From df768b67e937da6aa255e12b8e59651938e36728 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 20:57:26 -0500 Subject: [PATCH 001/501] [batch] get regions from global-config --- batch/batch/batch_configuration.py | 7 +++++ batch/batch/driver/instance_pool.py | 5 +++ batch/batch/driver/zone_monitor.py | 47 +++++++++-------------------- batch/deployment.yaml | 10 ++++++ 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index 6d9df307237..8d76f65f0ea 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -1,9 +1,16 @@ import os +import json KUBERNETES_TIMEOUT_IN_SECONDS = float(os.environ.get('KUBERNETES_TIMEOUT_IN_SECONDS', 5.0)) REFRESH_INTERVAL_IN_SECONDS = int(os.environ.get('REFRESH_INTERVAL_IN_SECONDS', 5 * 60)) DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] PROJECT = os.environ['PROJECT'] + +GPC_REGION = os.environ['HAIL_GPC_REGION'] + +BATCH_GPC_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GPC_REGIONS'])) +BATCH_GPC_REGIONS.add(GPC_REGION) + assert PROJECT != '' KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] BATCH_BUCKET_NAME = os.environ['HAIL_BATCH_BUCKET_NAME'] diff --git a/batch/batch/driver/instance_pool.py b/batch/batch/driver/instance_pool.py index 2c0fecb5159..8b85537cac6 100644 --- a/batch/batch/driver/instance_pool.py +++ b/batch/batch/driver/instance_pool.py @@ -207,10 +207,15 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None): break if self.live_total_cores_mcpu // 1000 < 4_000: + if not self.zone_monitor.init_zones: + return + zone = random.choice(self.zone_monitor.init_zones) else: zone_weights = self.zone_monitor.zone_weights(self.worker_cores, self.worker_local_ssd_data_disk, self.worker_pd_ssd_data_disk_size_gb) + if not zone_weights: + return zones = [zw.zone for zw in zone_weights] diff --git a/batch/batch/driver/zone_monitor.py b/batch/batch/driver/zone_monitor.py index 7e303d88855..6f7c307976e 100644 --- a/batch/batch/driver/zone_monitor.py +++ b/batch/batch/driver/zone_monitor.py @@ -48,33 +48,14 @@ def __repr__(self): class ZoneMonitor: - def __init__(self, app, regions=None, init_zones=None): + def __init__(self, app): self.app = app self.compute_client = app['compute_client'] self.zone_success_rate = ZoneSuccessRate() - # default until we update zones - # /regions is slow, don't make it synchronous on startup - if init_zones is None: - init_zones = ['us-central1-a', 'us-central1-b', 'us-central1-c', 'us-central1-f'] - - if regions is None: - regions = { - # 'northamerica-northeast1', - 'us-central1', - 'us-east1', - 'us-east4', - 'us-west1', - 'us-west2', - 'us-west3', - 'us-west4' - } - - self.init_zones = init_zones - self.regions = regions - - self.init_zone_weights = [ZoneWeight(z, 1) for z in self.init_zones] + self.init_zones = None + self.init_zone_weights = None self.region_info = None @@ -89,8 +70,8 @@ def shutdown(self): self.task_manager.shutdown() def zone_weights(self, worker_cores, worker_local_ssd_data_disk, worker_pd_ssd_data_disk_size_gb): - if self.region_info is None: - return self.init_zone_weights + if not self.region_info: + return None _zone_weights = [] for r in self.region_info.values(): @@ -114,15 +95,17 @@ def zone_weights(self, worker_cores, worker_local_ssd_data_disk, worker_pd_ssd_d return _zone_weights async def update_region_quotas(self): - new_region_info = {} - async for r in await self.compute_client.list('/regions'): - name = r['name'] - if name not in self.regions: - continue - - new_region_info[name] = r + self.region_info = { + name: await self.compute_client.get(f'/regions/{name}') + for name in BATCH_GPC_REGIONS + } + + self.init_zones = [ + os.path.basename(urllib.parse.urlparse(z).path) + for z in self.region_info[GPC_REGION]['zones'] + ] + self.init_zone_weights = [ZoneWeight(z, 1) for z in self.init_zones] - self.region_info = new_region_info log.info('updated region quotas') async def update_region_quotas_loop(self): diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 58ad51811f6..1051917d2db 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -53,6 +53,16 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" + - name: HAIL_GPC_REGION + valueFrom: + secretKeyRef: + name: global-config + key: gpc_region + - name: HAIL_BATCH_GPC_REGIONS + valueFrom: + secretKeyRef: + name: global-config + key: batch_gpc_regions - name: KUBERNETES_SERVER_URL value: "{{ global.k8s_server_url }}" - name: HAIL_SHA From 9926e74d74d7025186d313254b292ad0b74807be Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 22:59:48 -0500 Subject: [PATCH 002/501] [ci] use global config in ci test deployment --- ci/test/resources/deployment.yaml | 12 +++++++++--- ci/test/resources/hello.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ci/test/resources/deployment.yaml b/ci/test/resources/deployment.yaml index 068144fc3bf..87deecf4d00 100644 --- a/ci/test/resources/deployment.yaml +++ b/ci/test/resources/deployment.yaml @@ -49,12 +49,18 @@ spec: readOnly: true env: - name: HAIL_IP - value: "{{ global.ip }}" + valueFrom: + secretKeyRef: + name: global-config + key: ip - name: HAIL_DOMAIN - value: "{{ global.domain }}" + valueFrom: + secretKeyRef: + name: global-config + key: domain - name: HAIL_DEPLOY_CONFIG_FILE value: /deploy-config/deploy-config.json - - name: SHA + - name: HAIL_SHA value: "{{ code.sha }}" volumes: - name: deploy-config diff --git a/ci/test/resources/hello.py b/ci/test/resources/hello.py index 4338c00dfe5..0e914e097e0 100644 --- a/ci/test/resources/hello.py +++ b/ci/test/resources/hello.py @@ -10,7 +10,7 @@ app = web.Application() routes = web.RouteTableDef() -SHA = os.environ['SHA'] +SHA = os.environ['HAIL_SHA'] @routes.get('/healthcheck') From df4d4441a3376da09990d00f2b8f60777194b1a5 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 23:04:58 -0500 Subject: [PATCH 003/501] fix --- batch/batch/driver/zone_monitor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/batch/batch/driver/zone_monitor.py b/batch/batch/driver/zone_monitor.py index 6f7c307976e..38d6ec26fa3 100644 --- a/batch/batch/driver/zone_monitor.py +++ b/batch/batch/driver/zone_monitor.py @@ -6,6 +6,7 @@ from hailtop.utils import retry_long_running from ..utils import WindowFractionCounter +from ..batch_configuration import GPC_REGION, BATCH_GPC_REGIONS log = logging.getLogger('zone_monitor') From 86827ce0784bd27a50d49b559196ed59401c5224 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 23:31:43 -0500 Subject: [PATCH 004/501] Add domain to the deploy config. Modify hailctl dev config to let you set the domain. Note, this is an interface change since I changed `hailctl dev config` to act like gcloud/kubectl `... set property value`. --- hail/python/hailtop/config/deploy_config.py | 29 +++++----- hail/python/hailtop/hailctl/dev/cli.py | 47 +++++++--------- hail/python/hailtop/hailctl/dev/config/cli.py | 54 ++++++++----------- 3 files changed, 60 insertions(+), 70 deletions(-) diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index cf8e3403209..62738380d17 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -14,7 +14,15 @@ class DeployConfig: @staticmethod def from_config(config): - return DeployConfig(config['location'], config['default_namespace'], config['service_namespace']) + domain = config.get('domain', 'hail.is') + return DeployConfig(config['location'], config['default_namespace'], config['domain']) + + def get_config(self): + return { + 'location': self._location, + 'default_namespace': self._default_namespace, + 'domain': self._domain + } @staticmethod def from_config_file(config_file=None): @@ -33,24 +41,21 @@ def from_config_file(config_file=None): config = { 'location': 'external', 'default_namespace': 'default', - 'service_namespace': {} + 'domain': 'hail.is' } return DeployConfig.from_config(config) - def __init__(self, location, default_namespace, service_namespace): + def __init__(self, location, default_namespace, domain): assert location in ('external', 'k8s', 'gce') self._location = location self._default_namespace = default_namespace - self._service_namespace = service_namespace - - def with_service(self, service, ns): - return DeployConfig(self._location, self._default_namespace, {**self._service_namespace, service: ns}) + self._domain = domain def location(self): return self._location def service_ns(self, service): - return self._service_namespace.get(service, self._default_namespace) + return self._default_namespace def scheme(self, base_scheme='http'): # FIXME: should depend on ssl context @@ -66,8 +71,8 @@ def domain(self, service): return 'internal.hail' assert self._location == 'external' if ns == 'default': - return f'{service}.hail.is' - return 'internal.hail.is' + return f'{service}.{self._domain}' + return f'internal.{self._domain}' def base_path(self, service): ns = self.service_ns(service) @@ -90,8 +95,8 @@ def auth_session_cookie_name(self): def external_url(self, service, path, base_scheme='http'): ns = self.service_ns(service) if ns == 'default': - return f'{base_scheme}s://{service}.hail.is{path}' - return f'{base_scheme}s://internal.hail.is/{ns}/{service}{path}' + return f'{base_scheme}s://{service}.{self._domain}{path}' + return f'{base_scheme}s://internal.{self._domain}/{ns}/{service}{path}' def prefix_application(self, app, service, **kwargs): base_path = self.base_path(service) diff --git a/hail/python/hailtop/hailctl/dev/cli.py b/hail/python/hailtop/hailctl/dev/cli.py index e5ea415de89..fd692fc3710 100644 --- a/hail/python/hailtop/hailctl/dev/cli.py +++ b/hail/python/hailtop/hailctl/dev/cli.py @@ -9,9 +9,15 @@ def parser(): main_parser = argparse.ArgumentParser( - prog='hailctl dev', + prog='hailctl', description='Manage Hail development utilities.') - subparsers = main_parser.add_subparsers() + main_subparsers = main_parser.add_subparsers(title='hailctl subcommand', dest='hailctl subcommand', required=True) + + dev_parser = main_subparsers.add_parser( + 'dev', + help='Developer tools.', + description='Developer tools.') + subparsers = dev_parser.add_subparsers(title='hailctl dev subcommand', dest='hailctl dev subcommand', required=True) config_parser = subparsers.add_parser( 'config', @@ -24,14 +30,14 @@ def parser(): 'deploy', help='Deploy a branch', description='Deploy a branch') - + deploy_parser.set_defaults(module='deploy') deploy.cli.init_parser(deploy_parser) query_parser = subparsers.add_parser( 'query', help='Set dev settings on query service', description='Set dev settings on query service') - + deploy_parser.set_defaults(module='query') query.cli.init_parser(query_parser) return main_parser @@ -39,27 +45,14 @@ def parser(): def main(args): p = parser() - - if not args: - p.print_help() - sys.exit(0) + args = p.parse_args() + if args.module == 'deploy': + from .deploy import cli # pylint: disable=import-outside-toplevel + cli.main(args) + elif args.module.startswith('hailctl dev config'): + from .config import cli # pylint: disable=import-outside-toplevel + cli.main(args) else: - module = args[0] - if module == 'deploy': - from .deploy import cli # pylint: disable=import-outside-toplevel - args, _ = p.parse_known_args(args=args) - cli.main(args) - elif module == 'config': - from .config import cli # pylint: disable=import-outside-toplevel - args, _ = p.parse_known_args(args=args) - cli.main(args) - elif module == 'query': - from .query import cli # pylint: disable=import-outside-toplevel - args, _ = p.parse_known_args(args=args) - cli.main(args) - elif module in ('-h', '--help', 'help'): - p.print_help() - else: - sys.stderr.write(f"ERROR: no such module: {module!r}") - p.print_help() - sys.exit(1) + assert args.module == 'query' + from .query import cli # pylint: disable=import-outside-toplevel + cli.main(args) diff --git a/hail/python/hailtop/hailctl/dev/config/cli.py b/hail/python/hailtop/hailctl/dev/config/cli.py index 1f698caad62..519a3e8e954 100644 --- a/hail/python/hailtop/hailctl/dev/config/cli.py +++ b/hail/python/hailtop/hailctl/dev/config/cli.py @@ -1,40 +1,32 @@ import os import json -from hailtop.config import get_deploy_config +from . import set_property +from . import show -def init_parser(parser): - parser.add_argument("namespace", type=str, nargs='?', - help="Default namespace. Show the current configuration if not specified.") - parser.add_argument("--location", "-l", type=str, default='external', - choices=['external', 'gce', 'k8s'], - help="Location. (default: external)") - parser.add_argument("--override", "-o", type=str, default='', - help="List of comma-separated service=namespace overrides. (default: none)") +def init_parser(config_parser): + subparsers = config_parser.add_subparsers(title='hailctl dev config subcommand', dest='hailctl dev config subcommand', required=True) + set_parser = subparsers.add_parser( + 'set', + help='Set deploy configuration property.', + description='Set deploy configuration property.') -def main(args): - if not args.namespace: - deploy_config = get_deploy_config() - print(f' location: {deploy_config.location()}') - print(f' default: {deploy_config._default_namespace}') - if deploy_config._service_namespace: - print(' overrides:') - for service, ns in deploy_config._service_namespace.items(): - print(f' {service}: {ns}') - return + set_parser.set_defaults(module='hailctl dev config set') + set_property.init_parser(set_parser) + + show_parser = subparsers.add_parser( + 'show', + help='Set deploy configuration property.', + description='Set deploy configuration property.') - override = args.override.split(',') - override = [o.split('=') for o in override if o] - service_namespace = {o[0]: o[1] for o in override} + show_parser.set_defaults(module='hailctl dev config show') + show.init_parser(show_parser) - config = { - 'location': args.location, - 'default_namespace': args.namespace, - 'service_namespace': service_namespace - } +def main(args): + if args.module == 'hailctl dev config set': + set_property.main(args) + return - config_file = os.environ.get( - 'HAIL_DEPLOY_CONFIG_FILE', os.path.expanduser('~/.hail/deploy-config.json')) - with open(config_file, 'w') as f: - json.dump(config, f) + assert args.module == 'hailctl dev config show' + show.main(args) From ed9149d17f6a05f2f1eb255ded1db7ef98348304 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 23:38:04 -0500 Subject: [PATCH 005/501] comment --- hail/python/hailtop/hailctl/dev/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hail/python/hailtop/hailctl/dev/cli.py b/hail/python/hailtop/hailctl/dev/cli.py index fd692fc3710..2627a92be43 100644 --- a/hail/python/hailtop/hailctl/dev/cli.py +++ b/hail/python/hailtop/hailctl/dev/cli.py @@ -11,6 +11,8 @@ def parser(): main_parser = argparse.ArgumentParser( prog='hailctl', description='Manage Hail development utilities.') + # we have to set dest becuase of a rendering bug in argparse + # https://bugs.python.org/issue29298 main_subparsers = main_parser.add_subparsers(title='hailctl subcommand', dest='hailctl subcommand', required=True) dev_parser = main_subparsers.add_parser( From 749af5278252b01dc96c9d52fc047af023fd90cb Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 3 Dec 2020 23:49:55 -0500 Subject: [PATCH 006/501] add domain to Scala DeployConfig --- .../main/scala/is/hail/services/DeployConfig.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hail/src/main/scala/is/hail/services/DeployConfig.scala b/hail/src/main/scala/is/hail/services/DeployConfig.scala index 1e85edbcecc..17a9623f8f9 100644 --- a/hail/src/main/scala/is/hail/services/DeployConfig.scala +++ b/hail/src/main/scala/is/hail/services/DeployConfig.scala @@ -43,7 +43,7 @@ object DeployConfig { new DeployConfig( "external", "default", - Map()) + "hail.is") } def fromConfig(config: JValue): DeployConfig = { @@ -51,14 +51,14 @@ object DeployConfig { new DeployConfig( (config \ "location").extract[String], (config \ "default_namespace").extract[String], - (config \ "service_namespace").extract[Map[String, String]]) + (config \ "domain").extract[String]) } } class DeployConfig( val location: String, val defaultNamespace: String, - val serviceNamespace: Map[String, String]) { + val domain: String) { import DeployConfig._ def scheme(baseScheme: String = "http"): String = { @@ -69,7 +69,7 @@ class DeployConfig( } def getServiceNamespace(service: String): String = { - serviceNamespace.getOrElse(service, defaultNamespace) + defaultNamespace } def domain(service: String): String = { @@ -84,9 +84,9 @@ class DeployConfig( "internal.hail" case "external" => if (ns == "default") - s"$service.hail.is" + s"$service.$domain" else - "internal.hail.is" + s"internal.$domain" } } From f7f9c98a4b0b432e4a6ee1aa493eb66b5ace6719 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 00:53:17 -0500 Subject: [PATCH 007/501] typos --- batch/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 1051917d2db..61ccfb28ab0 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -53,12 +53,12 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" - - name: HAIL_GPC_REGION + - name: HAIL_GCP_REGION valueFrom: secretKeyRef: name: global-config key: gpc_region - - name: HAIL_BATCH_GPC_REGIONS + - name: HAIL_BATCH_GCP_REGIONS valueFrom: secretKeyRef: name: global-config From f40fe4ae5ebfa2b6849a10b1180db02eb96a04cb Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 01:12:37 -0500 Subject: [PATCH 008/501] add missing files --- .../hailctl/dev/config/set_property.py | 27 +++++++++++++++++++ .../python/hailtop/hailctl/dev/config/show.py | 10 +++++++ 2 files changed, 37 insertions(+) create mode 100644 hail/python/hailtop/hailctl/dev/config/set_property.py create mode 100644 hail/python/hailtop/hailctl/dev/config/show.py diff --git a/hail/python/hailtop/hailctl/dev/config/set_property.py b/hail/python/hailtop/hailctl/dev/config/set_property.py new file mode 100644 index 00000000000..9ef060622c1 --- /dev/null +++ b/hail/python/hailtop/hailctl/dev/config/set_property.py @@ -0,0 +1,27 @@ +import os +import json + +from hailtop.config import get_deploy_config + + +def init_parser(parser): + parser.add_argument("property", type=str, + help="Property to set.", + choices=['location', 'default', 'domain']) + parser.add_argument("value", type=str, + help="Value to set property to.") + + +def main(args): + deploy_config = get_deploy_config() + config = deploy_config.get_config() + + p = args.property + if p == 'default': + p = 'default_namespace' + config[p] = args.value + + config_file = os.environ.get( + 'HAIL_DEPLOY_CONFIG_FILE', os.path.expanduser('~/.hail/deploy-config.json')) + with open(config_file, 'w') as f: + json.dump(config, f) diff --git a/hail/python/hailtop/hailctl/dev/config/show.py b/hail/python/hailtop/hailctl/dev/config/show.py new file mode 100644 index 00000000000..e7ce9f492ef --- /dev/null +++ b/hail/python/hailtop/hailctl/dev/config/show.py @@ -0,0 +1,10 @@ +from hailtop.config import get_deploy_config + +def init_parser(parser): + pass + +def main(args): + deploy_config = get_deploy_config() + print(f' location: {deploy_config.location()}') + print(f' default: {deploy_config._default_namespace}') + print(f' domain: {deploy_config._domain}') From 9fa00c6d527897c63b4b1fd3f9ce55de766ad7db Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 01:17:25 -0500 Subject: [PATCH 009/501] delint --- hail/python/hailtop/auth/auth.py | 13 +++++++------ hail/python/hailtop/config/deploy_config.py | 10 ++++++++-- hail/python/hailtop/hailctl/auth/login.py | 17 +++++++---------- hail/python/hailtop/hailctl/curl.py | 3 ++- hail/python/hailtop/hailctl/dev/cli.py | 4 +--- hail/python/hailtop/hailctl/dev/config/cli.py | 5 ++--- hail/python/hailtop/hailctl/dev/config/show.py | 6 ++++-- 7 files changed, 31 insertions(+), 27 deletions(-) diff --git a/hail/python/hailtop/auth/auth.py b/hail/python/hailtop/auth/auth.py index 73b8feb1e02..b309c3ca32b 100644 --- a/hail/python/hailtop/auth/auth.py +++ b/hail/python/hailtop/auth/auth.py @@ -59,12 +59,13 @@ def copy_paste_login(copy_paste_token, namespace=None): async def async_copy_paste_login(copy_paste_token, namespace=None): deploy_config = get_deploy_config() + if namespace is not None: - auth_ns = namespace - deploy_config = deploy_config.with_service('auth', auth_ns) + deploy_config = deploy_config.with_default_namespace(namespace) else: - auth_ns = deploy_config.service_ns('auth') - headers = namespace_auth_headers(deploy_config, auth_ns, authorize_target=False) + namespace = deploy_config.default_namespace() + + headers = namespace_auth_headers(deploy_config, namespace, authorize_target=False) async with aiohttp.ClientSession( raise_for_status=True, @@ -78,10 +79,10 @@ async def async_copy_paste_login(copy_paste_token, namespace=None): username = resp['username'] tokens = get_tokens() - tokens[auth_ns] = token + tokens[namespace] = token dot_hail_dir = os.path.expanduser('~/.hail') if not os.path.exists(dot_hail_dir): os.mkdir(dot_hail_dir, mode=0o700) tokens.write() - return auth_ns, username + return namespace, username diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index 62738380d17..92e0e5641c5 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -15,7 +15,7 @@ class DeployConfig: @staticmethod def from_config(config): domain = config.get('domain', 'hail.is') - return DeployConfig(config['location'], config['default_namespace'], config['domain']) + return DeployConfig(config['location'], config['default_namespace'], domain) def get_config(self): return { @@ -51,10 +51,16 @@ def __init__(self, location, default_namespace, domain): self._default_namespace = default_namespace self._domain = domain + def with_default_namespace(self, default_namespace): + return DeployConfig(self._location, default_namespace, self._domain) + + def default_namespace(self): + return self._default_namespace + def location(self): return self._location - def service_ns(self, service): + def service_ns(self, service): # pylint: disable=unused-argument return self._default_namespace def scheme(self, base_scheme='http'): diff --git a/hail/python/hailtop/hailctl/auth/login.py b/hail/python/hailtop/hailctl/auth/login.py index 930f01e0b9a..20736c35502 100644 --- a/hail/python/hailtop/hailctl/auth/login.py +++ b/hail/python/hailtop/hailctl/auth/login.py @@ -44,7 +44,7 @@ async def start_server(): return (runner, port) -async def auth_flow(deploy_config, auth_ns, session): +async def auth_flow(deploy_config, default_ns, session): runner, port = await start_server() async with session.get(deploy_config.url('auth', '/api/v1alpha/login'), @@ -77,29 +77,26 @@ async def auth_flow(deploy_config, auth_ns, session): username = resp['username'] tokens = get_tokens() - tokens[auth_ns] = token + tokens[default_ns] = token dot_hail_dir = os.path.expanduser('~/.hail') if not os.path.exists(dot_hail_dir): os.mkdir(dot_hail_dir, mode=0o700) tokens.write() - if auth_ns == 'default': + if default_ns == 'default': print(f'Logged in as {username}.') else: - print(f'Logged into namespace {auth_ns} as {username}.') + print(f'Logged into namespace {default_ns} as {username}.') async def async_main(args): deploy_config = get_deploy_config() if args.namespace: - auth_ns = args.namespace - deploy_config = deploy_config.with_service('auth', auth_ns) - else: - auth_ns = deploy_config.service_ns('auth') - headers = namespace_auth_headers(deploy_config, auth_ns, authorize_target=False) + deploy_config = deploy_config.with_default_namespace(args.namespace) + headers = namespace_auth_headers(deploy_config, deploy_config.default_namespace(), authorize_target=False) async with get_context_specific_ssl_client_session( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60), headers=headers) as session: - await auth_flow(deploy_config, auth_ns, session) + await auth_flow(deploy_config, deploy_config.default_namespace(), session) def main(args, pass_through_args): # pylint: disable=unused-argument diff --git a/hail/python/hailtop/hailctl/curl.py b/hail/python/hailtop/hailctl/curl.py index 9939555d4e7..b48dcb31dec 100644 --- a/hail/python/hailtop/hailctl/curl.py +++ b/hail/python/hailtop/hailctl/curl.py @@ -13,9 +13,10 @@ def main(args): svc = args[1] path = args[2] deploy_config = get_deploy_config() + deploy_config = deploy_config.with_default_namespace(ns) headers = namespace_auth_headers(deploy_config, ns) headers = [x for k, v in headers.items() for x in ['-H', f'{k}: {v}']] - path = deploy_config.with_service(svc, ns).url(svc, path) + path = deploy_config.url(svc, path) os.execvp('curl', ['curl', *headers, *args[3:], path]) diff --git a/hail/python/hailtop/hailctl/dev/cli.py b/hail/python/hailtop/hailctl/dev/cli.py index 2627a92be43..e775bb65323 100644 --- a/hail/python/hailtop/hailctl/dev/cli.py +++ b/hail/python/hailtop/hailctl/dev/cli.py @@ -1,5 +1,3 @@ -import sys - import argparse from . import config @@ -14,7 +12,7 @@ def parser(): # we have to set dest becuase of a rendering bug in argparse # https://bugs.python.org/issue29298 main_subparsers = main_parser.add_subparsers(title='hailctl subcommand', dest='hailctl subcommand', required=True) - + dev_parser = main_subparsers.add_parser( 'dev', help='Developer tools.', diff --git a/hail/python/hailtop/hailctl/dev/config/cli.py b/hail/python/hailtop/hailctl/dev/config/cli.py index 519a3e8e954..6d3eb411e1c 100644 --- a/hail/python/hailtop/hailctl/dev/config/cli.py +++ b/hail/python/hailtop/hailctl/dev/config/cli.py @@ -1,9 +1,7 @@ -import os -import json - from . import set_property from . import show + def init_parser(config_parser): subparsers = config_parser.add_subparsers(title='hailctl dev config subcommand', dest='hailctl dev config subcommand', required=True) @@ -23,6 +21,7 @@ def init_parser(config_parser): show_parser.set_defaults(module='hailctl dev config show') show.init_parser(show_parser) + def main(args): if args.module == 'hailctl dev config set': set_property.main(args) diff --git a/hail/python/hailtop/hailctl/dev/config/show.py b/hail/python/hailtop/hailctl/dev/config/show.py index e7ce9f492ef..f2f87d89278 100644 --- a/hail/python/hailtop/hailctl/dev/config/show.py +++ b/hail/python/hailtop/hailctl/dev/config/show.py @@ -1,9 +1,11 @@ from hailtop.config import get_deploy_config -def init_parser(parser): + +def init_parser(parser): # pylint: disable=unused-argument pass -def main(args): + +def main(args): # pylint: disable=unused-argument deploy_config = get_deploy_config() print(f' location: {deploy_config.location()}') print(f' default: {deploy_config._default_namespace}') From bfc9667b06858febd9c8eede1433e4149fc93d08 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 01:25:51 -0500 Subject: [PATCH 010/501] fix --- hail/python/hailtop/hailctl/dev/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dev/cli.py b/hail/python/hailtop/hailctl/dev/cli.py index e775bb65323..30cea025971 100644 --- a/hail/python/hailtop/hailctl/dev/cli.py +++ b/hail/python/hailtop/hailctl/dev/cli.py @@ -37,7 +37,7 @@ def parser(): 'query', help='Set dev settings on query service', description='Set dev settings on query service') - deploy_parser.set_defaults(module='query') + query_parser.set_defaults(module='query') query.cli.init_parser(query_parser) return main_parser From 05188fa4cccb92996efad508e9483a33eda9f8b8 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 23:52:45 -0500 Subject: [PATCH 011/501] fix tests --- .../test/hailtop/config/test_deploy_config.py | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/hail/python/test/hailtop/config/test_deploy_config.py b/hail/python/test/hailtop/config/test_deploy_config.py index 5bb23f15c70..3b9af7c5cb4 100644 --- a/hail/python/test/hailtop/config/test_deploy_config.py +++ b/hail/python/test/hailtop/config/test_deploy_config.py @@ -7,27 +7,36 @@ def test_deploy_external_default(self): self.assertEqual(deploy_config.location(), 'external') self.assertEqual(deploy_config.service_ns('quam'), 'default') - self.assertEqual(deploy_config.service_ns('foo'), 'bar') + self.assertEqual(deploy_config.service_ns('foo'), 'default') self.assertEqual(deploy_config.scheme(), 'https') self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') - self.assertEqual(deploy_config.domain('quam'), 'quam.hail.is') + self.assertEqual(deploy_config.domain('quam'), 'quam.organization.tld') self.assertEqual(deploy_config.base_path('quam'), '') - self.assertEqual(deploy_config.base_url('quam'), 'https://quam.hail.is') - self.assertEqual(deploy_config.url('quam', '/moo'), 'https://quam.hail.is/moo') - self.assertEqual(deploy_config.external_url('quam', '/moo'), 'https://quam.hail.is/moo') + self.assertEqual(deploy_config.base_url('quam'), 'https://quam.organization.tld') + self.assertEqual(deploy_config.url('quam', '/moo'), 'https://quam.organization.tld/moo') + self.assertEqual(deploy_config.external_url('quam', '/moo'), 'https://quam.organization.tld/moo') + + def test_deploy_external_bar(self): + deploy_config = DeployConfig('external', 'bar') + + self.assertEqual(deploy_config.location(), 'external') + self.assertEqual(deploy_config.service_ns('quam'), 'bar') + self.assertEqual(deploy_config.service_ns('foo'), 'bar') + self.assertEqual(deploy_config.scheme(), 'https') + self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') self.assertEqual(deploy_config.base_path('foo'), '/bar/foo') - self.assertEqual(deploy_config.base_url('foo'), 'https://internal.hail.is/bar/foo') - self.assertEqual(deploy_config.url('foo', '/moo'), 'https://internal.hail.is/bar/foo/moo') - self.assertEqual(deploy_config.external_url('foo', '/moo'), 'https://internal.hail.is/bar/foo/moo') + self.assertEqual(deploy_config.base_url('foo'), 'https://internal.organization.tld/bar/foo') + self.assertEqual(deploy_config.url('foo', '/moo'), 'https://internal.organization.tld/bar/foo/moo') + self.assertEqual(deploy_config.external_url('foo', '/moo'), 'https://internal.organization.tld/bar/foo/moo') def test_deploy_k8s_default(self): - deploy_config = DeployConfig('k8s', 'default', {'foo': 'bar'}) + deploy_config = DeployConfig('k8s', 'default', 'organization.tld') self.assertEqual(deploy_config.location(), 'k8s') self.assertEqual(deploy_config.service_ns('quam'), 'default') - self.assertEqual(deploy_config.service_ns('foo'), 'bar') + self.assertEqual(deploy_config.service_ns('foo'), 'default') self.assertEqual(deploy_config.scheme(), 'https') self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') @@ -35,9 +44,18 @@ def test_deploy_k8s_default(self): self.assertEqual(deploy_config.base_path('quam'), '') self.assertEqual(deploy_config.base_url('quam'), 'https://quam.default') self.assertEqual(deploy_config.url('quam', '/moo'), 'https://quam.default/moo') - self.assertEqual(deploy_config.external_url('quam', '/moo'), 'https://quam.hail.is/moo') + self.assertEqual(deploy_config.external_url('quam', '/moo'), 'https://quam.organization.tld/moo') + + def test_deploy_k8s_bar(self): + deploy_config = DeployConfig('k8s', 'bar', 'organization.tld') + + self.assertEqual(deploy_config.location(), 'k8s') + self.assertEqual(deploy_config.service_ns('quam'), 'default') + self.assertEqual(deploy_config.service_ns('foo'), 'default') + self.assertEqual(deploy_config.scheme(), 'https') + self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') self.assertEqual(deploy_config.base_path('foo'), '/bar/foo') self.assertEqual(deploy_config.base_url('foo'), 'https://foo.bar/bar/foo') self.assertEqual(deploy_config.url('foo', '/moo'), 'https://foo.bar/bar/foo/moo') - self.assertEqual(deploy_config.external_url('foo', '/moo'), 'https://internal.hail.is/bar/foo/moo') + self.assertEqual(deploy_config.external_url('foo', '/moo'), 'https://internal.organization.tld/bar/foo/moo') From fa982c80c080b8fa88d86d7f1f30206defdfb21c Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 4 Dec 2020 23:57:49 -0500 Subject: [PATCH 012/501] more tyops --- batch/batch/batch_configuration.py | 6 +++--- batch/batch/driver/zone_monitor.py | 6 +++--- batch/deployment.yaml | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index 8d76f65f0ea..2fb845e0445 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -6,10 +6,10 @@ DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] PROJECT = os.environ['PROJECT'] -GPC_REGION = os.environ['HAIL_GPC_REGION'] +GCP_REGION = os.environ['HAIL_GCP_REGION'] -BATCH_GPC_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GPC_REGIONS'])) -BATCH_GPC_REGIONS.add(GPC_REGION) +BATCH_GCP_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GCP_REGIONS'])) +BATCH_GCP_REGIONS.add(GCP_REGION) assert PROJECT != '' KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] diff --git a/batch/batch/driver/zone_monitor.py b/batch/batch/driver/zone_monitor.py index 38d6ec26fa3..355afd93ea1 100644 --- a/batch/batch/driver/zone_monitor.py +++ b/batch/batch/driver/zone_monitor.py @@ -6,7 +6,7 @@ from hailtop.utils import retry_long_running from ..utils import WindowFractionCounter -from ..batch_configuration import GPC_REGION, BATCH_GPC_REGIONS +from ..batch_configuration import GCP_REGION, BATCH_GCP_REGIONS log = logging.getLogger('zone_monitor') @@ -98,12 +98,12 @@ def zone_weights(self, worker_cores, worker_local_ssd_data_disk, worker_pd_ssd_d async def update_region_quotas(self): self.region_info = { name: await self.compute_client.get(f'/regions/{name}') - for name in BATCH_GPC_REGIONS + for name in BATCH_GCP_REGIONS } self.init_zones = [ os.path.basename(urllib.parse.urlparse(z).path) - for z in self.region_info[GPC_REGION]['zones'] + for z in self.region_info[GCP_REGION]['zones'] ] self.init_zone_weights = [ZoneWeight(z, 1) for z in self.init_zones] diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 61ccfb28ab0..47b58acd6ba 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -57,12 +57,12 @@ spec: valueFrom: secretKeyRef: name: global-config - key: gpc_region + key: gcp_region - name: HAIL_BATCH_GCP_REGIONS valueFrom: secretKeyRef: name: global-config - key: batch_gpc_regions + key: batch_gcp_regions - name: KUBERNETES_SERVER_URL value: "{{ global.k8s_server_url }}" - name: HAIL_SHA From 6f76dd7aec7ff7e0af1f2946949af430fd053eb8 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 00:39:29 -0500 Subject: [PATCH 013/501] fix --- hail/python/test/hailtop/config/test_deploy_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/test/hailtop/config/test_deploy_config.py b/hail/python/test/hailtop/config/test_deploy_config.py index 3b9af7c5cb4..c8ebbc6dab8 100644 --- a/hail/python/test/hailtop/config/test_deploy_config.py +++ b/hail/python/test/hailtop/config/test_deploy_config.py @@ -3,7 +3,7 @@ class Test(unittest.TestCase): def test_deploy_external_default(self): - deploy_config = DeployConfig('external', 'default', {'foo': 'bar'}) + deploy_config = DeployConfig('external', 'default', 'organization.tld') self.assertEqual(deploy_config.location(), 'external') self.assertEqual(deploy_config.service_ns('quam'), 'default') From bcdbec25a8dcbf2ece901739ff668e84509f09fa Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 00:42:14 -0500 Subject: [PATCH 014/501] fix config --- batch/deployment.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 47b58acd6ba..e9b365400c0 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -184,6 +184,16 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" + - name: HAIL_GCP_REGION + valueFrom: + secretKeyRef: + name: global-config + key: gcp_region + - name: HAIL_BATCH_GCP_REGIONS + valueFrom: + secretKeyRef: + name: global-config + key: batch_gcp_regions - name: KUBERNETES_SERVER_URL value: "{{ global.k8s_server_url }}" - name: HAIL_SHA From cfccc931230dc80000e08fabbfcd2f9445bdd4c8 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 03:00:44 -0500 Subject: [PATCH 015/501] fix --- hail/python/test/hailtop/config/test_deploy_config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hail/python/test/hailtop/config/test_deploy_config.py b/hail/python/test/hailtop/config/test_deploy_config.py index c8ebbc6dab8..01e4dd7659c 100644 --- a/hail/python/test/hailtop/config/test_deploy_config.py +++ b/hail/python/test/hailtop/config/test_deploy_config.py @@ -18,13 +18,13 @@ def test_deploy_external_default(self): self.assertEqual(deploy_config.external_url('quam', '/moo'), 'https://quam.organization.tld/moo') def test_deploy_external_bar(self): - deploy_config = DeployConfig('external', 'bar') + deploy_config = DeployConfig('external', 'bar', 'organization.tld') self.assertEqual(deploy_config.location(), 'external') self.assertEqual(deploy_config.service_ns('quam'), 'bar') self.assertEqual(deploy_config.service_ns('foo'), 'bar') self.assertEqual(deploy_config.scheme(), 'https') - self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') + self.assertEqual(deploy_config.auth_session_cookie_name(), 'sesh') self.assertEqual(deploy_config.base_path('foo'), '/bar/foo') self.assertEqual(deploy_config.base_url('foo'), 'https://internal.organization.tld/bar/foo') @@ -50,10 +50,10 @@ def test_deploy_k8s_bar(self): deploy_config = DeployConfig('k8s', 'bar', 'organization.tld') self.assertEqual(deploy_config.location(), 'k8s') - self.assertEqual(deploy_config.service_ns('quam'), 'default') - self.assertEqual(deploy_config.service_ns('foo'), 'default') + self.assertEqual(deploy_config.service_ns('quam'), 'bar') + self.assertEqual(deploy_config.service_ns('foo'), 'bar') self.assertEqual(deploy_config.scheme(), 'https') - self.assertEqual(deploy_config.auth_session_cookie_name(), 'session') + self.assertEqual(deploy_config.auth_session_cookie_name(), 'sesh') self.assertEqual(deploy_config.base_path('foo'), '/bar/foo') self.assertEqual(deploy_config.base_url('foo'), 'https://foo.bar/bar/foo') From 56f63cd006edd18b873a0b322f379baf2fedf721 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 04:16:24 -0500 Subject: [PATCH 016/501] fix --- hail/src/main/scala/is/hail/services/DeployConfig.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/src/main/scala/is/hail/services/DeployConfig.scala b/hail/src/main/scala/is/hail/services/DeployConfig.scala index 17a9623f8f9..213646a934b 100644 --- a/hail/src/main/scala/is/hail/services/DeployConfig.scala +++ b/hail/src/main/scala/is/hail/services/DeployConfig.scala @@ -51,7 +51,7 @@ object DeployConfig { new DeployConfig( (config \ "location").extract[String], (config \ "default_namespace").extract[String], - (config \ "domain").extract[String]) + (config \ "domain").extract[Option[String]].getOrElse("hail.is")) } } From 77694cba164619a97cfd9f0601490e1b52695206 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Tue, 17 Nov 2020 18:05:57 -0500 Subject: [PATCH 017/501] Add infra/ using Terraform. Remove vdc/. Right now this spins up K8s, a database, some of the networking stuff, and creates a default/global-config secret that includes the information stored in $HAIL/config.mk. This isn't used yet and will probably change. --- infra/.gitignore | 3 + infra/README.md | 31 ++++ infra/main.tf | 190 +++++++++++++++++++ vdc/.gitignore | 5 - vdc/Makefile | 121 ------------- vdc/README.md | 50 ----- vdc/delete-gcr-images.sh | 12 -- vdc/gcp-address.yaml | 9 - vdc/gcp-config.yaml.in | 117 ------------ vdc/k8s-config.yaml | 371 -------------------------------------- vdc/list-gcp-sa-keys | 14 -- vdc/make-k8s-gcp-sa-key | 28 --- vdc/priority-classes.yaml | 31 ---- vdc/remove-k8s-gcp-sa-key | 21 --- 14 files changed, 224 insertions(+), 779 deletions(-) create mode 100644 infra/.gitignore create mode 100644 infra/README.md create mode 100644 infra/main.tf delete mode 100644 vdc/.gitignore delete mode 100644 vdc/Makefile delete mode 100644 vdc/README.md delete mode 100644 vdc/delete-gcr-images.sh delete mode 100644 vdc/gcp-address.yaml delete mode 100644 vdc/gcp-config.yaml.in delete mode 100644 vdc/k8s-config.yaml delete mode 100755 vdc/list-gcp-sa-keys delete mode 100755 vdc/make-k8s-gcp-sa-key delete mode 100644 vdc/priority-classes.yaml delete mode 100755 vdc/remove-k8s-gcp-sa-key diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 00000000000..6388da3f5cc --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,3 @@ +/.terraform +/terraform.tfstate +/terraform.tfstate*.backup diff --git a/infra/README.md b/infra/README.md new file mode 100644 index 00000000000..852391db53e --- /dev/null +++ b/infra/README.md @@ -0,0 +1,31 @@ +This is a work in progress to use Terraform to manage our cloud +infrastructure. + +Instructions: + +- You will need a GCP project. Create a service account for Terraform + with Editor role, and create a service account key and place it in + `$HOME/.hail/terraform_sa_key.json`. + +- Install terraform. + +- Create `$HOME/.hail/global.tfvars` that looks like: + + ``` + gcp_project = "" + gcp_region = "" + gcp_zone = "" + domain = "" + ``` + +- Run `terraform init`. + +- Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. + +You can now install Hail. Everything beyond this point assumes gcloud +and kubectl point to your GCP project and the cluster created by +Terraform. + +- Run `$HAIL/docker/third-party/copy_images.sh`. This copies some + base images from Dockerhub (which now has rate limits) to GCR. + diff --git a/infra/main.tf b/infra/main.tf new file mode 100644 index 00000000000..9e42fab57e0 --- /dev/null +++ b/infra/main.tf @@ -0,0 +1,190 @@ +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "3.48.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "1.13.3" + } + } +} + +variable "gcp_project" {} +variable "gcp_region" {} +variable "gcp_zone" {} +variable "domain" {} + +provider "google" { + credentials = file("~/.hail/terraform_sa_key.json") + + project = var.gcp_project + region = var.gcp_region + zone = var.gcp_zone +} + +data "google_client_config" "provider" {} + +resource "google_project_service" "service_networking" { + service = "servicenetworking.googleapis.com" +} + +resource "google_compute_network" "internal" { + name = "internal" +} + +data "google_compute_subnetwork" "internal_default_region" { + name = "internal" + region = var.gcp_region + depends_on = [google_compute_network.internal] +} + +resource "google_container_cluster" "vdc" { + name = "vdc" + location = var.gcp_zone + network = google_compute_network.internal.name + + # We can't create a cluster with no node pool defined, but we want to only use + # separately managed node pools. So we create the smallest possible default + # node pool and immediately delete it. + remove_default_node_pool = true + initial_node_count = 1 + + master_auth { + username = "" + password = "" + + client_certificate_config { + issue_client_certificate = false + } + } +} + +resource "google_container_node_pool" "vdc_preemptible_pool" { + name = "preemptible-pool" + location = var.gcp_zone + cluster = google_container_cluster.vdc.name + + autoscaling { + min_node_count = 0 + max_node_count = 200 + } + + node_config { + preemptible = true + machine_type = "n1-standard-2" + + metadata = { + disable-legacy-endpoints = "true" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + } +} + +resource "google_container_node_pool" "vdc_nonpreemptible_pool" { + name = "nonpreemptible-pool" + location = var.gcp_zone + cluster = google_container_cluster.vdc.name + + autoscaling { + min_node_count = 0 + max_node_count = 200 + } + + node_config { + preemptible = false + machine_type = "n1-standard-2" + + metadata = { + disable-legacy-endpoints = "true" + } + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + } +} + +resource "google_compute_global_address" "db_ip_address" { + name = "db-ip-address" + purpose = "VPC_PEERING" + address_type = "INTERNAL" + prefix_length = 16 + network = google_compute_network.internal.id +} + +resource "google_service_networking_connection" "private_vpc_connection" { + network = google_compute_network.internal.id + service = "servicenetworking.googleapis.com" + reserved_peering_ranges = [google_compute_global_address.db_ip_address.name] +} + +resource "random_id" "db_name_suffix" { + byte_length = 4 +} + +resource "google_sql_database_instance" "db" { + name = "db-${random_id.db_name_suffix.hex}" + database_version = "MYSQL_5_7" + region = var.gcp_region + + depends_on = [google_service_networking_connection.private_vpc_connection] + + settings { + # Second-generation instance tiers are based on the machine + # type. See argument reference below. + tier = "db-n1-standard-1" + + ip_configuration { + ipv4_enabled = false + private_network = google_compute_network.internal.id + require_ssl = true + } + } +} + +# FIXME rename this to gateway +resource "google_compute_address" "gateway" { + name = "gateway" +} + +resource "google_compute_address" "internal_gateway" { + name = "internal-gateway" + subnetwork = data.google_compute_subnetwork.internal_default_region.id + address_type = "INTERNAL" + region = var.gcp_region +} + +provider "kubernetes" { + load_config_file = false + + host = "https://${google_container_cluster.vdc.endpoint}" + token = data.google_client_config.provider.access_token + cluster_ca_certificate = base64decode( + google_container_cluster.vdc.master_auth[0].cluster_ca_certificate, + ) +} + +resource "kubernetes_secret" "global_config" { + metadata { + name = "global-config" + } + + data = { + "config.json" = < gcp-config.yaml - gcloud beta -q deployment-manager deployments create default --config gcp-config.yaml - kubectl create clusterrolebinding deploy-cluster-admin-binding --clusterrole cluster-admin --user deploy@$(PROJECT).iam.gserviceaccount.com -# FIXME still trying to figure out out to add this to config - gsutil iam ch serviceAccount:gcr-push@$(PROJECT).iam.gserviceaccount.com:admin gs://artifacts.$(PROJECT).appspot.com - gsutil iam ch serviceAccount:gcr-pull@$(PROJECT).iam.gserviceaccount.com:objectViewer gs://artifacts.$(PROJECT).appspot.com - gsutil iam ch serviceAccount:vdc-sa@$(PROJECT).iam.gserviceaccount.com:objectViewer gs://artifacts.$(PROJECT).appspot.com - -build-out-k8s: k8s-config create-k8s-secrets run-letsencrypt deploy-k8s - -k8s-config: - kubectl apply -f k8s-config.yaml - -create-k8s-secrets: - kubectl apply -f secrets.yaml - kubectl delete secrets --ignore-not-found=true gcr-push-service-account-key gcr-pull-key letsencrypt-config - gcloud iam service-accounts keys create \ - --iam-account=gcr-push@$(PROJECT).iam.gserviceaccount.com \ - gcr-push-service-account-key.json - kubectl create secret generic gcr-push-service-account-key --from-file=gcr-push-service-account-key.json - rm -f gcr-push-service-account-key.json - gcloud iam service-accounts keys create \ - --iam-account=gcr-pull@$(PROJECT).iam.gserviceaccount.com \ - gcr-pull.json - kubectl create secret generic gcr-pull-key --from-file=gcr-pull.json - rm -f gcr-pull.json -# empty secret to be filled in by letsencrypt - kubectl create secret generic letsencrypt-config - -ifeq ($(RUN_LETSENCRYPT),1) -run-letsencrypt: - $(MAKE) -C ../letsencrypt start-service - $(MAKE) -C ../gateway LETSENCRYPT_ONLY=1 deploy - kubectl -n default rollout status -w deployment gateway-deployment - $(MAKE) -C ../letsencrypt run -else -run-letsencrypt: - kubectl apply -f letsencrypt-config.yaml -endif - -deploy-k8s: - $(MAKE) -C ../letsencrypt start-service - $(MAKE) -C ../batch deploy -# FIXME ci can't run as a secondary yet - $(MAKE) -C ../ci run-service - $(MAKE) -C ../notebook deploy - $(MAKE) -C ../image-fetcher deploy - $(MAKE) -C ../scorecard deploy - $(MAKE) -C ../site deploy - $(MAKE) -C ../upload deploy -# last so the services are up - $(MAKE) -C ../gateway deploy - -clean-gcr: - bash delete-gcr-images.sh -# don't fail if doesn't exist - -gsutil -m rm -r gs://artifacts.$(PROJECT).appspot.com - -tear-down: delete-deployment clean-gcr - -delete-deployment: gcloud-config - gcloud beta -q deployment-manager deployments delete default - -create-address: gcloud-config - gcloud beta -q deployment-manager deployments create address --config gcp-address.yaml - -delete-address: gcloud-config - gcloud beta -q deployment-manager deployments delete address diff --git a/vdc/README.md b/vdc/README.md deleted file mode 100644 index 56d0019fe06..00000000000 --- a/vdc/README.md +++ /dev/null @@ -1,50 +0,0 @@ -Playbook for building the Hail GCP project. - -### Setup - -Deployment assumes the following things have been set up by hand -beforehand: - - - In the GCP console, go to APIs & Services > Library and enable the - following APIs: - - - Enable Identity and Access Management (IAM) API - - Enable Cloud SQL Admin API - - Google Cloud Deployment Manager V2 API - - - Reserve a static IP address `site` by running `make create-address`. - - - Update the domain's DNS to point to `site`'s external IP address. - You can print the IP address by running `make echo-ip`. - - - Create a service account - `deploy@.iam.gserviceaccount.com` with the project - owner role. - - - Activate the deploy service account in `gcloud` by running `make - activate-deploy`. - -### Deploy - - - Put secrets.yaml in `./secrets.yaml`. - - - Run, for example: - -``` -make PROJECT=hail-vdc IP=35.188.91.25 DOMAIN=hail.is build-out -``` - - Warning: modifies gcloud, kubectl configuration setting - - - Add `vdc-sa@.iam.gserviceaccount.com` service account - to broad-ctsa/artifacts.broad-ctsa.appspot.com to Storage Object - Viewer role. - -### Finish - - - destroy the privileged deploy service account with `make destroy-deploy` - -### FIXME - - - Doesn't deploy ci, which can't have multiple running instances. - - Describe secrets.yaml. diff --git a/vdc/delete-gcr-images.sh b/vdc/delete-gcr-images.sh deleted file mode 100644 index f567281af2d..00000000000 --- a/vdc/delete-gcr-images.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -ex - -PROJECT=$(gcloud config get-value project) -echo PROJECT=${PROJECT} - -for IMAGE in $(gcloud container images list --format 'get(name)'); do - echo IMAGE=${IMAGE} - for DIGEST in $(gcloud container images list-tags ${IMAGE} --format 'get(digest)'); do - gcloud container images delete -q --force-delete-tags ${IMAGE}@${DIGEST} - done -done diff --git a/vdc/gcp-address.yaml b/vdc/gcp-address.yaml deleted file mode 100644 index 61a17e9cd44..00000000000 --- a/vdc/gcp-address.yaml +++ /dev/null @@ -1,9 +0,0 @@ -resources: -# address -# name is not taken from property -- name: site - type: compute.beta.address - properties: - name: site - region: us-central1 - networkTier: PREMIUM diff --git a/vdc/gcp-config.yaml.in b/vdc/gcp-config.yaml.in deleted file mode 100644 index ec5aa1f452b..00000000000 --- a/vdc/gcp-config.yaml.in +++ /dev/null @@ -1,117 +0,0 @@ -resources: -# service accounts -- name: vdc-sa - type: iam.v1.serviceAccount - properties: -# must be 6-30 characters - accountId: vdc-sa - displayName: for vdc -- name: gcr-push - type: iam.v1.serviceAccount - properties: - accountId: gcr-push - displayName: push to gcr.io -- name: gcr-pull - type: iam.v1.serviceAccount - properties: - accountId: gcr-pull - displayName: pull from gcr.io -- name: k8s-admin - type: iam.v1.serviceAccount - properties: - accountId: k8s-admin - displayName: k8s admin -# buckets -# deployment manager cannot create artifacts.@project@.appspot.com -# sql instance -- name: sql-instance - type: sqladmin.v1beta4.instance - properties: - name: db-gh0um - region: us-central1 - settings: - activationPolicy: ALWAYS - ipConfiguration: - ipv4Enabled: false - privateNetwork: projects/@project@/global/networks/default - backupConfiguration: - enabled: true - startTime: 02:00 - storageAutoResize: true - dataDiskSizeGb: 10 - dataDiskType: PD_SSD - tier: db-f1-micro - databaseVersion: MYSQL_5_6 -# gke cluster -- name: vdc-gke-cluster - type: container.v1.cluster - metadata: - dependsOn: - - vdc-sa - properties: - zone: @zone@ - cluster: - addonsConfig: - httpLoadBalancing: {} - kubernetesDashboard: {disabled: true} - initialClusterVersion: 1.10.7-gke.6 - ipAllocationPolicy: {useIpAliases: true} - location: @zone@ - loggingService: logging.googleapis.com - masterAuth: - clientCertificateConfig: {} - masterAuthorizedNetworksConfig: {} - monitoringService: monitoring.googleapis.com - name: vdc - network: projects/@project@/global/networks/default - networkPolicy: {} - nodePools: - - autoscaling: {} - config: - diskSizeGb: 100 - diskType: pd-standard - imageType: COS - serviceAccount: vdc-sa@@project@.iam.gserviceaccount.com - machineType: n1-standard-1 - oauthScopes: ['https://www.googleapis.com/auth/devstorage.read_only', 'https://www.googleapis.com/auth/logging.write', - 'https://www.googleapis.com/auth/monitoring', 'https://www.googleapis.com/auth/servicecontrol', - 'https://www.googleapis.com/auth/service.management.readonly', 'https://www.googleapis.com/auth/trace.append'] - initialNodeCount: 3 - management: {autoRepair: true} - name: default-pool - version: 1.10.7-gke.6 - - autoscaling: {enabled: true, maxNodeCount: 6} - config: - diskSizeGb: 100 - diskType: pd-standard - imageType: COS - labels: {preemptible: 'true'} - serviceAccount: vdc-sa@@project@.iam.gserviceaccount.com - machineType: n1-standard-8 - oauthScopes: ['https://www.googleapis.com/auth/devstorage.read_only', 'https://www.googleapis.com/auth/logging.write', - 'https://www.googleapis.com/auth/monitoring', 'https://www.googleapis.com/auth/servicecontrol', - 'https://www.googleapis.com/auth/service.management.readonly', 'https://www.googleapis.com/auth/trace.append'] - preemptible: true - taints: - - {effect: NO_SCHEDULE, key: preemptible, value: 'true'} - management: {autoRepair: true} - name: preemptible-pool - version: 1.10.7-gke.6 - - autoscaling: {} - config: - diskSizeGb: 100 - diskType: pd-standard - imageType: COS - labels: {preemptible: 'false'} - serviceAccount: vdc-sa@@project@.iam.gserviceaccount.com - machineType: n1-standard-8 - oauthScopes: ['https://www.googleapis.com/auth/devstorage.read_only', 'https://www.googleapis.com/auth/logging.write', - 'https://www.googleapis.com/auth/monitoring', 'https://www.googleapis.com/auth/servicecontrol', - 'https://www.googleapis.com/auth/service.management.readonly', 'https://www.googleapis.com/auth/trace.append'] - preemptible: false - initialNodeCount: 1 - management: {autoRepair: true} - name: non-preemptible-pool - version: 1.10.7-gke.6 - privateClusterConfig: {} - subnetwork: projects/@project@/regions/@region@/subnetworks/default diff --git a/vdc/k8s-config.yaml b/vdc/k8s-config.yaml deleted file mode 100644 index a9569bcdc1e..00000000000 --- a/vdc/k8s-config.yaml +++ /dev/null @@ -1,371 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: batch-pods ---- -apiVersion: v1 -kind: Namespace -metadata: - name: test ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: batch-svc ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: gateway ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: notebook ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: letsencrypt ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: read-letsencrypt-config -rules: -- apiGroups: [""] - resources: ["secrets"] - resourceNames: ["letsencrypt-config"] - verbs: ["get"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: gateway-read-letsencrypt-config -subjects: -- kind: ServiceAccount - name: gateway - namespace: default -roleRef: - kind: Role - name: read-letsencrypt-config - apiGroup: "" ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: create-services-and-pods -rules: -- apiGroups: [""] - resources: ["services"] - verbs: ["*"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: notebook-create-services-and-pods -subjects: -- kind: ServiceAccount - name: notebook - namespace: default -roleRef: - kind: Role - name: create-services-and-pods - apiGroup: "" ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: read-get-user-secret -rules: -- apiGroups: [""] - resources: ["secrets"] - resourceNames: ["get-users"] - verbs: ["get"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: notebook-read-get-users-secret -subjects: -- kind: ServiceAccount - name: notebook - namespace: default -roleRef: - kind: Role - name: read-get-user-secret - apiGroup: "" ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: update-letsencrypt-config -rules: -- apiGroups: [""] - resources: ["secrets"] - resourceNames: ["letsencrypt-config"] - verbs: ["get", "update", "patch"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: letsencrypt-update-letsencrypt-config -subjects: -- kind: ServiceAccount - name: letsencrypt - namespace: default -roleRef: - kind: Role - name: update-letsencrypt-config - apiGroup: "" ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: batch-pods - name: batch-pods-admin -rules: -- apiGroups: [""] - resources: ["pods"] - verbs: ["*"] -- apiGroups: [""] - resources: ["pods/log"] - verbs: ["get"] -- apiGroups: [""] - resources: ["services"] - verbs: ["*"] -- apiGroups: [""] - resources: ["persistentvolumeclaims"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: batch-svc-batch-pods-admin-binding - namespace: batch-pods -subjects: -- kind: ServiceAccount - name: batch-svc - namespace: default -roleRef: - kind: Role - name: batch-pods-admin - apiGroup: "" ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: test-svc-batch-pods-admin-binding - namespace: batch-pods -subjects: -- kind: ServiceAccount - name: test-svc - namespace: batch-pods -roleRef: - kind: Role - name: batch-pods-admin - apiGroup: "" ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: deploy-svc - namespace: batch-pods ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: test-svc - namespace: batch-pods ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: deploy-svc - namespace: test ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: test-svc - namespace: test ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: default - name: default-deploy -rules: -- apiGroups: ["*"] - resources: ["*"] - verbs: ["*"] ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: test - name: test-test -rules: -- apiGroups: ["*"] - resources: ["*"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: deploy-svc-default-deploy-binding - namespace: default -subjects: -- kind: ServiceAccount - name: deploy-svc - namespace: batch-pods -roleRef: - kind: Role - name: default-deploy - apiGroup: "rbac.authorization.k8s.io" ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: test-svc-test-test-binding - namespace: test -subjects: -- kind: ServiceAccount - name: test-svc - namespace: batch-pods -roleRef: - kind: Role - name: test-test - apiGroup: "rbac.authorization.k8s.io" ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: batch-svc-default-deploy-binding - namespace: default -subjects: -- kind: ServiceAccount - name: batch-svc - namespace: default -roleRef: - kind: Role - name: default-deploy - apiGroup: "rbac.authorization.k8s.io" ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: batch-svc-test-test-binding - namespace: default -subjects: -- kind: ServiceAccount - name: batch-svc - namespace: default -roleRef: - kind: Role - name: test-test - apiGroup: "rbac.authorization.k8s.io" ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: letsencrypt-certs -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Mi ---- -apiVersion: v1 -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: test - name: delete-test-pvc -rules: -- apiGroups: [""] - resources: ["persistentvolumeclaims"] - verbs: ["list", "delete"] ---- -apiVersion: v1 -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: deploy-svc-delete-test-pvc - namespace: test -subjects: -- kind: ServiceAccount - name: deploy-svc - namespace: batch-pods -roleRef: - kind: Role - name: delete-test-pvc - apiGroup: "rbac.authorization.k8s.io" ---- -apiVersion: v1 -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - namespace: batch-pods - name: delete-batch-pods-pvc -rules: -- apiGroups: [""] - resources: ["persistentvolumeclaims"] - verbs: ["list", "delete"] ---- -apiVersion: v1 -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: deploy-svc-delete-batch-pods-pvc - namespace: batch-pods -subjects: -- kind: ServiceAccount - name: deploy-svc - namespace: batch-pods -roleRef: - kind: Role - name: delete-batch-pods-pvc - apiGroup: "rbac.authorization.k8s.io" ---- -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: batch - namespace: batch-pods -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard - replication-type: none ---- -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: batch - namespace: test -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-standard - replication-type: none ---- -apiVersion: v1 -kind: ResourceQuota -metadata: - name: batch-storage-class-quota - namespace: test -spec: - hard: - batch.storageclass.storage.k8s.io/requests.storage: 100Gi - batch.storageclass.storage.k8s.io/persistentvolumeclaims: 100 diff --git a/vdc/list-gcp-sa-keys b/vdc/list-gcp-sa-keys deleted file mode 100755 index 7328495651e..00000000000 --- a/vdc/list-gcp-sa-keys +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -set -ex - -if [ ! $# -eq 1 ] -then - echo "USAGE: list-gcp-sa-keys SERIVCE_ACCOUNT_NAME" - exit 1 -fi - -service_account_name=$1 - -gcloud iam service-accounts keys list \ - --iam-account="${service_account_name}@$(gcloud config get-value project).iam.gserviceaccount.com" diff --git a/vdc/make-k8s-gcp-sa-key b/vdc/make-k8s-gcp-sa-key deleted file mode 100755 index 22adead466e..00000000000 --- a/vdc/make-k8s-gcp-sa-key +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -set -ex - -if [ ! $# -eq 2 ] -then - echo "USAGE: make-k8s-gcp-sa-key SERIVCE_ACCOUNT_NAME NAMESPACE" - exit 1 -fi - -service_account_name=$1 -namespace=$2 -temp_file=$(mktemp /tmp/key.json.XXXXXX) - -cleanup() { - set +e - trap "" INT TERM - rm -rf ${temp_file} -} -trap cleanup EXIT -trap "exit 24" INT TERM - -gcloud iam service-accounts keys create ${temp_file} \ - --iam-account=${service_account_name}@hail-vdc.iam.gserviceaccount.com -kubectl create secret generic \ - gcp-sa-key-${service_account_name} \ - --namespace=${namespace} \ - --from-file=key.json="${temp_file}" diff --git a/vdc/priority-classes.yaml b/vdc/priority-classes.yaml deleted file mode 100644 index 05d4fc542da..00000000000 --- a/vdc/priority-classes.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: infrastructure -value: 1000000 -globalDefault: false -description: "For infrastructure like gateway." ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: production -value: 900000 -globalDefault: false -description: "For production services like apiserver, batch, etc." ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: user -value: 500000 -globalDefault: true -description: "For user activities like user batch jobs." ---- -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: test -value: 0 -globalDefault: false -description: "For tests." diff --git a/vdc/remove-k8s-gcp-sa-key b/vdc/remove-k8s-gcp-sa-key deleted file mode 100755 index 36026e1846b..00000000000 --- a/vdc/remove-k8s-gcp-sa-key +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -set -ex - -if [ ! $# -eq 2 ] -then - echo "USAGE: remove-k8s-gcp-sa-key SERIVCE_ACCOUNT_NAME NAMESPACE" - exit 1 -fi - -service_account_name=$1 -namespace=$2 -private_key_id=$(kubectl get secret gcp-sa-key-${service_account_name} \ - -o json \ - --namespace=$namespace \ - | jq -r '.data["key.json"]' \ - | base64 -D \ - | jq -r '.private_key_id') - -gcloud iam service-accounts keys delete ${private_key_id} \ - --iam-account="${service_account_name}@$(gcloud config get-value project).iam.gserviceaccount.com" From 4ec8b9dc3e9d5d60647b6b5b9628711a52e6e7db Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Wed, 18 Nov 2020 12:03:57 -0500 Subject: [PATCH 018/501] wip --- ci/bootstrap.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ ci/ci/github.py | 2 +- infra/README.md | 25 ++++++++++++---- infra/main.tf | 66 ++++++++++++++++++++++++++++++++++------- 4 files changed, 155 insertions(+), 16 deletions(-) create mode 100644 ci/bootstrap.py diff --git a/ci/bootstrap.py b/ci/bootstrap.py new file mode 100644 index 00000000000..7630255a3b9 --- /dev/null +++ b/ci/bootstrap.py @@ -0,0 +1,78 @@ +from shlex import quote as shq + +from ci.build import BuildConfiguration, Code +from ci.github import clone_or_fetch_script +from ci.utils import generate_token + + +class LocalJob: + def __init__(self, image, command, **kwargs): + self._image = image + self._command = command + self._kwargs = kwargs + + print(f'job: {image}, {command}, {kwargs}') + +class LocalBatchBuilder: + def __init__(self, attributes, callback): + self._attributes = attributes + self._callback = callback + self._jobs = [] + + @property + def attributes(self): + return self._attributes + + @property + def callback(self): + return self._callback + + def create_job(self, image, command, **kwargs): + job = LocalJob(image, command, **kwargs) + self._jobs.append(job) + return job + +class Branch(Code): + def __init__(self, owner, repo, branch, sha): + self._owner = owner + self._repo = repo + self._branch = branch + self._sha = sha + + def short_str(self): + return f'br-{self._owner}-{self._repo}-{self._branch}' + + def repo_dir(self): + return '.' + + def branch_url(self): + return f'https://github.com/{self._owner}/{self._repo}' + + def config(self): + return { + 'checkout_script': self.checkout_script(), + 'branch': self._branch, + 'repo': f'{self._owner}/{self._repo}', + 'repo_url': self.branch_url(), + 'sha': self._sha + } + + def checkout_script(self): + return f''' +{clone_or_fetch_script(self.branch_url())} + +git checkout {shq(self._sha)} +''' + +scope = 'deploy' +code = Branch('cseed', 'hail', 'infra-1', '04cbbf10928aa88ee8be30b65c80388801cdcd32') + +with open(f'build.yaml', 'r') as f: + config = BuildConfiguration(code, f.read(), scope) + +token = generate_token() +batch = LocalBatchBuilder( + attributes={ + 'token': token + }, callback=None) +config.build(batch, code, scope) diff --git a/ci/ci/github.py b/ci/ci/github.py index e517dde615d..ce909528f44 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -24,7 +24,7 @@ CALLBACK_URL = deploy_config.url('ci', '/api/v1alpha/batch_callback') -zulip_client = zulip.Client(config_file="/zulip-config/.zuliprc") +zulip_client = None # zulip.Client(config_file="/zulip-config/.zuliprc") class Repo: diff --git a/infra/README.md b/infra/README.md index 852391db53e..348c2d94713 100644 --- a/infra/README.md +++ b/infra/README.md @@ -3,10 +3,25 @@ infrastructure. Instructions: -- You will need a GCP project. Create a service account for Terraform - with Editor role, and create a service account key and place it in +- You will need a GCP project. We assume `gcloud` is configured to + point at your project. + +- Create a service account for Terraform with Owner role, create a + service account key and place it in `$HOME/.hail/terraform_sa_key.json`. +- Enable the the GCP services needed by Hail: + + ``` + gcloud services enable \ + compute.googleapis.com \ + cloudresourcemanager.googleapis.com \ + servicenetworking.googleapis.com \ + sqladmin.googleapis.com \ + container.googleapis.com \ + serviceusage.googleapis.com + ``` + - Install terraform. - Create `$HOME/.hail/global.tfvars` that looks like: @@ -21,10 +36,10 @@ Instructions: - Run `terraform init`. - Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. + Terraform has created a GKE cluster named `vdc`. We assume + `kubectl` is configured to point at this cluster. -You can now install Hail. Everything beyond this point assumes gcloud -and kubectl point to your GCP project and the cluster created by -Terraform. +You can now install Hail: - Run `$HAIL/docker/third-party/copy_images.sh`. This copies some base images from Dockerhub (which now has rate limits) to GCR. diff --git a/infra/main.tf b/infra/main.tf index 9e42fab57e0..f7662d5eea1 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -147,8 +147,7 @@ resource "google_sql_database_instance" "db" { } } -# FIXME rename this to gateway -resource "google_compute_address" "gateway" { +resource "google_compute_global_address" "gateway" { name = "gateway" } @@ -175,15 +174,62 @@ resource "kubernetes_secret" "global_config" { } data = { - "config.json" = < Date: Wed, 18 Nov 2020 13:59:20 -0500 Subject: [PATCH 019/501] wip --- ci/bootstrap.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 7630255a3b9..be90760e1ce 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -1,3 +1,4 @@ +import os from shlex import quote as shq from ci.build import BuildConfiguration, Code @@ -6,12 +7,24 @@ class LocalJob: - def __init__(self, image, command, **kwargs): + def __init__(self, index, image, command, *, + env=None, mount_docker_socket=False, secrets=None, parents=None, + input_files=None, output_files=None, + **kwargs): + self._index = index self._image = image self._command = command + + self._env = env + self._mount_docker_socket = mount_docker_socket + self._parents = parents + self._input_files = input_files + self._output_files = output_files self._kwargs = kwargs - print(f'job: {image}, {command}, {kwargs}') + self._done = False + + print(f'job: {image}, {command}, {env}, {mount_docker_socket}, {secrets}, {parents}, {input_files}, {output_files}, {kwargs}') class LocalBatchBuilder: def __init__(self, attributes, callback): @@ -28,10 +41,44 @@ def callback(self): return self._callback def create_job(self, image, command, **kwargs): - job = LocalJob(image, command, **kwargs) + index = len(self._jobs) + job = LocalJob(index, image, command, **kwargs) self._jobs.append(job) return job + def run(self): + os.makedirs(f'_/shared') + + for j in self._jobs: + if j._parents: + for p in j._parents: + assert p._done + + os.makedirs(f'_/{j._index}/secrets') + + # localize secrets + # copy inputs + # copy outputs + + mount_options = [ + '-v', '_/shared:/shared' + ] + if j._mount_docker_socket: + mount_options.extend(['-v', '/var/run/docker.sock:/var/run/docker.sock']) + + docker_cmd = [ + 'docker', + 'run', + *mount_options, + j._image, + *[shq(c) for c in j._command] + ] + + print(docker_cmd) + + j._done = True + + class Branch(Code): def __init__(self, owner, repo, branch, sha): self._owner = owner @@ -71,8 +118,11 @@ def checkout_script(self): config = BuildConfiguration(code, f.read(), scope) token = generate_token() +print(f'token {token}') batch = LocalBatchBuilder( attributes={ 'token': token }, callback=None) config.build(batch, code, scope) + +batch.run() From a4f758be9163552e280a3c3f947184d1d51732ab Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Wed, 18 Nov 2020 16:46:35 -0500 Subject: [PATCH 020/501] wip --- ci/bootstrap.py | 63 ++++++++++++++++++++++++++++++++++++++----------- infra/main.tf | 53 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 14 deletions(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index be90760e1ce..05fe5e37cb2 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -1,10 +1,15 @@ import os from shlex import quote as shq +import asyncio + +import kubernetes_asyncio as kube from ci.build import BuildConfiguration, Code from ci.github import clone_or_fetch_script from ci.utils import generate_token +from batch.driver.k8s_cache import K8sCache + class LocalJob: def __init__(self, index, image, command, *, @@ -18,6 +23,7 @@ def __init__(self, index, image, command, *, self._env = env self._mount_docker_socket = mount_docker_socket self._parents = parents + self._secrets = secrets self._input_files = input_files self._output_files = output_files self._kwargs = kwargs @@ -26,6 +32,7 @@ def __init__(self, index, image, command, *, print(f'job: {image}, {command}, {env}, {mount_docker_socket}, {secrets}, {parents}, {input_files}, {output_files}, {kwargs}') + class LocalBatchBuilder: def __init__(self, attributes, callback): self._attributes = attributes @@ -46,7 +53,11 @@ def create_job(self, image, command, **kwargs): self._jobs.append(job) return job - def run(self): + async def run(self): + await kube.config.load_kube_config() + k8s_client = kube.client.CoreV1Api() + k8s_cache = K8sCache(k8s_client, refresh_time=5) + os.makedirs(f'_/shared') for j in self._jobs: @@ -54,7 +65,8 @@ def run(self): for p in j._parents: assert p._done - os.makedirs(f'_/{j._index}/secrets') + secrets_host_path = f'_/{j._index}/secrets' + os.makedirs(secrets_host_path) # localize secrets # copy inputs @@ -63,6 +75,26 @@ def run(self): mount_options = [ '-v', '_/shared:/shared' ] + + secrets = j._secrets + if secrets: + print(secrets) + k8s_secrets = await asyncio.gather(*[ + k8s_cache.read_secret( + secret['name'], secret['namespace'], + 5) + for secret in secrets + ]) + + for k8s_secret in k8s_secrets: + secret_host_path = f'{secrets_host_path}/{secret["name"]}' + + populate_secret_host_path(secret_host_path, k8s_secret['data']) + + mount_options.extend([ + '-v', f'{secrets_host_path}:{secret["mount_path"]}' + ]) + if j._mount_docker_socket: mount_options.extend(['-v', '/var/run/docker.sock:/var/run/docker.sock']) @@ -111,18 +143,21 @@ def checkout_script(self): git checkout {shq(self._sha)} ''' -scope = 'deploy' -code = Branch('cseed', 'hail', 'infra-1', '04cbbf10928aa88ee8be30b65c80388801cdcd32') +async def main(): + scope = 'deploy' + code = Branch('cseed', 'hail', 'infra-1', '04cbbf10928aa88ee8be30b65c80388801cdcd32') + + with open(f'build.yaml', 'r') as f: + config = BuildConfiguration(code, f.read(), scope, requested_step_names=['deploy_batch']) -with open(f'build.yaml', 'r') as f: - config = BuildConfiguration(code, f.read(), scope) + token = generate_token() + print(f'token {token}') + batch = LocalBatchBuilder( + attributes={ + 'token': token + }, callback=None) + config.build(batch, code, scope) -token = generate_token() -print(f'token {token}') -batch = LocalBatchBuilder( - attributes={ - 'token': token - }, callback=None) -config.build(batch, code, scope) + await batch.run() -batch.run() +asyncio.get_event_loop().run_until_complete(main()) diff --git a/infra/main.tf b/infra/main.tf index f7662d5eea1..b430a915c4c 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -234,3 +234,56 @@ END END } } + +resource "google_container_registry" "registry" { +} + +resource "google_service_account" "gcr_pull" { + account_id = "gcr-pull" + display_name = "pull from gcr.io" +} + +resource "google_service_account_key" "gcr_pull_key" { + service_account_id = google_service_account.gcr_pull.name +} + +resource "google_service_account" "gcr_push" { + account_id = "gcr-push" + display_name = "push to gcr.io" +} + +resource "google_service_account_key" "gcr_push_key" { + service_account_id = google_service_account.gcr_push.name +} + +resource "google_storage_bucket_iam_member" "gcr_pull_viewer" { + bucket = google_container_registry.registry.id + role = "roles/storage.objectViewer" + member = "serviceAccount:${google_service_account.gcr_pull.email}" +} + +resource "google_storage_bucket_iam_member" "gcr_push_admin" { + bucket = google_container_registry.registry.id + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.gcr_push.email}" +} + +resource "kubernetes_secret" "gcr_pull_key" { + metadata { + name = "gcr-pull-key" + } + + data = { + "gcr-pull.json" = base64decode(google_service_account_key.gcr_pull_key.private_key) + } +} + +resource "kubernetes_secret" "gcr_push_key" { + metadata { + name = "gcr-push-service-account-key" + } + + data = { + "gcr-push-service-account-key.json" = base64decode(google_service_account_key.gcr_push_key.private_key) + } +} From 36c257599d4b694608ce98ea45752e394b251e5e Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Wed, 18 Nov 2020 21:16:21 -0500 Subject: [PATCH 021/501] wip --- ci/bootstrap.py | 42 +++++++++++++++++++++++++++++++++--------- config.mk | 10 +++++----- infra/README.md | 6 ++++++ 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 05fe5e37cb2..f404789746e 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -1,9 +1,12 @@ import os from shlex import quote as shq +import base64 import asyncio import kubernetes_asyncio as kube +from hailtop.utils import check_shell_output + from ci.build import BuildConfiguration, Code from ci.github import clone_or_fetch_script from ci.utils import generate_token @@ -11,6 +14,14 @@ from batch.driver.k8s_cache import K8sCache +def populate_secret_host_path(host_path, secret_data): + os.makedirs(host_path) + if secret_data is not None: + for filename, data in secret_data.items(): + with open(f'{host_path}/{filename}', 'wb') as f: + f.write(base64.b64decode(data)) + + class LocalJob: def __init__(self, index, image, command, *, env=None, mount_docker_socket=False, secrets=None, parents=None, @@ -54,18 +65,28 @@ def create_job(self, image, command, **kwargs): return job async def run(self): + print('in run') + await kube.config.load_kube_config() k8s_client = kube.client.CoreV1Api() k8s_cache = K8sCache(k8s_client, refresh_time=5) - os.makedirs(f'_/shared') + cwd = os.getcwd() + + os.makedirs(f'{cwd}/_/shared') for j in self._jobs: + print(j._index) + + if j._index > 0: + # bail + return + if j._parents: for p in j._parents: assert p._done - secrets_host_path = f'_/{j._index}/secrets' + secrets_host_path = f'{cwd}/_/{j._index}/secrets' os.makedirs(secrets_host_path) # localize secrets @@ -73,12 +94,11 @@ async def run(self): # copy outputs mount_options = [ - '-v', '_/shared:/shared' + '-v', f'{cwd}/_/shared:/shared' ] secrets = j._secrets if secrets: - print(secrets) k8s_secrets = await asyncio.gather(*[ k8s_cache.read_secret( secret['name'], secret['namespace'], @@ -86,10 +106,10 @@ async def run(self): for secret in secrets ]) - for k8s_secret in k8s_secrets: - secret_host_path = f'{secrets_host_path}/{secret["name"]}' + for secret, k8s_secret in zip(secrets, k8s_secrets): + secret_host_path = f'{secrets_host_path}/{k8s_secret.metadata.name}' - populate_secret_host_path(secret_host_path, k8s_secret['data']) + populate_secret_host_path(secret_host_path, k8s_secret.data) mount_options.extend([ '-v', f'{secrets_host_path}:{secret["mount_path"]}' @@ -105,8 +125,12 @@ async def run(self): j._image, *[shq(c) for c in j._command] ] - - print(docker_cmd) + + docker_cmd = ' '.join(docker_cmd) + + outerr = await check_shell_output(f'bash -c {shq(docker_cmd)}', echo=True) + print(f'>>>OUT{outerr[0].decode("utf-8")}<<<\n') + print(f'>>>ERR{outerr[1].decode("utf-8")}<<<\n') j._done = True diff --git a/config.mk b/config.mk index be79ac137c4..e9b137ab8bb 100644 --- a/config.mk +++ b/config.mk @@ -1,9 +1,9 @@ -PROJECT := hail-vdc +PROJECT := hail-vdc-staging DOCKER_ROOT_IMAGE := gcr.io/$(PROJECT)/ubuntu:18.04 -DOMAIN := hail.is -INTERNAL_IP := 10.128.0.57 -IP := 35.188.91.25 -KUBERNETES_SERVER_URL := https://104.198.230.143 +DOMAIN := staging.hail.is +INTERNAL_IP := 10.128.0.2 +IP := 34.120.221.136 +KUBERNETES_SERVER_URL := https://34.71.246.49 REGION := us-central1 ZONE := us-central1-a ifeq ($(NAMESPACE),default) diff --git a/infra/README.md b/infra/README.md index 348c2d94713..e3aa46e1980 100644 --- a/infra/README.md +++ b/infra/README.md @@ -44,3 +44,9 @@ You can now install Hail: - Run `$HAIL/docker/third-party/copy_images.sh`. This copies some base images from Dockerhub (which now has rate limits) to GCR. +- Generate TLS certificates. See ../dev-docs/tls-cookbook.md. + +- Update $HAIL/config.mk with your infrastructure settings. You can + get settings from the default/global-config secret. + +- Build the CI utils image. Run `make push-ci-utils` in $HAIL/ci. From 6c9977489dcb78f348b23d01e326f10fa6b50006 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Wed, 18 Nov 2020 23:19:55 -0500 Subject: [PATCH 022/501] wip --- ci/bootstrap.py | 206 +++++++++++++++++++++++++++++++++------------- ci/bootstrap.yaml | 49 +++++++++++ infra/README.md | 3 + infra/main.tf | 2 +- 4 files changed, 202 insertions(+), 58 deletions(-) create mode 100644 ci/bootstrap.yaml diff --git a/ci/bootstrap.py b/ci/bootstrap.py index f404789746e..c061180aabb 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -13,6 +13,8 @@ from batch.driver.k8s_cache import K8sCache +KUBERNETES_SERVER_URL = 'https://34.71.246.49' + def populate_secret_host_path(host_path, secret_data): os.makedirs(host_path) @@ -24,7 +26,7 @@ def populate_secret_host_path(host_path, secret_data): class LocalJob: def __init__(self, index, image, command, *, - env=None, mount_docker_socket=False, secrets=None, parents=None, + env=None, mount_docker_socket=False, secrets=None, service_account=None, parents=None, input_files=None, output_files=None, **kwargs): self._index = index @@ -35,13 +37,25 @@ def __init__(self, index, image, command, *, self._mount_docker_socket = mount_docker_socket self._parents = parents self._secrets = secrets + self._service_account = service_account self._input_files = input_files self._output_files = output_files self._kwargs = kwargs self._done = False + self._succeeded = None + - print(f'job: {image}, {command}, {env}, {mount_docker_socket}, {secrets}, {parents}, {input_files}, {output_files}, {kwargs}') +async def docker_run(*args): + script = ' '.join([shq(a) for a in args]) + outerr = await check_shell_output(script) + + cid = outerr[0].decode('ascii').strip() + + outerr = await check_shell_output(f'docker wait {cid}') + + exit_code = int(outerr[0].decode('ascii').strip()) + return cid, exit_code == 0 class LocalBatchBuilder: @@ -64,74 +78,152 @@ def create_job(self, image, command, **kwargs): self._jobs.append(job) return job - async def run(self): - print('in run') + async def run(self, token): + cwd = os.getcwd() + assert cwd.startswith('/') + + token = generate_token() + root = f'{cwd}/_/{token}' await kube.config.load_kube_config() k8s_client = kube.client.CoreV1Api() k8s_cache = K8sCache(k8s_client, refresh_time=5) - - cwd = os.getcwd() - - os.makedirs(f'{cwd}/_/shared') + + os.makedirs(f'{root}/shared') + + # FIXME hack + prefix = 'gs://dummy/build/{token}' for j in self._jobs: - print(j._index) - - if j._index > 0: - # bail - return - if j._parents: for p in j._parents: assert p._done - - secrets_host_path = f'{cwd}/_/{j._index}/secrets' - os.makedirs(secrets_host_path) - - # localize secrets - # copy inputs - # copy outputs - - mount_options = [ - '-v', f'{cwd}/_/shared:/shared' - ] - - secrets = j._secrets - if secrets: - k8s_secrets = await asyncio.gather(*[ - k8s_cache.read_secret( - secret['name'], secret['namespace'], - 5) - for secret in secrets - ]) + if not p._succeeded: + print(f'{j._index}: skipping: parent {p._index} failed') + j._done = True + j._failed = True + + job_root = f'{root}/{j._index}' + + os.makedirs(f'{job_root}/io') + os.makedirs(f'{job_root}/secrets') + + if j._input_files: + copy_script = 'set -ex\n' + for src, dest in j._input_files: + assert src.startswith(prefix) + src = f'/shared{src[len(prefix):]}' + copy_script = copy_script + 'cp -a {src} {dest}\n' + input_cid, input_ok = await docker_run( + 'docker', 'run', '-d', 'ubuntu:18.04', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', copy_script) + + print(f'{j._index}/input: {input_cid} {input_ok}') + else: + input_ok = True + + if input_ok: + mount_options = [ + '-v', f'{root}/io:/io' + ] - for secret, k8s_secret in zip(secrets, k8s_secrets): - secret_host_path = f'{secrets_host_path}/{k8s_secret.metadata.name}' + env_options = [] + if j._env: + for key, value in j._env: + env_options.extend([ + '-e', f'{key}={value}']) + if j._service_account: + namespace = j._service_account['namespace'] + name = j._service_account['name'] + + sa = await k8s_cache.read_service_account(name, namespace, 5) + assert len(sa.secrets) == 1 + + token_secret_name = sa.secrets[0].name + + secret = await k8s_cache.read_secret(token_secret_name, namespace, 5) + + token = base64.b64decode(secret.data['token']).decode() + cert = secret.data['ca.crt'] + + kube_config = f''' +apiVersion: v1 +clusters: +- cluster: + certificate-authority: /.kube/ca.crt + server: {KUBERNETES_SERVER_URL} + name: default-cluster +contexts: +- context: + cluster: default-cluster + user: {namespace}-{name} + namespace: {namespace} + name: default-context +current-context: default-context +kind: Config +preferences: {{}} +users: +- name: {namespace}-{name} + user: + token: {token} +''' - populate_secret_host_path(secret_host_path, k8s_secret.data) + dot_kube_dir = f'{job_root}/secrets/.kube' + os.makedirs(dot_kube_dir) + with open(f'{dot_kube_dir}/config', 'w') as f: + f.write(kube_config) + with open(f'{dot_kube_dir}/ca.crt', 'w') as f: + f.write(base64.b64decode(cert).decode()) mount_options.extend([ - '-v', f'{secrets_host_path}:{secret["mount_path"]}' + '-v', f'{dot_kube_dir}:/.kube' + ]) + env_options.extend([ + '-e', 'KUBECONFIG=/.kube/config']) + + secrets = j._secrets + if secrets: + k8s_secrets = await asyncio.gather(*[ + k8s_cache.read_secret( + secret['name'], secret['namespace'], + 5) + for secret in secrets ]) - if j._mount_docker_socket: - mount_options.extend(['-v', '/var/run/docker.sock:/var/run/docker.sock']) - - docker_cmd = [ - 'docker', - 'run', - *mount_options, - j._image, - *[shq(c) for c in j._command] - ] - - docker_cmd = ' '.join(docker_cmd) - - outerr = await check_shell_output(f'bash -c {shq(docker_cmd)}', echo=True) - print(f'>>>OUT{outerr[0].decode("utf-8")}<<<\n') - print(f'>>>ERR{outerr[1].decode("utf-8")}<<<\n') - + for secret, k8s_secret in zip(secrets, k8s_secrets): + secret_host_path = f'{job_root}/secrets/{k8s_secret.metadata.name}' + + populate_secret_host_path(secret_host_path, k8s_secret.data) + + mount_options.extend([ + '-v', f'{secret_host_path}:{secret["mount_path"]}' + ]) + + if j._mount_docker_socket: + mount_options.extend(['-v', '/var/run/docker.sock:/var/run/docker.sock']) + + main_cid, main_ok = await docker_run( + 'docker', 'run', '-d', + *env_options, *mount_options, j._image, *j._command) + print(f'{j._index}/main: {main_cid} {main_ok}') + else: + main_ok = False + print(f'{j._index}/main: skipping: input failed') + + if j._output_files: + if main_ok: + copy_script = 'set -ex\n' + for src, dest in j._input_files: + copy_script = copy_script + 'cp -a {src} {dest}\n' + output_cid, output_ok = await docker_run( + 'docker', 'run', '-d', 'ubuntu:18.04', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', copy_script) + print(f'{j._index}/output: {output_cid} {output_ok}') + else: + output_ok = False + print(f'{j._index}/output: skipping: main failed') + else: + output_ok = True + + j._succeeded = (input_ok and main_ok and output_ok) j._done = True @@ -169,7 +261,7 @@ def checkout_script(self): async def main(): scope = 'deploy' - code = Branch('cseed', 'hail', 'infra-1', '04cbbf10928aa88ee8be30b65c80388801cdcd32') + code = Branch('cseed', 'hail', 'infra-1', 'dd8c84ee1601d9dd5643ec78fd9996cb51472e18') with open(f'build.yaml', 'r') as f: config = BuildConfiguration(code, f.read(), scope, requested_step_names=['deploy_batch']) @@ -182,6 +274,6 @@ async def main(): }, callback=None) config.build(batch, code, scope) - await batch.run() + await batch.run(token) asyncio.get_event_loop().run_until_complete(main()) diff --git a/ci/bootstrap.yaml b/ci/bootstrap.yaml new file mode 100644 index 00000000000..3d2ad51ea87 --- /dev/null +++ b/ci/bootstrap.yaml @@ -0,0 +1,49 @@ +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: infrastructure +value: 1000000 +globalDefault: false +description: "For infrastructure like gateway." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: production +value: 900000 +globalDefault: false +description: "For production services like apiserver, batch, etc." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: user +value: 500000 +globalDefault: true +description: "For user activities like user batch jobs." +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: test +value: 0 +globalDefault: false +description: "For tests." +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ci-agent +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: ci-agent-cluster-admin +subjects: +- kind: ServiceAccount + name: ci-agent + namespace: default +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: "" diff --git a/infra/README.md b/infra/README.md index e3aa46e1980..d6a9ace4396 100644 --- a/infra/README.md +++ b/infra/README.md @@ -50,3 +50,6 @@ You can now install Hail: get settings from the default/global-config secret. - Build the CI utils image. Run `make push-ci-utils` in $HAIL/ci. + +- Run `kubectl -n default apply -f bootstrap.yaml`. + diff --git a/infra/main.tf b/infra/main.tf index b430a915c4c..bd89fa6f24b 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -264,7 +264,7 @@ resource "google_storage_bucket_iam_member" "gcr_pull_viewer" { resource "google_storage_bucket_iam_member" "gcr_push_admin" { bucket = google_container_registry.registry.id - role = "roles/storage.objectAdmin" + role = "roles/storage.admin" member = "serviceAccount:${google_service_account.gcr_push.email}" } From c7e04e2e5f244421f80fac22d7a6311450b3ab1a Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 00:35:39 -0500 Subject: [PATCH 023/501] wip --- build.yaml | 10 ++++---- ci/bootstrap.py | 62 ++++++++++++++++++++++++++++++------------------- ci/ci/build.py | 4 ++-- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/build.yaml b/build.yaml index eab378950e0..2e7f3307d6e 100644 --- a/build.yaml +++ b/build.yaml @@ -130,11 +130,11 @@ steps: - from: /io/repo/atgu/sql to: /repo/atgu/ - from: /io/repo/hail/python/hailtop - to: /repo/hailtop/ + to: /repo/ - from: /io/repo/hail/python/test - to: /repo/test/ - - from: /io/repo/hail/python/cluster-tests/ - to: /repo/hail/python/cluster-tests + to: /repo/ + - from: /io/repo/hail/python/cluster-tests + to: /repo/hail/python/ - from: /io/repo/hail/python/hail/hail_version to: /hail_version - from: /io/repo/hail/python/hail/hail_pip_version @@ -142,7 +142,7 @@ steps: - from: /io/repo/git_version to: /git_version - from: /io/repo/address/test - to: /repo/address/test + to: /repo/address/ dependsOn: - base_image - kind: buildImage diff --git a/ci/bootstrap.py b/ci/bootstrap.py index c061180aabb..9d4f22ab1f7 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -26,7 +26,7 @@ def populate_secret_host_path(host_path, secret_data): class LocalJob: def __init__(self, index, image, command, *, - env=None, mount_docker_socket=False, secrets=None, service_account=None, parents=None, + env=None, mount_docker_socket=False, secrets=None, service_account=None, attributes=None, parents=None, input_files=None, output_files=None, **kwargs): self._index = index @@ -38,6 +38,7 @@ def __init__(self, index, image, command, *, self._parents = parents self._secrets = secrets self._service_account = service_account + self._attributes = attributes self._input_files = input_files self._output_files = output_files self._kwargs = kwargs @@ -78,28 +79,29 @@ def create_job(self, image, command, **kwargs): self._jobs.append(job) return job - async def run(self, token): + async def run(self): cwd = os.getcwd() assert cwd.startswith('/') - token = generate_token() - root = f'{cwd}/_/{token}' + batch_token = self._attributes['token'] + root = f'{cwd}/_/{batch_token}' await kube.config.load_kube_config() k8s_client = kube.client.CoreV1Api() k8s_cache = K8sCache(k8s_client, refresh_time=5) os.makedirs(f'{root}/shared') - - # FIXME hack - prefix = 'gs://dummy/build/{token}' + + prefix = f'gs://dummy/build/{batch_token}' for j in self._jobs: + job_name = j._attributes.get('name') + if j._parents: for p in j._parents: assert p._done if not p._succeeded: - print(f'{j._index}: skipping: parent {p._index} failed') + print(f'{j._index}: {job_name}: SKIPPED: parent {p._index} failed') j._done = True j._failed = True @@ -111,26 +113,31 @@ async def run(self, token): if j._input_files: copy_script = 'set -ex\n' for src, dest in j._input_files: - assert src.startswith(prefix) + assert src.startswith(prefix), (prefix, src) src = f'/shared{src[len(prefix):]}' - copy_script = copy_script + 'cp -a {src} {dest}\n' + if dest.endswith('/'): + copy_script = copy_script + f'mkdir -p {dest}\n' + else: + copy_script = copy_script + f'mkdir -p {os.path.dirname(dest)}\n' + copy_script = copy_script + f'cp -a {src} {dest}\n' input_cid, input_ok = await docker_run( - 'docker', 'run', '-d', 'ubuntu:18.04', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', copy_script) + 'docker', 'run', '-d', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', 'ubuntu:18.04', '/bin/bash', '-c', copy_script) - print(f'{j._index}/input: {input_cid} {input_ok}') + print(f'{j._index}: {job_name}/input: {input_cid} {"OK" if input_ok else "FAILED"}') else: input_ok = True if input_ok: mount_options = [ - '-v', f'{root}/io:/io' + '-v', f'{job_root}/io:/io' ] - + env_options = [] if j._env: for key, value in j._env: env_options.extend([ '-e', f'{key}={value}']) + if j._service_account: namespace = j._service_account['namespace'] name = j._service_account['name'] @@ -204,22 +211,28 @@ async def run(self, token): main_cid, main_ok = await docker_run( 'docker', 'run', '-d', *env_options, *mount_options, j._image, *j._command) - print(f'{j._index}/main: {main_cid} {main_ok}') + print(f'{j._index}: {job_name}/main: {main_cid} {"OK" if main_ok else "FAILED"}') else: main_ok = False - print(f'{j._index}/main: skipping: input failed') + print(f'{j._index}: {job_name}/main: SKIPPED: input failed') if j._output_files: if main_ok: copy_script = 'set -ex\n' - for src, dest in j._input_files: - copy_script = copy_script + 'cp -a {src} {dest}\n' - output_cid, output_ok = await docker_run( - 'docker', 'run', '-d', 'ubuntu:18.04', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', copy_script) - print(f'{j._index}/output: {output_cid} {output_ok}') + for src, dest in j._output_files: + assert dest.startswith(prefix), (prefix, dest) + dest = f'/shared{dest[len(prefix):]}' + if dest.endswith('/'): + copy_script = copy_script + f'mkdir -p {dest}\n' + else: + copy_script = copy_script + f'mkdir -p {os.path.dirname(dest)}\n' + copy_script = copy_script + f'cp -a {src} {dest}\n' + output_cid, output_ok = await docker_run( + 'docker', 'run', '-d', '-v', f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', 'ubuntu:18.04', '/bin/bash', '-c', copy_script) + print(f'{j._index}: {job_name}/output: {output_cid} {"OK" if output_ok else "FAILED"}') else: output_ok = False - print(f'{j._index}/output: skipping: main failed') + print(f'{j._index}: {job_name}/output: SKIPPED: main failed') else: output_ok = True @@ -259,6 +272,7 @@ def checkout_script(self): git checkout {shq(self._sha)} ''' + async def main(): scope = 'deploy' code = Branch('cseed', 'hail', 'infra-1', 'dd8c84ee1601d9dd5643ec78fd9996cb51472e18') @@ -267,13 +281,13 @@ async def main(): config = BuildConfiguration(code, f.read(), scope, requested_step_names=['deploy_batch']) token = generate_token() - print(f'token {token}') batch = LocalBatchBuilder( attributes={ 'token': token }, callback=None) config.build(batch, code, scope) - await batch.run(token) + await batch.run() + asyncio.get_event_loop().run_until_complete(main()) diff --git a/ci/ci/build.py b/ci/ci/build.py index d29327b0424..140c47ea727 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -954,8 +954,8 @@ def build(self, batch, code, scope): # pylint: disable=unused-argument if not self.cant_create_database: password_files_input = [ - (f'gs://{BUCKET}/build/{batch.attributes["token"]}/{self.admin_password_file}', self.admin_password_file), - (f'gs://{BUCKET}/build/{batch.attributes["token"]}/{self.user_password_file}', self.user_password_file)] + (f'gs://{BUCKET}/build/{batch.attributes["token"]}{self.admin_password_file}', self.admin_password_file), + (f'gs://{BUCKET}/build/{batch.attributes["token"]}{self.user_password_file}', self.user_password_file)] input_files.extend(password_files_input) self.create_passwords_job = batch.create_job( From ec8c7443234d70807239f56ad7600c1b1ce75969 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 03:26:04 -0500 Subject: [PATCH 024/501] wip --- infra/README.md | 17 +++++ infra/main.tf | 162 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) diff --git a/infra/README.md b/infra/README.md index d6a9ace4396..b0c27c3a6d7 100644 --- a/infra/README.md +++ b/infra/README.md @@ -53,3 +53,20 @@ You can now install Hail: - Run `kubectl -n default apply -f bootstrap.yaml`. +- On an instance on the internal network, 100gb, Ubuntu 20.04 TLS, enable + full API and run: + + ``` + sudo apt update + sudo apt install -y docker.io python3-pip mysql-client-core-8.0 + sudo snap install --classic kubectl + gcloud -q auth configure-docker + gcloud container clusters get-credentials --zone us-central1-a vdc + git clone https://github.com/cseed/hail.git + python3 -m pip install -r $HOME/hail/docker/requirements.txt + ``` + +add yourself to the docker group +add ssh firewall rule to internal + +python3 ./ci/bootstrap.py diff --git a/infra/main.tf b/infra/main.tf index bd89fa6f24b..1a530f4fe14 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -75,6 +75,16 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { preemptible = true machine_type = "n1-standard-2" + labels = { + "preemptible" = "true" + } + + taint { + key = "preemptible" + value = "true" + effect = "NO_SCHEDULE" + } + metadata = { disable-legacy-endpoints = "true" } @@ -99,6 +109,10 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { preemptible = false machine_type = "n1-standard-2" + labels = { + preemptible = "false" + } + metadata = { disable-legacy-endpoints = "true" } @@ -287,3 +301,151 @@ resource "kubernetes_secret" "gcr_push_key" { "gcr-push-service-account-key.json" = base64decode(google_service_account_key.gcr_push_key.private_key) } } + +resource "kubernetes_namespace" "ukbb_rg" { + metadata { + name = "ukbb-rg" + } +} + +resource "kubernetes_service" "ukbb_rb_browser" { + metadata { + name = "ukbb-rg-browser" + namespace = "ukbb-rg" + labels = { + app = "ukbb-rg-browser" + } + } + spec { + port { + port = 80 + protocol = "TCP" + target_port = 80 + } + selector = { + app = "ukbb-rg-browser" + } + } +} + +resource "kubernetes_service" "ukbb_rb_static" { + metadata { + name = "ukbb-rg-static" + namespace = "ukbb-rg" + labels = { + app = "ukbb-rg-static" + } + } + spec { + port { + port = 80 + protocol = "TCP" + target_port = 80 + } + selector = { + app = "ukbb-rg-static" + } + } +} + +resource "random_id" "atgu_name_suffix" { + byte_length = 2 +} + +resource "google_service_account" "atgu" { + account_id = "atgu-${random_id.atgu_name_suffix.hex}" +} + +resource "google_service_account_key" "atgu_key" { + service_account_id = google_service_account.atgu.name +} + +resource "kubernetes_secret" "atgu_gsa_key" { + metadata { + name = "atgu-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.atgu_key.private_key) + } +} + +resource "random_id" "auth_name_suffix" { + byte_length = 2 +} + +resource "google_service_account" "auth" { + account_id = "auth-${random_id.auth_name_suffix.hex}" +} + +resource "google_service_account_key" "auth_key" { + service_account_id = google_service_account.auth.name +} + +resource "kubernetes_secret" "auth_gsa_key" { + metadata { + name = "auth-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.auth_key.private_key) + } +} + +resource "random_id" "batch_name_suffix" { + byte_length = 2 +} + +resource "google_service_account" "batch" { + account_id = "batch-${random_id.batch_name_suffix.hex}" +} + +resource "google_service_account_key" "batch_key" { + service_account_id = google_service_account.batch.name +} + +resource "kubernetes_secret" "batch_gsa_key" { + metadata { + name = "batch-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.batch_key.private_key) + } +} + +resource "google_service_account" "benchmark" { + account_id = "benchmark" +} + +resource "google_service_account_key" "benchmark_key" { + service_account_id = google_service_account.benchmark.name +} + +resource "kubernetes_secret" "benchmark_gsa_key" { + metadata { + name = "benchmark-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.benchmark_key.private_key) + } +} + +resource "google_service_account" "monitoring" { + account_id = "monitoring" +} + +resource "google_service_account_key" "monitoring_key" { + service_account_id = google_service_account.monitoring.name +} + +resource "kubernetes_secret" "monitoring_gsa_key" { + metadata { + name = "monitoring-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.monitoring_key.private_key) + } +} From 76a89d34277e9fb122063950eb2bd3025bda0128 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 12:36:14 -0500 Subject: [PATCH 025/501] wip --- infra/README.md | 22 +++++++++++++++++++++- infra/main.tf | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/infra/README.md b/infra/README.md index b0c27c3a6d7..6300c4878d7 100644 --- a/infra/README.md +++ b/infra/README.md @@ -39,6 +39,10 @@ Instructions: Terraform has created a GKE cluster named `vdc`. We assume `kubectl` is configured to point at this cluster. +- Go to the Google Cloud console, VPC networks > internal > Private + service connection > Private connections to services, and enable + Export custom routes to both connections. + You can now install Hail: - Run `$HAIL/docker/third-party/copy_images.sh`. This copies some @@ -51,7 +55,19 @@ You can now install Hail: - Build the CI utils image. Run `make push-ci-utils` in $HAIL/ci. -- Run `kubectl -n default apply -f bootstrap.yaml`. +- Run `kubectl -n default apply -f $HAIL/ci/bootstrap.yaml`. + +- Go to the Google Cloud console, API & Services, Credentials. + Configure the consent screen. Add the scope: + https://www.googleapis.com/auth/userinfo.email. Create an OAuth + client ID. Authorize the redirect URI: + https://auth./oauth2callback. Download the client secret + as client_secret.json. Create the auth-oauth2-client-secret secret + with: + + ``` + kubectl -n default create secret generic auth-oauth2-client-secret --from-file=./client_secret.json + ``` - On an instance on the internal network, 100gb, Ubuntu 20.04 TLS, enable full API and run: @@ -69,4 +85,8 @@ You can now install Hail: add yourself to the docker group add ssh firewall rule to internal +create users + python3 ./ci/bootstrap.py + +cotton@admin:~/hail$ (sudo rm -rf _) && HAIL_CI_UTILS_IMAGE=gcr.io/hail-vdc-staging/ci-utils:latest HAIL_CI_BUCKET_NAME='dummy' KUBERNETES_SERVER_URL='http://34.71.246.49' HAIL_DEFAULT_NAMESPACE='default' HAIL_DOMAIN=staging.hail.is HAIL_GCP_ZONE=us-central1-a HAIL_GCP_PROJECT=hail-vdc-staging PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python python3 ./ci/bootstrap.py^C diff --git a/infra/main.tf b/infra/main.tf index 1a530f4fe14..18dfaf3e223 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -223,6 +223,7 @@ resource "kubernetes_secret" "database_server_config" { "client-cert.pem" = google_sql_ssl_cert.root_client_cert.cert "client-key.pem" = google_sql_ssl_cert.root_client_cert.private_key "sql-config.cnf" = < Date: Thu, 19 Nov 2020 19:35:12 -0500 Subject: [PATCH 026/501] wip --- bootstrap-gateway/Dockerfile | 13 +++++ bootstrap-gateway/Makefile | 24 ++++++++++ bootstrap-gateway/deployment.yaml | 71 ++++++++++++++++++++++++++++ bootstrap-gateway/gateway.nginx.conf | 18 +++++++ bootstrap-gateway/gzip.conf | 28 +++++++++++ bootstrap-gateway/nginx.conf | 48 +++++++++++++++++++ bootstrap-gateway/service.yaml | 37 +++++++++++++++ build.yaml | 19 ++++++-- config.mk | 2 +- infra/README.md | 11 ++++- infra/main.tf | 5 +- letsencrypt/Dockerfile | 13 +++-- letsencrypt/Makefile | 10 ++-- letsencrypt/service.yaml | 32 +++++++++++++ 14 files changed, 312 insertions(+), 19 deletions(-) create mode 100644 bootstrap-gateway/Dockerfile create mode 100644 bootstrap-gateway/Makefile create mode 100644 bootstrap-gateway/deployment.yaml create mode 100644 bootstrap-gateway/gateway.nginx.conf create mode 100644 bootstrap-gateway/gzip.conf create mode 100644 bootstrap-gateway/nginx.conf create mode 100644 bootstrap-gateway/service.yaml diff --git a/bootstrap-gateway/Dockerfile b/bootstrap-gateway/Dockerfile new file mode 100644 index 00000000000..a2fe4d140e1 --- /dev/null +++ b/bootstrap-gateway/Dockerfile @@ -0,0 +1,13 @@ +FROM {{ hail_ubuntu_image.image }} + +RUN hail-apt-get-install nginx + +RUN rm -f /etc/nginx/sites-enabled/default +ADD nginx.conf /etc/nginx/ +ADD gateway.nginx.conf /etc/nginx/conf.d/gateway.conf +ADD gzip.conf /etc/nginx/conf.d/gzip.conf + +RUN ln -sf /dev/stdout /var/log/nginx/access.log +RUN ln -sf /dev/stderr /var/log/nginx/error.log + +CMD ["nginx", "-g", "daemon off;"] diff --git a/bootstrap-gateway/Makefile b/bootstrap-gateway/Makefile new file mode 100644 index 00000000000..b5cffe53a91 --- /dev/null +++ b/bootstrap-gateway/Makefile @@ -0,0 +1,24 @@ +include ../config.mk + +.PHONY: build push deploy + +GATEWAY_LATEST = gcr.io/$(PROJECT)/gateway:latest +GATEWAY_IMAGE = gcr.io/$(PROJECT)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') + +build: + $(MAKE) -C ../docker hail-ubuntu + -docker pull $(GATEWAY_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out + docker build -t gateway -f Dockerfile.out --cache-from gateway,$(GATEWAY_LATEST),hail-ubuntu . + +push: build + docker tag gateway $(GATEWAY_LATEST) + docker push $(GATEWAY_LATEST) + docker tag gateway $(GATEWAY_IMAGE) + docker push $(GATEWAY_IMAGE) + +deploy: push + python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"global":{"ip":"$(IP)"}}' service.yaml service.yaml.out + kubectl -n default apply -f service.yaml.out + python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"gateway_image":{"image":"$(GATEWAY_IMAGE)"},"global":{"project":"$(PROJECT)"}}' deployment.yaml deployment.yaml.out + kubectl -n default apply -f deployment.yaml.out diff --git a/bootstrap-gateway/deployment.yaml b/bootstrap-gateway/deployment.yaml new file mode 100644 index 00000000000..73d7d466547 --- /dev/null +++ b/bootstrap-gateway/deployment.yaml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gateway-deployment + labels: + app: gateway + hail.is/sha: "{{ code.sha }}" +spec: + selector: + matchLabels: + app: gateway + replicas: 3 + template: + metadata: + labels: + app: gateway + hail.is/sha: "{{ code.sha }}" + spec: + priorityClassName: infrastructure + nodeSelector: + preemptible: "true" + tolerations: + - key: preemptible + value: "true" + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - gateway + topologyKey: "kubernetes.io/hostname" + containers: + - name: gateway + image: "{{ gateway_image.image }}" + resources: + requests: + memory: "250M" + cpu: "100m" + ports: + - containerPort: 80 + - containerPort: 443 +--- +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: gateway +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: gateway + minReplicas: 3 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + targetAverageUtilization: 80 +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: gateway +spec: + minAvailable: 2 + selector: + matchLabels: + app: gateway diff --git a/bootstrap-gateway/gateway.nginx.conf b/bootstrap-gateway/gateway.nginx.conf new file mode 100644 index 00000000000..e1c73fb8488 --- /dev/null +++ b/bootstrap-gateway/gateway.nginx.conf @@ -0,0 +1,18 @@ +server { + listen 80 default_server; + listen [::]:80 default_server; + server_name _; + + location /.well-known/acme-challenge { + proxy_pass http://letsencrypt; + + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $http_host; + proxy_set_header X-Forwarded-Proto https; + } + + location / { + return 301 https://$host$request_uri; + } +} diff --git a/bootstrap-gateway/gzip.conf b/bootstrap-gateway/gzip.conf new file mode 100644 index 00000000000..2c8366a97d8 --- /dev/null +++ b/bootstrap-gateway/gzip.conf @@ -0,0 +1,28 @@ +# Re-enable when we migrate to 10.15.8: gzip on; +gzip_vary on; +gzip_comp_level 6; +gzip_buffers 16 8k; +gzip_http_version 1.1; +gzip_min_length 512; +gzip_proxied any; + +# suggestions: https://www.fastly.com/blog/new-gzip-settings-and-deciding-what-compress +# defaults: https://nginx.org/en/docs/http/ngx_http_gzip_module.html +gzip_types + text/plain + text/css + text/javascript + application/json + application/javascript + application/x-font-ttf + application/x-font-truetype + application/font-woff + font/woff2 + woff + font/eot + font/otf + font/opentype + image/svg+xml + svg + svgz + image/x-icon; \ No newline at end of file diff --git a/bootstrap-gateway/nginx.conf b/bootstrap-gateway/nginx.conf new file mode 100644 index 00000000000..d0df92f52a9 --- /dev/null +++ b/bootstrap-gateway/nginx.conf @@ -0,0 +1,48 @@ +user www-data; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; +} + + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + server_names_hash_bucket_size 128; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE + ssl_prefer_server_ciphers on; + + log_format combined_real_ip '$http_x_real_ip - $remote_addr - $remote_user [$time_local] ' + '$scheme "$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"'; + access_log /var/log/nginx/access.log combined_real_ip; + error_log /var/log/nginx/error.log; + + gzip on; + + map $http_x_forwarded_proto $updated_scheme { + default $http_x_forwarded_proto; + '' $scheme; + } + map $http_x_forwarded_host $updated_host { + default $http_x_forwarded_host; + '' $http_host; + } + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + include /etc/nginx/conf.d/*.conf; + include /etc/nginx/sites-enabled/*; +} diff --git a/bootstrap-gateway/service.yaml b/bootstrap-gateway/service.yaml new file mode 100644 index 00000000000..6f27cdc7e90 --- /dev/null +++ b/bootstrap-gateway/service.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: Service +metadata: + name: letsencrypt + labels: + app: letsencrypt +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + selector: + app: letsencrypt +--- +apiVersion: v1 +kind: Service +metadata: + name: gateway + labels: + app: gateway +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: https + port: 443 + protocol: TCP + targetPort: 443 + selector: + app: gateway + loadBalancerIP: "{{ global.ip }}" + # https://kubernetes.io/docs/tutorials/services/source-ip/#source-ip-for-services-with-type-loadbalancer + externalTrafficPolicy: Local + type: LoadBalancer diff --git a/build.yaml b/build.yaml index 2e7f3307d6e..a8f6f796e7f 100644 --- a/build.yaml +++ b/build.yaml @@ -339,6 +339,19 @@ steps: dependsOn: - default_ns - base_image + - kind: runImage + name: create_test_gsa_keys + image: + valueFrom: service_base_image.image + script: | + # batch, benchmark, auth gsa keys + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "auth-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "batch-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "atgu-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "benchmark-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + scopes: + - test + - dev - kind: runImage name: create_accounts image: @@ -370,11 +383,6 @@ steps: await db.async_close() async_to_blocking(main()) EOF - # batch, benchmark, auth gsa keys - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "auth-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "batch-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "atgu-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "benchmark-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - # create auth N=$(kubectl -n {{ default_ns.name }} get secret --ignore-not-found=true --no-headers auth-tokens | wc -l | tr -d '[:space:]') if [[ $N = 0 ]]; then @@ -443,6 +451,7 @@ steps: - auth_database - service_base_image - create_deploy_config + - create_test_gsa_keys - kind: buildImage name: hail_build_image dockerFile: hail/Dockerfile.hail-build diff --git a/config.mk b/config.mk index e9b137ab8bb..218fd309548 100644 --- a/config.mk +++ b/config.mk @@ -2,7 +2,7 @@ PROJECT := hail-vdc-staging DOCKER_ROOT_IMAGE := gcr.io/$(PROJECT)/ubuntu:18.04 DOMAIN := staging.hail.is INTERNAL_IP := 10.128.0.2 -IP := 34.120.221.136 +IP := 34.121.206.184 KUBERNETES_SERVER_URL := https://34.71.246.49 REGION := us-central1 ZONE := us-central1-a diff --git a/infra/README.md b/infra/README.md index 6300c4878d7..669ba80501a 100644 --- a/infra/README.md +++ b/infra/README.md @@ -53,9 +53,18 @@ You can now install Hail: - Update $HAIL/config.mk with your infrastructure settings. You can get settings from the default/global-config secret. +- Run `kubectl -n default apply -f $HAIL/ci/bootstrap.yaml`. + - Build the CI utils image. Run `make push-ci-utils` in $HAIL/ci. -- Run `kubectl -n default apply -f $HAIL/ci/bootstrap.yaml`. +- Deploy the bootstrap gateway. Run `make deploy` in + $HAIL/bootstrap-gateway. + +- Create Let's Encrypt certs. Run `make run` in $HAIL/letsencrypt. + +- Deploy the gateway. Run `make deploy` in $HAIL/gateway. + +- Deploy the internal-gateway. Run `make deploy` in $HAIL/internal-gateway. - Go to the Google Cloud console, API & Services, Credentials. Configure the consent screen. Add the scope: diff --git a/infra/main.tf b/infra/main.tf index 18dfaf3e223..fafde5ab341 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -161,8 +161,9 @@ resource "google_sql_database_instance" "db" { } } -resource "google_compute_global_address" "gateway" { +resource "google_compute_address" "gateway" { name = "gateway" + region = var.gcp_region } resource "google_compute_address" "internal_gateway" { @@ -191,7 +192,7 @@ resource "kubernetes_secret" "global_config" { gcp_project = var.gcp_project domain = var.domain internal_ip = google_compute_address.internal_gateway.address - ip = google_compute_global_address.gateway.address + ip = google_compute_address.gateway.address kubernetes_server_url = "https://${google_container_cluster.vdc.endpoint}" gcp_region = var.gcp_region gcp_zone = var.gcp_zone diff --git a/letsencrypt/Dockerfile b/letsencrypt/Dockerfile index ef1f3017edb..29f5388260c 100644 --- a/letsencrypt/Dockerfile +++ b/letsencrypt/Dockerfile @@ -1,10 +1,13 @@ FROM {{ hail_ubuntu_image.image }} -# get add-apt-repository -RUN hail-apt-get-install nginx software-properties-common && \ - bash -c 'DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata' && \ - add-apt-repository -y ppa:certbot/certbot && \ - hail-apt-get-install python-certbot-nginx wget +RUN apt update && \ + apt install -y nginx software-properties-common && \ + bash -c 'DEBIAN_FRONTEND=noninteractive apt install -y tzdata' && \ + python3.6 /usr/bin/add-apt-repository -y ppa:certbot/certbot && \ + apt install -y python-certbot-nginx wget && \ + rm -rf /var/lib/apt/lists/* + +RUN hail-pip-install cffi RUN wget -O /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/v1.11.3/bin/linux/amd64/kubectl && \ chmod +x /usr/local/bin/kubectl diff --git a/letsencrypt/Makefile b/letsencrypt/Makefile index 2a6c748e72f..f98c277b1e3 100644 --- a/letsencrypt/Makefile +++ b/letsencrypt/Makefile @@ -5,8 +5,6 @@ LETSENCRYPT_IMAGE = gcr.io/$(PROJECT)/letsencrypt:$(shell docker images -q --no- STATIC_CONFIG = letsencrypt-pod.yaml letsencrypt.sh -.PHONY: $(STATIC_CONFIG) build push start-service run clean - $(STATIC_CONFIG): %: %.in sed -e "s,@project@,$(PROJECT),g" \ -e "s;@domains@;$(shell paste -s -d, domains.txt);g" \ @@ -14,23 +12,25 @@ $(STATIC_CONFIG): %: %.in -e "s,@ip@,$(IP),g" \ < $< > $@ +.PHONY: build build: letsencrypt.sh $(MAKE) -C ../docker hail-ubuntu python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out docker build -f Dockerfile.out -t letsencrypt --cache-from letsencrypt,$(LETSENCRYPT_LATEST),hail-ubuntu . +.PHONY: push push: build docker tag letsencrypt $(LETSENCRYPT_LATEST) docker push $(LETSENCRYPT_LATEST) docker tag letsencrypt $(LETSENCRYPT_IMAGE) docker push $(LETSENCRYPT_IMAGE) -start-service: service.yaml - kubectl -n default apply -f service.yaml - +.PHONY: run run: letsencrypt-pod.yaml service.yaml push + kubectl -n default apply -f service.yaml python3 ../ci/jinja2_render.py '{"letsencrypt_image":{"image":"$(LETSENCRYPT_IMAGE)"}}' letsencrypt-pod.yaml letsencrypt-pod.yaml.out /bin/bash run-letsencrypt.sh +.PHONY: clean clean: rm -rf $(STATIC_CONFIG) diff --git a/letsencrypt/service.yaml b/letsencrypt/service.yaml index cb28855e5bc..a46e1ba3573 100644 --- a/letsencrypt/service.yaml +++ b/letsencrypt/service.yaml @@ -1,4 +1,36 @@ apiVersion: v1 +kind: ServiceAccount +metadata: + name: letsencrypt +--- +apiVersion: v1 +kind: Secret +metadata: + name: letsencrypt-config +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: update-letsencrypt-config +rules: +- apiGroups: [""] + resources: ["secrets"] + resourceNames: ["letsencrypt-config"] + verbs: ["get", "update", "patch"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: letsencrypt-update-letsencrypt-config +subjects: +- kind: ServiceAccount + name: letsencrypt +roleRef: + kind: Role + name: update-letsencrypt-config + apiGroup: "" +--- +apiVersion: v1 kind: Service metadata: name: letsencrypt From 0cbf817ddbebbc6422cf3c5f48a68098e537cd59 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 19:35:56 -0500 Subject: [PATCH 027/501] add bootstrap-gateway/.gitignore --- bootstrap-gateway/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 bootstrap-gateway/.gitignore diff --git a/bootstrap-gateway/.gitignore b/bootstrap-gateway/.gitignore new file mode 100644 index 00000000000..d244fa2f7cb --- /dev/null +++ b/bootstrap-gateway/.gitignore @@ -0,0 +1,2 @@ +/deployment.yaml.out +/service.yaml.out From ad5c9886522bc0273b17ddfba4848f666b288ea4 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 20:34:57 -0500 Subject: [PATCH 028/501] wip --- auth/auth/driver/driver.py | 2 +- build.yaml | 105 ++++++++------------------------ ci/bootstrap_create_accounts.py | 74 ++++++++++++++++++++++ 3 files changed, 102 insertions(+), 79 deletions(-) create mode 100644 ci/bootstrap_create_accounts.py diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 4ce707e0c7d..35fe339afc0 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -449,7 +449,7 @@ async def create_user(app, user): try: await _create_user(app, user, cleanup) except Exception: - log.exception(f'create user {user} failed, will retry') + log.exception(f'create user {user} failed') for f in cleanup: try: diff --git a/build.yaml b/build.yaml index a8f6f796e7f..5bbed7520eb 100644 --- a/build.yaml +++ b/build.yaml @@ -143,6 +143,8 @@ steps: to: /git_version - from: /io/repo/address/test to: /repo/address/ + - from: /io/repo/ci/bootstrap_create_accounts.py + to: /repo/ci/ dependsOn: - base_image - kind: buildImage @@ -352,82 +354,25 @@ steps: scopes: - test - dev + - kind: buildImage + name: auth_image + dockerFile: auth/Dockerfile + contextPath: . + publishAs: auth + dependsOn: + - service_base_image - kind: runImage name: create_accounts image: - valueFrom: service_base_image.image + valueFrom: auth_image.image script: | set -ex - # create accounts - mkdir /user-tokens - cat > create-session.py < Date: Thu, 19 Nov 2020 20:37:38 -0500 Subject: [PATCH 029/501] fix --- build.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 5bbed7520eb..f7cbf26786d 100644 --- a/build.yaml +++ b/build.yaml @@ -354,6 +354,9 @@ steps: scopes: - test - dev + dependsOn: + - default_ns + - service_base_image - kind: buildImage name: auth_image dockerFile: auth/Dockerfile @@ -405,7 +408,7 @@ steps: - default_ns - deploy_test_batch_sa - auth_database - - service_base_image + - auth_image - create_deploy_config - create_test_gsa_keys - kind: buildImage From 1c7eb849b9cb1f5c555e2fe3dda6595482a25e7f Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 20:59:07 -0500 Subject: [PATCH 030/501] fixes --- build.yaml | 3 --- infra/README.md | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/build.yaml b/build.yaml index f7cbf26786d..05dd84546c4 100644 --- a/build.yaml +++ b/build.yaml @@ -376,9 +376,6 @@ steps: export HAIL_GSA_EMAIL=$(kubectl get secret test-gsa-key -o jsonpath={.data.key\\.json} | base64 -d | jq -r '."client_email"') {% endif %} /io/bootstrap_create_accounts.py - scopes: - - test - - dev serviceAccount: name: admin namespace: diff --git a/infra/README.md b/infra/README.md index 669ba80501a..bcfa103b7ff 100644 --- a/infra/README.md +++ b/infra/README.md @@ -93,6 +93,7 @@ You can now install Hail: add yourself to the docker group add ssh firewall rule to internal +fix up sha, other config in bootstrap.py create users From 2bda09422565d08db507ed938eaabb236f00fd48 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 21:03:34 -0500 Subject: [PATCH 031/501] fix --- build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 05dd84546c4..c56ca1c9fd7 100644 --- a/build.yaml +++ b/build.yaml @@ -375,7 +375,7 @@ steps: export HAIL_NAMESPACE_NAME={{ default_ns.name }} export HAIL_GSA_EMAIL=$(kubectl get secret test-gsa-key -o jsonpath={.data.key\\.json} | base64 -d | jq -r '."client_email"') {% endif %} - /io/bootstrap_create_accounts.py + python3 /io/bootstrap_create_accounts.py serviceAccount: name: admin namespace: From 653a933a5260ded15508db29e06a22d2c4c5632c Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 21:08:45 -0500 Subject: [PATCH 032/501] fix --- build.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.yaml b/build.yaml index c56ca1c9fd7..6ea42feadac 100644 --- a/build.yaml +++ b/build.yaml @@ -370,7 +370,10 @@ steps: valueFrom: auth_image.image script: | set -ex + export PROJECT={{ global.project }} + export ZONE={{ global.zone }} export HAIL_PROJECT={{ global.project }} + export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} {% if not deploy %} export HAIL_NAMESPACE_NAME={{ default_ns.name }} export HAIL_GSA_EMAIL=$(kubectl get secret test-gsa-key -o jsonpath={.data.key\\.json} | base64 -d | jq -r '."client_email"') From c31180ad228801ea145ced53a1fe92aa6ae22e1a Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 21:18:57 -0500 Subject: [PATCH 033/501] fix --- ci/bootstrap_create_accounts.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 68517bba49f..b8a18e0beb1 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -55,7 +55,8 @@ async def main(): await db_instance.async_init(maxsize=50, config_file='/database-server-config/sql-config.json') app['db_instance'] = db_instance - kube.config.load_incluster_config() + # kube.config.load_incluster_config() + await kube.config.load_kube_config() k8s_client = kube.client.CoreV1Api() app['k8s_client'] = k8s_client From 87b96808c4719dfb86fe91ee14447795965107e9 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 21:51:19 -0500 Subject: [PATCH 034/501] fix --- auth/auth/driver/driver.py | 23 ++++++++++++----------- ci/bootstrap_create_accounts.py | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 35fe339afc0..26c8635e4a5 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -349,7 +349,7 @@ async def delete(self): self.billing_project = None -async def _create_user(app, user, cleanup): +async def _create_user(app, user, skip_trial_bp, cleanup): db_instance = app['db_instance'] db = app['db'] k8s_client = app['k8s_client'] @@ -424,14 +424,15 @@ async def _create_user(app, user, cleanup): await db_secret.create( 'database-server-config', namespace_name, db_resource.secret_data()) - trial_bp = user['trial_bp_name'] - if trial_bp is None: - username = user['username'] - billing_project_name = f'{username}-trial' - billing_project = BillingProjectResource(batch_client) - cleanup.append(billing_project.delete) - await billing_project.create(username, billing_project_name) - updates['trial_bp_name'] = billing_project_name + if not skip_trial_bp: + trial_bp = user['trial_bp_name'] + if trial_bp is None: + username = user['username'] + billing_project_name = f'{username}-trial' + billing_project = BillingProjectResource(batch_client) + cleanup.append(billing_project.delete) + await billing_project.create(username, billing_project_name) + updates['trial_bp_name'] = billing_project_name n_rows = await db.execute_update(f''' UPDATE users @@ -444,10 +445,10 @@ async def _create_user(app, user, cleanup): raise DatabaseConflictError -async def create_user(app, user): +async def create_user(app, user, skip_trial_bp=False): cleanup = [] try: - await _create_user(app, user, cleanup) + await _create_user(app, user, skip_trial_bp, cleanup) except Exception: log.exception(f'create user {user} failed') diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index b8a18e0beb1..940ba70b2bf 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -69,7 +69,7 @@ async def main(): user_id = await insert_user_if_not_exists(db, username, email, is_developer, is_service_account) db_user = await db.execute_and_fetchone('SELECT * FROM users where id = %s;', (user_id,)) - await create_user(app, db_user) + await create_user(app, db_user, skip_trial_bp=True) async_to_blocking(main()) From 5eaa35c355fc11f88853bd0bff8f4a68e1ccf689 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 21:57:49 -0500 Subject: [PATCH 035/501] fix --- auth/auth/driver/driver.py | 4 ++-- ci/bootstrap_create_accounts.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 26c8635e4a5..8cd118e96a4 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -354,7 +354,6 @@ async def _create_user(app, user, skip_trial_bp, cleanup): db = app['db'] k8s_client = app['k8s_client'] iam_client = app['iam_client'] - batch_client = app['batch_client'] if user['is_service_account'] != 1: token = secret_alnum_string(5, case='lower') @@ -427,6 +426,7 @@ async def _create_user(app, user, skip_trial_bp, cleanup): if not skip_trial_bp: trial_bp = user['trial_bp_name'] if trial_bp is None: + batch_client = app['batch_client'] username = user['username'] billing_project_name = f'{username}-trial' billing_project = BillingProjectResource(batch_client) @@ -466,7 +466,6 @@ async def delete_user(app, user): db = app['db'] k8s_client = app['k8s_client'] iam_client = app['iam_client'] - batch_client = app['batch_client'] tokens_secret_name = user['tokens_secret_name'] if tokens_secret_name is not None: @@ -499,6 +498,7 @@ async def delete_user(app, user): trial_bp_name = user['trial_bp_name'] if trial_bp_name is not None: + batch_client = app['batch_client'] bp = BillingProjectResource(batch_client, user['username'], trial_bp_name) await bp.delete() diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 940ba70b2bf..3e85cc85df8 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -63,8 +63,6 @@ async def main(): app['iam_client'] = aiogoogle.IAmClient( PROJECT, credentials=aiogoogle.Credentials.from_file('/gsa-key/key.json')) - app['batch_client'] = await bc.aioclient.BatchClient(None) - for username, email, is_developer, is_service_account in users: user_id = await insert_user_if_not_exists(db, username, email, is_developer, is_service_account) From 0e62d8b3d27fe52c5ed4c3bf819d3551ba1fd7e2 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Thu, 19 Nov 2020 22:14:56 -0500 Subject: [PATCH 036/501] fix --- ci/bootstrap_create_accounts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 3e85cc85df8..2dfab10a6f1 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -30,7 +30,7 @@ async def insert_user_if_not_exists(db, username, email, is_developer, is_servic return await db.execute_insertone( ''' INSERT INTO users (state, username, email, is_developer, is_service_account, gsa_email, gsa_key_secret_name, namespace_name) -VALUES (%s, %s, %s, %s, %s); +VALUES (%s, %s, %s, %s, %s, %s, %s, %s); ''', ('creating', username, email, is_developer, is_service_account, gsa_email, gsa_key_secret_name, namespace_name)) From 91e4f005ff6be0df0ffd0e65d19caf7b56cceaf3 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 00:47:06 -0500 Subject: [PATCH 037/501] fix --- auth/auth/driver/driver.py | 2 +- build.yaml | 7 ++----- ci/bootstrap.py | 4 ++-- ci/bootstrap_create_accounts.py | 29 +++++++++++++++++------------ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 8cd118e96a4..41f04bf460b 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -423,7 +423,7 @@ async def _create_user(app, user, skip_trial_bp, cleanup): await db_secret.create( 'database-server-config', namespace_name, db_resource.secret_data()) - if not skip_trial_bp: + if not skip_trial_bp and user['is_service_account'] != 1: trial_bp = user['trial_bp_name'] if trial_bp is None: batch_client = app['batch_client'] diff --git a/build.yaml b/build.yaml index 6ea42feadac..e96ab30dec3 100644 --- a/build.yaml +++ b/build.yaml @@ -374,11 +374,8 @@ steps: export ZONE={{ global.zone }} export HAIL_PROJECT={{ global.project }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} - {% if not deploy %} - export HAIL_NAMESPACE_NAME={{ default_ns.name }} - export HAIL_GSA_EMAIL=$(kubectl get secret test-gsa-key -o jsonpath={.data.key\\.json} | base64 -d | jq -r '."client_email"') - {% endif %} - python3 /io/bootstrap_create_accounts.py + export HAIL_SCOPE={{ scope }} + python3 /io/bootstrap_create_accounts.py serviceAccount: name: admin namespace: diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 9d4f22ab1f7..762e84f432f 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -14,7 +14,7 @@ from batch.driver.k8s_cache import K8sCache KUBERNETES_SERVER_URL = 'https://34.71.246.49' - +SHA = os.environ['HAIL_SHA'] def populate_secret_host_path(host_path, secret_data): os.makedirs(host_path) @@ -275,7 +275,7 @@ def checkout_script(self): async def main(): scope = 'deploy' - code = Branch('cseed', 'hail', 'infra-1', 'dd8c84ee1601d9dd5643ec78fd9996cb51472e18') + code = Branch('cseed', 'hail', 'infra-1', SHA) with open(f'build.yaml', 'r') as f: config = BuildConfiguration(code, f.read(), scope, requested_step_names=['deploy_batch']) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 2dfab10a6f1..f4ebc90dc6a 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -1,31 +1,36 @@ import os +import base64 +import json import kubernetes_asyncio as kube from hailtop import aiogoogle -from hailtop import batch_client as bc from hailtop.utils import async_to_blocking from gear import Database from auth.driver.driver import create_user +SCOPE = os.environ['HAIL_SCOPE'] PROJECT = os.environ['HAIL_PROJECT'] -GSA_EMAIL = os.environ.get('HAIL_GSA_EMAIL') -NAMESPACE_NAME = os.environ.get('HAIL_NAMESPACE_NAME') +DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] +async def insert_user_if_not_exists(app, username, email, is_developer, is_service_account): + db = app['db'] + k8s_client = app['k8s_client'] -async def insert_user_if_not_exists(db, username, email, is_developer, is_service_account): row = await db.execute_and_fetchone('SELECT id FROM users where username = %s;', (username,)) if row: return row['id'] - # If scope = test, dev we can't create our own service accounts or - # namespaces. Use the ones given to us. - gsa_email = GSA_EMAIL - if gsa_email: - gsa_key_secret_name = f'{username}-gsa-key' - else: - gsa_key_secret_name = None + gsa_key_secret_name = f'{username}-gsa-key' + + secret = await k8s_client.read_namespaced_secret(gsa_key_secret_name, DEFAULT_NAMESPACE) + key_json = base64.b64decode(secret.data['key.json']).decode() + key = json.loads(key_json) + gsa_email = key['client_email'] - namespace_name = NAMESPACE_NAME + if is_developer and SCOPE != 'deploy': + namespace_name = DEFAULT_NAMESPACE + else: + namespace_name = None return await db.execute_insertone( ''' From e1a62bd95a46c7419983f28be9b5a5a026027709 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 01:12:30 -0500 Subject: [PATCH 038/501] fixes --- ci/bootstrap_create_accounts.py | 6 +++--- infra/main.tf | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index f4ebc90dc6a..ae81611890a 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -69,10 +69,10 @@ async def main(): PROJECT, credentials=aiogoogle.Credentials.from_file('/gsa-key/key.json')) for username, email, is_developer, is_service_account in users: - user_id = await insert_user_if_not_exists(db, username, email, is_developer, is_service_account) + user_id = await insert_user_if_not_exists(app, username, email, is_developer, is_service_account) - db_user = await db.execute_and_fetchone('SELECT * FROM users where id = %s;', (user_id,)) - await create_user(app, db_user, skip_trial_bp=True) + db_user = await db.execute_and_fetchone('SELECT * FROM users where id = %s;', (user_id,)) + await create_user(app, db_user, skip_trial_bp=True) async_to_blocking(main()) diff --git a/infra/main.tf b/infra/main.tf index fafde5ab341..d74d0b92430 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -384,6 +384,21 @@ resource "google_service_account_key" "auth_key" { service_account_id = google_service_account.auth.name } +resource "google_project_iam_member" "auth_service_account_admin" { + role = "roles/iam.serviceAccountAdmin" + member = "serviceAccount:${google_service_account.auth.email}" +} + +resource "google_project_iam_member" "auth_service_account_key_admin" { + role = "roles/iam.serviceAccountKeyAdmin" + member = "serviceAccount:${google_service_account.auth.email}" +} + +resource "google_project_iam_member" "auth_storage_admin" { + role = "roles/storage.admin" + member = "serviceAccount:${google_service_account.auth.email}" +} + resource "kubernetes_secret" "auth_gsa_key" { metadata { name = "auth-gsa-key" From 3f839debbffd182a4c252c5b027eacbadf9176a2 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 01:59:59 -0500 Subject: [PATCH 039/501] fix --- auth/cluster-role.yaml | 21 --------------------- ci/bootstrap.yaml | 22 ++++++++++++++++++++++ ci/bootstrap_create_accounts.py | 4 ++-- 3 files changed, 24 insertions(+), 23 deletions(-) delete mode 100644 auth/cluster-role.yaml diff --git a/auth/cluster-role.yaml b/auth/cluster-role.yaml deleted file mode 100644 index ac693717189..00000000000 --- a/auth/cluster-role.yaml +++ /dev/null @@ -1,21 +0,0 @@ -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: auth-driver -rules: -- apiGroups: [""] - resources: ["namespaces", "secrets"] - verbs: ["*"] ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: auth-driver -subjects: -- kind: ServiceAccount - name: auth-driver - namespace: default -roleRef: - kind: ClusterRole - name: auth-driver - apiGroup: rbac.authorization.k8s.io diff --git a/ci/bootstrap.yaml b/ci/bootstrap.yaml index 3d2ad51ea87..255d21dfaab 100644 --- a/ci/bootstrap.yaml +++ b/ci/bootstrap.yaml @@ -47,3 +47,25 @@ roleRef: kind: ClusterRole name: cluster-admin apiGroup: "" +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-driver +rules: +- apiGroups: [""] + resources: ["namespaces", "secrets"] + verbs: ["*"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-driver +subjects: +- kind: ServiceAccount + name: auth-driver + namespace: default +roleRef: + kind: ClusterRole + name: auth-driver + apiGroup: rbac.authorization.k8s.io diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index ae81611890a..0e94d46daed 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -20,13 +20,13 @@ async def insert_user_if_not_exists(app, username, email, is_developer, is_servi if row: return row['id'] - gsa_key_secret_name = f'{username}-gsa-key' - secret = await k8s_client.read_namespaced_secret(gsa_key_secret_name, DEFAULT_NAMESPACE) key_json = base64.b64decode(secret.data['key.json']).decode() key = json.loads(key_json) gsa_email = key['client_email'] + gsa_key_secret_name = f'{username}-gsa-key' + if is_developer and SCOPE != 'deploy': namespace_name = DEFAULT_NAMESPACE else: From 10a9d6dac9fdb9eb8680c620d49b438044db062b Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 02:03:58 -0500 Subject: [PATCH 040/501] fix --- ci/bootstrap_create_accounts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 0e94d46daed..ae81611890a 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -20,13 +20,13 @@ async def insert_user_if_not_exists(app, username, email, is_developer, is_servi if row: return row['id'] + gsa_key_secret_name = f'{username}-gsa-key' + secret = await k8s_client.read_namespaced_secret(gsa_key_secret_name, DEFAULT_NAMESPACE) key_json = base64.b64decode(secret.data['key.json']).decode() key = json.loads(key_json) gsa_email = key['client_email'] - gsa_key_secret_name = f'{username}-gsa-key' - if is_developer and SCOPE != 'deploy': namespace_name = DEFAULT_NAMESPACE else: From 9d75561e97068d4ede4dc7f610808b6b74d503df Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 02:12:35 -0500 Subject: [PATCH 041/501] fix --- auth/Makefile | 1 + auth/auth-driver-service-account.yaml | 25 +++++++++++++++++++++++++ auth/deployment.yaml | 26 -------------------------- build.yaml | 7 +++++++ 4 files changed, 33 insertions(+), 26 deletions(-) create mode 100644 auth/auth-driver-service-account.yaml diff --git a/auth/Makefile b/auth/Makefile index b1f0298df17..21e11abff90 100644 --- a/auth/Makefile +++ b/auth/Makefile @@ -29,6 +29,7 @@ push: build .PHONY: deploy deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default + kubectl -n $(NAMESPACE) apply -f auth-driver-service-account.yaml python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"auth_image":{"image":"$(AUTH_IMAGE)"},"auth_database":{"user_secret_name":"sql-auth-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/auth/auth-driver-service-account.yaml b/auth/auth-driver-service-account.yaml new file mode 100644 index 00000000000..bd8006efc0b --- /dev/null +++ b/auth/auth-driver-service-account.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: auth-driver +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-driver +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["*"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: auth-driver +subjects: + - kind: ServiceAccount + name: auth-driver +roleRef: + kind: Role + name: auth-driver + apiGroup: "" diff --git a/auth/deployment.yaml b/auth/deployment.yaml index cede60c1528..659308670bd 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -1,29 +1,3 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: auth-driver ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: auth-driver -rules: - - apiGroups: [""] - resources: ["secrets"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: auth-driver -subjects: - - kind: ServiceAccount - name: auth-driver -roleRef: - kind: Role - name: auth-driver - apiGroup: "" ---- apiVersion: apps/v1 kind: Deployment metadata: diff --git a/build.yaml b/build.yaml index e96ab30dec3..17ba49813a2 100644 --- a/build.yaml +++ b/build.yaml @@ -364,6 +364,13 @@ steps: publishAs: auth dependsOn: - service_base_image + - kind: deploy + name: deploy_auth_driver_service_account + namespace: + valueFrom: default_ns.name + config: auth/auth-driver-service-account.yaml + dependsOn: + - default_ns - kind: runImage name: create_accounts image: From 2d9941c6393fb815287d612de019ea212de8c5fd Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 02:18:50 -0500 Subject: [PATCH 042/501] fix --- build.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 17ba49813a2..11a96bf3e41 100644 --- a/build.yaml +++ b/build.yaml @@ -384,7 +384,7 @@ steps: export HAIL_SCOPE={{ scope }} python3 /io/bootstrap_create_accounts.py serviceAccount: - name: admin + name: auth-driver namespace: valueFrom: default_ns.name secrets: @@ -414,6 +414,7 @@ steps: - auth_database - auth_image - create_deploy_config + - deploy_auth_driver_service_account - create_test_gsa_keys - kind: buildImage name: hail_build_image From a5379580a029876536fbca91351a7457c7b2d38c Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 02:34:40 -0500 Subject: [PATCH 043/501] fix --- ci/bootstrap_create_accounts.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index ae81611890a..bf278b72167 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -18,6 +18,8 @@ async def insert_user_if_not_exists(app, username, email, is_developer, is_servi row = await db.execute_and_fetchone('SELECT id FROM users where username = %s;', (username,)) if row: + if row['state'] == 'active': + return None return row['id'] gsa_key_secret_name = f'{username}-gsa-key' @@ -71,8 +73,9 @@ async def main(): for username, email, is_developer, is_service_account in users: user_id = await insert_user_if_not_exists(app, username, email, is_developer, is_service_account) - db_user = await db.execute_and_fetchone('SELECT * FROM users where id = %s;', (user_id,)) - await create_user(app, db_user, skip_trial_bp=True) + if user_id is not None: + db_user = await db.execute_and_fetchone('SELECT * FROM users where id = %s;', (user_id,)) + await create_user(app, db_user, skip_trial_bp=True) async_to_blocking(main()) From 2a155619635a97fa9c64c9e454716c24c3efe876 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 03:09:16 -0500 Subject: [PATCH 044/501] fix up readme --- infra/README.md | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/infra/README.md b/infra/README.md index bcfa103b7ff..1eb8eeaf29a 100644 --- a/infra/README.md +++ b/infra/README.md @@ -78,25 +78,36 @@ You can now install Hail: kubectl -n default create secret generic auth-oauth2-client-secret --from-file=./client_secret.json ``` -- On an instance on the internal network, 100gb, Ubuntu 20.04 TLS, enable - full API and run: +- Create a VM on the internal network, 100GB, Ubuntu 20.04 TLS, allow + full access to all Cloud APIs. 10GB will run out of space. + +- Install some dependencies: ``` sudo apt update sudo apt install -y docker.io python3-pip mysql-client-core-8.0 - sudo snap install --classic kubectl + sudo usermod -a -G docker $USER gcloud -q auth configure-docker gcloud container clusters get-credentials --zone us-central1-a vdc git clone https://github.com/cseed/hail.git python3 -m pip install -r $HOME/hail/docker/requirements.txt ``` -add yourself to the docker group -add ssh firewall rule to internal -fix up sha, other config in bootstrap.py + You will have to log out/in for usermod to take effect. -create users +- Bootstrap the cluster by running: -python3 ./ci/bootstrap.py + ``` + HAIL_SHA=$(git rev-parse HEAD) \ + HAIL_CI_UTILS_IMAGE=gcr.io//ci-utils:latest \ + HAIL_CI_BUCKET_NAME=dummy \ + KUBERNETES_SERVER_URL='' \ + HAIL_DEFAULT_NAMESPACE='default' \ + HAIL_DOMAIN= \ + HAIL_GCP_ZONE= \ + GCP_PROJECT= \ + PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ + python3 $HAIL/ci/bootstrap.py + ``` -cotton@admin:~/hail$ (sudo rm -rf _) && HAIL_CI_UTILS_IMAGE=gcr.io/hail-vdc-staging/ci-utils:latest HAIL_CI_BUCKET_NAME='dummy' KUBERNETES_SERVER_URL='http://34.71.246.49' HAIL_DEFAULT_NAMESPACE='default' HAIL_DOMAIN=staging.hail.is HAIL_GCP_ZONE=us-central1-a HAIL_GCP_PROJECT=hail-vdc-staging PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python python3 ./ci/bootstrap.py^C +- You may want to add a suitable ssh forward rule to the internal network. From 7c1dc38a8aefbebd7b0cdccb60dda17ec361a72b Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 10:28:52 -0500 Subject: [PATCH 045/501] fix spacing --- infra/README.md | 2 +- infra/main.tf | 72 ++++++++++++++++++++++++------------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/infra/README.md b/infra/README.md index 1eb8eeaf29a..be476aa019f 100644 --- a/infra/README.md +++ b/infra/README.md @@ -85,7 +85,7 @@ You can now install Hail: ``` sudo apt update - sudo apt install -y docker.io python3-pip mysql-client-core-8.0 + sudo apt install -y docker.io python3-pip sudo usermod -a -G docker $USER gcloud -q auth configure-docker gcloud container clusters get-credentials --zone us-central1-a vdc diff --git a/infra/main.tf b/infra/main.tf index d74d0b92430..ce47f1cbe4f 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -20,8 +20,8 @@ provider "google" { credentials = file("~/.hail/terraform_sa_key.json") project = var.gcp_project - region = var.gcp_region - zone = var.gcp_zone + region = var.gcp_region + zone = var.gcp_zone } data "google_client_config" "provider" {} @@ -49,7 +49,7 @@ resource "google_container_cluster" "vdc" { # separately managed node pools. So we create the smallest possible default # node pool and immediately delete it. remove_default_node_pool = true - initial_node_count = 1 + initial_node_count = 1 master_auth { username = "" @@ -62,9 +62,9 @@ resource "google_container_cluster" "vdc" { } resource "google_container_node_pool" "vdc_preemptible_pool" { - name = "preemptible-pool" - location = var.gcp_zone - cluster = google_container_cluster.vdc.name + name = "preemptible-pool" + location = var.gcp_zone + cluster = google_container_cluster.vdc.name autoscaling { min_node_count = 0 @@ -72,7 +72,7 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { } node_config { - preemptible = true + preemptible = true machine_type = "n1-standard-2" labels = { @@ -96,9 +96,9 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { } resource "google_container_node_pool" "vdc_nonpreemptible_pool" { - name = "nonpreemptible-pool" - location = var.gcp_zone - cluster = google_container_cluster.vdc.name + name = "nonpreemptible-pool" + location = var.gcp_zone + cluster = google_container_cluster.vdc.name autoscaling { min_node_count = 0 @@ -106,7 +106,7 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { } node_config { - preemptible = false + preemptible = false machine_type = "n1-standard-2" labels = { @@ -124,9 +124,9 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { } resource "google_compute_global_address" "db_ip_address" { - name = "db-ip-address" - purpose = "VPC_PEERING" - address_type = "INTERNAL" + name = "db-ip-address" + purpose = "VPC_PEERING" + address_type = "INTERNAL" prefix_length = 16 network = google_compute_network.internal.id } @@ -142,9 +142,9 @@ resource "random_id" "db_name_suffix" { } resource "google_sql_database_instance" "db" { - name = "db-${random_id.db_name_suffix.hex}" + name = "db-${random_id.db_name_suffix.hex}" database_version = "MYSQL_5_7" - region = var.gcp_region + region = var.gcp_region depends_on = [google_service_networking_connection.private_vpc_connection] @@ -154,7 +154,7 @@ resource "google_sql_database_instance" "db" { tier = "db-n1-standard-1" ip_configuration { - ipv4_enabled = false + ipv4_enabled = false private_network = google_compute_network.internal.id require_ssl = true } @@ -167,16 +167,16 @@ resource "google_compute_address" "gateway" { } resource "google_compute_address" "internal_gateway" { - name = "internal-gateway" - subnetwork = data.google_compute_subnetwork.internal_default_region.id + name = "internal-gateway" + subnetwork = data.google_compute_subnetwork.internal_default_region.id address_type = "INTERNAL" - region = var.gcp_region + region = var.gcp_region } provider "kubernetes" { load_config_file = false - host = "https://${google_container_cluster.vdc.endpoint}" + host = "https://${google_container_cluster.vdc.endpoint}" token = data.google_client_config.provider.access_token cluster_ca_certificate = base64decode( google_container_cluster.vdc.master_auth[0].cluster_ca_certificate, @@ -255,7 +255,7 @@ resource "google_container_registry" "registry" { } resource "google_service_account" "gcr_pull" { - account_id = "gcr-pull" + account_id = "gcr-pull" display_name = "pull from gcr.io" } @@ -264,7 +264,7 @@ resource "google_service_account_key" "gcr_pull_key" { } resource "google_service_account" "gcr_push" { - account_id = "gcr-push" + account_id = "gcr-push" display_name = "push to gcr.io" } @@ -355,7 +355,7 @@ resource "random_id" "atgu_name_suffix" { } resource "google_service_account" "atgu" { - account_id = "atgu-${random_id.atgu_name_suffix.hex}" + account_id = "atgu-${random_id.atgu_name_suffix.hex}" } resource "google_service_account_key" "atgu_key" { @@ -377,7 +377,7 @@ resource "random_id" "auth_name_suffix" { } resource "google_service_account" "auth" { - account_id = "auth-${random_id.auth_name_suffix.hex}" + account_id = "auth-${random_id.auth_name_suffix.hex}" } resource "google_service_account_key" "auth_key" { @@ -385,18 +385,18 @@ resource "google_service_account_key" "auth_key" { } resource "google_project_iam_member" "auth_service_account_admin" { - role = "roles/iam.serviceAccountAdmin" - member = "serviceAccount:${google_service_account.auth.email}" + role = "roles/iam.serviceAccountAdmin" + member = "serviceAccount:${google_service_account.auth.email}" } resource "google_project_iam_member" "auth_service_account_key_admin" { - role = "roles/iam.serviceAccountKeyAdmin" - member = "serviceAccount:${google_service_account.auth.email}" + role = "roles/iam.serviceAccountKeyAdmin" + member = "serviceAccount:${google_service_account.auth.email}" } resource "google_project_iam_member" "auth_storage_admin" { - role = "roles/storage.admin" - member = "serviceAccount:${google_service_account.auth.email}" + role = "roles/storage.admin" + member = "serviceAccount:${google_service_account.auth.email}" } resource "kubernetes_secret" "auth_gsa_key" { @@ -414,7 +414,7 @@ resource "random_id" "batch_name_suffix" { } resource "google_service_account" "batch" { - account_id = "batch-${random_id.batch_name_suffix.hex}" + account_id = "batch-${random_id.batch_name_suffix.hex}" } resource "google_service_account_key" "batch_key" { @@ -432,7 +432,7 @@ resource "kubernetes_secret" "batch_gsa_key" { } resource "google_service_account" "benchmark" { - account_id = "benchmark" + account_id = "benchmark" } resource "google_service_account_key" "benchmark_key" { @@ -450,7 +450,7 @@ resource "kubernetes_secret" "benchmark_gsa_key" { } resource "google_service_account" "monitoring" { - account_id = "monitoring" + account_id = "monitoring" } resource "google_service_account_key" "monitoring_key" { @@ -472,7 +472,7 @@ resource "random_id" "test_name_suffix" { } resource "google_service_account" "test" { - account_id = "test-${random_id.test_name_suffix.hex}" + account_id = "test-${random_id.test_name_suffix.hex}" } resource "google_service_account_key" "test_key" { @@ -490,7 +490,7 @@ resource "kubernetes_secret" "test_gsa_key" { } resource "google_service_account" "test_dev" { - account_id = "test-dev" + account_id = "test-dev" } resource "google_service_account_key" "test_dev_key" { From 7831481d0e54f6a44a073dd416985442fa43b102 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 10:32:15 -0500 Subject: [PATCH 046/501] change db-ip-address => google-managed-services-internal --- infra/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/infra/main.tf b/infra/main.tf index ce47f1cbe4f..458057d3d17 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -123,8 +123,8 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { } } -resource "google_compute_global_address" "db_ip_address" { - name = "db-ip-address" +resource "google_compute_global_address" "google_managed_services_internal" { + name = "google-managed-services-internal" purpose = "VPC_PEERING" address_type = "INTERNAL" prefix_length = 16 @@ -134,7 +134,7 @@ resource "google_compute_global_address" "db_ip_address" { resource "google_service_networking_connection" "private_vpc_connection" { network = google_compute_network.internal.id service = "servicenetworking.googleapis.com" - reserved_peering_ranges = [google_compute_global_address.db_ip_address.name] + reserved_peering_ranges = [google_compute_global_address.google_managed_services_internal.name] } resource "random_id" "db_name_suffix" { From 874e5fd52e5978d3f86bec7461d44cad82fb7eb6 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 10:38:34 -0500 Subject: [PATCH 047/501] fix k8s namespace ref --- infra/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/main.tf b/infra/main.tf index 458057d3d17..fc7297f4ac8 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -313,7 +313,7 @@ resource "kubernetes_namespace" "ukbb_rg" { resource "kubernetes_service" "ukbb_rb_browser" { metadata { name = "ukbb-rg-browser" - namespace = "ukbb-rg" + namespace = kubernetes_namespace.ukbb_rg.metadata[0].name labels = { app = "ukbb-rg-browser" } @@ -333,7 +333,7 @@ resource "kubernetes_service" "ukbb_rb_browser" { resource "kubernetes_service" "ukbb_rb_static" { metadata { name = "ukbb-rg-static" - namespace = "ukbb-rg" + namespace = kubernetes_namespace.ukbb_rg.metadata[0].name labels = { app = "ukbb-rg-static" } From 4d4671246c617dadc570df92b73bba5739f8fef7 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 10:59:57 -0500 Subject: [PATCH 048/501] address comments --- ci/bootstrap.py | 2 +- infra/README.md | 69 ++++++++++++++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 762e84f432f..e09f251ab55 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -13,7 +13,7 @@ from batch.driver.k8s_cache import K8sCache -KUBERNETES_SERVER_URL = 'https://34.71.246.49' +KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] SHA = os.environ['HAIL_SHA'] def populate_secret_host_path(host_path, secret_data): diff --git a/infra/README.md b/infra/README.md index be476aa019f..b0d73f2955c 100644 --- a/infra/README.md +++ b/infra/README.md @@ -3,8 +3,12 @@ infrastructure. Instructions: -- You will need a GCP project. We assume `gcloud` is configured to - point at your project. +- You will need a GCP project. Configure `gcloud` to point at your project: + + ``` + gcloud config set project + gcloud config set compute/zone + ``` - Create a service account for Terraform with Owner role, create a service account key and place it in @@ -27,7 +31,7 @@ Instructions: - Create `$HOME/.hail/global.tfvars` that looks like: ``` - gcp_project = "" + gcp_project = "" gcp_region = "" gcp_zone = "" domain = "" @@ -36,8 +40,11 @@ Instructions: - Run `terraform init`. - Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. - Terraform has created a GKE cluster named `vdc`. We assume - `kubectl` is configured to point at this cluster. + Terraform has created a GKE cluster named `vdc`. Configure `kubectl` to point at the vdc cluster: + + ``` + gcloud container clusters get-credentials vdc + ``` - Go to the Google Cloud console, VPC networks > internal > Private service connection > Private connections to services, and enable @@ -45,13 +52,40 @@ Instructions: You can now install Hail: -- Run `$HAIL/docker/third-party/copy_images.sh`. This copies some - base images from Dockerhub (which now has rate limits) to GCR. +- Create a VM on the internal network, 100GB, Ubuntu 20.04 TLS, allow + full access to all Cloud APIs. 10GB will run out of space. We + assume the rest of the commands are run on the VM. -- Generate TLS certificates. See ../dev-docs/tls-cookbook.md. +- Standardize file permissions. This is for docker, which considers + permissions for caching. Run `echo 'umask 022' > ~/.profile`. You + will need to log out/in or run `umask 022`. - Update $HAIL/config.mk with your infrastructure settings. You can - get settings from the default/global-config secret. + get settings from the default/global-config secret: + + ``` + kubectl get secret global-config -o json | jq '.data | map_values(@base64d)' + ``` + +- Install some dependencies on the VM: + + ``` + sudo apt update + sudo apt install -y docker.io python3-pip + sudo usermod -a -G docker $USER + gcloud -q auth configure-docker + gcloud container clusters get-credentials --zone us-central1-a vdc + git clone https://github.com/cseed/hail.git + python3 -m pip install -r $HOME/hail/docker/requirements.txt + ``` + + You will have to log out/in for usermod to take effect. + +- Run `PROJECT= + $HAIL/docker/third-party/copy_images.sh`. This copies some base + images from Dockerhub (which now has rate limits) to GCR. + +- Generate TLS certificates. See ../dev-docs/tls-cookbook.md. - Run `kubectl -n default apply -f $HAIL/ci/bootstrap.yaml`. @@ -78,23 +112,6 @@ You can now install Hail: kubectl -n default create secret generic auth-oauth2-client-secret --from-file=./client_secret.json ``` -- Create a VM on the internal network, 100GB, Ubuntu 20.04 TLS, allow - full access to all Cloud APIs. 10GB will run out of space. - -- Install some dependencies: - - ``` - sudo apt update - sudo apt install -y docker.io python3-pip - sudo usermod -a -G docker $USER - gcloud -q auth configure-docker - gcloud container clusters get-credentials --zone us-central1-a vdc - git clone https://github.com/cseed/hail.git - python3 -m pip install -r $HOME/hail/docker/requirements.txt - ``` - - You will have to log out/in for usermod to take effect. - - Bootstrap the cluster by running: ``` From 812ff4b3cc1f64042d39a9062b012392180d36ac Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 11:12:48 -0500 Subject: [PATCH 049/501] add roles for service accounts --- infra/main.tf | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/infra/main.tf b/infra/main.tf index fc7297f4ac8..638c9e7dba8 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -431,6 +431,21 @@ resource "kubernetes_secret" "batch_gsa_key" { } } +resource "google_project_iam_member" "batch_compute_instance_admin" { + role = "roles/compute.instanceAdmin.v1" + member = "serviceAccount:${google_service_account.batch.email}" +} + +resource "google_project_iam_member" "batch_service_account_user" { + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${google_service_account.batch.email}" +} + +resource "google_project_iam_member" "batch_logging_viewer" { + role = "roles/logging.viewer" + member = "serviceAccount:${google_service_account.batch.email}" +} + resource "google_service_account" "benchmark" { account_id = "benchmark" } @@ -489,6 +504,26 @@ resource "kubernetes_secret" "test_gsa_key" { } } +resource "google_project_iam_member" "test_compute_instance_admin" { + role = "roles/compute.instanceAdmin.v1" + member = "serviceAccount:${google_service_account.test.email}" +} + +resource "google_project_iam_member" "test_service_account_user" { + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${google_service_account.test.email}" +} + +resource "google_project_iam_member" "test_logging_viewer" { + role = "roles/logging.viewer" + member = "serviceAccount:${google_service_account.test.email}" +} + +resource "google_project_iam_member" "test_service_usage_consumer" { + role = "roles/serviceusage.serviceUsageConsumer" + member = "serviceAccount:${google_service_account.test.email}" +} + resource "google_service_account" "test_dev" { account_id = "test-dev" } @@ -506,3 +541,32 @@ resource "kubernetes_secret" "test_dev_gsa_key" { "key.json" = base64decode(google_service_account_key.test_dev_key.private_key) } } + +resource "google_service_account" "batch_agent" { + account_id = "batch2-agent" +} + +resource "google_project_iam_member" "batch_agent_compute_instance_admin" { + role = "roles/compute.instanceAdmin.v1" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} + +resource "google_project_iam_member" "batch_agent_service_account_user" { + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} + +resource "google_project_iam_member" "batch_agent_log_writer" { + role = "roles/logging.logWriter" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} + +resource "google_project_iam_member" "batch_agent_object_creator" { + role = "roles/storage.objectCreator" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} + +resource "google_project_iam_member" "batch_agent_object_viewer" { + role = "roles/storage.objectViewer" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} From b3a60a0dfcc665c285bd6b16736455fa05ed0543 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 11:46:50 -0500 Subject: [PATCH 050/501] added vdc to batch-worker firewall rule --- infra/main.tf | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/infra/main.tf b/infra/main.tf index 638c9e7dba8..ec0740c3614 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -67,7 +67,7 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { cluster = google_container_cluster.vdc.name autoscaling { - min_node_count = 0 + min_node_count = 1 max_node_count = 200 } @@ -101,7 +101,7 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { cluster = google_container_cluster.vdc.name autoscaling { - min_node_count = 0 + min_node_count = 1 max_node_count = 200 } @@ -570,3 +570,26 @@ resource "google_project_iam_member" "batch_agent_object_viewer" { role = "roles/storage.objectViewer" member = "serviceAccount:${google_service_account.batch_agent.email}" } + +resource "google_compute_firewall" "vdc_to_batch_worker" { + name = "vdc-to-batch-worker" + network = google_compute_network.internal.name + + source_ranges = [google_container_cluster.vdc.cluster_ipv4_cidr] + + target_tags = ["batch2-worker"] + + allow { + protocol = "icmp" + } + + allow { + protocol = "tcp" + ports = ["1-65535"] + } + + allow { + protocol = "udp" + ports = ["1-65535"] + } +} From cf12f455ccf734408e154f7954f76114df048531 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Fri, 20 Nov 2020 12:55:15 -0500 Subject: [PATCH 051/501] fix --- ci/bootstrap_create_accounts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index bf278b72167..343a4214324 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -16,7 +16,7 @@ async def insert_user_if_not_exists(app, username, email, is_developer, is_servi db = app['db'] k8s_client = app['k8s_client'] - row = await db.execute_and_fetchone('SELECT id FROM users where username = %s;', (username,)) + row = await db.execute_and_fetchone('SELECT id, state FROM users where username = %s;', (username,)) if row: if row['state'] == 'active': return None From 82050ed7a9049e7bbc3c41e382d5f7136ddb23f8 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 02:57:48 -0500 Subject: [PATCH 052/501] fixes --- infra/README.md | 29 +++++++-- infra/main.tf | 152 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 151 insertions(+), 30 deletions(-) diff --git a/infra/README.md b/infra/README.md index b0d73f2955c..6ac6c54cb7f 100644 --- a/infra/README.md +++ b/infra/README.md @@ -10,6 +10,8 @@ Instructions: gcloud config set compute/zone ``` +- Delete the default network if it exists. + - Create a service account for Terraform with Owner role, create a service account key and place it in `$HOME/.hail/terraform_sa_key.json`. @@ -23,7 +25,8 @@ Instructions: servicenetworking.googleapis.com \ sqladmin.googleapis.com \ container.googleapis.com \ - serviceusage.googleapis.com + serviceusage.googleapis.com \ + dns.googleapis.com ``` - Install terraform. @@ -31,7 +34,15 @@ Instructions: - Create `$HOME/.hail/global.tfvars` that looks like: ``` + gsuite_organization = "" + # batch_gcp_regions is a JSON array of string, the names of the gcp + # regions to schedule over in Batch. + batch_gcp_regions = "" gcp_project = "" + # gcp_location is the bucket location that spans the regions you're + # going to schedule across in Batch. If you are running on one + # region, it can just be that region. + gcp_location = "" gcp_region = "" gcp_zone = "" domain = "" @@ -40,7 +51,8 @@ Instructions: - Run `terraform init`. - Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. - Terraform has created a GKE cluster named `vdc`. Configure `kubectl` to point at the vdc cluster: + Terraform has created a GKE cluster named `vdc`. Configure + `kubectl` to point at the vdc cluster: ``` gcloud container clusters get-credentials vdc @@ -79,11 +91,16 @@ You can now install Hail: python3 -m pip install -r $HOME/hail/docker/requirements.txt ``` - You will have to log out/in for usermod to take effect. + You will have to log out/in for the usermod to take effect. + +- Run + + ``` + PROJECT= $HAIL/docker/third-party/copy_images.sh + ``` -- Run `PROJECT= - $HAIL/docker/third-party/copy_images.sh`. This copies some base - images from Dockerhub (which now has rate limits) to GCR. + This copies some base images from Dockerhub (which now has rate + limits) to GCR. - Generate TLS certificates. See ../dev-docs/tls-cookbook.md. diff --git a/infra/main.tf b/infra/main.tf index ec0740c3614..76c40c70de0 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -11,7 +11,10 @@ terraform { } } +variable "gsuite_organization" {} +variable "batch_gcp_regions" {} variable "gcp_project" {} +variable "gcp_location" {} variable "gcp_region" {} variable "gcp_zone" {} variable "domain" {} @@ -30,20 +33,20 @@ resource "google_project_service" "service_networking" { service = "servicenetworking.googleapis.com" } -resource "google_compute_network" "internal" { - name = "internal" +resource "google_compute_network" "default" { + name = "default" } -data "google_compute_subnetwork" "internal_default_region" { - name = "internal" +data "google_compute_subnetwork" "default_region" { + name = "default" region = var.gcp_region - depends_on = [google_compute_network.internal] + depends_on = [google_compute_network.default] } resource "google_container_cluster" "vdc" { name = "vdc" location = var.gcp_zone - network = google_compute_network.internal.name + network = google_compute_network.default.name # We can't create a cluster with no node pool defined, but we want to only use # separately managed node pools. So we create the smallest possible default @@ -66,8 +69,11 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { location = var.gcp_zone cluster = google_container_cluster.vdc.name + # Allocate at least one node, so that autoscaling can take place. + initial_node_count = 1 + autoscaling { - min_node_count = 1 + min_node_count = 0 max_node_count = 200 } @@ -100,8 +106,11 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { location = var.gcp_zone cluster = google_container_cluster.vdc.name + # Allocate at least one node, so that autoscaling can take place. + initial_node_count = 1 + autoscaling { - min_node_count = 1 + min_node_count = 0 max_node_count = 200 } @@ -123,22 +132,24 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { } } -resource "google_compute_global_address" "google_managed_services_internal" { - name = "google-managed-services-internal" +resource "random_id" "db_name_suffix" { + byte_length = 4 +} + +# Without this, I get: +# Error: Error, failed to create instance because the network doesn't have at least 1 private services connection. Please see https://cloud.google.com/sql/docs/mysql/private-ip#network_requirements for how to create this connection. +resource "google_compute_global_address" "google_managed_services_default" { + name = "google-managed-services-default" purpose = "VPC_PEERING" address_type = "INTERNAL" prefix_length = 16 - network = google_compute_network.internal.id + network = google_compute_network.default.id } resource "google_service_networking_connection" "private_vpc_connection" { - network = google_compute_network.internal.id + network = google_compute_network.default.id service = "servicenetworking.googleapis.com" - reserved_peering_ranges = [google_compute_global_address.google_managed_services_internal.name] -} - -resource "random_id" "db_name_suffix" { - byte_length = 4 + reserved_peering_ranges = [google_compute_global_address.google_managed_services_default.name] } resource "google_sql_database_instance" "db" { @@ -155,7 +166,7 @@ resource "google_sql_database_instance" "db" { ip_configuration { ipv4_enabled = false - private_network = google_compute_network.internal.id + private_network = google_compute_network.default.id require_ssl = true } } @@ -168,7 +179,7 @@ resource "google_compute_address" "gateway" { resource "google_compute_address" "internal_gateway" { name = "internal-gateway" - subnetwork = data.google_compute_subnetwork.internal_default_region.id + subnetwork = data.google_compute_subnetwork.default_region.id address_type = "INTERNAL" region = var.gcp_region } @@ -189,13 +200,17 @@ resource "kubernetes_secret" "global_config" { } data = { - gcp_project = var.gcp_project + batch_gcp_regions = var.batch_gcp_regions + default_namespace = "default" + docker_root_image = "gcr.io/${var.gcp_project}/ubuntu:18.04" domain = var.domain + gcp_project = var.gcp_project + gcp_region = var.gcp_region + gcp_zone = var.gcp_zone + gsuite_organization = var.gsuite_organization internal_ip = google_compute_address.internal_gateway.address ip = google_compute_address.gateway.address kubernetes_server_url = "https://${google_container_cluster.vdc.endpoint}" - gcp_region = var.gcp_region - gcp_zone = var.gcp_zone } } @@ -464,6 +479,28 @@ resource "kubernetes_secret" "benchmark_gsa_key" { } } +resource "random_id" "ci_name_suffix" { + byte_length = 2 +} + +resource "google_service_account" "ci" { + account_id = "ci-${random_id.ci_name_suffix.hex}" +} + +resource "google_service_account_key" "ci_key" { + service_account_id = google_service_account.ci.name +} + +resource "kubernetes_secret" "ci_gsa_key" { + metadata { + name = "ci-gsa-key" + } + + data = { + "key.json" = base64decode(google_service_account_key.ci_key.private_key) + } +} + resource "google_service_account" "monitoring" { account_id = "monitoring" } @@ -571,13 +608,52 @@ resource "google_project_iam_member" "batch_agent_object_viewer" { member = "serviceAccount:${google_service_account.batch_agent.email}" } +resource "google_compute_firewall" "default_allow_internal" { + name = "default-allow-internal" + network = google_compute_network.default.name + + priority = 65534 + + source_ranges = ["10.128.0.0/9"] + + allow { + protocol = "tcp" + ports = ["0-65535"] + } + + allow { + protocol = "udp" + ports = ["0-65535"] + } + + allow { + protocol = "icmp" + } +} + +resource "google_compute_firewall" "allow_ssh" { + name = "allow-ssh" + network = google_compute_network.default.name + + priority = 65534 + + source_ranges = ["0.0.0.0/0"] + + target_tags = ["allow-ssh"] + + allow { + protocol = "tcp" + ports = ["22"] + } +} + resource "google_compute_firewall" "vdc_to_batch_worker" { name = "vdc-to-batch-worker" - network = google_compute_network.internal.name + network = google_compute_network.default.name source_ranges = [google_container_cluster.vdc.cluster_ipv4_cidr] - target_tags = ["batch2-worker"] + target_tags = ["batch2-agent"] allow { protocol = "icmp" @@ -593,3 +669,31 @@ resource "google_compute_firewall" "vdc_to_batch_worker" { ports = ["1-65535"] } } + +resource "google_storage_bucket" "batch_logs" { + name = "batch-logs" + location = var.gcp_location + force_destroy = true + storage_class = "MULTI_REGIONAL" +} + +resource "google_dns_managed_zone" "dns_zone" { + name = "dns-zone" + dns_name = "hail." + visibility = "private" + + private_visibility_config { + networks { + network_url = google_compute_network.default.id + } + } +} + +resource "google_dns_record_set" "internal_gateway" { + name = "*.${google_dns_managed_zone.dns_zone.dns_name}" + managed_zone = google_dns_managed_zone.dns_zone.name + type = "A" + ttl = 300 + + rrdatas = [google_compute_address.internal_gateway.address] +} From 894c2661a8207253c75d0b1b3bb1d19c17f25b89 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 11:55:53 -0500 Subject: [PATCH 053/501] fixes --- batch/Makefile | 2 +- config.mk | 4 ++-- monitoring/Makefile | 3 +-- query/Makefile | 2 +- site/Makefile | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/batch/Makefile b/batch/Makefile index 1b4072b981d..f07b3895549 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -51,7 +51,7 @@ deploy: push .PHONY: create-build-worker-image-instance create-build-worker-image-instance: -gcloud -q compute --project $(PROJECT) instances delete --zone=$(ZONE) build-batch-worker-image - gcloud -q compute --project $(PROJECT) instances create --zone=$(ZONE) build-batch-worker-image --machine-type=n1-standard-1 --network=default --network-tier=PREMIUM --metadata-from-file startup-script=build-batch-worker-image-startup.sh --no-restart-on-failure --maintenance-policy=MIGRATE --scopes=https://www.googleapis.com/auth/cloud-platform --image=ubuntu-minimal-1804-bionic-v20200923 --image-project=ubuntu-os-cloud --boot-disk-size=10GB --boot-disk-type=pd-ssd + gcloud -q compute --project $(PROJECT) instances create --zone=$(ZONE) build-batch-worker-image --machine-type=n1-standard-1 --network=default --network-tier=PREMIUM --metadata-from-file startup-script=build-batch-worker-image-startup.sh --no-restart-on-failure --maintenance-policy=MIGRATE --scopes=https://www.googleapis.com/auth/cloud-platform --image=$$(gcloud compute images list --standard-images --filter 'family="ubuntu-minimal-2004-lts"' --format='value(name)') --image-project=ubuntu-os-cloud --boot-disk-size=10GB --boot-disk-type=pd-ssd .PHONY: create-worker-image create-worker-image: diff --git a/config.mk b/config.mk index 218fd309548..bfa2b517038 100644 --- a/config.mk +++ b/config.mk @@ -2,8 +2,8 @@ PROJECT := hail-vdc-staging DOCKER_ROOT_IMAGE := gcr.io/$(PROJECT)/ubuntu:18.04 DOMAIN := staging.hail.is INTERNAL_IP := 10.128.0.2 -IP := 34.121.206.184 -KUBERNETES_SERVER_URL := https://34.71.246.49 +IP := 35.224.188.20 +KUBERNETES_SERVER_URL := https://34.123.185.11 REGION := us-central1 ZONE := us-central1-a ifeq ($(NAMESPACE),default) diff --git a/monitoring/Makefile b/monitoring/Makefile index decd1b428aa..91aac0736e5 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -1,5 +1,4 @@ -PROJECT = $(shell gcloud config get-value project) -DOMAIN ?= hail.is +include ../config.mk MONITORING_LATEST = gcr.io/$(PROJECT)/monitoring:latest MONITORING_IMAGE = gcr.io/$(PROJECT)/monitoring:$(shell docker images -q --no-trunc monitoring:latest | sed -e 's,[^:]*:,,') diff --git a/query/Makefile b/query/Makefile index 89e83a6485b..8c471292109 100644 --- a/query/Makefile +++ b/query/Makefile @@ -32,5 +32,5 @@ push: build deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f service-account.yaml - python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"query_image":{"image":"$(QUERY_IMAGE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"query_image":{"image":"$(QUERY_IMAGE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/site/Makefile b/site/Makefile index 406c68b943f..6c0942897c8 100644 --- a/site/Makefile +++ b/site/Makefile @@ -49,7 +49,7 @@ push: build deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"hail.is"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DPELOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"hail-ubuntu"}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"hail-ubuntu"}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: From b71cd6a3cc7b55cd85ac884093c736af50b6577d Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 12:04:36 -0500 Subject: [PATCH 054/501] allow ssh --- infra/main.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/infra/main.tf b/infra/main.tf index 76c40c70de0..2978b2a115e 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -639,8 +639,6 @@ resource "google_compute_firewall" "allow_ssh" { source_ranges = ["0.0.0.0/0"] - target_tags = ["allow-ssh"] - allow { protocol = "tcp" ports = ["22"] From 64916bbe4afd26e8dc5e9f69e7500b4ac4d288e7 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 16:13:58 -0500 Subject: [PATCH 055/501] wip --- batch/deployment.yaml | 10 ++++++++-- ci/bootstrap.py | 4 +++- dev-docs/tls-cookbook.md | 5 +++-- infra/README.md | 20 ++++++++++++-------- infra/main.tf | 7 ++++++- letsencrypt/Dockerfile | 5 +---- 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 58ad51811f6..a9b900c961e 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -67,7 +67,10 @@ spec: {% endif %} {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME - value: hail-batch + valueFrom: + secretKeyRef: + name: global-config + key: batch_logs_bucket - name: STANDING_WORKER_MAX_IDLE_TIME_SECS value: "7200" # 2 hours {% else %} @@ -188,7 +191,10 @@ spec: {% endif %} {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME - value: hail-batch + valueFrom: + secretKeyRef: + name: global-config + key: batch_logs_bucket - name: STANDING_WORKER_MAX_IDLE_TIME_SECS value: "7200" # 2 hours {% else %} diff --git a/ci/bootstrap.py b/ci/bootstrap.py index e09f251ab55..045e95092a0 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -96,7 +96,9 @@ async def run(self): for j in self._jobs: job_name = j._attributes.get('name') - + + print(f'{j._index}: {job_name}: running...') + if j._parents: for p in j._parents: assert p._done diff --git a/dev-docs/tls-cookbook.md b/dev-docs/tls-cookbook.md index fe54e94613f..2ca392087a6 100644 --- a/dev-docs/tls-cookbook.md +++ b/dev-docs/tls-cookbook.md @@ -117,9 +117,10 @@ kubectl create secret generic \ 3. Update all the service certificates: ``` -python3 $HAIL_HOME/tls/create_certs.py \ +PYTHONPATH=$HAIL/hail/python \ + python3 $HAIL/tls/create_certs.py \ default \ - $HAIL_HOME/tls/config.yaml \ + $HAIL/tls/config.yaml \ hail-root-key.pem \ hail-root-cert.pem ``` diff --git a/infra/README.md b/infra/README.md index 6ac6c54cb7f..e8c0b56dbb1 100644 --- a/infra/README.md +++ b/infra/README.md @@ -72,31 +72,37 @@ You can now install Hail: permissions for caching. Run `echo 'umask 022' > ~/.profile`. You will need to log out/in or run `umask 022`. +- Clone the Hail Github repository: + + ``` + git clone https://github.com/hail-is/hail.git + ``` + - Update $HAIL/config.mk with your infrastructure settings. You can get settings from the default/global-config secret: ``` - kubectl get secret global-config -o json | jq '.data | map_values(@base64d)' + kubectl -n default get secret global-config -o json | jq '.data | map_values(@base64d)' ``` - Install some dependencies on the VM: ``` sudo apt update - sudo apt install -y docker.io python3-pip + sudo apt install -y docker.io python3-pip openjdk-8-jre-headless + sudo snap install --classic kubectl sudo usermod -a -G docker $USER gcloud -q auth configure-docker gcloud container clusters get-credentials --zone us-central1-a vdc - git clone https://github.com/cseed/hail.git python3 -m pip install -r $HOME/hail/docker/requirements.txt ``` You will have to log out/in for the usermod to take effect. -- Run +- In `$HAIL/docker/third-party` run: ``` - PROJECT= $HAIL/docker/third-party/copy_images.sh + PROJECT= ./copy_images.sh ``` This copies some base images from Dockerhub (which now has rate @@ -139,9 +145,7 @@ You can now install Hail: HAIL_DEFAULT_NAMESPACE='default' \ HAIL_DOMAIN= \ HAIL_GCP_ZONE= \ - GCP_PROJECT= \ + HAIL_GCP_PROJECT= \ PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ python3 $HAIL/ci/bootstrap.py ``` - -- You may want to add a suitable ssh forward rule to the internal network. diff --git a/infra/main.tf b/infra/main.tf index 2978b2a115e..e69c4adeeed 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -201,6 +201,7 @@ resource "kubernetes_secret" "global_config" { data = { batch_gcp_regions = var.batch_gcp_regions + batch_logs_bucket = google_storage_bucket.batch_logs.name default_namespace = "default" docker_root_image = "gcr.io/${var.gcp_project}/ubuntu:18.04" domain = var.domain @@ -668,8 +669,12 @@ resource "google_compute_firewall" "vdc_to_batch_worker" { } } +resource "random_id" "batch_logs_bucket_name_suffix" { + byte_length = 2 +} + resource "google_storage_bucket" "batch_logs" { - name = "batch-logs" + name = "batch-logs-${random_id.batch_logs_bucket_name_suffix.hex}" location = var.gcp_location force_destroy = true storage_class = "MULTI_REGIONAL" diff --git a/letsencrypt/Dockerfile b/letsencrypt/Dockerfile index 29f5388260c..4f46f6ece3a 100644 --- a/letsencrypt/Dockerfile +++ b/letsencrypt/Dockerfile @@ -1,10 +1,7 @@ FROM {{ hail_ubuntu_image.image }} RUN apt update && \ - apt install -y nginx software-properties-common && \ - bash -c 'DEBIAN_FRONTEND=noninteractive apt install -y tzdata' && \ - python3.6 /usr/bin/add-apt-repository -y ppa:certbot/certbot && \ - apt install -y python-certbot-nginx wget && \ + apt install -y nginx wget certbot python3-certbot-nginx && \ rm -rf /var/lib/apt/lists/* RUN hail-pip-install cffi From 4461fdc974bca6c1e2a65c6fc19dcd96b411112d Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sat, 5 Dec 2020 21:45:43 -0500 Subject: [PATCH 056/501] fix --- auth/auth/driver/driver.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 41f04bf460b..193a49f81c1 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -355,15 +355,16 @@ async def _create_user(app, user, skip_trial_bp, cleanup): k8s_client = app['k8s_client'] iam_client = app['iam_client'] + username = user['username'] if user['is_service_account'] != 1: token = secret_alnum_string(5, case='lower') - ident_token = f'{user["username"]}-{token}' + ident_token = f'{username}-{token}' else: token = secret_alnum_string(3, case='numbers') - ident_token = f'{user["username"]}-{token}' + ident_token = f'{username}-{token}' - if user['is_developer'] == 1 or user['is_service_account'] == 1: - ident = user['username'] + if user['is_developer'] == 1 or user['is_service_account'] == 1 or username == 'test': + ident = username else: ident = ident_token @@ -427,7 +428,6 @@ async def _create_user(app, user, skip_trial_bp, cleanup): trial_bp = user['trial_bp_name'] if trial_bp is None: batch_client = app['batch_client'] - username = user['username'] billing_project_name = f'{username}-trial' billing_project = BillingProjectResource(batch_client) cleanup.append(billing_project.delete) From 5aedef1f25b586e6ad713bf624a5ce41ca9437d0 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 11:47:19 -0500 Subject: [PATCH 057/501] make batch gsa storage.admin --- infra/main.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/infra/main.tf b/infra/main.tf index e69c4adeeed..be0afffc82e 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -462,6 +462,11 @@ resource "google_project_iam_member" "batch_logging_viewer" { member = "serviceAccount:${google_service_account.batch.email}" } +resource "google_project_iam_member" "batch_storage_admin" { + role = "roles/storage.admin" + member = "serviceAccount:${google_service_account.batch.email}" +} + resource "google_service_account" "benchmark" { account_id = "benchmark" } From dde9478975bbf31a8e6e84ea6cbbbc923862b2fc Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 12:37:36 -0500 Subject: [PATCH 058/501] fix up batch worker image --- batch/build-batch-worker-image-startup.sh | 5 ++++- infra/README.md | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/batch/build-batch-worker-image-startup.sh b/batch/build-batch-worker-image-startup.sh index f2ffbafb731..9d6df2b02be 100644 --- a/batch/build-batch-worker-image-startup.sh +++ b/batch/build-batch-worker-image-startup.sh @@ -37,9 +37,12 @@ curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/release | tar xz --to-stdout ./docker-credential-gcr \ > /usr/bin/docker-credential-gcr && chmod +x /usr/bin/docker-credential-gcr +# avoid "unable to get current user home directory: os/user lookup failed" +export HOME=/root docker-credential-gcr configure-docker -docker pull gcr.io/hail-vdc/ubuntu:18.04 +GCP_PROJECT=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/project/project-id") +docker pull gcr.io/$GCP_PROJECT/ubuntu:18.04 docker pull gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine # add docker daemon debug logging diff --git a/infra/README.md b/infra/README.md index e8c0b56dbb1..2d835fe8405 100644 --- a/infra/README.md +++ b/infra/README.md @@ -135,6 +135,18 @@ You can now install Hail: kubectl -n default create secret generic auth-oauth2-client-secret --from-file=./client_secret.json ``` +- Create the batch worker image. In `$HAIL/batch`, run: + + ``` + make create-build-worker-image-instance + ``` + + Wait for the `build-batch-worker-image` instance to be stopped. Then run: + + ``` + make create-worker-image + ``` + - Bootstrap the cluster by running: ``` From a8c8725a030f12108488f7c199f0b029b12f64ba Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 12:39:35 -0500 Subject: [PATCH 059/501] [infra] add option to create initial user in bootstrap --- build.yaml | 21 +++++++++++++++++ ci/bootstrap.py | 45 ++++++++++++++++++++++++++++++++---- ci/create_initial_account.py | 39 +++++++++++++++++++++++++++++++ infra/README.md | 17 +++++++------- 4 files changed, 108 insertions(+), 14 deletions(-) create mode 100644 ci/create_initial_account.py diff --git a/build.yaml b/build.yaml index 11a96bf3e41..6a60c4c0041 100644 --- a/build.yaml +++ b/build.yaml @@ -145,6 +145,8 @@ steps: to: /repo/address/ - from: /io/repo/ci/bootstrap_create_accounts.py to: /repo/ci/ + - from: /io/repo/ci/create_initial_account.py + to: /repo/ci/ dependsOn: - base_image - kind: buildImage @@ -416,6 +418,25 @@ steps: - create_deploy_config - deploy_auth_driver_service_account - create_test_gsa_keys + - kind: runImage + name: create_initial_user + image: + valueFrom: service_base_image.image + script: | + set -ex + python3 /io/create_initial_account.py {{ code.username }} {{ code.email }} + secrets: + - name: + valueFrom: auth_database.user_secret_name + namespace: + valueFrom: default_ns.name + mountPath: /sql-config + inputs: + - from: /repo/create_initial_account.py + to: /io/create_initial_account.py + dependsOn: + - auth_database + - service_base_image - kind: buildImage name: hail_build_image dockerFile: hail/Dockerfile.hail-build diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 045e95092a0..f72d90023ef 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -2,6 +2,7 @@ from shlex import quote as shq import base64 import asyncio +import argparse import kubernetes_asyncio as kube @@ -14,7 +15,7 @@ from batch.driver.k8s_cache import K8sCache KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] -SHA = os.environ['HAIL_SHA'] + def populate_secret_host_path(host_path, secret_data): os.makedirs(host_path) @@ -243,11 +244,12 @@ async def run(self): class Branch(Code): - def __init__(self, owner, repo, branch, sha): + def __init__(self, owner, repo, branch, sha, config_extra): self._owner = owner self._repo = repo self._branch = branch self._sha = sha + self._extra_config = extra_config def short_str(self): return f'br-{self._owner}-{self._repo}-{self._branch}' @@ -259,13 +261,15 @@ def branch_url(self): return f'https://github.com/{self._owner}/{self._repo}' def config(self): - return { + config = { 'checkout_script': self.checkout_script(), 'branch': self._branch, 'repo': f'{self._owner}/{self._repo}', 'repo_url': self.branch_url(), 'sha': self._sha } + config.extend(self._extra_config) + return extra_config def checkout_script(self): return f''' @@ -276,11 +280,42 @@ def checkout_script(self): async def main(): + parser = argparse.ArgumentParser(description='Create initial Hail as a service account.') + + parser.add_argument('--extra-code-config', dest='extra_code_config', + help='Extra code config in JSON format.') + parser.add_argument('branch', + help='Github branch to run. It should be the same branch bootstrap.py is being run from.') + parser.add_argument('sha', + help='SHA of the git commit to run. It should match the branch.') + parser.add_argument('steps', + help='The requested steps to execute.') + + args = parser.parse_args() + + branch_pieces = args.branch.split(":") + assert len(branch_pieces) == 2, f'{branch_pieces} {s}' + + repo_pieces = branch_pieces[0].split("/") + assert len(repo_pieces) == 2, f'{repo_pieces} {branch_pieces[0]}' + owner = repo_pieces[0] + repo_name = repo_pieces[1] + + branch_name = branch_pieces[1] + + extra_code_config = args.extra_code_config + if extra_code_config is not None: + extra_code_config = json.loads(extra_code_config) + else: + extra_code_config = {} + scope = 'deploy' - code = Branch('cseed', 'hail', 'infra-1', SHA) + code = Branch(owner, repo_name, branch_name, args.sha, extra_code_config) + + steps = [s.strip() for s in args.steps.split(',')] with open(f'build.yaml', 'r') as f: - config = BuildConfiguration(code, f.read(), scope, requested_step_names=['deploy_batch']) + config = BuildConfiguration(code, f.read(), scope, requested_step_names=steps) token = generate_token() batch = LocalBatchBuilder( diff --git a/ci/create_initial_account.py b/ci/create_initial_account.py new file mode 100644 index 00000000000..1bbc8e3bdc9 --- /dev/null +++ b/ci/create_initial_account.py @@ -0,0 +1,39 @@ +import sys +import argparse +from hailtop.utils import async_to_blocking +from gear import Database + + +async def insert_user_if_not_exists(db, username, email): + row = await db.execute_and_fetchone('SELECT id, state FROM users where username = %s;', (username,)) + if row: + if row['state'] == 'active': + return None + return row['id'] + + return await db.execute_insertone( + ''' +INSERT INTO users (state, username, email, is_developer, is_service_account) +VALUES (%s, %s, %s, %s, %s, %s, %s, %s); +''', + ('creating', username, email, 1, 0)) + + +async def main(): + parser = argparse.ArgumentParser(description='Create initial Hail as a service account.') + + parser.add_argument('username', + help='The username of the initial user.') + parser.add_argument('email', + help='The email of the initial user.') + + args = parser.parse_args() + + db = Database() + await db.async_init(maxsize=50) + app['db'] = db + + await insert_user_if_not_exists(db, args.username, args.email) + + +async_to_blocking(main()) diff --git a/infra/README.md b/infra/README.md index 2d835fe8405..5f44c181744 100644 --- a/infra/README.md +++ b/infra/README.md @@ -150,14 +150,13 @@ You can now install Hail: - Bootstrap the cluster by running: ``` - HAIL_SHA=$(git rev-parse HEAD) \ HAIL_CI_UTILS_IMAGE=gcr.io//ci-utils:latest \ - HAIL_CI_BUCKET_NAME=dummy \ - KUBERNETES_SERVER_URL='' \ - HAIL_DEFAULT_NAMESPACE='default' \ - HAIL_DOMAIN= \ - HAIL_GCP_ZONE= \ - HAIL_GCP_PROJECT= \ - PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ - python3 $HAIL/ci/bootstrap.py + HAIL_CI_BUCKET_NAME=dummy \ + KUBERNETES_SERVER_URL='' \ + HAIL_DEFAULT_NAMESPACE='default' \ + HAIL_DOMAIN= \ + HAIL_GCP_ZONE= \ + HAIL_GCP_PROJECT= \ + PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ + python3 $HAIL/ci/bootstrap.py hail-is/hail:main $(git rev-parse HEAD) test_batch_0 ``` From 2469f34cb21b454950e6bd2f8486d5f2e60bd729 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 14:03:04 -0500 Subject: [PATCH 060/501] fixes --- build.yaml | 7 +++++-- ci/bootstrap.py | 12 ++++++------ ci/create_initial_account.py | 1 - infra/README.md | 20 +++++++++++++++++++- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/build.yaml b/build.yaml index 6a60c4c0041..3b49b4efd83 100644 --- a/build.yaml +++ b/build.yaml @@ -420,6 +420,7 @@ steps: - create_test_gsa_keys - kind: runImage name: create_initial_user + runIfRequested: true image: valueFrom: service_base_image.image script: | @@ -432,11 +433,13 @@ steps: valueFrom: default_ns.name mountPath: /sql-config inputs: - - from: /repo/create_initial_account.py + - from: /repo/ci/create_initial_account.py to: /io/create_initial_account.py dependsOn: - - auth_database + - default_ns - service_base_image + - copy_files + - auth_database - kind: buildImage name: hail_build_image dockerFile: hail/Dockerfile.hail-build diff --git a/ci/bootstrap.py b/ci/bootstrap.py index f72d90023ef..e6aee4c1b69 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -1,4 +1,5 @@ import os +import json from shlex import quote as shq import base64 import asyncio @@ -244,7 +245,7 @@ async def run(self): class Branch(Code): - def __init__(self, owner, repo, branch, sha, config_extra): + def __init__(self, owner, repo, branch, sha, extra_config): self._owner = owner self._repo = repo self._branch = branch @@ -268,8 +269,8 @@ def config(self): 'repo_url': self.branch_url(), 'sha': self._sha } - config.extend(self._extra_config) - return extra_config + config.update(self._extra_config) + return config def checkout_script(self): return f''' @@ -303,9 +304,8 @@ async def main(): branch_name = branch_pieces[1] - extra_code_config = args.extra_code_config - if extra_code_config is not None: - extra_code_config = json.loads(extra_code_config) + if args.extra_code_config is not None: + extra_code_config = json.loads(args.extra_code_config) else: extra_code_config = {} diff --git a/ci/create_initial_account.py b/ci/create_initial_account.py index 1bbc8e3bdc9..e2529bacda7 100644 --- a/ci/create_initial_account.py +++ b/ci/create_initial_account.py @@ -31,7 +31,6 @@ async def main(): db = Database() await db.async_init(maxsize=50) - app['db'] = db await insert_user_if_not_exists(db, args.username, args.email) diff --git a/infra/README.md b/infra/README.md index 5f44c181744..5fd35462c2f 100644 --- a/infra/README.md +++ b/infra/README.md @@ -26,7 +26,9 @@ Instructions: sqladmin.googleapis.com \ container.googleapis.com \ serviceusage.googleapis.com \ - dns.googleapis.com + dns.googleapis.com \ + logging.googleapis.com \ + cloudprofiler.googleapis.com ``` - Install terraform. @@ -160,3 +162,19 @@ You can now install Hail: PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ python3 $HAIL/ci/bootstrap.py hail-is/hail:main $(git rev-parse HEAD) test_batch_0 ``` + +- Create the initial (developer) user: + + ``` + HAIL_CI_UTILS_IMAGE=gcr.io//ci-utils:latest \ + HAIL_CI_BUCKET_NAME=dummy \ + KUBERNETES_SERVER_URL='' \ + HAIL_DEFAULT_NAMESPACE='default' \ + HAIL_DOMAIN= \ + HAIL_GCP_ZONE= \ + HAIL_GCP_PROJECT= \ + PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ + python3 $HAIL/ci/bootstrap.py --extra-code-config '{"username":"","email":""}' hail-is/hail:main $(git rev-parse HEAD) create_initial_user + ``` + + Additional users can be added by the intial user by going to auth.hail.is/users. From 25fcfbac976a7687eb18378400dea0d25d6c06a7 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 14:09:48 -0500 Subject: [PATCH 061/501] fix --- ci/create_initial_account.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/create_initial_account.py b/ci/create_initial_account.py index e2529bacda7..5f0ab53e147 100644 --- a/ci/create_initial_account.py +++ b/ci/create_initial_account.py @@ -14,7 +14,7 @@ async def insert_user_if_not_exists(db, username, email): return await db.execute_insertone( ''' INSERT INTO users (state, username, email, is_developer, is_service_account) -VALUES (%s, %s, %s, %s, %s, %s, %s, %s); +VALUES (%s, %s, %s, %s, %s); ''', ('creating', username, email, 1, 0)) From fb1989ccd49cf1b994bd3cbb5c55cb45705b8349 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 14:20:12 -0500 Subject: [PATCH 062/501] fix deploy config --- build.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.yaml b/build.yaml index 3b49b4efd83..302d71022e1 100644 --- a/build.yaml +++ b/build.yaml @@ -286,13 +286,13 @@ steps: set -ex # k8s deploy config cat > deploy-config.json < deploy-config.json < Date: Sun, 6 Dec 2020 15:15:49 -0500 Subject: [PATCH 063/501] fixes and workarounds --- ci/bootstrap.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index e6aee4c1b69..b90b1666b58 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -52,11 +52,11 @@ def __init__(self, index, image, command, *, async def docker_run(*args): script = ' '.join([shq(a) for a in args]) outerr = await check_shell_output(script) - + cid = outerr[0].decode('ascii').strip() - + outerr = await check_shell_output(f'docker wait {cid}') - + exit_code = int(outerr[0].decode('ascii').strip()) return cid, exit_code == 0 @@ -84,18 +84,14 @@ def create_job(self, image, command, **kwargs): async def run(self): cwd = os.getcwd() assert cwd.startswith('/') - + batch_token = self._attributes['token'] root = f'{cwd}/_/{batch_token}' - - await kube.config.load_kube_config() - k8s_client = kube.client.CoreV1Api() - k8s_cache = K8sCache(k8s_client, refresh_time=5) os.makedirs(f'{root}/shared') - + prefix = f'gs://dummy/build/{batch_token}' - + for j in self._jobs: job_name = j._attributes.get('name') @@ -142,6 +138,14 @@ async def run(self): env_options.extend([ '-e', f'{key}={value}']) + # Reboot the cache on each use. The kube client isn't + # refreshing tokens correctly. + # https://github.com/kubernetes-client/python/issues/741 + # Note, that is in the kubenetes-client repo, the + # kubernetes_asyncio. I'm assuming it has the same + # issue. + k8s_cache = K8sCache(kube.client.CoreV1Api(), refresh_time=5) + if j._service_account: namespace = j._service_account['namespace'] name = j._service_account['name'] @@ -281,6 +285,8 @@ def checkout_script(self): async def main(): + await kube.config.load_kube_config() + parser = argparse.ArgumentParser(description='Create initial Hail as a service account.') parser.add_argument('--extra-code-config', dest='extra_code_config', @@ -295,7 +301,7 @@ async def main(): args = parser.parse_args() branch_pieces = args.branch.split(":") - assert len(branch_pieces) == 2, f'{branch_pieces} {s}' + assert len(branch_pieces) == 2, f'{branch_pieces} {args.branch}' repo_pieces = branch_pieces[0].split("/") assert len(repo_pieces) == 2, f'{repo_pieces} {branch_pieces[0]}' @@ -314,7 +320,7 @@ async def main(): steps = [s.strip() for s in args.steps.split(',')] - with open(f'build.yaml', 'r') as f: + with open('build.yaml', 'r') as f: config = BuildConfiguration(code, f.read(), scope, requested_step_names=steps) token = generate_token() From d06b1022ce504cdaac2e1f0b39761f685685e157 Mon Sep 17 00:00:00 2001 From: Cotton Seed Date: Sun, 6 Dec 2020 17:45:16 -0500 Subject: [PATCH 064/501] fixes --- build.yaml | 2 +- infra/README.md | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/build.yaml b/build.yaml index 302d71022e1..4b4f582a1ca 100644 --- a/build.yaml +++ b/build.yaml @@ -386,7 +386,7 @@ steps: export HAIL_SCOPE={{ scope }} python3 /io/bootstrap_create_accounts.py serviceAccount: - name: auth-driver + name: admin namespace: valueFrom: default_ns.name secrets: diff --git a/infra/README.md b/infra/README.md index 5fd35462c2f..fdca61863a8 100644 --- a/infra/README.md +++ b/infra/README.md @@ -57,7 +57,7 @@ Instructions: `kubectl` to point at the vdc cluster: ``` - gcloud container clusters get-credentials vdc + gcloud container clusters get-credentials --zone vdc ``` - Go to the Google Cloud console, VPC networks > internal > Private @@ -80,27 +80,27 @@ You can now install Hail: git clone https://github.com/hail-is/hail.git ``` -- Update $HAIL/config.mk with your infrastructure settings. You can - get settings from the default/global-config secret: - - ``` - kubectl -n default get secret global-config -o json | jq '.data | map_values(@base64d)' - ``` - - Install some dependencies on the VM: ``` sudo apt update - sudo apt install -y docker.io python3-pip openjdk-8-jre-headless + sudo apt install -y docker.io python3-pip openjdk-8-jre-headless jq sudo snap install --classic kubectl sudo usermod -a -G docker $USER gcloud -q auth configure-docker - gcloud container clusters get-credentials --zone us-central1-a vdc + gcloud container clusters get-credentials --zone vdc python3 -m pip install -r $HOME/hail/docker/requirements.txt ``` You will have to log out/in for the usermod to take effect. +- Update $HAIL/config.mk with your infrastructure settings. You can + get settings from the default/global-config secret: + + ``` + kubectl -n default get secret global-config -o json | jq '.data | map_values(@base64d)' + ``` + - In `$HAIL/docker/third-party` run: ``` From e5631d3dd0e98bcbcbb367535e0f971b670d9302 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 8 Dec 2020 16:46:37 +1100 Subject: [PATCH 065/501] Update config.mk for CPG. --- config.mk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config.mk b/config.mk index bfa2b517038..054983ada22 100644 --- a/config.mk +++ b/config.mk @@ -1,11 +1,11 @@ -PROJECT := hail-vdc-staging +PROJECT := hail-295901 DOCKER_ROOT_IMAGE := gcr.io/$(PROJECT)/ubuntu:18.04 -DOMAIN := staging.hail.is -INTERNAL_IP := 10.128.0.2 -IP := 35.224.188.20 -KUBERNETES_SERVER_URL := https://34.123.185.11 -REGION := us-central1 -ZONE := us-central1-a +DOMAIN := hail.populationgenomics.org.au +INTERNAL_IP := 10.152.0.2 +IP := 35.201.29.236 +KUBERNETES_SERVER_URL := https://34.87.199.41 +REGION := australia-southeast1 +ZONE := australia-southeast1-b ifeq ($(NAMESPACE),default) SCOPE = deploy DEPLOY = true From 6c09a31f149a762c29feaba19e3ea06a7affa83d Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 8 Dec 2020 16:32:56 +1100 Subject: [PATCH 066/501] For Australia, we can't have a multi-regional logs bucket. --- infra/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/main.tf b/infra/main.tf index be0afffc82e..722fe36db7b 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -682,7 +682,7 @@ resource "google_storage_bucket" "batch_logs" { name = "batch-logs-${random_id.batch_logs_bucket_name_suffix.hex}" location = var.gcp_location force_destroy = true - storage_class = "MULTI_REGIONAL" + storage_class = "STANDARD" } resource "google_dns_managed_zone" "dns_zone" { From 9a0167fc73289a42d3008c913d3a1f4670e540c7 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 15 Dec 2020 22:37:21 -0500 Subject: [PATCH 067/501] [k8s] set most resource requests to 100mCPU --- amundsen/deployment.yaml | 5 ++++- blog/deployment.yaml | 15 +++++++++++---- ci/deployment.yaml | 6 +++--- ci/test/resources/deployment.yaml | 7 +++++-- internal-gateway/deployment.yaml | 7 +++++-- memory/deployment.yaml | 22 +++++++++++----------- shuffler/deployment.yaml | 6 +++--- ukbb-rg/deployment.yaml | 12 ++++++------ 8 files changed, 48 insertions(+), 32 deletions(-) diff --git a/amundsen/deployment.yaml b/amundsen/deployment.yaml index f7297437a07..348635bb9b6 100644 --- a/amundsen/deployment.yaml +++ b/amundsen/deployment.yaml @@ -34,7 +34,10 @@ spec: imagePullPolicy: Always resources: requests: - memory: "1G" + memory: 375Mi + cpu: 100m + limits: + memory: 3750Mi cpu: "1" ports: - containerPort: 5000 diff --git a/blog/deployment.yaml b/blog/deployment.yaml index 8a1c0773d77..344f1bcab07 100644 --- a/blog/deployment.yaml +++ b/blog/deployment.yaml @@ -43,6 +43,13 @@ spec: - mountPath: /ssl-config name: ssl-config-blog readOnly: true + resources: + requests: + memory: 338Mi + cpu: 90m + limits: + memory: 3375Mi + cpu: 900m - name: blog image: ghost:3.0-alpine env: @@ -87,11 +94,11 @@ spec: name: blog-content resources: requests: - memory: 250Mi - cpu: 300m + memory: 37Mi + cpu: 10m limits: - memory: 250Mi - cpu: "1" + memory: 375Mi + cpu: 100m volumeClaimTemplates: - metadata: name: blog-content diff --git a/ci/deployment.yaml b/ci/deployment.yaml index ae63d55914c..cd7dc7ed6e2 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -33,10 +33,10 @@ spec: image: "{{ ci_image.image }}" resources: requests: - memory: "1G" - cpu: "400m" + memory: 375Mi + cpu: 100m limits: - memory: "1G" + memory: 3750Mi cpu: "1" env: - name: HAIL_DEPLOY_CONFIG_FILE diff --git a/ci/test/resources/deployment.yaml b/ci/test/resources/deployment.yaml index 87deecf4d00..7256c856727 100644 --- a/ci/test/resources/deployment.yaml +++ b/ci/test/resources/deployment.yaml @@ -26,8 +26,11 @@ spec: image: "{{ hello_image.image }}" resources: requests: - memory: "0.5G" - cpu: "0.5" + memory: 37Mi + cpu: 10m + limits: + memory: 375Mi + cpu: 100m ports: - containerPort: 5000 livenessProbe: diff --git a/internal-gateway/deployment.yaml b/internal-gateway/deployment.yaml index 9b50ea9f39b..9f7128e0683 100644 --- a/internal-gateway/deployment.yaml +++ b/internal-gateway/deployment.yaml @@ -37,8 +37,11 @@ spec: image: "{{ internal_gateway_image.image }}" resources: requests: - memory: "250M" - cpu: "200m" + memory: 375Mi + cpu: 100m + limits: + memory: 3750Mi + cpu: "1" ports: - containerPort: 80 volumeMounts: diff --git a/memory/deployment.yaml b/memory/deployment.yaml index 6c9faf53176..1a999e237d9 100644 --- a/memory/deployment.yaml +++ b/memory/deployment.yaml @@ -25,11 +25,11 @@ spec: image: gcr.io/{{ global.project }}/redis:6.0.6-alpine command: - redis-server - - --port + - --port - "0" - --unixsocket - /redis/redis.sock - - --timeout + - --timeout - "0" - --maxmemory - 2gb @@ -40,16 +40,16 @@ spec: mountPath: /redis resources: requests: - memory: "2.5G" - cpu: "400m" + memory: 187Mi + cpu: 50m limits: - memory: "3.75G" - cpu: "1" + memory: 1875i + cpu: 500m readinessProbe: exec: command: - redis-cli - - -s + - -s - /redis/redis.sock - ping initialDelaySeconds: 5 @@ -88,11 +88,11 @@ spec: readOnly: true resources: requests: - memory: "1.25G" - cpu: "400m" + memory: 187Mi + cpu: 50m limits: - memory: "3.75G" - cpu: "1" + memory: 1875i + cpu: 500m readinessProbe: tcpSocket: port: 5000 diff --git a/shuffler/deployment.yaml b/shuffler/deployment.yaml index 71f96e7d5e3..5fb226df10d 100644 --- a/shuffler/deployment.yaml +++ b/shuffler/deployment.yaml @@ -48,10 +48,10 @@ spec: readOnly: true resources: requests: - memory: "3.75G" - cpu: "800m" + memory: 375Mi + cpu: 100m limits: - memory: "3.75G" + memory: 3750Mi cpu: "1" volumes: - name: deploy-config diff --git a/ukbb-rg/deployment.yaml b/ukbb-rg/deployment.yaml index a1213a0286d..48997dec8d2 100644 --- a/ukbb-rg/deployment.yaml +++ b/ukbb-rg/deployment.yaml @@ -37,10 +37,10 @@ spec: imagePullPolicy: Always resources: requests: - memory: "3.75G" - cpu: "800m" + memory: 375Mi + cpu: 100m limits: - memory: "3.75G" + memory: 3750Mi cpu: "1" ports: - containerPort: 80 @@ -86,10 +86,10 @@ spec: imagePullPolicy: Always resources: requests: - memory: "3.75G" - cpu: "800m" + memory: 375Mi + cpu: 100m limits: - memory: "3.75G" + memory: 3750Mi cpu: "1" ports: - containerPort: 3838 From 59c0bdbb7989cf0850fb5ba58182de97da84a360 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 15 Dec 2020 23:06:34 -0500 Subject: [PATCH 068/501] [k8s] reduce batch request & reduce namespaced batch,auth,router cpu request --- auth/deployment.yaml | 22 ++++++++++++++++------ batch/deployment.yaml | 29 ++++++++++++++++++++--------- router/deployment.yaml | 11 ++++++++--- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/auth/deployment.yaml b/auth/deployment.yaml index 3f8debec131..39864752116 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -81,10 +81,15 @@ spec: key: gsuite_organization resources: requests: - memory: "250M" - cpu: "100m" +{% if deploy %} + memory: 375Mi + cpu: 100m +{% else %} + memory: 37Mi + cpu: 10m +{% endif %} limits: - memory: "1G" + memory: 3750Mi cpu: "1" volumeMounts: - name: deploy-config @@ -191,10 +196,15 @@ spec: key: gsuite_organization resources: requests: - memory: "250M" - cpu: "100m" +{% if deploy %} + memory: 375Mi + cpu: 100m +{% else %} + memory: 37Mi + cpu: 10m +{% endif %} limits: - memory: "1G" + memory: 3750Mi cpu: "1" volumeMounts: - name: deploy-config diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 245caa40b3e..d59d4a39eac 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -42,9 +42,15 @@ spec: - -m - batch.driver resources: - requests: - memory: "1G" - cpu: "1" +{% if deploy %} + requests: + memory: 375Mi + cpu: 100m +{% else %} + requests: + memory: 37Mi + cpu: 10m +{% endif %} env: - name: HAIL_DOMAIN value: "{{ global.domain }}" @@ -236,12 +242,17 @@ spec: ports: - containerPort: 5000 resources: - requests: - memory: "250M" - cpu: "100m" - limits: - memory: "1G" - cpu: "1" + requests: +{% if deploy %} + memory: 375Mi + cpu: 100m +{% else %} + memory: 37Mi + cpu: 10m +{% endif %} + limits: + memory: 3750Mi + cpu: "1" volumeMounts: - name: deploy-config mountPath: /deploy-config diff --git a/router/deployment.yaml b/router/deployment.yaml index 36479251257..b16fb314bfb 100644 --- a/router/deployment.yaml +++ b/router/deployment.yaml @@ -312,10 +312,15 @@ spec: image: {{ router_image.image }} resources: requests: - memory: "100M" - cpu: "100m" +{% if deploy %} + memory: 375Mi + cpu: 100m +{% else %} + memory: 37Mi + cpu: 10m +{% endif %} limits: - memory: "250M" + memory: 3750Mi cpu: "1" env: - name: HAIL_DOMAIN From 3d464747203d34c44631819703bda060b7e95ed4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 18 Dec 2020 16:31:38 +1100 Subject: [PATCH 069/501] Add stable release channel to GKE cluster. --- infra/main.tf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/infra/main.tf b/infra/main.tf index 722fe36db7b..cf60b2d675b 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -62,6 +62,10 @@ resource "google_container_cluster" "vdc" { issue_client_certificate = false } } + + release_channel { + channel = "STABLE" + } } resource "google_container_node_pool" "vdc_preemptible_pool" { From e0fefef5c981eb7b0ad5c0c12aebb1fac7c02220 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 12 Jan 2021 10:44:20 +1100 Subject: [PATCH 070/501] Add credentials also for worker images from the Artifact Registry. (#5) --- batch/batch/worker/worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 2bbb0153471..abac4dfc4bc 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -342,7 +342,7 @@ async def get_container_status(self): async def run(self, worker): try: async with self.step('pulling'): - if self.image.startswith('gcr.io/'): + if self.image.startswith('gcr.io/') or self.image.contains('docker.pkg.dev/'): key = base64.b64decode( self.job.gsa_key['key.json']).decode() auth = { From 384c91a8b653f2c4709077572535933583223d14 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 12 Jan 2021 17:32:39 +1100 Subject: [PATCH 071/501] Fix Artifact Registry image path check. --- batch/batch/worker/worker.py | 2 +- hail/python/hailtop/batch/backend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index abac4dfc4bc..81a21b17f99 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -342,7 +342,7 @@ async def get_container_status(self): async def run(self, worker): try: async with self.step('pulling'): - if self.image.startswith('gcr.io/') or self.image.contains('docker.pkg.dev/'): + if self.image.startswith('gcr.io/') or 'docker.pkg.dev/' in self.image: key = base64.b64decode( self.job.gsa_key['key.json']).decode() auth = { diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index cf3a28986c0..ae85dc517c0 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -472,7 +472,7 @@ def _cp(src, dst): resources['storage'] = job._storage image = job._image if job._image else default_image - if not image.startswith('gcr.io/'): + if not (image.startswith('gcr.io/') or 'docker.pkg.dev/' in image): warnings.warn(f'Using an image {image} not in GCR. ' f'Jobs may fail due to Docker Hub rate limits.') From cb3249f2c6424783c3a3fc6e9948b58f0373f8d0 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 26 Nov 2020 00:49:42 +1100 Subject: [PATCH 072/501] Usernames can't contain dots, otherwise the GSA creation fails. --- auth/auth/auth.py | 1 + 1 file changed, 1 insertion(+) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 82359c7cb18..92c47e543d7 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -241,6 +241,7 @@ async def callback(request): assert caller == 'signup' username, domain = email.split('@') + username = ''.join(c for c in username if c.isalnum()) if domain != GSUITE_ORGANIZATION: raise web.HTTPUnauthorized() From d04ffdb98c80ab092f59478d47c2058ab03cd538 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 22 Jan 2021 18:13:45 +1100 Subject: [PATCH 073/501] Make batches visible to everyone. This is a temporary workaround for the CPG until there's a permission model for sharing batches. It will allow us to use a service account to submit prod jobs, while still showing users all the debugging information in the UI. --- batch/batch/front_end/front_end.py | 31 ++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 2bff994f706..9dbe181b497 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -244,11 +244,14 @@ async def get_jobs(request, userdata): user = userdata['username'] db = request.app['db'] + + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. record = await db.select_and_fetchone( ''' SELECT * FROM batches -WHERE user = %s AND id = %s AND NOT deleted; -''', (user, batch_id)) +WHERE id = %s AND NOT deleted; +''', (batch_id,)) if not record: raise web.HTTPNotFound() @@ -311,6 +314,8 @@ async def _read_log_from_gcs(task): async def _get_job_log(app, batch_id, job_id, user): db = app['db'] + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id FROM jobs @@ -320,9 +325,9 @@ async def _get_job_log(app, batch_id, job_id, user): ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id AND jobs.attempt_id = attempts.attempt_id LEFT JOIN instances ON attempts.instance_name = instances.name -WHERE user = %s AND jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; +WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; ''', - (user, batch_id, job_id)) + (batch_id, job_id)) if not record: raise web.HTTPNotFound() return await _get_job_log_from_record(app, batch_id, job_id, record) @@ -425,8 +430,10 @@ async def get_job_log(request, userdata): # pylint: disable=R1710 async def _query_batches(request, user): db = request.app['db'] - where_conditions = ['user = %s', 'NOT deleted'] - where_args = [user] + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. + where_conditions = ['NOT deleted'] + where_args = [] last_batch_id = request.query.get('last_batch_id') if last_batch_id is not None: @@ -1083,6 +1090,8 @@ async def ui_batches(request, userdata): async def _get_job(app, batch_id, job_id, user): db = app['db'] + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT jobs.*, ip_address, format_version, SUM(`usage` * rate) AS cost FROM jobs @@ -1097,10 +1106,10 @@ async def _get_job(app, batch_id, job_id, user): jobs.job_id = aggregated_job_resources.job_id LEFT JOIN resources ON aggregated_job_resources.resource = resources.resource -WHERE user = %s AND jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s +WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s GROUP BY jobs.batch_id, jobs.job_id; ''', - (user, batch_id, job_id)) + (batch_id, job_id)) if not record: raise web.HTTPNotFound() @@ -1121,14 +1130,16 @@ async def _get_job(app, batch_id, job_id, user): async def _get_attempts(app, batch_id, job_id, user): db = app['db'] + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. attempts = db.select_and_fetchall(''' SELECT attempts.* FROM jobs INNER JOIN batches ON jobs.batch_id = batches.id LEFT JOIN attempts ON jobs.batch_id = attempts.batch_id and jobs.job_id = attempts.job_id -WHERE user = %s AND jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; +WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; ''', - (user, batch_id, job_id)) + (batch_id, job_id)) attempts = [attempt async for attempt in attempts] if len(attempts) == 0: From d3424e17407081449716ed559a13edcabe466e53 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 12 Jan 2021 18:02:14 +1100 Subject: [PATCH 074/501] Mount tokens when submitting a new batch, necessary for nested batches. --- hail/python/hailtop/batch/backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 272592b19a5..0dc5b2ecb1b 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -488,7 +488,8 @@ def _cp(src, dst): timeout=job._timeout, gcsfuse=job._gcsfuse if len(job._gcsfuse) > 0 else None, env=env_vars, - requester_pays_project=batch.requester_pays_project) + requester_pays_project=batch.requester_pays_project, + mount_tokens=True) n_jobs_submitted += 1 From df9040bdf298028eae6e18ea0ef529b94b762230 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 13:50:55 +1100 Subject: [PATCH 075/501] Add conda recipe and Actions workflow --- .github/workflows/main.yaml | 36 ++++++++++++++++++++ conda/hail/build.sh | 7 ++++ conda/hail/meta.yaml | 65 +++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 .github/workflows/main.yaml create mode 100755 conda/hail/build.sh create mode 100644 conda/hail/meta.yaml diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 00000000000..3f7dd35df54 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,36 @@ +name: CI +on: [push, pull_request] + +jobs: + build-test-publish: + # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: + if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@main + + - uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: buildenv + channels: cpg,conda-forge,bioconda,defaults + channel-priority: true + mamba-version: "*" + python-version: 3.7 + + - name: Setup build env + run: mamba install pip conda-build anaconda-client + + - name: Build package + run: mamba build conda/$(basename $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]') + + - name: Upload to anaconda package repository + if: "startsWith(github.ref, 'refs/tags/')" + run: | + anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ + upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 diff --git a/conda/hail/build.sh b/conda/hail/build.sh new file mode 100755 index 00000000000..434df6b5a3e --- /dev/null +++ b/conda/hail/build.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +# Build instructions +# https://hail.is/docs/0.2/getting_started_developing.html#requirements +pushd $SRC_DIR/hail +make install HAIL_COMPILE_NATIVES='build' -j ${CPU_COUNT} +popd diff --git a/conda/hail/meta.yaml b/conda/hail/meta.yaml new file mode 100644 index 00000000000..0d270df332f --- /dev/null +++ b/conda/hail/meta.yaml @@ -0,0 +1,65 @@ +{% set name = 'hail' %} +{% set version = '0.2.61' %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + git_url: https://github.com/hail-is/hail.git + git_rev: {{ version }} + sha256: unused + +build: + number: 1 + +requirements: + build: + - {{ compiler('cxx') }} + - make + - rsync + host: + - python + - pyspark >=2.4,<2.4.2 + - openjdk 8.* + - lz4 + - pytest-runner + - pip + run: + - python + - openjdk 8.* + - pyspark >=2.4,<2.4.2 + - aiohttp + - aiohttp-session + - bokeh >1.1,<1.3 + - decorator <5 + - deprecated + - gcsfs + - humanize + - hurry.filesize + - nest-asyncio + - parsimonious + - pyjwt + - python-json-logger ==0.1.11 + - requests + - scipy + - tabulate ==0.8.3 + - tqdm ==4.42.1 + - dill + - asyncinit + - google-cloud-sdk + - google-cloud-storage + - google-api-core + +test: + imports: + - hail + - hailtop.batch + +about: + home: https://hail.is + dev_url: https://github.com/populationgenomics/hail + license: MIT + license_file: LICENSE + summary: | + Hail is Python-based data analysis tool for working with genomic data. From ef9255ac0423df73d47a5ff6b72f561799b421fb Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 16:22:21 +1100 Subject: [PATCH 076/501] Remove mamba --- .github/workflows/main.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 3f7dd35df54..bd51c15ec12 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -20,14 +20,13 @@ jobs: activate-environment: buildenv channels: cpg,conda-forge,bioconda,defaults channel-priority: true - mamba-version: "*" python-version: 3.7 - name: Setup build env - run: mamba install pip conda-build anaconda-client + run: conda install pip conda-build anaconda-client - name: Build package - run: mamba build conda/$(basename $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]') + run: conda build conda/$(basename $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]') - name: Upload to anaconda package repository if: "startsWith(github.ref, 'refs/tags/')" From 67ab02baa722b4cc65234f45c8f55fabda5b8a0b Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 25 Jan 2021 17:23:30 +1100 Subject: [PATCH 077/501] Support passing an authorization token to the ServiceBackend. --- hail/python/hailtop/batch/backend.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index cd5b8b9cb78..0e0ac1b8b67 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -281,10 +281,12 @@ class ServiceBackend(Backend): bucket: Name of bucket to use. Should not include the ``gs://`` prefix. - + token: + The authorization token to pass to the batch client. + Should only be set for user delegation purposes. """ - def __init__(self, billing_project: str = None, bucket: str = None): + def __init__(self, billing_project: str = None, bucket: str = None, token: str = None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: @@ -292,7 +294,7 @@ def __init__(self, billing_project: str = None, bucket: str = None): 'the billing_project parameter of ServiceBackend must be set ' 'or run `hailctl config set batch/billing_project ' 'MY_BILLING_PROJECT`') - self._batch_client = BatchClient(billing_project) + self._batch_client = BatchClient(billing_project, _token=token) if bucket is None: bucket = get_user_config().get('batch', 'bucket', fallback=None) From 8fd0ea439b1db33a6679c2c7c4a338b4f60f29f7 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 25 Jan 2021 19:48:24 +1100 Subject: [PATCH 078/501] Remove assertion that breaks the usage of Artifact Registry images. (#12) Those image paths start with e.g. australia-southeast1-docker.pkg.dev/. --- batch/batch/worker/worker.py | 1 - 1 file changed, 1 deletion(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 5e117a3ae66..b19af75e2c9 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -386,7 +386,6 @@ async def run(self, worker): auth = await self.batch_worker_access_token() await self.ensure_image_is_pulled(auth=auth) else: - assert self.image.startswith('gcr.io/') # Pull to verify this user has access to this # image. # FIXME improve the performance of this with a From 365af29743aea9535658d90ca0fddeb1006d7fdb Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 25 Jan 2021 19:48:36 +1100 Subject: [PATCH 079/501] Remove leftover user check. (#13) Follow-up to https://github.com/populationgenomics/hail/commit/d04ffdb98c80ab092f59478d47c2058ab03cd538. --- batch/batch/front_end/front_end.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 7536e894835..8a160ab500c 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -939,15 +939,17 @@ async def insert(tx): async def _get_batch(app, batch_id, user): db: Database = app['db'] + # CPG-specific workaround until there's a permission model for sharing + # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT batches.*, SUM(`usage` * rate) AS cost FROM batches LEFT JOIN aggregated_batch_resources ON batches.id = aggregated_batch_resources.batch_id LEFT JOIN resources ON aggregated_batch_resources.resource = resources.resource -WHERE user = %s AND id = %s AND NOT deleted +WHERE id = %s AND NOT deleted GROUP BY batches.id; -''', (user, batch_id)) +''', (batch_id,)) if not record: raise web.HTTPNotFound() From 17442663d7318a6321e64870cb6537d1c91fd2e9 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 19:55:08 +1100 Subject: [PATCH 080/501] Trigger CI on every push to main --- .github/workflows/main.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index bd51c15ec12..42614e1dde4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,6 +1,8 @@ name: CI -on: [push, pull_request] - +on: + push: + branches: + - main jobs: build-test-publish: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: @@ -29,7 +31,6 @@ jobs: run: conda build conda/$(basename $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]') - name: Upload to anaconda package repository - if: "startsWith(github.ref, 'refs/tags/')" run: | anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 From 4e418f8de7c55088b79a8bc041b5b8343cc86907 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 19:57:20 +1100 Subject: [PATCH 081/501] Push to anaconda with --force --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 42614e1dde4..2518d83074b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -32,5 +32,5 @@ jobs: - name: Upload to anaconda package repository run: | - anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ + anaconda -t ${{ secrets.ANACONDA_TOKEN }} --force \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 From f8d52f4c570913a42ec19372916b8e8d1ef108e0 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 20:58:43 +1100 Subject: [PATCH 082/501] Add README --- conda/README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 conda/README.md diff --git a/conda/README.md b/conda/README.md new file mode 100644 index 00000000000..63c366b8f7d --- /dev/null +++ b/conda/README.md @@ -0,0 +1,27 @@ +# Conda package + +This folder contains a Conda recipe to build [the `hail` package for the `cpg` Anaconda channel]((https://anaconda.org/cpg/hail)). + +Note that there is also [a `hail` package in the `bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail), which is synced with the [official PyPI release](https://pypi.org/project/hail). The idea of having a separate package in the `cpg` channel is to be able to build it from the most recent development codebase, that would reflect changes that were not yet released, or not to be propagated to the upstream repository at all. + +[GitHub Actions CI](../.github/workflows/main.yaml)) is set up to build the package using this recipe and push it to Anaconda on every push to the `main` branch in the [CPG hail fork](https://github.com/populationgenomics/hail). + +To install the package, set up miniconda first: + +``` +if [[ "$OSTYPE" == "darwin"* ]]; then + wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh +else + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh +fi +bash miniconda.sh +``` + +When installing, to prioritize the CPG package, list the `cpg` channel before `bioconda`: + +``` +conda create --name hail -c cpg -c bioconda -c conda-forge hail +conda activate hail +``` + +You can also install Hail into an existing enviornment; however note that Hail requires Python of versions 3.6 or 3.7, so Conda might downgrade Python in that environment, which may affect other installed packages. From a41dc3af50d364ce208ba1a6738ddb0d92b85531 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Jan 2021 21:05:26 +1100 Subject: [PATCH 083/501] Conda: fix links to channels --- conda/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/README.md b/conda/README.md index 63c366b8f7d..94db79bf763 100644 --- a/conda/README.md +++ b/conda/README.md @@ -1,10 +1,10 @@ # Conda package -This folder contains a Conda recipe to build [the `hail` package for the `cpg` Anaconda channel]((https://anaconda.org/cpg/hail)). +This folder contains a Conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). -Note that there is also [a `hail` package in the `bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail), which is synced with the [official PyPI release](https://pypi.org/project/hail). The idea of having a separate package in the `cpg` channel is to be able to build it from the most recent development codebase, that would reflect changes that were not yet released, or not to be propagated to the upstream repository at all. +Note that there is also a `hail` package in the [`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail), which is synced with the [official PyPI release](https://pypi.org/project/hail). The idea of having a separate package in the `cpg` channel is to be able to build it from the most recent development codebase, that would reflect changes that were not yet released, or not to be propagated to the upstream repository at all. -[GitHub Actions CI](../.github/workflows/main.yaml)) is set up to build the package using this recipe and push it to Anaconda on every push to the `main` branch in the [CPG hail fork](https://github.com/populationgenomics/hail). +[GitHub Actions CI](../.github/workflows/main.yaml) is set up to build the package using this recipe and push it to Anaconda on every push to the `main` branch in the [CPG hail fork](https://github.com/populationgenomics/hail). To install the package, set up miniconda first: From 7a76b6ab5ac4567b42fa490a07ba9c5faffa8553 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 26 Jan 2021 09:27:53 +1100 Subject: [PATCH 084/501] Update conda/README.md Co-authored-by: Leonhard Gruenschloss --- conda/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/README.md b/conda/README.md index 94db79bf763..8a5ed64d27f 100644 --- a/conda/README.md +++ b/conda/README.md @@ -24,4 +24,4 @@ conda create --name hail -c cpg -c bioconda -c conda-forge hail conda activate hail ``` -You can also install Hail into an existing enviornment; however note that Hail requires Python of versions 3.6 or 3.7, so Conda might downgrade Python in that environment, which may affect other installed packages. +You can also install Hail into an existing environment; however note that Hail requires Python of versions 3.6 or 3.7, so conda might downgrade Python in that environment, which may affect other installed packages. From c5228b322f3243687eec632461d572e71513a621 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Jan 2021 12:16:25 +1100 Subject: [PATCH 085/501] Condarize: properly set up package version, include git hash --- .github/workflows/{main.yaml => condarize.yaml} | 11 ++++++++++- conda/hail/{meta.yaml => meta-template.yaml} | 13 ++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) rename .github/workflows/{main.yaml => condarize.yaml} (70%) rename conda/hail/{meta.yaml => meta-template.yaml} (81%) diff --git a/.github/workflows/main.yaml b/.github/workflows/condarize.yaml similarity index 70% rename from .github/workflows/main.yaml rename to .github/workflows/condarize.yaml index 2518d83074b..5cace284fff 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/condarize.yaml @@ -27,8 +27,17 @@ jobs: - name: Setup build env run: conda install pip conda-build anaconda-client + - name: Fix version + run: | + MKFILE=hail/Makefile + MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) + PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) + VERSION=${MAJOR_MINOR}.${PATCH}-${GITHUB_SHA:0:7} + cat conda/hail/meta-template.yaml \ + | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml + - name: Build package - run: conda build conda/$(basename $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]') + run: conda build conda/hail - name: Upload to anaconda package repository run: | diff --git a/conda/hail/meta.yaml b/conda/hail/meta-template.yaml similarity index 81% rename from conda/hail/meta.yaml rename to conda/hail/meta-template.yaml index 0d270df332f..4bf74f09e07 100644 --- a/conda/hail/meta.yaml +++ b/conda/hail/meta-template.yaml @@ -1,17 +1,12 @@ -{% set name = 'hail' %} -{% set version = '0.2.61' %} - package: - name: {{ name|lower }} - version: {{ version }} + name: hail + version: {version} source: - git_url: https://github.com/hail-is/hail.git - git_rev: {{ version }} - sha256: unused + path: ../../ build: - number: 1 + number: 0 requirements: build: From aae1eb78acf768ecbaba47f28e1aec6d9d5d7c3e Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 26 Jan 2021 14:27:48 +1100 Subject: [PATCH 086/501] Update conda/README.md Co-authored-by: Peter Diakumis --- conda/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conda/README.md b/conda/README.md index 8a5ed64d27f..92fce9b3c4b 100644 --- a/conda/README.md +++ b/conda/README.md @@ -2,7 +2,14 @@ This folder contains a Conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). -Note that there is also a `hail` package in the [`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail), which is synced with the [official PyPI release](https://pypi.org/project/hail). The idea of having a separate package in the `cpg` channel is to be able to build it from the most recent development codebase, that would reflect changes that were not yet released, or not to be propagated to the upstream repository at all. +Note that there is also a `hail` package in the +[`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail) +which is synced with the +[official PyPI release](https://pypi.org/project/hail). +Having a separate conda package in the `cpg` channel built from the +most recent development codebase allows us to test changes +that have not been released yet, or even use features that will not be +propagated to the upstream repository at all. [GitHub Actions CI](../.github/workflows/main.yaml) is set up to build the package using this recipe and push it to Anaconda on every push to the `main` branch in the [CPG hail fork](https://github.com/populationgenomics/hail). From 1a370386a6d1f89148d75e09b907091e5e22518d Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 26 Jan 2021 14:28:08 +1100 Subject: [PATCH 087/501] Update conda/README.md Co-authored-by: Peter Diakumis --- conda/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/README.md b/conda/README.md index 92fce9b3c4b..c0db52add0e 100644 --- a/conda/README.md +++ b/conda/README.md @@ -1,6 +1,6 @@ # Conda package -This folder contains a Conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). +This folder contains a conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). Note that there is also a `hail` package in the [`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail) From 2fd837e7d6987a7b281d0406032531bafc930165 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Jan 2021 11:40:52 +1100 Subject: [PATCH 088/501] Conda package version: follow pep-0440 for dev release tags (which conda respects) --- .github/workflows/condarize.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index 5cace284fff..400c59ed0a6 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -32,7 +32,7 @@ jobs: MKFILE=hail/Makefile MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) - VERSION=${MAJOR_MINOR}.${PATCH}-${GITHUB_SHA:0:7} + VERSION=${MAJOR_MINOR}.${PATCH}.dev${GITHUB_SHA:0:7} cat conda/hail/meta-template.yaml \ | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml From 07c3ddc695b6f2140aa27477bf43f5c1f478ba20 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 27 Jan 2021 11:54:49 +1100 Subject: [PATCH 089/501] Conda package version: follow pep-0440 for dev release tags (which conda respects) (#16) --- .github/workflows/condarize.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index 5cace284fff..400c59ed0a6 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -32,7 +32,7 @@ jobs: MKFILE=hail/Makefile MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) - VERSION=${MAJOR_MINOR}.${PATCH}-${GITHUB_SHA:0:7} + VERSION=${MAJOR_MINOR}.${PATCH}.dev${GITHUB_SHA:0:7} cat conda/hail/meta-template.yaml \ | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml From 9eec9690ae268b4bb6a541c2851f2ed3488f798d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Jan 2021 12:06:44 +1100 Subject: [PATCH 090/501] Conda recipe: split fixing meta.yaml from build jobs, use ubuntu for that --- .github/workflows/condarize.yaml | 47 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index 400c59ed0a6..6908a42dd08 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -1,12 +1,35 @@ name: CI -on: +on: push: branches: - main jobs: - build-test-publish: + set_conda_pkg_version: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@main + + - name: Fix meta YAML + run: | + MKFILE=hail/Makefile + MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) + PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) + VERSION=${MAJOR_MINOR}.${PATCH}.dev${GITHUB_SHA:0:7} + cat conda/hail/meta-template.yaml \ + | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml + + - name: Upload meta YAML for build job + uses: actions/upload-artifact@v2 + with: + name: meta.yaml + path: conda/hail/meta.yaml + + condarize: + # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: + if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" + needs: set_conda_pkg_version strategy: matrix: os: [ubuntu-latest, macos-latest] @@ -17,6 +40,15 @@ jobs: steps: - uses: actions/checkout@main + - name: Download meta YAML + uses: actions/download-artifact@v2 + with: + name: meta.yaml + path: conda/hail/ + + - name: Test artefacts + run: cat conda/hail/meta.yaml + - uses: conda-incubator/setup-miniconda@v2 with: activate-environment: buildenv @@ -27,19 +59,10 @@ jobs: - name: Setup build env run: conda install pip conda-build anaconda-client - - name: Fix version - run: | - MKFILE=hail/Makefile - MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) - PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) - VERSION=${MAJOR_MINOR}.${PATCH}.dev${GITHUB_SHA:0:7} - cat conda/hail/meta-template.yaml \ - | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml - - name: Build package run: conda build conda/hail - name: Upload to anaconda package repository run: | - anaconda -t ${{ secrets.ANACONDA_TOKEN }} --force \ + anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 From 15db828cd5ad39ab3607168b663f1a2577a3e943 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Jan 2021 14:16:13 +1100 Subject: [PATCH 091/501] Run CI on every event, but upload only on push to main --- .github/workflows/condarize.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index 6908a42dd08..81743053ba3 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -1,8 +1,6 @@ name: CI -on: - push: - branches: - - main +on: [push, pull_request] + jobs: set_conda_pkg_version: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: @@ -63,6 +61,7 @@ jobs: run: conda build conda/hail - name: Upload to anaconda package repository + if: github.ref == 'refs/heads/main' run: | anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 From db92fe37ff98808bcbea912a4f11ac2093c4842c Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Jan 2021 14:19:33 +1100 Subject: [PATCH 092/501] Drop cat meta.yaml --- .github/workflows/condarize.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index 81743053ba3..ab6a7a8cf00 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -44,9 +44,6 @@ jobs: name: meta.yaml path: conda/hail/ - - name: Test artefacts - run: cat conda/hail/meta.yaml - - uses: conda-incubator/setup-miniconda@v2 with: activate-environment: buildenv From 8c697ca191b4055f54d517dcc917a9cf922409d5 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Jan 2021 14:44:33 +1100 Subject: [PATCH 093/501] Rename CI steps --- .github/workflows/condarize.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarize.yaml index ab6a7a8cf00..421e847fdda 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarize.yaml @@ -1,8 +1,7 @@ -name: CI +name: Condarize on: [push, pull_request] - jobs: - set_conda_pkg_version: + set-conda-pkg-version: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" runs-on: ubuntu-latest @@ -24,10 +23,10 @@ jobs: name: meta.yaml path: conda/hail/meta.yaml - condarize: + build-publish: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" - needs: set_conda_pkg_version + needs: set-conda-pkg-version strategy: matrix: os: [ubuntu-latest, macos-latest] From cecf705674dbe20cdf89a6134a712d46795f4697 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 3 Feb 2021 14:21:41 +1100 Subject: [PATCH 094/501] Don't attempt to mount secrets that aren't necessary for "dev deploy" only. --- ci/ci/ci.py | 7 +++++-- ci/deployment.yaml | 46 ++++++++++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 0251a13887e..7c99f8549fe 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -27,8 +27,11 @@ from .environment import BUCKET from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch -with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f: - oauth_token = f.read().strip() +oauth_token = None +HAIL_CI_OAUTH_TOKEN = os.getenv('HAIL_CI_OAUTH_TOKEN') +if HAIL_CI_OAUTH_TOKEN: + with open(HAIL_CI_OAUTH_TOKEN, 'r') as f: + oauth_token = f.read().strip() log = logging.getLogger('ci') diff --git a/ci/deployment.yaml b/ci/deployment.yaml index cd7dc7ed6e2..2ca76adab5c 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -41,11 +41,11 @@ spec: env: - name: HAIL_DEPLOY_CONFIG_FILE value: /deploy-config/deploy-config.json - - name: HAIL_CI_OAUTH_TOKEN - value: /secrets/oauth-token/oauth-token + #- name: HAIL_CI_OAUTH_TOKEN + # value: /secrets/oauth-token/oauth-token {% if deploy %} - name: HAIL_WATCHED_BRANCHES - value: '[["hail-is/hail:main",true]]' + value: '[]' {% else %} - name: HAIL_WATCHED_BRANCHES value: '[["hail-ci-test/ci-test-{{create_ci_test_repo.token}}:master", true]]' @@ -79,18 +79,20 @@ spec: - name: session-secret-key mountPath: /session-secret-key readOnly: true - - mountPath: /sql-config - name: sql-config - readOnly: true - - mountPath: /secrets/oauth-token - name: hail-ci-0-1-github-oauth-token - readOnly: true + # CPG-specific: don't attempt to mount secrets that aren't necessary + # for "dev deploy" only. + #- mountPath: /sql-config + # name: sql-config + # readOnly: true + #- mountPath: /secrets/oauth-token + # name: hail-ci-0-1-github-oauth-token + # readOnly: true - mountPath: /user-tokens name: ci-tokens readOnly: true - - mountPath: /zulip-config - name: zulip-config - readOnly: true + #- mountPath: /zulip-config + # name: zulip-config + # readOnly: true - name: ssl-config-ci mountPath: /ssl-config readOnly: true @@ -112,19 +114,19 @@ spec: secret: optional: false secretName: session-secret-key - - name: sql-config - secret: - optional: false - secretName: "{{ ci_database.user_secret_name }}" - - name: hail-ci-0-1-github-oauth-token - secret: - secretName: hail-ci-0-1-github-oauth-token + #- name: sql-config + # secret: + # optional: false + # secretName: "{{ ci_database.user_secret_name }}" + #- name: hail-ci-0-1-github-oauth-token + # secret: + # secretName: hail-ci-0-1-github-oauth-token - name: ci-tokens secret: secretName: ci-tokens - - name: zulip-config - secret: - secretName: zulip-config + #- name: zulip-config + # secret: + # secretName: zulip-config - name: ssl-config-ci secret: optional: false From cdc0b08a7a49096080f2c357c711b7ced6464b98 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Feb 2021 16:32:25 +1100 Subject: [PATCH 095/501] CI on tags only --- .github/workflows/{condarize.yaml => condarise.yaml} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename .github/workflows/{condarize.yaml => condarise.yaml} (89%) diff --git a/.github/workflows/condarize.yaml b/.github/workflows/condarise.yaml similarity index 89% rename from .github/workflows/condarize.yaml rename to .github/workflows/condarise.yaml index 421e847fdda..ad1effd9ef1 100644 --- a/.github/workflows/condarize.yaml +++ b/.github/workflows/condarise.yaml @@ -1,9 +1,9 @@ -name: Condarize +name: Condarise on: [push, pull_request] jobs: set-conda-pkg-version: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: - if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" + if: "startsWith(github.ref, 'refs/tags/')" runs-on: ubuntu-latest steps: - uses: actions/checkout@main @@ -25,7 +25,7 @@ jobs: build-publish: # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: - if: "! startsWith(github.event.head_commit.message, 'Bump ') || startsWith(github.ref, 'refs/tags/')" + if: "startsWith(github.ref, 'refs/tags/')" needs: set-conda-pkg-version strategy: matrix: From f284bea95f8ac6e404b11d5e71cba5c0e10980d4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 3 Feb 2021 16:37:45 +1100 Subject: [PATCH 096/501] Remove dbpool from ci. --- ci/ci/ci.py | 97 ---------------------------------------------- ci/deployment.yaml | 23 ----------- 2 files changed, 120 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 7c99f8549fe..14e3e06cbe5 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -47,52 +47,6 @@ routes = web.RouteTableDef() -@routes.get('') -@routes.get('/') -@web_authenticated_developers_only() -async def index(request, userdata): # pylint: disable=unused-argument - app = request.app - dbpool = app['dbpool'] - wb_configs = [] - for i, wb in enumerate(watched_branches): - if wb.prs: - pr_configs = [] - for pr in wb.prs.values(): - batch_id = pr.batch.id if pr.batch and hasattr(pr.batch, 'id') else None - build_state = pr.build_state if await pr.authorized(dbpool) else 'unauthorized' - if build_state is None and batch_id is not None: - build_state = 'building' - - pr_config = { - 'number': pr.number, - 'title': pr.title, - # FIXME generate links to the merge log - 'batch_id': pr.batch.id if pr.batch and hasattr(pr.batch, 'id') else None, - 'build_state': build_state, - 'review_state': pr.review_state, - 'author': pr.author, - 'out_of_date': pr.build_state in ['failure', 'success', None] and not pr.is_up_to_date(), - } - pr_configs.append(pr_config) - else: - pr_configs = None - # FIXME recent deploy history - wb_config = { - 'index': i, - 'branch': wb.branch.short_str(), - 'sha': wb.sha, - # FIXME generate links to the merge log - 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and hasattr(wb.deploy_batch, 'id') else None, - 'deploy_state': wb.deploy_state, - 'repo': wb.branch.repo.short_str(), - 'prs': pr_configs, - } - wb_configs.append(wb_config) - - page_context = {'watched_branches': wb_configs} - return await render_template('ci', request, userdata, 'index.html', page_context) - - def wb_and_pr_from_request(request): watched_branch_index = int(request.match_info['watched_branch_index']) pr_number = int(request.match_info['pr_number']) @@ -139,36 +93,6 @@ async def get_pr(request, userdata): # pylint: disable=unused-argument return await render_template('ci', request, userdata, 'pr.html', page_context) -async def retry_pr(wb, pr, request): - app = request.app - session = await aiohttp_session.get_session(request) - - if pr.batch is None: - log.info('retry cannot be requested for PR #{pr.number} because it has no batch') - set_message(session, f'Retry cannot be requested for PR #{pr.number} because it has no batch.', 'error') - return - - batch_id = pr.batch.id - dbpool = app['dbpool'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('INSERT INTO invalidated_batches (batch_id) VALUES (%s);', batch_id) - await wb.notify_batch_changed(app) - - log.info(f'retry requested for PR: {pr.number}') - set_message(session, f'Retry requested for PR #{pr.number}.', 'info') - - -@routes.post('/watched_branches/{watched_branch_index}/pr/{pr_number}/retry') -@check_csrf_token -@web_authenticated_developers_only(redirect=False) -async def post_retry_pr(request, userdata): # pylint: disable=unused-argument - wb, pr = wb_and_pr_from_request(request) - - await asyncio.shield(retry_pr(wb, pr, request)) - return web.HTTPFound(deploy_config.external_url('ci', f'/watched_branches/{wb.index}/pr/{pr.number}')) - - @routes.get('/batches') @web_authenticated_developers_only() async def get_batches(request, userdata): @@ -210,23 +134,6 @@ async def get_job(request, userdata): return await render_template('ci', request, userdata, 'job.html', page_context) -@routes.post('/authorize_source_sha') -@check_csrf_token -@web_authenticated_developers_only(redirect=False) -async def post_authorized_source_sha(request, userdata): # pylint: disable=unused-argument - app = request.app - dbpool = app['dbpool'] - post = await request.post() - sha = post['sha'].strip() - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('INSERT INTO authorized_shas (sha) VALUES (%s);', sha) - log.info(f'authorized sha: {sha}') - session = await aiohttp_session.get_session(request) - set_message(session, f'SHA {sha} authorized.', 'info') - return web.HTTPFound(deploy_config.external_url('ci', '/')) - - @routes.get('/healthcheck') async def healthcheck(request): # pylint: disable=unused-argument return web.Response(status=200) @@ -403,7 +310,6 @@ async def on_startup(app): app['gh_client_session'] = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5)) app['github_client'] = gh_aiohttp.GitHubAPI(app['gh_client_session'], 'ci', oauth_token=oauth_token) app['batch_client'] = BatchClient('ci') - app['dbpool'] = await create_database_pool() app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(update_loop(app)) @@ -411,9 +317,6 @@ async def on_startup(app): async def on_cleanup(app): try: - dbpool = app['dbpool'] - dbpool.close() - await dbpool.wait_closed() await app['gh_client_session'].close() await app['batch_client'].close() finally: diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 2ca76adab5c..e200f9af00d 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -41,8 +41,6 @@ spec: env: - name: HAIL_DEPLOY_CONFIG_FILE value: /deploy-config/deploy-config.json - #- name: HAIL_CI_OAUTH_TOKEN - # value: /secrets/oauth-token/oauth-token {% if deploy %} - name: HAIL_WATCHED_BRANCHES value: '[]' @@ -79,20 +77,9 @@ spec: - name: session-secret-key mountPath: /session-secret-key readOnly: true - # CPG-specific: don't attempt to mount secrets that aren't necessary - # for "dev deploy" only. - #- mountPath: /sql-config - # name: sql-config - # readOnly: true - #- mountPath: /secrets/oauth-token - # name: hail-ci-0-1-github-oauth-token - # readOnly: true - mountPath: /user-tokens name: ci-tokens readOnly: true - #- mountPath: /zulip-config - # name: zulip-config - # readOnly: true - name: ssl-config-ci mountPath: /ssl-config readOnly: true @@ -114,19 +101,9 @@ spec: secret: optional: false secretName: session-secret-key - #- name: sql-config - # secret: - # optional: false - # secretName: "{{ ci_database.user_secret_name }}" - #- name: hail-ci-0-1-github-oauth-token - # secret: - # secretName: hail-ci-0-1-github-oauth-token - name: ci-tokens secret: secretName: ci-tokens - #- name: zulip-config - # secret: - # secretName: zulip-config - name: ssl-config-ci secret: optional: false From 11fd4ceb995c409daf692c0f82522edd38fe5482 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Feb 2021 16:52:57 +1100 Subject: [PATCH 097/501] Adjust comments --- .github/workflows/condarise.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index ad1effd9ef1..bd09215f5a7 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -2,7 +2,7 @@ name: Condarise on: [push, pull_request] jobs: set-conda-pkg-version: - # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: + # Run for tags only if: "startsWith(github.ref, 'refs/tags/')" runs-on: ubuntu-latest steps: @@ -24,7 +24,7 @@ jobs: path: conda/hail/meta.yaml build-publish: - # For tag pushes, we want to assure only the tag event triggers CI, not the accompanying commit: + # Run for tags only if: "startsWith(github.ref, 'refs/tags/')" needs: set-conda-pkg-version strategy: From 2f8e03a790db0f2a71339d333a8baaaae503c187 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 3 Feb 2021 20:21:21 +1100 Subject: [PATCH 098/501] Conda pkg: fix for hailctl (#22) * For building cpg conda package, skip uploading artefacts and use the official packages ones --- conda/hail/build.sh | 2 +- hail/Makefile | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/conda/hail/build.sh b/conda/hail/build.sh index 434df6b5a3e..2c3c348b87f 100755 --- a/conda/hail/build.sh +++ b/conda/hail/build.sh @@ -3,5 +3,5 @@ # Build instructions # https://hail.is/docs/0.2/getting_started_developing.html#requirements pushd $SRC_DIR/hail -make install HAIL_COMPILE_NATIVES='build' -j ${CPU_COUNT} +make install HAIL_COMPILE_NATIVES='build' -j ${CPU_COUNT} SKIP_UPLOAD_ARTIFACTS=1 popd diff --git a/hail/Makefile b/hail/Makefile index e6f7c863087..200ccea3145 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -214,6 +214,11 @@ CLOUD_SUB_FOLDER := $(HAIL_PIP_VERSION) UPLOAD_RETENTION = gsutil -m retention temp set "$(cloud_base)/*" endif +ifdef SKIP_UPLOAD_ARTIFACTS +DEV_CLARIFIER = +CLOUD_SUB_FOLDER := $(HAIL_PIP_VERSION) +endif + HAILCTL_BUCKET_BASE ?= gs://hail-common/hailctl/dataproc cloud_base := $(HAILCTL_BUCKET_BASE)/$(DEV_CLARIFIER)$(CLOUD_SUB_FOLDER) @@ -255,7 +260,11 @@ install-on-cluster: $(WHEEL) $(PIP) install $(WHEEL) --no-deps .PHONY: install-hailctl +ifdef SKIP_UPLOAD_ARTIFACTS +install-hailctl: install +else install-hailctl: install upload-artifacts +endif .PHONY: test-dataproc test-dataproc: install-hailctl From 6d369669360c9215e82d002b458d841cc994dbf3 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 3 Feb 2021 22:07:59 +1100 Subject: [PATCH 099/501] Fix to make sure conda pkg is uploaded tot anaconda (#23) --- .github/workflows/condarise.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index bd09215f5a7..92656c159b0 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -57,7 +57,6 @@ jobs: run: conda build conda/hail - name: Upload to anaconda package repository - if: github.ref == 'refs/heads/main' run: | anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 From b6f287e891b39e61e8c76092855a15c9f1d5c864 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 4 Feb 2021 10:20:57 +1100 Subject: [PATCH 100/501] Remove us-central1 check. --- batch/batch/driver/pool.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/batch/batch/driver/pool.py b/batch/batch/driver/pool.py index 4bf14c9a089..b2378fdcabd 100644 --- a/batch/batch/driver/pool.py +++ b/batch/batch/driver/pool.py @@ -407,8 +407,7 @@ def get_instance(user, cores_mcpu): while i < len(self.pool.healthy_instances_by_free_cores): instance = self.pool.healthy_instances_by_free_cores[i] assert cores_mcpu <= instance.free_cores_mcpu - if user != 'ci' or (user == 'ci' and instance.zone.startswith('us-central1')): - return instance + return instance i += 1 histogram = collections.defaultdict(int) for instance in self.pool.healthy_instances_by_free_cores: From be9a1bbba5dbaa7516dde72ad97f0c6ff0de532a Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 4 Feb 2021 12:41:28 +1100 Subject: [PATCH 101/501] Remove non-existent secrets from detault_ns step. --- build.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/build.yaml b/build.yaml index 62d1ce8167b..7340f8ab05f 100644 --- a/build.yaml +++ b/build.yaml @@ -9,18 +9,11 @@ steps: namespaceName: default public: true secrets: - - hail-vdc-sa-key - gcr-pull-key - gcr-push-service-account-key - - hail-ci-0-1-github-oauth-token - test-gsa-key - auth-oauth2-client-secret - - scorecard-github-access-token - - zulip-config - benchmark-gsa-key - - billing-monitor-gsa-key - - asana-access-token - - hail-ci-0-1-service-account-key - kind: buildImage name: echo_image dockerFile: echo/Dockerfile From 52e7091ac9efc99cca81fae4bf70ece070115587 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 4 Feb 2021 13:06:30 +1100 Subject: [PATCH 102/501] Fix CI bucket name. --- ci/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deployment.yaml b/ci/deployment.yaml index e200f9af00d..0891dddf95b 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -64,7 +64,7 @@ spec: value: "{{ global.k8s_server_url }}" - name: HAIL_CI_BUCKET_NAME {% if deploy %} - value: "hail-ci-bpk3h" + value: "cpg-hail-ci" {% else %} value: "hail-test-dmk9z" {% endif %} From 024a259caab971417a25ede308a7941ea3b72f71 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 09:25:27 +1100 Subject: [PATCH 103/501] Add serviceName for create_test_gsa_keys step. --- build.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.yaml b/build.yaml index 7340f8ab05f..add4c856353 100644 --- a/build.yaml +++ b/build.yaml @@ -348,6 +348,10 @@ steps: kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "batch-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "atgu-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "benchmark-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + serviceAccount: + name: admin + namespace: + valueFrom: default_ns.name scopes: - test - dev From 2921af1eb79afbf4eb058daa6561875b63d66050 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 17:03:49 +1100 Subject: [PATCH 104/501] auth_database depends on create_database_server_config --- build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/build.yaml b/build.yaml index add4c856353..2750584ac8d 100644 --- a/build.yaml +++ b/build.yaml @@ -272,6 +272,7 @@ steps: dependsOn: - default_ns - copy_files + - create_database_server_config - delete_auth_tables - kind: runImage name: create_deploy_config From 2c842fa4cee8782e33b30be96fcc8e51305fd45f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 17:38:07 +1100 Subject: [PATCH 105/501] Add dependency logs. --- ci/ci/build.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/ci/build.py b/ci/ci/build.py index 6fd600c7f59..d400469b2ec 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -157,6 +157,9 @@ def __init__(self, params): self.deps = [params.name_step[d] for d in json['dependsOn'] if params.name_step[d]] else: self.deps = [] + + log.info(f'step {self.name} depends on: {", ".join(self.deps)}') + self.scopes = json.get('scopes') self.run_if_requested = json.get('runIfRequested', False) From 7c471c79c04a9e8e3e1825a34cfadca3751fc2ae Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 17:47:47 +1100 Subject: [PATCH 106/501] Fix depends log. --- ci/ci/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ci/build.py b/ci/ci/build.py index d400469b2ec..4bc314e72ba 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -158,7 +158,7 @@ def __init__(self, params): else: self.deps = [] - log.info(f'step {self.name} depends on: {", ".join(self.deps)}') + log.info(f'step {self.name} depends on: {", ".join(step.name for step in self.deps)}') self.scopes = json.get('scopes') self.run_if_requested = json.get('runIfRequested', False) From b5f94d5d57fba4b87effedf207506ec1727d0d1f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 17:59:24 +1100 Subject: [PATCH 107/501] Add create_database_server_config to dev scope. --- build.yaml | 2 +- ci/ci/build.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/build.yaml b/build.yaml index 2750584ac8d..b35b2f16789 100644 --- a/build.yaml +++ b/build.yaml @@ -219,6 +219,7 @@ steps: valueFrom: default_ns.name scopes: - test + - dev dependsOn: - default_ns - base_image @@ -272,7 +273,6 @@ steps: dependsOn: - default_ns - copy_files - - create_database_server_config - delete_auth_tables - kind: runImage name: create_deploy_config diff --git a/ci/ci/build.py b/ci/ci/build.py index 4bc314e72ba..65adbba4822 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -158,8 +158,6 @@ def __init__(self, params): else: self.deps = [] - log.info(f'step {self.name} depends on: {", ".join(step.name for step in self.deps)}') - self.scopes = json.get('scopes') self.run_if_requested = json.get('runIfRequested', False) From 3c4a1fe66751e3ecd35427c3e90af0975fbc5ade Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 18:01:24 +1100 Subject: [PATCH 108/501] Restore create_database_server_config dep. --- build.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/build.yaml b/build.yaml index b35b2f16789..04c3bb45fc1 100644 --- a/build.yaml +++ b/build.yaml @@ -273,6 +273,7 @@ steps: dependsOn: - default_ns - copy_files + - create_database_server_config - delete_auth_tables - kind: runImage name: create_deploy_config From f13196a8e23f3af957432dbbad4ba1331cfb31e4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 21:21:16 +1100 Subject: [PATCH 109/501] Revert. --- build.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/build.yaml b/build.yaml index 04c3bb45fc1..add4c856353 100644 --- a/build.yaml +++ b/build.yaml @@ -219,7 +219,6 @@ steps: valueFrom: default_ns.name scopes: - test - - dev dependsOn: - default_ns - base_image @@ -273,7 +272,6 @@ steps: dependsOn: - default_ns - copy_files - - create_database_server_config - delete_auth_tables - kind: runImage name: create_deploy_config From baf543cb2a475d3329d4ef635d56f4a13b3c73bd Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 22:21:55 +1100 Subject: [PATCH 110/501] Don't mount auth-gsa-key in create_accounts step. --- build.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/build.yaml b/build.yaml index add4c856353..5b72d922d77 100644 --- a/build.yaml +++ b/build.yaml @@ -402,10 +402,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /deploy-config - - name: auth-gsa-key - namespace: - valueFrom: default_ns.name - mountPath: /gsa-key inputs: - from: /repo/ci/bootstrap_create_accounts.py to: /io/bootstrap_create_accounts.py From 6b771dd589112b6b3c1922cc75da474c4c5a47af Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 22:47:24 +1100 Subject: [PATCH 111/501] Create ci and test-dev GSA keys in create_test_gsa_keys. --- build.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/build.yaml b/build.yaml index 5b72d922d77..afe78bba538 100644 --- a/build.yaml +++ b/build.yaml @@ -343,11 +343,13 @@ steps: image: valueFrom: service_base_image.image script: | - # batch, benchmark, auth gsa keys + # auth, atgu, batch, benchmark, ci, test-dev gsa keys kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "auth-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "batch-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "atgu-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "batch-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "benchmark-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "ci-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - + kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "test-dev-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - serviceAccount: name: admin namespace: From cd7627af060852f1f2c257d63e1762d82ad007e7 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 23:02:33 +1100 Subject: [PATCH 112/501] Strip down ci service to the minimum functionality for dev deploy. --- ci/ci/ci.py | 208 ++-------------------------------------------------- 1 file changed, 6 insertions(+), 202 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 14e3e06cbe5..f942cefbb1f 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -1,37 +1,23 @@ import traceback import json -import os import logging -import asyncio -import concurrent.futures import aiohttp from aiohttp import web -import aiohttp_session import uvloop -from gidgethub import aiohttp as gh_aiohttp, routing as gh_routing, sansio as gh_sansio +from gidgethub import aiohttp as gh_aiohttp from hailtop.utils import collect_agen, humanize_timedelta_msecs from hailtop.batch_client.aioclient import BatchClient from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger -from hailtop import aiotools from gear import ( setup_aiohttp_session, rest_authenticated_developers_only, web_authenticated_developers_only, - check_csrf_token, - create_database_pool, ) -from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template, set_message +from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template -from .environment import BUCKET -from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch - -oauth_token = None -HAIL_CI_OAUTH_TOKEN = os.getenv('HAIL_CI_OAUTH_TOKEN') -if HAIL_CI_OAUTH_TOKEN: - with open(HAIL_CI_OAUTH_TOKEN, 'r') as f: - oauth_token = f.read().strip() +from .github import FQBranch, UnwatchedBranch log = logging.getLogger('ci') @@ -39,60 +25,9 @@ deploy_config = get_deploy_config() -watched_branches = [ - WatchedBranch(index, FQBranch.from_short_str(bss), deployable) - for (index, [bss, deployable]) in enumerate(json.loads(os.environ.get('HAIL_WATCHED_BRANCHES', '[]'))) -] - routes = web.RouteTableDef() -def wb_and_pr_from_request(request): - watched_branch_index = int(request.match_info['watched_branch_index']) - pr_number = int(request.match_info['pr_number']) - - if watched_branch_index < 0 or watched_branch_index >= len(watched_branches): - raise web.HTTPNotFound() - wb = watched_branches[watched_branch_index] - - if not wb.prs or pr_number not in wb.prs: - raise web.HTTPNotFound() - return wb, wb.prs[pr_number] - - -@routes.get('/watched_branches/{watched_branch_index}/pr/{pr_number}') -@web_authenticated_developers_only() -async def get_pr(request, userdata): # pylint: disable=unused-argument - wb, pr = wb_and_pr_from_request(request) - - page_context = {} - page_context['repo'] = wb.branch.repo.short_str() - page_context['wb'] = wb - page_context['pr'] = pr - # FIXME - batch = pr.batch - if batch: - if hasattr(batch, 'id'): - status = await batch.last_known_status() - jobs = await collect_agen(batch.jobs()) - for j in jobs: - j['duration'] = humanize_timedelta_msecs(j['duration']) - page_context['batch'] = status - page_context['jobs'] = jobs - page_context['artifacts'] = f'/{BUCKET}/build/{batch.attributes["token"]}' - else: - page_context['exception'] = '\n'.join( - traceback.format_exception(None, batch.exception, batch.exception.__traceback__) - ) - - batch_client = request.app['batch_client'] - batches = batch_client.list_batches(f'test=1 pr={pr.number}') - batches = sorted([b async for b in batches], key=lambda b: b.id, reverse=True) - page_context['history'] = [await b.last_known_status() for b in batches] - - return await render_template('ci', request, userdata, 'pr.html', page_context) - - @routes.get('/batches') @web_authenticated_developers_only() async def get_batches(request, userdata): @@ -139,112 +74,6 @@ async def healthcheck(request): # pylint: disable=unused-argument return web.Response(status=200) -gh_router = gh_routing.Router() - - -@gh_router.register('pull_request') -async def pull_request_callback(event): - gh_pr = event.data['pull_request'] - number = gh_pr['number'] - target_branch = FQBranch.from_gh_json(gh_pr['base']) - for wb in watched_branches: - if (wb.prs and number in wb.prs) or (wb.branch == target_branch): - await wb.notify_github_changed(event.app) - - -@gh_router.register('push') -async def push_callback(event): - data = event.data - ref = data['ref'] - if ref.startswith('refs/heads/'): - branch_name = ref[len('refs/heads/') :] - branch = FQBranch(Repo.from_gh_json(data['repository']), branch_name) - for wb in watched_branches: - if wb.branch == branch or any(pr.branch == branch for pr in wb.prs.values()): - await wb.notify_github_changed(event.app) - - -@gh_router.register('pull_request_review') -async def pull_request_review_callback(event): - gh_pr = event.data['pull_request'] - number = gh_pr['number'] - for wb in watched_branches: - if number in wb.prs: - await wb.notify_github_changed(event.app) - - -async def github_callback_handler(request): - event = gh_sansio.Event.from_http(request.headers, await request.read()) - event.app = request.app - await gh_router.dispatch(event) - - -@routes.post('/github_callback') -async def github_callback(request): - await asyncio.shield(github_callback_handler(request)) - return web.Response(status=200) - - -async def batch_callback_handler(request): - app = request.app - params = await request.json() - log.info(f'batch callback {params}') - attrs = params.get('attributes') - if attrs: - target_branch = attrs.get('target_branch') - if target_branch: - for wb in watched_branches: - if wb.branch.short_str() == target_branch: - log.info(f'watched_branch {wb.branch.short_str()} notify batch changed') - await wb.notify_batch_changed(app) - - -@routes.get('/api/v1alpha/deploy_status') -@rest_authenticated_developers_only -async def deploy_status(request, userdata): # pylint: disable=unused-argument - batch_client = request.app['batch_client'] - - async def get_failure_information(batch): - if isinstance(batch, MergeFailureBatch): - return batch.exception - jobs = await collect_agen(batch.jobs()) - - async def fetch_job_and_log(j): - full_job = await batch_client.get_job(j['batch_id'], j['job_id']) - log = await full_job.log() - return {**full_job._status, 'log': log} - - return await asyncio.gather(*[fetch_job_and_log(j) for j in jobs if j['state'] in ('Error', 'Failed')]) - - wb_configs = [ - { - 'branch': wb.branch.short_str(), - 'sha': wb.sha, - 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and hasattr(wb.deploy_batch, 'id') else None, - 'deploy_state': wb.deploy_state, - 'repo': wb.branch.repo.short_str(), - 'failure_information': None - if wb.deploy_state == 'success' - else await get_failure_information(wb.deploy_batch), - } - for wb in watched_branches - ] - return web.json_response(wb_configs) - - -@routes.post('/api/v1alpha/update') -@rest_authenticated_developers_only -async def post_update(request, userdata): # pylint: disable=unused-argument - log.info('developer triggered update') - - async def update_all(): - for wb in watched_branches: - await wb.update(request.app) - - request.app['task_manager'].ensure_future(update_all()) - return web.Response(status=200) - - @routes.post('/api/v1alpha/dev_deploy_branch') @rest_authenticated_developers_only async def dev_deploy_branch(request, userdata): @@ -287,40 +116,15 @@ async def dev_deploy_branch(request, userdata): return web.json_response({'sha': sha, 'batch_id': batch_id}) -@routes.post('/api/v1alpha/batch_callback') -async def batch_callback(request): - await asyncio.shield(batch_callback_handler(request)) - return web.Response(status=200) - - -async def update_loop(app): - while True: - try: - for wb in watched_branches: - log.info(f'updating {wb.branch.short_str()}') - await wb.update(app) - except concurrent.futures.CancelledError: - raise - except Exception: # pylint: disable=broad-except - log.exception(f'{wb.branch.short_str()} update failed due to exception') - await asyncio.sleep(300) - - async def on_startup(app): app['gh_client_session'] = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5)) - app['github_client'] = gh_aiohttp.GitHubAPI(app['gh_client_session'], 'ci', oauth_token=oauth_token) + app['github_client'] = gh_aiohttp.GitHubAPI(app['gh_client_session'], 'ci') app['batch_client'] = BatchClient('ci') - app['task_manager'] = aiotools.BackgroundTaskManager() - app['task_manager'].ensure_future(update_loop(app)) - async def on_cleanup(app): - try: - await app['gh_client_session'].close() - await app['batch_client'].close() - finally: - app['task_manager'].shutdown() + await app['gh_client_session'].close() + await app['batch_client'].close() def run(): From 68cc473140784a69e4758bb79d7286908d21438a Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Feb 2021 23:40:08 +1100 Subject: [PATCH 113/501] Revert extra line. --- ci/ci/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/ci/build.py b/ci/ci/build.py index 65adbba4822..6fd600c7f59 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -157,7 +157,6 @@ def __init__(self, params): self.deps = [params.name_step[d] for d in json['dependsOn'] if params.name_step[d]] else: self.deps = [] - self.scopes = json.get('scopes') self.run_if_requested = json.get('runIfRequested', False) From 1cceeaa0dd01fde62d8fe445003cab31a3af21a4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sat, 6 Feb 2021 00:11:57 +1100 Subject: [PATCH 114/501] Fix server name in gateway. --- gateway/gateway.nginx.conf | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gateway/gateway.nginx.conf b/gateway/gateway.nginx.conf index 74a84f6c76a..7c33cd847f5 100644 --- a/gateway/gateway.nginx.conf +++ b/gateway/gateway.nginx.conf @@ -23,7 +23,7 @@ map $maybe_router_scheme $router_scheme { } server { - server_name internal.hail.is; + server_name internal.hail.populationgenomics.org.au; client_max_body_size 8m; location = /auth { @@ -57,6 +57,12 @@ server { # include /ssl-config/ssl-config-proxy.conf } + error_page 401 = @error401; + + location @error401 { + return 302 https://auth.populationgenomics.org.au/login?next=https://$http_host$request_uri; + } + listen [::]:443 ssl; listen 443 ssl; ssl_certificate /etc/letsencrypt/fullchain.pem; @@ -66,7 +72,7 @@ server { } server { - server_name hail.is; + server_name hail.populationgenomics.org.au; client_max_body_size 8m; location / { From dd68d2d9c4b5b8d7c81269de450e0eb8ab6df92f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sat, 6 Feb 2021 12:39:43 +1100 Subject: [PATCH 115/501] Fix redirect. --- gateway/gateway.nginx.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gateway/gateway.nginx.conf b/gateway/gateway.nginx.conf index 7c33cd847f5..b847c81761b 100644 --- a/gateway/gateway.nginx.conf +++ b/gateway/gateway.nginx.conf @@ -60,7 +60,7 @@ server { error_page 401 = @error401; location @error401 { - return 302 https://auth.populationgenomics.org.au/login?next=https://$http_host$request_uri; + return 302 https://auth.hail.populationgenomics.org.au/login?next=https://$http_host$request_uri; } listen [::]:443 ssl; From 5c560692d847f9ce6f8e735384ab427693e16301 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sun, 7 Feb 2021 13:59:38 +1100 Subject: [PATCH 116/501] Disable "Launch Jupyter" button, as this currently doesn't work. --- notebook/notebook/templates/notebook-form.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/notebook/notebook/templates/notebook-form.html b/notebook/notebook/templates/notebook-form.html index 0a057e82ebb..99e0332a853 100644 --- a/notebook/notebook/templates/notebook-form.html +++ b/notebook/notebook/templates/notebook-form.html @@ -1,4 +1,6 @@
+
From de79a15d2ed2c0fd0338dfd30526b6289a370165 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 9 Feb 2021 17:18:32 +1100 Subject: [PATCH 117/501] Fix test bucket name. --- atgu/deployment.yaml | 2 +- batch/deployment.yaml | 4 +- benchmark-service/deployment.yaml | 2 +- build.yaml | 62 +++++++++---------- ci/deployment.yaml | 2 +- .../is/hail/fs/gs/GoogleStorageFSSuite.scala | 2 +- query/deployment.yaml | 2 +- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/atgu/deployment.yaml b/atgu/deployment.yaml index f5cf8eeaa68..d31d79e2ae7 100644 --- a/atgu/deployment.yaml +++ b/atgu/deployment.yaml @@ -63,7 +63,7 @@ spec: {% if deploy %} value: hail-atgu-data {% else %} - value: hail-test-dmk9z + value: cpg-hail-test {% endif %} volumeMounts: - name: deploy-config diff --git a/batch/deployment.yaml b/batch/deployment.yaml index e920b7e5fed..50b01ee941b 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -99,7 +99,7 @@ spec: value: "7200" # 2 hours {% else %} - name: HAIL_BATCH_BUCKET_NAME - value: hail-test-dmk9z + value: cpg-hail-test - name: STANDING_WORKER_MAX_IDLE_TIME_SECS value: "300" {% endif %} @@ -239,7 +239,7 @@ spec: value: "7200" # 2 hours {% else %} - name: HAIL_BATCH_BUCKET_NAME - value: hail-test-dmk9z + value: cpg-hail-test - name: STANDING_WORKER_MAX_IDLE_TIME_SECS value: "300" {% endif %} diff --git a/benchmark-service/deployment.yaml b/benchmark-service/deployment.yaml index f80ac822e18..48955c1ecd9 100644 --- a/benchmark-service/deployment.yaml +++ b/benchmark-service/deployment.yaml @@ -50,7 +50,7 @@ spec: value: "WetqnMQMoqq2" {% else %} - name: HAIL_BENCHMARK_BUCKET_NAME - value: hail-test-dmk9z + value: cpg-hail-test {% endif %} {% if scope == "dev" %} - name: INSTANCE_ID diff --git a/build.yaml b/build.yaml index afe78bba538..7e1c9c6f73f 100644 --- a/build.yaml +++ b/build.yaml @@ -743,9 +743,9 @@ steps: tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://hail-test-dmk9z/{{ token }}/test/resources/ + gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://hail-test-dmk9z/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-0.xml inputs: - from: /resources.tar.gz @@ -780,9 +780,9 @@ steps: tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://hail-test-dmk9z/{{ token }}/test/resources/ + gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://hail-test-dmk9z/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-1.xml inputs: - from: /resources.tar.gz @@ -817,9 +817,9 @@ steps: tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://hail-test-dmk9z/{{ token }}/test/resources/ + gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://hail-test-dmk9z/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-2.xml inputs: - from: /resources.tar.gz @@ -854,9 +854,9 @@ steps: tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://hail-test-dmk9z/{{ token }}/test/resources/ + gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://hail-test-dmk9z/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-3.xml inputs: - from: /resources.tar.gz @@ -891,9 +891,9 @@ steps: tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://hail-test-dmk9z/{{ token }}/test/resources/ + gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://hail-test-dmk9z/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-4.xml inputs: - from: /resources.tar.gz @@ -1018,7 +1018,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 @@ -1059,7 +1059,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 @@ -1100,7 +1100,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 @@ -1141,7 +1141,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 @@ -1182,7 +1182,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 @@ -1219,7 +1219,7 @@ steps: tar xzf test.tar.gz tar xvf debug-wheel-container.tar python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 -n 10 test/hailtop/aiotools/test_copy.py inputs: @@ -1253,7 +1253,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" python3 -m pytest -m unchecked_allocator --ignore=test/hailtop/batch/ --log-cli-level=INFO -s -vv --instafail --durations=50 test @@ -1370,7 +1370,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=hail-test-dmk9z + export HAIL_TEST_BUCKET=cpg-hail-test export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export HAIL_QUERY_BACKEND=local @@ -2155,7 +2155,7 @@ steps: cd /io tar xvf wheel-container.tar python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test HAIL_BILLING_PROJECT=test HAIL_DONT_RETRY_500=1 HAIL_QUERY_BACKEND=service python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 /io/test/ timeout: 600 secrets: @@ -2222,7 +2222,7 @@ steps: script: | export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PROJECT={{ global.project }} - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 /io/test/ timeout: 600 secrets: @@ -2271,7 +2271,7 @@ steps: export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --log-date-format="%Y-%m-%dT%H:%M:%S" \ --log-format="%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d:%(funcName)s %(message)s" \ @@ -2339,7 +2339,7 @@ steps: export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --log-date-format="%Y-%m-%dT%H:%M:%S" \ --log-format="%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d:%(funcName)s %(message)s" \ @@ -2407,7 +2407,7 @@ steps: export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --log-date-format="%Y-%m-%dT%H:%M:%S" \ --log-format="%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d:%(funcName)s %(message)s" \ @@ -2475,7 +2475,7 @@ steps: export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --log-date-format="%Y-%m-%dT%H:%M:%S" \ --log-format="%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d:%(funcName)s %(message)s" \ @@ -2543,7 +2543,7 @@ steps: export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --log-date-format="%Y-%m-%dT%H:%M:%S" \ --log-format="%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)d:%(funcName)s %(message)s" \ @@ -2718,7 +2718,7 @@ steps: export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --durations=0 \ --log-cli-level=INFO \ @@ -2769,7 +2769,7 @@ steps: export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --durations=0 \ --log-cli-level=INFO \ @@ -2820,7 +2820,7 @@ steps: export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --durations=0 \ --log-cli-level=INFO \ @@ -2871,7 +2871,7 @@ steps: export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --durations=0 \ --log-cli-level=INFO \ @@ -2922,7 +2922,7 @@ steps: export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ --durations=0 \ --log-cli-level=INFO \ @@ -2969,7 +2969,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json cd /io/hailtop/hailtop/batch hailctl config set batch/billing_project test - hailctl config set batch/bucket hail-test-dmk9z + hailctl config set batch/bucket cpg-hail-test python3 -m pytest --instafail \ --doctest-modules \ --doctest-glob='*.rst' \ diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 0891dddf95b..21b00d95fbb 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -66,7 +66,7 @@ spec: {% if deploy %} value: "cpg-hail-ci" {% else %} - value: "hail-test-dmk9z" + value: "cpg-hail-test" {% endif %} ports: - containerPort: 5000 diff --git a/hail/src/test/scala/is/hail/fs/gs/GoogleStorageFSSuite.scala b/hail/src/test/scala/is/hail/fs/gs/GoogleStorageFSSuite.scala index b9b7ccf50a5..fc16867ab8b 100644 --- a/hail/src/test/scala/is/hail/fs/gs/GoogleStorageFSSuite.scala +++ b/hail/src/test/scala/is/hail/fs/gs/GoogleStorageFSSuite.scala @@ -9,7 +9,7 @@ import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test class GoogleStorageFSSuite extends TestNGSuite with FSSuite { - val bucket: String = "hail-test-dmk9z" + val bucket: String = "cpg-hail-test" val root: String = s"gs://$bucket" diff --git a/query/deployment.yaml b/query/deployment.yaml index 848fb4cbc60..82d5b41cef5 100644 --- a/query/deployment.yaml +++ b/query/deployment.yaml @@ -46,7 +46,7 @@ spec: value: hail-query {% else %} - name: HAIL_QUERY_BUCKET - value: hail-test-dmk9z + value: cpg-hail-test {% endif %} ports: - containerPort: 5000 From 027bab00b398195d95643a81705290a1f8afd124 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 10 Feb 2021 11:27:19 +1100 Subject: [PATCH 118/501] Fix create_accounts for deploy. --- build.yaml | 4 ++++ ci/bootstrap_create_accounts.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 7e1c9c6f73f..cc11dc26bb4 100644 --- a/build.yaml +++ b/build.yaml @@ -404,6 +404,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /deploy-config + - name: auth-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /auth-gsa-key inputs: - from: /repo/ci/bootstrap_create_accounts.py to: /io/bootstrap_create_accounts.py diff --git a/ci/bootstrap_create_accounts.py b/ci/bootstrap_create_accounts.py index 343a4214324..706c52509b1 100644 --- a/ci/bootstrap_create_accounts.py +++ b/ci/bootstrap_create_accounts.py @@ -68,7 +68,7 @@ async def main(): app['k8s_client'] = k8s_client app['iam_client'] = aiogoogle.IAmClient( - PROJECT, credentials=aiogoogle.Credentials.from_file('/gsa-key/key.json')) + PROJECT, credentials=aiogoogle.Credentials.from_file('/auth-gsa-key/key.json')) for username, email, is_developer, is_service_account in users: user_id = await insert_user_if_not_exists(app, username, email, is_developer, is_service_account) From f1784286fa0f64ec92308737b18b76a1a4070368 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 17 Feb 2021 13:11:57 +1100 Subject: [PATCH 119/501] Add logging to dev deploy failures. --- ci/ci/ci.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 1fc761dd600..2d86e7385c4 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -112,6 +112,7 @@ async def dev_deploy_branch(request, userdata): batch_id = await unwatched_branch.deploy(batch_client, steps) except Exception as e: # pylint: disable=broad-except message = traceback.format_exc() + log.info('dev deploy failed: ' + message, exc_info=True) raise web.HTTPBadRequest(text=f'starting the deploy failed due to\n{message}') from e return web.json_response({'sha': sha, 'batch_id': batch_id}) From b949001843a9d4d743cea6d7e9fced0c7cd0e102 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 17 Feb 2021 16:09:15 +1100 Subject: [PATCH 120/501] [hailctl dev deploy] fix printing remote traceback (#37) --- hail/python/hailtop/hailctl/dev/deploy/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dev/deploy/cli.py b/hail/python/hailtop/hailctl/dev/deploy/cli.py index c85a4623b9a..ff1197970be 100644 --- a/hail/python/hailtop/hailctl/dev/deploy/cli.py +++ b/hail/python/hailtop/hailctl/dev/deploy/cli.py @@ -27,7 +27,9 @@ def __init__(self, deploy_config=None): async def __aenter__(self): headers = service_auth_headers(self._deploy_config, 'ci') - self._session = client_session(timeout=aiohttp.ClientTimeout(total=60), headers=headers) + self._session = client_session( + raise_for_status=False, + timeout=aiohttp.ClientTimeout(total=60), headers=headers) return self async def __aexit__(self, exc_type, exc, tb): From df73a188c84efc7ffd2312ca4261941d879b9cca Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 17 Feb 2021 17:01:39 +1100 Subject: [PATCH 121/501] Build conda package on push to main --- .github/workflows/condarise.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 92656c159b0..70c3f6a43a9 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -1,9 +1,10 @@ name: Condarise -on: [push, pull_request] +on: + push: + branches: + - main jobs: set-conda-pkg-version: - # Run for tags only - if: "startsWith(github.ref, 'refs/tags/')" runs-on: ubuntu-latest steps: - uses: actions/checkout@main @@ -24,8 +25,6 @@ jobs: path: conda/hail/meta.yaml build-publish: - # Run for tags only - if: "startsWith(github.ref, 'refs/tags/')" needs: set-conda-pkg-version strategy: matrix: From d9c939f08a216c1bbea79fbe59b53b655e02f45e Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 17 Feb 2021 18:15:48 +1100 Subject: [PATCH 122/501] Reformat README and add a bit on tag construction --- conda/README.md | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/conda/README.md b/conda/README.md index c0db52add0e..b4487f39b20 100644 --- a/conda/README.md +++ b/conda/README.md @@ -1,19 +1,36 @@ # Conda package -This folder contains a conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). +This folder contains a conda recipe to build the `hail` package for +the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). -Note that there is also a `hail` package in the +Note that there is also a package in the [`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail) -which is synced with the -[official PyPI release](https://pypi.org/project/hail). -Having a separate conda package in the `cpg` channel built from the -most recent development codebase allows us to test changes -that have not been released yet, or even use features that will not be -propagated to the upstream repository at all. +synced with the [official PyPI release](https://pypi.org/project/hail). However, having +a separate conda package in the `cpg` channel allows us to build it against the codebase +in our fork. -[GitHub Actions CI](../.github/workflows/main.yaml) is set up to build the package using this recipe and push it to Anaconda on every push to the `main` branch in the [CPG hail fork](https://github.com/populationgenomics/hail). +Because we don't control versioning of Hail project, the `cpg` conda package is +versioned specifically: we append the git commit hash to the official version tag, +e.g. `0.2.62.dev289c163`. -To install the package, set up miniconda first: +[GitHub Actions CI](../.github/workflows/condarise.yaml) is set up to build the package +using this recipe and push it to Anaconda on every push event to the `main` branch in +the +[CPG hail fork](https://github.com/populationgenomics/hail). + +When installing the package, list the `cpg` channel before `bioconda` to prioritize it +in the channel order: + +``` +conda create --name hail -c cpg -c bioconda -c conda-forge hail +conda activate hail +``` + +You can also install Hail into an existing environment. However, note that Hail requires +Python of versions 3.6 or 3.7, so conda might downgrade Python in that environment, +which may affect other installed packages. + +Note that if you don't have `conda` installed, here are handy commands to do that: ``` if [[ "$OSTYPE" == "darwin"* ]]; then @@ -23,12 +40,3 @@ else fi bash miniconda.sh ``` - -When installing, to prioritize the CPG package, list the `cpg` channel before `bioconda`: - -``` -conda create --name hail -c cpg -c bioconda -c conda-forge hail -conda activate hail -``` - -You can also install Hail into an existing environment; however note that Hail requires Python of versions 3.6 or 3.7, so conda might downgrade Python in that environment, which may affect other installed packages. From c1667cd065ed79fb6cae722cb1306eec7f7cfe13 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 17 Feb 2021 18:19:44 +1100 Subject: [PATCH 123/501] Fix --- conda/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/README.md b/conda/README.md index b4487f39b20..c45e68038f8 100644 --- a/conda/README.md +++ b/conda/README.md @@ -3,7 +3,7 @@ This folder contains a conda recipe to build the `hail` package for the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). -Note that there is also a package in the +Note that there is also a `hail` package in the [`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail) synced with the [official PyPI release](https://pypi.org/project/hail). However, having a separate conda package in the `cpg` channel allows us to build it against the codebase From f5d6f8601bed8eba9c303ef0f9602792ff2d864c Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 17 Feb 2021 18:30:16 +1100 Subject: [PATCH 124/501] Switch to Artifact Registry (#29) * config.mk: use AR docker image * build.yaml: use AR for images * Gitignore artifacts * fetch-image.sh: gcloud auth for AR * image-fetcher dockerfile: base on AR * image fetcher make file: use AR * build-batch-worker-image-startup.sh: add gcloud auth configure-docker for AR, user AR for images * batch worker: ue AR for public_gcr_images * Worker dockerfile: use AR * ci build.py: add gcloud auth configure-docker for AR, use AR base image * Makefiles: use AR docker images * stat-permissions.sh: fix for macOS gnu-grep * third-party dockers: use AR * ci dockerFile create_namespace: use AR image * Docker hail-ubuntu: use AR image * ci build.py: add region into the jinja render config for Dockerfiles * CI deploy config: add region * Python-dill: use AR image * Auth deployment.yaml: add REGION * Use hail-dvc default ci image pointer * Add Artifact Registry in main.tf * Add AR in dev-docs * Add AR for ukbb-rg * Add AR in more makefiles * Fix AR usage for image-fetcher * Specify "region" in the "hail-ubuntu-stmp" target. * Fix zone assignment in startup script. * Allow ci to read from Artifact Registry. * Makefiles: parametrise `DOCKER_ROOT_IMAGE` to support both GCR and AR * Parametrise `DOCKER_PREFIX` to support both GCR and AR * Un-gitignore "vlad" * Fix * batch: pass DOCKER_PREFIX to worker * DOCKER_PREFIX in build-batch-worker-image-startup.sh * build.yaml: add ubuntu tags back * ci/ci/build.py: parametrise docker prefix * infra README: configure-docker AR Co-authored-by: Leonhard Gruenschloss --- .gitignore | 4 ++ address/Makefile | 4 +- admin-pod/Makefile | 2 +- amundsen/Makefile | 2 +- amundsen/deployment.yaml | 2 +- auth/Makefile | 4 +- batch/Dockerfile.worker | 4 +- batch/Makefile | 12 ++-- batch/batch/batch_configuration.py | 1 + batch/batch/driver/create_instance.py | 7 ++- batch/batch/public_gcr_images.py | 4 +- batch/batch/worker/worker.py | 6 +- batch/build-batch-worker-image-startup.sh | 10 ++-- batch/deployment.yaml | 4 ++ benchmark-service/Makefile | 4 +- benchmark/Dockerfile | 2 +- benchmark/Makefile | 5 +- bootstrap-gateway/Makefile | 4 +- build.yaml | 30 +++++----- ci/Makefile | 10 ++-- ci/ci/build.py | 11 ++-- ci/ci/environment.py | 5 +- ci/deployment.yaml | 2 + ci/test/resources/build.yaml | 2 +- config.mk | 7 ++- dev-docs/development_process.md | 2 + docker/Makefile | 24 ++++---- docker/hail-ubuntu/Dockerfile | 2 +- docker/python-dill/Makefile | 2 +- docker/python-dill/push.sh | 4 +- docker/stat-permissions.sh | 2 +- docker/third-party/Makefile | 2 +- docker/third-party/copy_images.sh | 6 +- gateway/Makefile | 4 +- image-fetcher/Dockerfile | 4 +- image-fetcher/Makefile | 10 ++-- image-fetcher/fetch-image.sh | 6 +- infra/README.md | 17 +++++- infra/main.tf | 70 ++++++++++++++++++++++- internal-gateway/Makefile | 4 +- letsencrypt/Makefile | 4 +- memory/Makefile | 6 +- memory/deployment.yaml | 2 +- monitoring/Makefile | 4 +- notebook/Makefile | 4 +- query/Makefile | 4 +- router-resolver/Makefile | 4 +- router/Makefile | 4 +- shuffler/Makefile | 4 +- site/Makefile | 2 +- ukbb-rg/Dockerfile.browser | 2 +- ukbb-rg/Dockerfile.static | 2 +- ukbb-rg/Makefile | 23 ++++---- ukbb-rg/deployment.yaml | 4 +- 54 files changed, 241 insertions(+), 135 deletions(-) diff --git a/.gitignore b/.gitignore index 18be6d90daa..286f17f3c29 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,7 @@ hs_err_pid*.log GPATH GRTAGS GTAGS +hail/prebuilt/lib/darwin/libboot.dylib +hail/prebuilt/lib/darwin/libhail.dylib +query/hail.jar +infra/.terraform.lock.hcl diff --git a/address/Makefile b/address/Makefile index 6bc4a444e2c..956ed8e0dac 100644 --- a/address/Makefile +++ b/address/Makefile @@ -1,7 +1,7 @@ include ../config.mk -ADDRESS_LATEST = gcr.io/$(PROJECT)/address:latest -ADDRESS_IMAGE = gcr.io/$(PROJECT)/address:$(shell docker images -q --no-trunc address | sed -e 's,[^:]*:,,') +ADDRESS_LATEST = $(DOCKER_PREFIX)/address:latest +ADDRESS_IMAGE = $(DOCKER_PREFIX)/address:$(shell docker images -q --no-trunc address | sed -e 's,[^:]*:,,') PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$(PYTHONPATH) python3 diff --git a/admin-pod/Makefile b/admin-pod/Makefile index 96797a16638..1efe07336e7 100644 --- a/admin-pod/Makefile +++ b/admin-pod/Makefile @@ -1,6 +1,6 @@ include ../config.mk -SERVICE_BASE_IMAGE = gcr.io/$(PROJECT)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') +SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') .PHONY: deploy deploy: diff --git a/amundsen/Makefile b/amundsen/Makefile index a10671d6432..f1adad9f537 100644 --- a/amundsen/Makefile +++ b/amundsen/Makefile @@ -2,7 +2,7 @@ include ../config.mk PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"global":{"docker_prefix":"$(DOCKER_PREFIX)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: diff --git a/amundsen/deployment.yaml b/amundsen/deployment.yaml index 348635bb9b6..1c824b0c961 100644 --- a/amundsen/deployment.yaml +++ b/amundsen/deployment.yaml @@ -30,7 +30,7 @@ spec: {% endif %} containers: - name: amundsen-frontend - image: gcr.io/{{ global.project }}/amundsendev/amundsen-frontend:2.3.0 + image: {{ global.docker_prefix }}/amundsendev/amundsen-frontend:2.3.0 imagePullPolicy: Always resources: requests: diff --git a/auth/Makefile b/auth/Makefile index 21e11abff90..1a423eda6af 100644 --- a/auth/Makefile +++ b/auth/Makefile @@ -1,7 +1,7 @@ include ../config.mk -AUTH_LATEST = gcr.io/$(PROJECT)/auth:latest -AUTH_IMAGE = gcr.io/$(PROJECT)/auth:$(shell docker images -q --no-trunc auth:latest | sed -e 's,[^:]*:,,') +AUTH_LATEST = $(DOCKER_PREFIX)/auth:latest +AUTH_IMAGE = $(DOCKER_PREFIX)/auth:$(shell docker images -q --no-trunc auth:latest | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index 72b8acaece3..049680feaf8 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -1,4 +1,4 @@ -FROM gcr.io/{{ global.project }}/python:3.7-slim-stretch +FROM {{ global.docker_prefix }}/python:3.7-slim-stretch COPY docker/hail-ubuntu/hail-apt-get-install /bin/hail-apt-get-install RUN mkdir -p /usr/share/man/man1 /usr/share/man/man2 @@ -43,4 +43,4 @@ COPY batch/setup.py batch/MANIFEST.in /batch/ COPY batch/batch /batch/batch/ RUN hail-pip-install --no-deps /batch && rm -rf /batch -COPY batch/hail.jar / \ No newline at end of file +COPY batch/hail.jar / diff --git a/batch/Makefile b/batch/Makefile index cfdb31ed29e..1375084ad96 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -1,10 +1,10 @@ include ../config.mk -BATCH_LATEST = gcr.io/$(PROJECT)/batch:latest -BATCH_IMAGE = gcr.io/$(PROJECT)/batch:$(shell docker images -q --no-trunc batch | sed -e 's,[^:]*:,,') +BATCH_LATEST = $(DOCKER_PREFIX)/batch:latest +BATCH_IMAGE = $(DOCKER_PREFIX)/batch:$(shell docker images -q --no-trunc batch | sed -e 's,[^:]*:,,') -BATCH_WORKER_LATEST = gcr.io/$(PROJECT)/batch-worker:latest -BATCH_WORKER_IMAGE = gcr.io/$(PROJECT)/batch-worker:$(shell docker images -q --no-trunc batch-worker | sed -e 's,[^:]*:,,') +BATCH_WORKER_LATEST = $(DOCKER_PREFIX)/batch-worker:latest +BATCH_WORKER_IMAGE = $(DOCKER_PREFIX)/batch-worker:$(shell docker images -q --no-trunc batch-worker | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -31,7 +31,7 @@ build-worker: build-prereqs # janky cp ../hail/build/libs/hail-all-spark.jar ./hail.jar -docker pull $(BATCH_WORKER_LATEST) - python3 ../ci/jinja2_render.py '{"global":{"project":"$(PROJECT)"}, "service_base_image":{"image":"service-base"}}' Dockerfile.worker Dockerfile.worker.out + python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}}' Dockerfile.worker Dockerfile.worker.out docker build -t batch-worker -f Dockerfile.worker.out --cache-from batch-worker,$(BATCH_WORKER_LATEST),service-base .. .PHONY: build @@ -48,7 +48,7 @@ push: build docker tag batch-worker $(BATCH_WORKER_IMAGE) docker push $(BATCH_WORKER_IMAGE) -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: push diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index 2553db26f03..0e80c242c51 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -8,6 +8,7 @@ GCP_REGION = os.environ['HAIL_GCP_REGION'] GCP_ZONE = os.environ['HAIL_GCP_ZONE'] +DOCKER_PREFIX = os.environ['HAIL_DOCKER_PREFIX'] BATCH_GCP_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GCP_REGIONS'])) BATCH_GCP_REGIONS.add(GCP_REGION) diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index 065b22fb81b..88e723ae0b3 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -5,7 +5,7 @@ from hailtop import aiogoogle -from ..batch_configuration import PROJECT, DEFAULT_NAMESPACE +from ..batch_configuration import PROJECT, DOCKER_PREFIX, DEFAULT_NAMESPACE from ..worker_config import WorkerConfig from ..log_store import LogStore @@ -172,6 +172,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke ZONE=$(curl -s http://metadata.google.internal/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google') BATCH_WORKER_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/batch_worker_image") +DOCKER_PREFIX=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_prefix") # Setup fluentd touch /worker.log @@ -248,6 +249,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke -e BATCH_LOGS_BUCKET_NAME=$BATCH_LOGS_BUCKET_NAME \ -e INSTANCE_ID=$INSTANCE_ID \ -e PROJECT=$PROJECT \ +-e DOCKER_PREFIX=$DOCKER_PREFIX \ -e WORKER_CONFIG=$WORKER_CONFIG \ -e MAX_IDLE_TIME_MSECS=$MAX_IDLE_TIME_MSECS \ -e WORKER_DATA_DISK_MOUNT=/mnt/disks/$WORKER_DATA_DISK_NAME \ @@ -291,6 +293,9 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke }, { 'key': 'batch_worker_image', 'value': BATCH_WORKER_IMAGE + }, { + 'key': 'docker_prefix', + 'value': DOCKER_PREFIX }, { 'key': 'namespace', 'value': DEFAULT_NAMESPACE diff --git a/batch/batch/public_gcr_images.py b/batch/batch/public_gcr_images.py index 48b723c25de..a6020874e99 100644 --- a/batch/batch/public_gcr_images.py +++ b/batch/batch/public_gcr_images.py @@ -1,7 +1,7 @@ from typing import List -def public_gcr_images(project: str) -> List[str]: +def public_gcr_images(docker_prefix: str) -> List[str]: # the worker cannot import batch_configuration because it does not have all the environment # variables - return [f'gcr.io/{project}/{name}' for name in ('query', 'hail', 'python-dill')] + return [f'{docker_prefix}/{name}' for name in ('query', 'hail', 'python-dill')] diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 45d8856d779..79e41e8db0b 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -62,7 +62,8 @@ BATCH_LOGS_BUCKET_NAME = os.environ['BATCH_LOGS_BUCKET_NAME'] INSTANCE_ID = os.environ['INSTANCE_ID'] PROJECT = os.environ['PROJECT'] -PUBLIC_GCR_IMAGES = public_gcr_images(PROJECT) +DOCKER_PREFIX = os.environ['DOCKER_PREFIX'] +PUBLIC_GCR_IMAGES = public_gcr_images(DOCKER_PREFIX) WORKER_CONFIG = json.loads(base64.b64decode(os.environ['WORKER_CONFIG']).decode()) MAX_IDLE_TIME_MSECS = int(os.environ['MAX_IDLE_TIME_MSECS']) WORKER_DATA_DISK_MOUNT = os.environ['WORKER_DATA_DISK_MOUNT'] @@ -75,6 +76,7 @@ log.info(f'BATCH_LOGS_BUCKET_NAME {BATCH_LOGS_BUCKET_NAME}') log.info(f'INSTANCE_ID {INSTANCE_ID}') log.info(f'PROJECT {PROJECT}') +log.info(f'DOCKER_PREFIX {DOCKER_PREFIX}') log.info(f'WORKER_CONFIG {WORKER_CONFIG}') log.info(f'MAX_IDLE_TIME_MSECS {MAX_IDLE_TIME_MSECS}') log.info(f'WORKER_DATA_DISK_MOUNT {WORKER_DATA_DISK_MOUNT}') @@ -247,7 +249,7 @@ def __init__(self, job, name, spec): if repository in HAIL_GENETICS_IMAGES: repository_name_without_prefix = repository[len(HAIL_GENETICS):] - repository = f'gcr.io/{PROJECT}/{repository_name_without_prefix}' + repository = f'{DOCKER_PREFIX}/{repository_name_without_prefix}' self.repository = repository self.tag = tag diff --git a/batch/build-batch-worker-image-startup.sh b/batch/build-batch-worker-image-startup.sh index 9d6df2b02be..623b7eb985d 100644 --- a/batch/build-batch-worker-image-startup.sh +++ b/batch/build-batch-worker-image-startup.sh @@ -29,7 +29,7 @@ rm -rf /var/lib/apt/lists/* [ -f /etc/docker/daemon.json ] || echo "{}" > /etc/docker/daemon.json -VERSION=1.5.0 +VERSION=2.0.4 OS=linux ARCH=amd64 @@ -39,10 +39,12 @@ curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/release # avoid "unable to get current user home directory: os/user lookup failed" export HOME=/root -docker-credential-gcr configure-docker -GCP_PROJECT=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/project/project-id") -docker pull gcr.io/$GCP_PROJECT/ubuntu:18.04 +DOCKER_PREFIX=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_prefix") + +docker-credential-gcr configure-docker --include-artifact-registry + +docker pull ${DOCKER_PREFIX}/ubuntu:18.04 docker pull gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine # add docker daemon debug logging diff --git a/batch/deployment.yaml b/batch/deployment.yaml index bfaed09bad5..8d52eb1d7a3 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -62,6 +62,8 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" + - name: HAIL_DOCKER_PREFIX + value: "{{ global.docker_prefix }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: @@ -200,6 +202,8 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" + - name: HAIL_DOCKER_PREFIX + value: "{{ global.docker_prefix }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: diff --git a/benchmark-service/Makefile b/benchmark-service/Makefile index 1a9cfd075bf..e3694fd5d38 100644 --- a/benchmark-service/Makefile +++ b/benchmark-service/Makefile @@ -1,7 +1,7 @@ include ../config.mk -BENCHMARK_LATEST = gcr.io/$(PROJECT)/benchmark:latest -BENCHMARK_IMAGE = gcr.io/$(PROJECT)/benchmark:$(shell docker images -q --no-trunc benchmark:latest | sed -e 's,[^:]*:,,') +BENCHMARK_LATEST = $(DOCKER_PREFIX)/benchmark:latest +BENCHMARK_IMAGE = $(DOCKER_PREFIX)/benchmark:$(shell docker images -q --no-trunc benchmark:latest | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index bc5a13edde4..1c4f1950339 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -1,4 +1,4 @@ -FROM gcr.io/{{ global.project }}/ubuntu:18.04 +FROM {{ global.docker_prefix }}/ubuntu:18.04 ENV LANG C.UTF-8 diff --git a/benchmark/Makefile b/benchmark/Makefile index 60630cbefa4..3113151bf5c 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -29,7 +29,7 @@ cleanup_image: rm -f pushed_image BENCHMARK_DOCKER_TAG := benchmark_$(shell whoami) -BENCHMARK_REPO_BASE = gcr.io/$(PROJECT)/$(BENCHMARK_DOCKER_TAG) +BENCHMARK_REPO_BASE = $(DOCKER_PREFIX)/$(BENCHMARK_DOCKER_TAG) ifndef HAIL_WHEEL image_sha: @@ -41,7 +41,7 @@ image_sha: wheel cleanup_image cp $(BENCHMARK_WHEEL) . # it's possible that the HAIL_WHEEL installs different dependencies, but this generally creates less work for docker cp ../hail/python/requirements.txt . - python3 ../ci/jinja2_render.py '{"global":{"project":"$(PROJECT)"}}' Dockerfile Dockerfile.out + python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile Dockerfile.out docker build -f Dockerfile.out -t $(BENCHMARK_DOCKER_TAG) . --build-arg HAIL_WHEEL=$(notdir $(HAIL_WHEEL)) --build-arg BENCHMARK_WHEEL=$(notdir $(BENCHMARK_WHEEL)) @printf $$(docker images -q --no-trunc $(BENCHMARK_DOCKER_TAG) | sed -e 's,[^:]*:,,') > image_sha @echo Image sha is `cat image_sha` @@ -71,4 +71,3 @@ clean: rm -f $(notdir $(HAIL_WHEEL)) rm -f $(notdir $(BENCHMARK_WHEEL)) rm -f requirements.txt - diff --git a/bootstrap-gateway/Makefile b/bootstrap-gateway/Makefile index b5cffe53a91..dbb9616a73a 100644 --- a/bootstrap-gateway/Makefile +++ b/bootstrap-gateway/Makefile @@ -2,8 +2,8 @@ include ../config.mk .PHONY: build push deploy -GATEWAY_LATEST = gcr.io/$(PROJECT)/gateway:latest -GATEWAY_IMAGE = gcr.io/$(PROJECT)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') +GATEWAY_LATEST = $(DOCKER_PREFIX)/gateway:latest +GATEWAY_IMAGE = $(DOCKER_PREFIX)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu diff --git a/build.yaml b/build.yaml index 1904390e016..5987a0a1d49 100644 --- a/build.yaml +++ b/build.yaml @@ -2289,7 +2289,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2357,7 +2357,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2425,7 +2425,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2493,7 +2493,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2561,7 +2561,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2737,8 +2737,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" - export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ @@ -2788,8 +2788,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" - export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ @@ -2839,8 +2839,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" - export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ @@ -2890,8 +2890,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" - export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ @@ -2941,8 +2941,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 - export DOCKER_ROOT_IMAGE="gcr.io/{{ global.project }}/ubuntu:18.04" - export PYTHON_DILL_IMAGE="gcr.io/{{ global.project }}/python-dill:3.7-slim" + export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test python3 -m pytest \ diff --git a/ci/Makefile b/ci/Makefile index bff5fd12a15..b59abad6a38 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -1,10 +1,10 @@ include ../config.mk -CI_UTILS_LATEST = gcr.io/$(PROJECT)/ci-utils:latest -CI_UTILS_IMAGE = gcr.io/$(PROJECT)/ci-utils:$(shell docker images -q --no-trunc ci-utils | sed -e 's,[^:]*:,,') +CI_UTILS_LATEST = $(DOCKER_PREFIX)/ci-utils:latest +CI_UTILS_IMAGE = $(DOCKER_PREFIX)/ci-utils:$(shell docker images -q --no-trunc ci-utils | sed -e 's,[^:]*:,,') -CI_LATEST = gcr.io/$(PROJECT)/ci:latest -CI_IMAGE = gcr.io/$(PROJECT)/ci:$(shell docker images -q --no-trunc ci | sed -e 's,[^:]*:,,') +CI_LATEST = $(DOCKER_PREFIX)/ci:latest +CI_IMAGE = $(DOCKER_PREFIX)/ci:$(shell docker images -q --no-trunc ci | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../batch:../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -47,5 +47,5 @@ push: build .PHONY: deploy deploy: push push-ci-utils ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"}}' deployment.yaml deployment.yaml.out + python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","docker_prefix":"$(DOCKER_PREFIX)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/ci/ci/build.py b/ci/ci/build.py index 6fd600c7f59..6e4e9f08ec7 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -11,6 +11,7 @@ from .environment import ( GCP_PROJECT, GCP_ZONE, + DOCKER_PREFIX, DOMAIN, IP, CI_UTILS_IMAGE, @@ -167,6 +168,7 @@ def input_config(self, code, scope): config['global'] = { 'project': GCP_PROJECT, 'zone': GCP_ZONE, + 'docker_prefix': DOCKER_PREFIX, 'domain': DOMAIN, 'ip': IP, 'k8s_server_url': KUBERNETES_SERVER_URL, @@ -235,9 +237,9 @@ def __init__(self, params, dockerfile, context_path, publish_as, inputs): # pyl self.publish_as = publish_as self.inputs = inputs if params.scope == 'deploy' and publish_as and not is_test_deployment: - self.base_image = f'gcr.io/{GCP_PROJECT}/{self.publish_as}' + self.base_image = f'{DOCKER_PREFIX}/{self.publish_as}' else: - self.base_image = f'gcr.io/{GCP_PROJECT}/ci-intermediate' + self.base_image = f'{DOCKER_PREFIX}/ci-intermediate' self.image = f'{self.base_image}:{self.token}' self.job = None @@ -290,7 +292,7 @@ def build(self, batch, code, scope): ) if self.publish_as: - published_latest = shq(f'gcr.io/{GCP_PROJECT}/{self.publish_as}:latest') + published_latest = shq(f'{DOCKER_PREFIX}/{self.publish_as}:latest') pull_published_latest = f'time retry docker pull {shq(published_latest)} || true' cache_from_published_latest = f'--cache-from {shq(published_latest)}' else: @@ -321,6 +323,7 @@ def build(self, batch, code, scope): ''' ) + docker_registry = DOCKER_PREFIX.split('/')[0] script = f''' set -ex date @@ -338,7 +341,7 @@ def build(self, batch, code, scope): time gcloud -q auth activate-service-account \ --key-file=/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json -time gcloud -q auth configure-docker +time gcloud -q auth configure-docker {docker_registry} time retry docker pull $FROM_IMAGE {pull_published_latest} diff --git a/ci/ci/environment.py b/ci/ci/environment.py index 3d9f432917e..fdba01268af 100644 --- a/ci/ci/environment.py +++ b/ci/ci/environment.py @@ -4,10 +4,13 @@ assert GCP_PROJECT != '' GCP_ZONE = os.environ['HAIL_GCP_ZONE'] assert GCP_ZONE != '' +GCP_REGION = '-'.join(GCP_ZONE.split('-')[:-1]) # us-west1-a -> us-west1 +DOCKER_PREFIX = os.environ.get('HAIL_DOCKER_PREFIX', f'gcr.io/{GCP_REGION}') +assert DOCKER_PREFIX != '' DOMAIN = os.environ['HAIL_DOMAIN'] assert DOMAIN != '' IP = os.environ.get('HAIL_IP') -CI_UTILS_IMAGE = os.environ.get('HAIL_CI_UTILS_IMAGE', 'gcr.io/hail-vdc/ci-utils:latest') +CI_UTILS_IMAGE = os.environ.get('HAIL_CI_UTILS_IMAGE', f'{DOCKER_PREFIX}/ci-utils:latest') DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] BUCKET = os.environ['HAIL_CI_BUCKET_NAME'] diff --git a/ci/deployment.yaml b/ci/deployment.yaml index f0d47675f6f..c6173689e48 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -52,6 +52,8 @@ spec: value: "{{ global.project }}" - name: HAIL_GCP_ZONE value: "{{ global.zone }}" + - name: HAIL_DOCKER_PREFIX + value: "{{ global.docker_prefix }}" - name: HAIL_CI_UTILS_IMAGE value: "{{ ci_utils_image.image }}" - name: HAIL_IP diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index 3b1df5214c5..0b78c54383f 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -7,7 +7,7 @@ steps: name: inline_image dockerFile: inline: | - FROM gcr.io/{{ global.project }}/ubuntu:18.04 + FROM {{ global.region }}-docker.pkg.dev/{{ global.project }}/hail/ubuntu:18.04 RUN apt-get update && apt-get install -y git contextPath: . - kind: runImage diff --git a/config.mk b/config.mk index 054983ada22..4812634eadf 100644 --- a/config.mk +++ b/config.mk @@ -1,11 +1,12 @@ PROJECT := hail-295901 -DOCKER_ROOT_IMAGE := gcr.io/$(PROJECT)/ubuntu:18.04 +REGION := australia-southeast1 +ZONE := australia-southeast1-b +DOCKER_PREFIX := $(REGION)-docker.pkg.dev/$(PROJECT)/hail +DOCKER_ROOT_IMAGE := $(DOCKER_PREFIX)/ubuntu:18.04 DOMAIN := hail.populationgenomics.org.au INTERNAL_IP := 10.152.0.2 IP := 35.201.29.236 KUBERNETES_SERVER_URL := https://34.87.199.41 -REGION := australia-southeast1 -ZONE := australia-southeast1-b ifeq ($(NAMESPACE),default) SCOPE = deploy DEPLOY = true diff --git a/dev-docs/development_process.md b/dev-docs/development_process.md index 113b34a5619..660c77b0b03 100644 --- a/dev-docs/development_process.md +++ b/dev-docs/development_process.md @@ -57,6 +57,8 @@ gcloud auth login gcloud config set project hail-vdc gcloud container clusters get-credentials vdc --zone=us-central1-a gcloud auth configure-docker +# If you are using the Artifact Registry: +# gcloud auth configure-docker $REGION-docker.pkg.dev ``` To use BuildKit with Docker for a much faster building experience, add diff --git a/docker/Makefile b/docker/Makefile index 1907254ae55..ddb5ec35761 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -3,28 +3,28 @@ include ../config.mk .PHONY: build build: base-stmp service-base hail-ubuntu -HAIL_UBUNTU_LATEST = gcr.io/$(PROJECT)/hail-ubuntu:latest -HAIL_UBUNTU_IMAGE = gcr.io/$(PROJECT)/hail-ubuntu:$(shell docker images -q --no-trunc hail-ubuntu:latest | sed -e 's,[^:]*:,,') +HAIL_UBUNTU_LATEST = $(DOCKER_PREFIX)/hail-ubuntu:latest +HAIL_UBUNTU_IMAGE = $(DOCKER_PREFIX)/hail-ubuntu:$(shell docker images -q --no-trunc hail-ubuntu:latest | sed -e 's,[^:]*:,,') -BASE_LATEST = gcr.io/$(PROJECT)/base:latest -BASE_IMAGE = gcr.io/$(PROJECT)/base:$(shell docker images -q --no-trunc base:latest | sed -e 's,[^:]*:,,') +BASE_LATEST = $(DOCKER_PREFIX)/base:latest +BASE_IMAGE = $(DOCKER_PREFIX)/base:$(shell docker images -q --no-trunc base:latest | sed -e 's,[^:]*:,,') -SERVICE_BASE_LATEST = gcr.io/$(PROJECT)/service-base:latest -SERVICE_BASE_IMAGE = gcr.io/$(PROJECT)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') +SERVICE_BASE_LATEST = $(DOCKER_PREFIX)/service-base:latest +SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') -HAIL_PUBLIC_LATEST = gcr.io/$(PROJECT)/hail-public:latest -HAIL_PUBLIC_IMAGE = gcr.io/$(PROJECT)/hail-public:$(shell docker images -q --no-trunc hail-public:latest | sed -e 's,[^:]*:,,') +HAIL_PUBLIC_LATEST = $(DOCKER_PREFIX)/hail-public:latest +HAIL_PUBLIC_IMAGE = $(DOCKER_PREFIX)/hail-public:$(shell docker images -q --no-trunc hail-public:latest | sed -e 's,[^:]*:,,') -GENETICS_PUBLIC_LATEST = gcr.io/$(PROJECT)/genetics-public:latest -GENETICS_PUBLIC_IMAGE = gcr.io/$(PROJECT)/genetics-public:$(shell docker images -q --no-trunc genetics-public:latest | sed -e 's,[^:]*:,,') +GENETICS_PUBLIC_LATEST = $(DOCKER_PREFIX)/genetics-public:latest +GENETICS_PUBLIC_IMAGE = $(DOCKER_PREFIX)/genetics-public:$(shell docker images -q --no-trunc genetics-public:latest | sed -e 's,[^:]*:,,') .PHONY: hail-ubuntu hail-ubuntu: hail-ubuntu-stmp hail-ubuntu-stmp: hail-ubuntu/Dockerfile hail-ubuntu/hail-pip-install hail-ubuntu/pip.conf hail-ubuntu/hail-apt-get-install - -docker pull gcr.io/$(PROJECT)/ubuntu:bionic-20200921 + -docker pull $(DOCKER_PREFIX)/ubuntu:bionic-20200921 -docker pull $(HAIL_UBUNTU_LATEST) - python3 ../ci/jinja2_render.py '{"global":{"project":"$(PROJECT)"}}' hail-ubuntu/Dockerfile hail-ubuntu/Dockerfile.out + python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' hail-ubuntu/Dockerfile hail-ubuntu/Dockerfile.out docker build -t hail-ubuntu -f hail-ubuntu/Dockerfile.out --cache-from hail-ubuntu,$(HAIL_UBUNTU_LATEST),ubuntu:bionic-20200921 hail-ubuntu touch hail-ubuntu-stmp diff --git a/docker/hail-ubuntu/Dockerfile b/docker/hail-ubuntu/Dockerfile index 1cbc30eafc4..23b1cd2d4df 100644 --- a/docker/hail-ubuntu/Dockerfile +++ b/docker/hail-ubuntu/Dockerfile @@ -1,4 +1,4 @@ -FROM gcr.io/{{ global.project }}/ubuntu:focal-20201106 +FROM {{ global.docker_prefix }}/ubuntu:focal-20201106 ENV LANG C.UTF-8 COPY hail-apt-get-install /bin/hail-apt-get-install # We need software-properties-common for add-apt-repository; however, that diff --git a/docker/python-dill/Makefile b/docker/python-dill/Makefile index 75338d366f9..76f2a48f70c 100644 --- a/docker/python-dill/Makefile +++ b/docker/python-dill/Makefile @@ -1,4 +1,4 @@ include ../../config.mk push: - PROJECT=$(PROJECT) bash push.sh + DOCKER_PREFIX=$(DOCKER_PREFIX) bash push.sh diff --git a/docker/python-dill/push.sh b/docker/python-dill/push.sh index 4091017c21f..4f6bfbdaad9 100644 --- a/docker/python-dill/push.sh +++ b/docker/python-dill/push.sh @@ -5,6 +5,6 @@ do sed "s/@PYTHON_VERSION@/$version/g" Dockerfile > Dockerfile.out docker build --tag hailgenetics/python-dill:$version - gcloud-auth.log 2>&1; then @@ -29,7 +31,7 @@ echo "Namespace: $DEFAULT_NAMESPACE; Home: $HOME" while true; do if curl -sSL "https://notebook$NOTEBOOK_BASE_PATH/images" > image-fetch-output.log 2>&1; then - for image in "gcr.io/$PROJECT/base:latest" \ + for image in "$DOCKER_PREFIX/base:latest" \ gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine \ $(cat image-fetch-output.log); do docker pull "$image" || true diff --git a/infra/README.md b/infra/README.md index fdca61863a8..8e80d3e094d 100644 --- a/infra/README.md +++ b/infra/README.md @@ -16,7 +16,7 @@ Instructions: service account key and place it in `$HOME/.hail/terraform_sa_key.json`. -- Enable the the GCP services needed by Hail: +- Enable the GCP services needed by Hail: ``` gcloud services enable \ @@ -48,6 +48,9 @@ Instructions: gcp_region = "" gcp_zone = "" domain = "" + # If set to true, pull the base ubuntu image from Artifact Registry. + # Otherwise, assumes GCR. + use_artifact_registry = true ``` - Run `terraform init`. @@ -88,6 +91,8 @@ You can now install Hail: sudo snap install --classic kubectl sudo usermod -a -G docker $USER gcloud -q auth configure-docker + # If you are using the Artifact Registry: + # gcloud -q auth configure-docker $REGION-docker.pkg.dev gcloud container clusters get-credentials --zone vdc python3 -m pip install -r $HOME/hail/docker/requirements.txt ``` @@ -152,13 +157,16 @@ You can now install Hail: - Bootstrap the cluster by running: ``` - HAIL_CI_UTILS_IMAGE=gcr.io//ci-utils:latest \ + DOCKER_PREFIX=gcr.io/ + + HAIL_CI_UTILS_IMAGE=$DOCKER_PREFIX/ci-utils:latest \ HAIL_CI_BUCKET_NAME=dummy \ KUBERNETES_SERVER_URL='' \ HAIL_DEFAULT_NAMESPACE='default' \ HAIL_DOMAIN= \ HAIL_GCP_ZONE= \ HAIL_GCP_PROJECT= \ + DOCKER_PREFIX=$DOCKER_PREFIX \ PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ python3 $HAIL/ci/bootstrap.py hail-is/hail:main $(git rev-parse HEAD) test_batch_0 ``` @@ -166,13 +174,16 @@ You can now install Hail: - Create the initial (developer) user: ``` - HAIL_CI_UTILS_IMAGE=gcr.io//ci-utils:latest \ + DOCKER_PREFIX=gcr.io/ + + HAIL_CI_UTILS_IMAGE=$DOCKER_PREFIX/ci-utils:latest \ HAIL_CI_BUCKET_NAME=dummy \ KUBERNETES_SERVER_URL='' \ HAIL_DEFAULT_NAMESPACE='default' \ HAIL_DOMAIN= \ HAIL_GCP_ZONE= \ HAIL_GCP_PROJECT= \ + DOCKER_PREFIX=$DOCKER_PREFIX \ PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python \ python3 $HAIL/ci/bootstrap.py --extra-code-config '{"username":"","email":""}' hail-is/hail:main $(git rev-parse HEAD) create_initial_user ``` diff --git a/infra/main.tf b/infra/main.tf index cf60b2d675b..f621e55f5bc 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -18,6 +18,19 @@ variable "gcp_location" {} variable "gcp_region" {} variable "gcp_zone" {} variable "domain" {} +variable "use_artifact_registry" { + type = bool + description = "pull the ubuntu image from Artifact Registry. Otherwise, GCR" +} + +locals { + docker_prefix = ( + var.use_artifact_registry ? + "${var.gcp_region}-docker.pkg.dev/${var.gcp_project}/hail" : + "gcr.io/${var.gcp_project}" + ) + docker_root_image = "${local.docker_prefix}/ubuntu:18.04" +} provider "google" { credentials = file("~/.hail/terraform_sa_key.json") @@ -27,6 +40,14 @@ provider "google" { zone = var.gcp_zone } +provider "google-beta" { + credentials = file("~/.hail/terraform_sa_key.json") + + project = var.gcp_project + region = var.gcp_region + zone = var.gcp_zone +} + data "google_client_config" "provider" {} resource "google_project_service" "service_networking" { @@ -141,7 +162,10 @@ resource "random_id" "db_name_suffix" { } # Without this, I get: -# Error: Error, failed to create instance because the network doesn't have at least 1 private services connection. Please see https://cloud.google.com/sql/docs/mysql/private-ip#network_requirements for how to create this connection. +# Error: Error, failed to create instance because the network doesn't have at least +# 1 private services connection. Please see +# https://cloud.google.com/sql/docs/mysql/private-ip#network_requirements +# for how to create this connection. resource "google_compute_global_address" "google_managed_services_default" { name = "google-managed-services-default" purpose = "VPC_PEERING" @@ -207,11 +231,12 @@ resource "kubernetes_secret" "global_config" { batch_gcp_regions = var.batch_gcp_regions batch_logs_bucket = google_storage_bucket.batch_logs.name default_namespace = "default" - docker_root_image = "gcr.io/${var.gcp_project}/ubuntu:18.04" + docker_root_image = local.docker_root_image domain = var.domain gcp_project = var.gcp_project gcp_region = var.gcp_region gcp_zone = var.gcp_zone + docker_prefix = local.docker_prefix gsuite_organization = var.gsuite_organization internal_ip = google_compute_address.internal_gateway.address ip = google_compute_address.gateway.address @@ -255,7 +280,7 @@ ssl-mode=VERIFY_CA END "sql-config.json" = < Date: Thu, 18 Feb 2021 08:35:55 +1100 Subject: [PATCH 125/501] Add initial version endpoint for deploy --- auth/auth/auth.py | 6 ++++++ docker/Dockerfile.service-base | 1 + 2 files changed, 7 insertions(+) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index d3f42a87c1f..d1c50edc3d1 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,6 +8,7 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow +from hailtop.hailctl import version from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger @@ -558,6 +559,11 @@ async def userinfo(request): return web.json_response(user) +@routes.get('/api/v1alpha/version') +async def rest_get_version(request): # pylint: disable=W0613 + return version() + + async def on_startup(app): db = Database() await db.async_init(maxsize=50) diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index c34a143ff44..16c4948b9fe 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,6 +7,7 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ +RUN make -C hail python-version-info RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py From 9b94a339e25ed0e3c4b53694535df72cdb724b8a Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Thu, 18 Feb 2021 10:35:18 +1100 Subject: [PATCH 126/501] Fix building worker image (#40) --- batch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/Makefile b/batch/Makefile index 1375084ad96..b557413539e 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -31,7 +31,7 @@ build-worker: build-prereqs # janky cp ../hail/build/libs/hail-all-spark.jar ./hail.jar -docker pull $(BATCH_WORKER_LATEST) - python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}}' Dockerfile.worker Dockerfile.worker.out + python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' Dockerfile.worker Dockerfile.worker.out docker build -t batch-worker -f Dockerfile.worker.out --cache-from batch-worker,$(BATCH_WORKER_LATEST),service-base .. .PHONY: build From b8ad36f21ab88b6107d3fe23912911ca0ee73c58 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 18 Feb 2021 11:02:35 +1100 Subject: [PATCH 127/501] Add Artifact Registry permissions for the batch-agent service account. --- infra/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/infra/main.tf b/infra/main.tf index f621e55f5bc..93814998ff4 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -338,6 +338,14 @@ resource "google_artifact_registry_repository_iam_member" "artifact_registry_pul member = "serviceAccount:${google_service_account.gcr_pull.email}" } +resource "google_artifact_registry_repository_iam_member" "artifact_registry_batch_agent_viewer" { + provider = google-beta + repository = google_artifact_registry_repository.repository.name + location = var.gcp_location + role = "roles/artifactregistry.reader" + member = "serviceAccount:${google_service_account.batch_agent.email}" +} + resource "google_artifact_registry_repository_iam_member" "artifact_registry_ci_viewer" { provider = google-beta repository = google_artifact_registry_repository.repository.name From 87f0ce14bea1d9f4d8f9efa1f28e55400d58f25d Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 18 Feb 2021 16:05:32 +1100 Subject: [PATCH 128/501] Improve python-version-info install for service-base --- docker/Dockerfile.service-base | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 16c4948b9fe..0f294df4c16 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,7 +7,10 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ -RUN make -C hail python-version-info +COPY hail/Makefile hail/env_var.mk hail/ +RUN mkdir -p hail/python/hail hail/python/hailtop/hailctl hail/python/hail/docs/_static && \ + make -C hail python-version-info && \ + rm hail/Makefile hail/env_var.mk RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py From f11d09118ab83c1a491e46d337be6c7cc9cad7ef Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 18 Feb 2021 17:15:04 +1100 Subject: [PATCH 129/501] Slight modifications, and ask for rebuild --- auth/auth/auth.py | 5 ++++- docker/Dockerfile.service-base | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index d1c50edc3d1..4f2c5c4e12d 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -561,7 +561,10 @@ async def userinfo(request): @routes.get('/api/v1alpha/version') async def rest_get_version(request): # pylint: disable=W0613 - return version() + try: + return web.Response(text=version()) + except Exception as e: + return web.json_response({"error": str(e)}) async def on_startup(app): diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 0f294df4c16..53a7fbe8f8d 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -1,4 +1,4 @@ -FROM {{ base_image.image }} +FROM australia-southeast1-docker.pkg.dev/hail-295901/hail/service-base:latest RUN hail-apt-get-install build-essential python3-dev @@ -9,7 +9,7 @@ COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ COPY hail/Makefile hail/env_var.mk hail/ RUN mkdir -p hail/python/hail hail/python/hailtop/hailctl hail/python/hail/docs/_static && \ - make -C hail python-version-info && \ + (cd hail && echo $(pwd) && make python-version-info) && \ rm hail/Makefile hail/env_var.mk RUN hail-pip-install /hailtop && rm -rf /hailtop From c837b0f3299dc2b478ab20816a3498addfc828c6 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 18 Feb 2021 18:48:23 +1100 Subject: [PATCH 130/501] Revert accidental commit of base_image override --- docker/Dockerfile.service-base | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 53a7fbe8f8d..046ae57d73f 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -1,4 +1,4 @@ -FROM australia-southeast1-docker.pkg.dev/hail-295901/hail/service-base:latest +FROM {{ base_image.image }} RUN hail-apt-get-install build-essential python3-dev From 04df70d9b973c21d07fe3bb965f05aa20265add7 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Fri, 19 Feb 2021 08:24:24 +1100 Subject: [PATCH 131/501] Add .git for version generation + copy hail_version --- .dockerignore | 2 +- docker/Dockerfile.service-base | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.dockerignore b/.dockerignore index 59bd597af0f..f74c7566b00 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,7 @@ hail/.bloop/ hail/.gradle/ hail/.idea/ hail/.pytest_cache/ -.git/ +# .git/ hail/.ensime.cache.d/ hail/.ensime_cache.d/ hail/.ensime_cache/ diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 046ae57d73f..dfd3e7bb779 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,10 +7,11 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ -COPY hail/Makefile hail/env_var.mk hail/ -RUN mkdir -p hail/python/hail hail/python/hailtop/hailctl hail/python/hail/docs/_static && \ - (cd hail && echo $(pwd) && make python-version-info) && \ - rm hail/Makefile hail/env_var.mk +COPY hail/Makefile hail/env_var.mk .git/ hailtop/ +RUN mkdir -p hailtop/python/hail hailtop/python/hailtop/hailctl hailtop/python/hail/docs/_static && \ + (cd hailtop && echo $(pwd) && make python-version-info) && \ + cp hailtop/python/hail/hail_*version hailtop/hailtop/hailctl && \ + rm -rf hailtop/Makefile hailtop/env_var.mk .git/ RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py From 15f10993f2930a931f995ac1eda04d16f3b99bb8 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 19 Feb 2021 08:51:44 +1100 Subject: [PATCH 132/501] Revert CPG-specific batch visibility hack. https://github.com/hail-is/hail/pull/9954 fixes this in a better way. --- batch/batch/front_end/front_end.py | 289 ++++++++++++++++------------- 1 file changed, 162 insertions(+), 127 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index eaace1a0462..f434d267995 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -37,22 +37,19 @@ # import uvloop -from ..utils import (adjust_cores_for_memory_request, worker_memory_per_core_mib, - cost_from_msec_mcpu, adjust_cores_for_packability, coalesce, - adjust_cores_for_storage_request, total_worker_storage_gib, - query_billing_projects) +from ..utils import cost_from_msec_mcpu, coalesce, query_billing_projects from ..batch import batch_record_to_dict, job_record_to_dict, cancel_batch_in_db from ..exceptions import (BatchUserError, NonExistentBillingProjectError, ClosedBillingProjectError, InvalidBillingLimitError, BatchOperationAlreadyCompletedError) +from ..inst_coll_config import InstanceCollectionConfigs from ..log_store import LogStore from ..database import CallError, check_call_procedure from ..batch_configuration import (BATCH_BUCKET_NAME, DEFAULT_NAMESPACE) -from ..globals import HTTP_CLIENT_MAX_SIZE, BATCH_FORMAT_VERSION +from ..globals import HTTP_CLIENT_MAX_SIZE, BATCH_FORMAT_VERSION, valid_machine_types from ..spec_writer import SpecWriter from ..batch_format_version import BatchFormatVersion -from .pool_selector import PoolSelector from .validate import ValidationError, validate_batch, validate_and_clean_jobs # uvloop.install() @@ -103,6 +100,7 @@ BATCH_JOB_DEFAULT_MEMORY = os.environ.get('HAIL_BATCH_JOB_DEFAULT_MEMORY', '3.75Gi') BATCH_JOB_DEFAULT_STORAGE = os.environ.get('HAIL_BATCH_JOB_DEFAULT_STORAGE', '10Gi') BATCH_JOB_DEFAULT_WORKER_TYPE = os.environ.get('HAIL_BATCH_JOB_DEFAULT_WORKER_TYPE', 'standard') +BATCH_JOB_DEFAULT_PREEMPTIBLE = True def rest_authenticated_developers_or_auth_only(fun): @@ -115,6 +113,49 @@ async def wrapped(request, userdata, *args, **kwargs): return wrapped +async def _user_can_access(db: Database, batch_id: int, user: str): + record = await db.select_and_fetchone( + ''' +SELECT id +FROM batches +LEFT JOIN billing_project_users ON batches.billing_project = billing_project_users.billing_project +WHERE id = %s AND billing_project_users.`user` = %s; +''', + (batch_id, user)) + + return record is not None + + +def rest_billing_project_users_only(fun): + @rest_authenticated_users_only + @wraps(fun) + async def wrapped(request, userdata, *args, **kwargs): + db = request.app['db'] + batch_id = int(request.match_info['batch_id']) + user = userdata['username'] + permitted_user = await _user_can_access(db, batch_id, user) + if not permitted_user: + raise web.HTTPNotFound() + return await fun(request, userdata, batch_id, *args, **kwargs) + return wrapped + + +def web_billing_project_users_only(redirect=True): + def wrap(fun): + @web_authenticated_users_only(redirect) + @wraps(fun) + async def wrapped(request, userdata, *args, **kwargs): + db = request.app['db'] + batch_id = int(request.match_info['batch_id']) + user = userdata['username'] + permitted_user = await _user_can_access(db, batch_id, user) + if not permitted_user: + raise web.HTTPNotFound() + return await fun(request, userdata, batch_id, *args, **kwargs) + return wrapped + return wrap + + @routes.get('/healthcheck') async def get_healthcheck(request): # pylint: disable=W0613 return web.Response() @@ -150,8 +191,9 @@ async def _query_batch_jobs(request, batch_id): state_query_values = { 'pending': ['Pending'], 'ready': ['Ready'], + 'creating': ['Creating'], 'running': ['Running'], - 'live': ['Ready', 'Running'], + 'live': ['Ready', 'Creating', 'Running'], 'cancelled': ['Cancelled'], 'error': ['Error'], 'failed': ['Failed'], @@ -250,15 +292,9 @@ async def _query_batch_jobs(request, batch_id): @routes.get('/api/v1alpha/batches/{batch_id}/jobs') @prom_async_time(REQUEST_TIME_GET_JOBS) -@rest_authenticated_users_only -async def get_jobs(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - +@rest_billing_project_users_only +async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argument db = request.app['db'] - - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. record = await db.select_and_fetchone( ''' SELECT * FROM batches @@ -324,11 +360,9 @@ async def _read_log_from_gcs(task): return None -async def _get_job_log(app, batch_id, job_id, user): +async def _get_job_log(app, batch_id, job_id): db: Database = app['db'] - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id FROM jobs @@ -397,7 +431,7 @@ async def _get_full_job_status(app, record): state = record['state'] format_version = BatchFormatVersion(record['format_version']) - if state in ('Pending', 'Ready', 'Cancelled'): + if state in ('Pending', 'Creating', 'Ready', 'Cancelled'): return None if state in ('Error', 'Failed', 'Success'): @@ -432,22 +466,18 @@ async def _get_full_job_status(app, record): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log') @prom_async_time(REQUEST_TIME_GET_JOB_LOG) -@rest_authenticated_users_only -async def get_job_log(request, userdata): # pylint: disable=R1710 - batch_id = int(request.match_info['batch_id']) +@rest_billing_project_users_only +async def get_job_log(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) - user = userdata['username'] - job_log = await _get_job_log(request.app, batch_id, job_id, user) + job_log = await _get_job_log(request.app, batch_id, job_id) return web.json_response(job_log) -async def _query_batches(request, user): +async def _query_batches(request, user, q): db = request.app['db'] - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. - where_conditions = ['NOT deleted'] - where_args = [] + where_conditions = ['EXISTS (SELECT * FROM billing_project_users WHERE billing_project_users.`user` = %s AND billing_project_users.billing_project = batches.billing_project)', 'NOT deleted'] + where_args = [user] last_batch_id = request.query.get('last_batch_id') if last_batch_id is not None: @@ -455,7 +485,6 @@ async def _query_batches(request, user): where_conditions.append('(id < %s)') where_args.append(last_batch_id) - q = request.query.get('q', '') terms = q.split() for t in terms: if t[0] == '!': @@ -478,6 +507,18 @@ async def _query_batches(request, user): ((batches.id) IN (SELECT batch_id FROM batch_attributes WHERE `key` = %s)) +''' + args = [k] + elif t.startswith('user:'): + k = t[5:] + condition = ''' +(batches.`user` = %s) +''' + args = [k] + elif t.startswith('billing_project:'): + k = t[16:] + condition = ''' +(batches.`billing_project` = %s) ''' args = [k] elif t == 'open': @@ -542,9 +583,10 @@ async def _query_batches(request, user): @routes.get('/api/v1alpha/batches') @prom_async_time(REQUEST_TIME_GET_BATCHES) @rest_authenticated_users_only -async def get_batches(request, userdata): +async def get_batches(request, userdata): # pylint: disable=unused-argument user = userdata['username'] - batches, last_batch_id = await _query_batches(request, user) + q = request.query.get('q', f'user:{user}') + batches, last_batch_id = await _query_batches(request, user, q) body = { 'batches': batches } @@ -653,14 +695,13 @@ async def create_jobs(request, userdata): if not resources: resources = {} spec['resources'] = resources + if 'cpu' not in resources: resources['cpu'] = BATCH_JOB_DEFAULT_CPU if 'memory' not in resources: resources['memory'] = BATCH_JOB_DEFAULT_MEMORY if 'storage' not in resources: resources['storage'] = BATCH_JOB_DEFAULT_STORAGE - if 'worker_type' not in resources: - resources['worker_type'] = BATCH_JOB_DEFAULT_WORKER_TYPE req_cores_mcpu = parse_cpu_in_mcpu(resources['cpu']) req_memory_bytes = parse_memory_in_bytes(resources['memory']) @@ -671,30 +712,52 @@ async def create_jobs(request, userdata): reason=f'bad resource request for job {id}: ' f'cpu cannot be 0') - worker_type = resources['worker_type'] + worker_type = resources.get('worker_type') + machine_type = resources.get('machine_type') + preemptible = resources.get('preemptible') - pool_selector: PoolSelector = app['pool_selector'] - pool = pool_selector.select_pool(worker_type=worker_type) + if worker_type and machine_type: + raise web.HTTPBadRequest( + reason='cannot specify both worker_type and machine_type') - if not pool: - raise web.HTTPBadRequest(reason=f'unsupported worker type {worker_type}') + if not machine_type and not worker_type: + worker_type = BATCH_JOB_DEFAULT_WORKER_TYPE + resources['worker_type'] = worker_type + + inst_coll_configs: InstanceCollectionConfigs = app['inst_coll_configs'] + + inst_coll_name = None + cores_mcpu = None + + if worker_type: + if preemptible is not None: + raise web.HTTPBadRequest( + reason='cannot have preemptible specified with a worker_type') + + result = inst_coll_configs.select_pool( + worker_type=worker_type, + cores_mcpu=req_cores_mcpu, + memory_bytes=req_memory_bytes, + storage_bytes=req_storage_bytes) + if result: + inst_coll_name, cores_mcpu = result + else: + assert machine_type and machine_type in valid_machine_types - worker_cores = pool.worker_cores - worker_local_ssd_data_disk = pool.worker_local_ssd_data_disk - worker_pd_ssd_data_disk_size_gb = pool.worker_pd_ssd_data_disk_size_gb + if 'preemptible' not in resources: + resources['preemptible'] = BATCH_JOB_DEFAULT_PREEMPTIBLE - cores_mcpu = adjust_cores_for_memory_request(req_cores_mcpu, req_memory_bytes, worker_type) - cores_mcpu = adjust_cores_for_storage_request(cores_mcpu, req_storage_bytes, worker_cores, - worker_local_ssd_data_disk, worker_pd_ssd_data_disk_size_gb) - cores_mcpu = adjust_cores_for_packability(cores_mcpu) + result = inst_coll_configs.select_job_private( + machine_type=machine_type, + storage_bytes=req_storage_bytes) + if result: + inst_coll_name, cores_mcpu, storage_gib = result + resources['storage_gib'] = storage_gib - if cores_mcpu > worker_cores * 1000: - total_memory_available = worker_memory_per_core_mib(worker_type) * worker_cores - total_storage_available = total_worker_storage_gib(worker_local_ssd_data_disk, worker_pd_ssd_data_disk_size_gb) + if inst_coll_name is None: raise web.HTTPBadRequest( - reason=f'resource requests for job {id} with worker_type {worker_type} are unsatisfiable: ' - f'requested: cpu={resources["cpu"]}, memory={resources["memory"]} storage={resources["storage"]}' - f'maximum: cpu={worker_cores}, memory={total_memory_available}Mi, storage={total_storage_available}G') + reason=f'resource requests for job {id} are unsatisfiable: ' + f'requested: cpu={resources["cpu"]}, memory={resources["memory"]}, storage={resources["storage"]}') secrets = spec.get('secrets') if not secrets: @@ -749,7 +812,7 @@ async def create_jobs(request, userdata): sa = spec.get('service_account') check_service_account_permissions(user, sa) - icr = inst_coll_resources[pool.name] + icr = inst_coll_resources[inst_coll_name] icr['n_jobs'] += 1 if len(parent_ids) == 0: state = 'Ready' @@ -770,7 +833,7 @@ async def create_jobs(request, userdata): jobs_args.append( (batch_id, job_id, state, json.dumps(db_spec), - always_run, cores_mcpu, len(parent_ids), pool.name)) + always_run, cores_mcpu, len(parent_ids), inst_coll_name)) for parent_id in parent_ids: job_parents_args.append( @@ -945,11 +1008,9 @@ async def insert(tx): return web.json_response({'id': id}) -async def _get_batch(app, batch_id, user): +async def _get_batch(app, batch_id): db: Database = app['db'] - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT batches.*, SUM(`usage` * rate) AS cost FROM batches LEFT JOIN aggregated_batch_resources @@ -958,30 +1019,28 @@ async def _get_batch(app, batch_id, user): ON aggregated_batch_resources.resource = resources.resource WHERE id = %s AND NOT deleted GROUP BY batches.id; -''', (batch_id,)) +''', (batch_id)) if not record: raise web.HTTPNotFound() return batch_record_to_dict(record) -async def _cancel_batch(app, batch_id, user): - await cancel_batch_in_db(app['db'], batch_id, user) - +async def _cancel_batch(app, batch_id): + await cancel_batch_in_db(app['db'], batch_id) app['cancel_batch_state_changed'].set() - return web.Response() -async def _delete_batch(app, batch_id, user): +async def _delete_batch(app, batch_id): db: Database = app['db'] record = await db.select_and_fetchone( ''' SELECT `state` FROM batches -WHERE user = %s AND id = %s AND NOT deleted; +WHERE id = %s AND NOT deleted; ''', - (user, batch_id)) + (batch_id,)) if not record: raise web.HTTPNotFound() @@ -996,20 +1055,16 @@ async def _delete_batch(app, batch_id, user): @routes.get('/api/v1alpha/batches/{batch_id}') @prom_async_time(REQUEST_TIME_POST_GET_BATCH) -@rest_authenticated_users_only -async def get_batch(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - return web.json_response(await _get_batch(request.app, batch_id, user)) +@rest_billing_project_users_only +async def get_batch(request, userdata, batch_id): # pylint: disable=unused-argument + return web.json_response(await _get_batch(request.app, batch_id)) @routes.patch('/api/v1alpha/batches/{batch_id}/cancel') @prom_async_time(REQUEST_TIME_PATCH_CANCEL_BATCH) -@rest_authenticated_users_only -async def cancel_batch(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - await _handle_api_error(_cancel_batch, request.app, batch_id, user) +@rest_billing_project_users_only +async def cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument + await _handle_api_error(_cancel_batch, request.app, batch_id) return web.Response() @@ -1056,23 +1111,18 @@ async def close_batch(request, userdata): @routes.delete('/api/v1alpha/batches/{batch_id}') @prom_async_time(REQUEST_TIME_DELETE_BATCH) -@rest_authenticated_users_only -async def delete_batch(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - await _delete_batch(request.app, batch_id, user) +@rest_billing_project_users_only +async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument + await _delete_batch(request.app, batch_id) return web.Response() @routes.get('/batches/{batch_id}') @prom_async_time(REQUEST_TIME_GET_BATCH_UI) -@web_authenticated_users_only() -async def ui_batch(request, userdata): +@web_billing_project_users_only() +async def ui_batch(request, userdata, batch_id): app = request.app - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - - batch = await _get_batch(app, batch_id, user) + batch = await _get_batch(app, batch_id) jobs, last_job_id = await _query_batch_jobs(request, batch_id) for j in jobs: @@ -1093,12 +1143,10 @@ async def ui_batch(request, userdata): @routes.post('/batches/{batch_id}/cancel') @prom_async_time(REQUEST_TIME_POST_CANCEL_BATCH_UI) @check_csrf_token -@web_authenticated_users_only(redirect=False) -async def ui_cancel_batch(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] +@web_billing_project_users_only(redirect=False) +async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument session = await aiohttp_session.get_session(request) - errored = await _handle_ui_error(session, _cancel_batch, request.app, batch_id, user) + errored = await _handle_ui_error(session, _cancel_batch, request.app, batch_id) if not errored: set_message(session, f'Batch {batch_id} cancelled.', 'info') location = request.app.router['batches'].url_for() @@ -1108,11 +1156,9 @@ async def ui_cancel_batch(request, userdata): @routes.post('/batches/{batch_id}/delete') @prom_async_time(REQUEST_TIME_POST_DELETE_BATCH_UI) @check_csrf_token -@web_authenticated_users_only(redirect=False) -async def ui_delete_batch(request, userdata): - batch_id = int(request.match_info['batch_id']) - user = userdata['username'] - await _delete_batch(request.app, batch_id, user) +@web_billing_project_users_only(redirect=False) +async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument + await _delete_batch(request.app, batch_id) session = await aiohttp_session.get_session(request) set_message(session, f'Batch {batch_id} deleted.', 'info') location = request.app.router['batches'].url_for() @@ -1124,22 +1170,21 @@ async def ui_delete_batch(request, userdata): @web_authenticated_users_only() async def ui_batches(request, userdata): user = userdata['username'] - batches, last_batch_id = await _query_batches(request, user) + q = request.query.get('q', f'user:{user}') + batches, last_batch_id = await _query_batches(request, user, q) for batch in batches: batch['cost'] = cost_str(batch['cost']) page_context = { 'batches': batches, - 'q': request.query.get('q'), + 'q': q, 'last_batch_id': last_batch_id } return await render_template('batch', request, userdata, 'batches.html', page_context) -async def _get_job(app, batch_id, job_id, user): +async def _get_job(app, batch_id, job_id): db: Database = app['db'] - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. record = await db.select_and_fetchone(''' SELECT jobs.*, ip_address, format_version, SUM(`usage` * rate) AS cost FROM jobs @@ -1175,11 +1220,9 @@ async def _get_job(app, batch_id, job_id, user): return job -async def _get_attempts(app, batch_id, job_id, user): +async def _get_attempts(app, batch_id, job_id): db: Database = app['db'] - # CPG-specific workaround until there's a permission model for sharing - # batches: don't check the user in this query. attempts = db.select_and_fetchall(''' SELECT attempts.* FROM jobs @@ -1222,40 +1265,32 @@ async def _get_attempts(app, batch_id, job_id, user): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/attempts') @prom_async_time(REQUEST_TIME_GET_ATTEMPTS) -@rest_authenticated_users_only -async def get_attempts(request, userdata): - batch_id = int(request.match_info['batch_id']) +@rest_billing_project_users_only +async def get_attempts(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) - user = userdata['username'] - - attempts = await _get_attempts(request.app, batch_id, job_id, user) + attempts = await _get_attempts(request.app, batch_id, job_id) return web.json_response(attempts) @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}') @prom_async_time(REQUEST_TIME_GET_JOB) -@rest_authenticated_users_only -async def get_job(request, userdata): - batch_id = int(request.match_info['batch_id']) +@rest_billing_project_users_only +async def get_job(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) - user = userdata['username'] - - status = await _get_job(request.app, batch_id, job_id, user) + status = await _get_job(request.app, batch_id, job_id) return web.json_response(status) @routes.get('/batches/{batch_id}/jobs/{job_id}') @prom_async_time(REQUEST_TIME_GET_JOB_UI) -@web_authenticated_users_only() -async def ui_get_job(request, userdata): +@web_billing_project_users_only() +async def ui_get_job(request, userdata, batch_id): app = request.app - batch_id = int(request.match_info['batch_id']) job_id = int(request.match_info['job_id']) - user = userdata['username'] - job_status, attempts, job_log = await asyncio.gather(_get_job(app, batch_id, job_id, user), - _get_attempts(app, batch_id, job_id, user), - _get_job_log(app, batch_id, job_id, user)) + job_status, attempts, job_log = await asyncio.gather(_get_job(app, batch_id, job_id), + _get_attempts(app, batch_id, job_id), + _get_job_log(app, batch_id, job_id)) job_status_status = job_status['status'] container_status_spec = dictfix.NoneOr({ @@ -1905,6 +1940,10 @@ async def on_startup(app): '/gsa-key/key.json') app['log_store'] = LogStore(BATCH_BUCKET_NAME, instance_id, pool, credentials=credentials) + inst_coll_configs = InstanceCollectionConfigs(app) + app['inst_coll_configs'] = inst_coll_configs + await inst_coll_configs.async_init() + cancel_batch_state_changed = asyncio.Event() app['cancel_batch_state_changed'] = cancel_batch_state_changed @@ -1919,10 +1958,6 @@ async def on_startup(app): 'delete_batch_loop', run_if_changed, delete_batch_state_changed, delete_batch_loop_body, app)) - pool_selector = PoolSelector(app) - app['pool_selector'] = pool_selector - await pool_selector.async_init() - async def on_cleanup(app): try: From ee0a85e76d3762a657beca6dfbab137e04cbd856 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 19 Feb 2021 08:36:46 +1100 Subject: [PATCH 133/501] Gitignore */hail.jar and *.dylib --- .gitignore | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 286f17f3c29..d12b6a4c27d 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,6 @@ hs_err_pid*.log GPATH GRTAGS GTAGS -hail/prebuilt/lib/darwin/libboot.dylib -hail/prebuilt/lib/darwin/libhail.dylib -query/hail.jar +*.dylib +*/hail.jar infra/.terraform.lock.hcl From 305fc85a2ec2269e4e5b254b42c2a357a67f867d Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 19 Feb 2021 12:22:40 +1100 Subject: [PATCH 134/501] Add a prod_deploy endpoint for deploying to production (#44) * Add a prod_deploy endpoint for deploying to production. * Revert front_end. * Actually revert front_end. --- ci/ci/ci.py | 33 ++++++++++++++++++++++++++++++++- ci/ci/github.py | 7 +++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index a41bd2187db..5426288efcb 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -17,7 +17,7 @@ ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template -from .github import FQBranch, UnwatchedBranch +from .github import FQBranch, WatchedBranch, UnwatchedBranch log = logging.getLogger('ci') @@ -123,6 +123,37 @@ async def dev_deploy_branch(request, userdata): return web.json_response({'sha': sha, 'batch_id': batch_id}) +# This is CPG-specific, as the Hail team redeploys by watching the main branch. +@routes.post('/api/v1alpha/prod_deploy') +@rest_authenticated_developers_only +async def prod_deploy(request, unused_userdata): + """Deploys the main branch to the production namespace ("default").""" + + app = request.app + try: + params = await request.json() + except Exception as e: + message = 'could not read body as JSON' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + try: + steps = params['steps'] + except Exception as e: + message = f'parameters are wrong; check the steps syntax.\n\n{params}' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + watched_branch = WatchedBranch( + 0, FQBranch.from_short_str('populationgenomics/hail:main'), True + ) + watched_branch.sha = 'HEAD' + await watched_branch._start_deploy(request.app['batch_client'], steps) + + url = deploy_config.external_url('ci', '/batches') + return web.Response(text=f'{url}\n') + + async def on_startup(app): app['gh_client_session'] = aiohttp.ClientSession( timeout=aiohttp.ClientTimeout(total=5) diff --git a/ci/ci/github.py b/ci/ci/github.py index 92613d8c611..08dc1176bb5 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -6,7 +6,6 @@ import concurrent.futures import aiohttp import gidgethub -import zulip from hailtop.config import get_deploy_config from hailtop.batch_client.aioclient import Batch @@ -24,7 +23,7 @@ CALLBACK_URL = deploy_config.url('ci', '/api/v1alpha/batch_callback') -zulip_client = None # zulip.Client(config_file="/zulip-config/.zuliprc") +zulip_client = None # zulip.Client(config_file="/zulip-config/.zuliprc") class Repo: @@ -761,7 +760,7 @@ async def _heal(self, batch_client, dbpool, gh): log.info(f'cancel batch {batch.id} for {attrs["pr"]} {attrs["source_sha"]} => {attrs["target_sha"]}') await batch.cancel() - async def _start_deploy(self, batch_client): + async def _start_deploy(self, batch_client, steps=()): # not deploying assert not self.deploy_batch or self.deploy_state @@ -778,7 +777,7 @@ async def _start_deploy(self, batch_client): ''' ) with open(f'{repo_dir}/build.yaml', 'r') as f: - config = BuildConfiguration(self, f.read(), scope='deploy') + config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) log.info(f'creating deploy batch for {self.branch.short_str()}') deploy_batch = batch_client.create_batch( From 7e41da21bc39b4d67980b6bad2d600dda4b97861 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 19 Feb 2021 17:07:52 +1100 Subject: [PATCH 135/501] Use GCS as the backend for the Terraform state. --- infra/global.tfvars | 8 ++++++++ infra/main.tf | 3 +++ 2 files changed, 11 insertions(+) create mode 100644 infra/global.tfvars diff --git a/infra/global.tfvars b/infra/global.tfvars new file mode 100644 index 00000000000..391608593aa --- /dev/null +++ b/infra/global.tfvars @@ -0,0 +1,8 @@ +gsuite_organization = "populationgenomics.org.au" +batch_gcp_regions = "[\"australia-southeast1\"]" +gcp_project = "hail-295901" +gcp_location = "australia-southeast1" +gcp_region = "australia-southeast1" +gcp_zone = "australia-southeast1-b" +domain = "hail.populationgenomics.org.au" +use_artifact_registry = true diff --git a/infra/main.tf b/infra/main.tf index 93814998ff4..b001ad988d5 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -9,6 +9,9 @@ terraform { version = "1.13.3" } } + backend "gcs" { + bucket = "cpg-hail-terraform" + } } variable "gsuite_organization" {} From b41ae4750fbfb057285d22188657a43408b15be2 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 22 Feb 2021 09:17:29 +1100 Subject: [PATCH 136/501] Remove .git from dockerignore --- .dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index f74c7566b00..90c65c26b86 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,6 @@ hail/.bloop/ hail/.gradle/ hail/.idea/ hail/.pytest_cache/ -# .git/ hail/.ensime.cache.d/ hail/.ensime_cache.d/ hail/.ensime_cache/ From 5de68b045dc235c27e4e717d34a95e52a23c5e23 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Mon, 22 Feb 2021 09:18:06 +1100 Subject: [PATCH 137/501] Merge pull request #43 from populationgenomics/add-version-endpoint Add version endpoint to auth API --- .dockerignore | 1 - auth/auth/auth.py | 9 +++++++++ docker/Dockerfile.service-base | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 59bd597af0f..90c65c26b86 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,6 @@ hail/.bloop/ hail/.gradle/ hail/.idea/ hail/.pytest_cache/ -.git/ hail/.ensime.cache.d/ hail/.ensime_cache.d/ hail/.ensime_cache/ diff --git a/auth/auth/auth.py b/auth/auth/auth.py index d3f42a87c1f..4f2c5c4e12d 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,6 +8,7 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow +from hailtop.hailctl import version from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger @@ -558,6 +559,14 @@ async def userinfo(request): return web.json_response(user) +@routes.get('/api/v1alpha/version') +async def rest_get_version(request): # pylint: disable=W0613 + try: + return web.Response(text=version()) + except Exception as e: + return web.json_response({"error": str(e)}) + + async def on_startup(app): db = Database() await db.async_init(maxsize=50) diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index c34a143ff44..dfd3e7bb779 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,6 +7,11 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ +COPY hail/Makefile hail/env_var.mk .git/ hailtop/ +RUN mkdir -p hailtop/python/hail hailtop/python/hailtop/hailctl hailtop/python/hail/docs/_static && \ + (cd hailtop && echo $(pwd) && make python-version-info) && \ + cp hailtop/python/hail/hail_*version hailtop/hailtop/hailctl && \ + rm -rf hailtop/Makefile hailtop/env_var.mk .git/ RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py From f42f31b3c0165e4ee184863a31c9499f3feffa12 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 22 Feb 2021 19:51:52 +1100 Subject: [PATCH 138/501] Sync `hailctl version` and conda version --- .github/workflows/condarise.yaml | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 70c3f6a43a9..005004adbbb 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -4,28 +4,7 @@ on: branches: - main jobs: - set-conda-pkg-version: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@main - - - name: Fix meta YAML - run: | - MKFILE=hail/Makefile - MAJOR_MINOR=$(grep -Po 'HAIL_MAJOR_MINOR_VERSION := \K.*(?=)' ${MKFILE}) - PATCH=$(grep -Po 'HAIL_PATCH_VERSION := \K.*(?=)' ${MKFILE}) - VERSION=${MAJOR_MINOR}.${PATCH}.dev${GITHUB_SHA:0:7} - cat conda/hail/meta-template.yaml \ - | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml - - - name: Upload meta YAML for build job - uses: actions/upload-artifact@v2 - with: - name: meta.yaml - path: conda/hail/meta.yaml - build-publish: - needs: set-conda-pkg-version strategy: matrix: os: [ubuntu-latest, macos-latest] @@ -36,11 +15,12 @@ jobs: steps: - uses: actions/checkout@main - - name: Download meta YAML - uses: actions/download-artifact@v2 - with: - name: meta.yaml - path: conda/hail/ + - name: Fix meta YAML + run: | + make -C hail python-version-info + VERSION=$(cat hail/python/hail/hail_version) + cat conda/hail/meta-template.yaml \ + | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml - uses: conda-incubator/setup-miniconda@v2 with: From 50b74ec0608565c52e4ca1086ae4e255bccd5feb Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 23 Feb 2021 14:43:32 +1100 Subject: [PATCH 139/501] Fix readme --- conda/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/README.md b/conda/README.md index c0db52add0e..0d41342aa48 100644 --- a/conda/README.md +++ b/conda/README.md @@ -27,7 +27,7 @@ bash miniconda.sh When installing, to prioritize the CPG package, list the `cpg` channel before `bioconda`: ``` -conda create --name hail -c cpg -c bioconda -c conda-forge hail +conda create --name hail -c cpg -c conda-forge hail conda activate hail ``` From ca8a7a82ab2dde51af588b57b31eee9bebe2980b Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Tue, 23 Feb 2021 15:10:43 +1100 Subject: [PATCH 140/501] Move version from auth -> query service --- auth/auth/auth.py | 9 --------- query/query/query.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 4f2c5c4e12d..d3f42a87c1f 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,7 +8,6 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow -from hailtop.hailctl import version from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger @@ -559,14 +558,6 @@ async def userinfo(request): return web.json_response(user) -@routes.get('/api/v1alpha/version') -async def rest_get_version(request): # pylint: disable=W0613 - try: - return web.Response(text=version()) - except Exception as e: - return web.json_response({"error": str(e)}) - - async def on_startup(app): db = Database() await db.async_init(maxsize=50) diff --git a/query/query/query.py b/query/query/query.py index da37bf846d1..2e277193d2d 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -12,6 +12,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger +from hailtop.hailctl import version from gear import setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only uvloop.install() @@ -19,6 +20,8 @@ DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] log = logging.getLogger('batch') routes = web.RouteTableDef() +# Store this value once so we don't hit the desk +HAIL_VERSION = version() def java_to_web_response(jresp): @@ -196,6 +199,14 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument return java_to_web_response(jresp) +@routes.get('/api/v1alpha/version') +async def rest_get_version(request): # pylint: disable=W0613 + try: + return web.Response(text=HAIL_VERSION) + except Exception as e: + return web.json_response({"error": str(e)}) + + async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool From 0fc9d1cb189b2353acc6d816449af348d559342b Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 23 Feb 2021 15:13:19 +1100 Subject: [PATCH 141/501] Fix version tag (no longer exact same as for hailctl :() --- .github/workflows/condarise.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 005004adbbb..b62c0cdb412 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -19,6 +19,7 @@ jobs: run: | make -C hail python-version-info VERSION=$(cat hail/python/hail/hail_version) + VERSION=${VERSION/-/.dev} cat conda/hail/meta-template.yaml \ | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml From 0aee766e51ccdbec0e860406f7afa5e4076ddc7a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 23 Feb 2021 15:15:22 +1100 Subject: [PATCH 142/501] To test the PR --- .github/workflows/condarise.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index b62c0cdb412..43619562b45 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -1,8 +1,5 @@ name: Condarise -on: - push: - branches: - - main +on: [push, pull_request] jobs: build-publish: strategy: From 8225daae44e08cf5e1346a38cd53668ba50dbee8 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 23 Feb 2021 15:21:48 +1100 Subject: [PATCH 143/501] Revert "To test the PR" This reverts commit 0aee766e51ccdbec0e860406f7afa5e4076ddc7a. --- .github/workflows/condarise.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 43619562b45..b62c0cdb412 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -1,5 +1,8 @@ name: Condarise -on: [push, pull_request] +on: + push: + branches: + - main jobs: build-publish: strategy: From 732ed67826e85c0cbbf5d63812b04dd45f24dd6c Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Tue, 23 Feb 2021 15:10:43 +1100 Subject: [PATCH 144/501] Move version from auth -> query service --- auth/auth/auth.py | 9 --------- query/query/query.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 4f2c5c4e12d..d3f42a87c1f 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,7 +8,6 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow -from hailtop.hailctl import version from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger @@ -559,14 +558,6 @@ async def userinfo(request): return web.json_response(user) -@routes.get('/api/v1alpha/version') -async def rest_get_version(request): # pylint: disable=W0613 - try: - return web.Response(text=version()) - except Exception as e: - return web.json_response({"error": str(e)}) - - async def on_startup(app): db = Database() await db.async_init(maxsize=50) diff --git a/query/query/query.py b/query/query/query.py index da37bf846d1..2e277193d2d 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -12,6 +12,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger +from hailtop.hailctl import version from gear import setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only uvloop.install() @@ -19,6 +20,8 @@ DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] log = logging.getLogger('batch') routes = web.RouteTableDef() +# Store this value once so we don't hit the desk +HAIL_VERSION = version() def java_to_web_response(jresp): @@ -196,6 +199,14 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument return java_to_web_response(jresp) +@routes.get('/api/v1alpha/version') +async def rest_get_version(request): # pylint: disable=W0613 + try: + return web.Response(text=HAIL_VERSION) + except Exception as e: + return web.json_response({"error": str(e)}) + + async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool From 453e206f0d5733f5fa8d0b81683a6b9227db7c5d Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Wed, 24 Feb 2021 09:54:39 +1100 Subject: [PATCH 145/501] Add terminationGracePeriodSeconds to query deployment --- query/deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/query/deployment.yaml b/query/deployment.yaml index 82d5b41cef5..e9abee964f3 100644 --- a/query/deployment.yaml +++ b/query/deployment.yaml @@ -16,6 +16,7 @@ spec: app: query hail.is/sha: "{{ code.sha }}" spec: + terminationGracePeriodSeconds: 28800 # 8 hours serviceAccountName: query {% if deploy %} priorityClassName: production From 2210349cd4f3132572a42c8d76a63f7d76fcfc17 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Wed, 24 Feb 2021 09:55:42 +1100 Subject: [PATCH 146/501] Add app.on_shutdown to wait for all asyncio tasks --- query/query/query.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/query/query/query.py b/query/query/query.py index da37bf846d1..858a58df4d7 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -229,6 +229,10 @@ async def on_cleanup(app): await asyncio.gather(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) +async def on_shutdown(app): + await asyncio.gather(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) + + def run(): app = web.Application() @@ -238,6 +242,7 @@ def run(): app.on_startup.append(on_startup) app.on_cleanup.append(on_cleanup) + app.on_shutdown.append(on_shutdown) deploy_config = get_deploy_config() web.run_app( From 55c2e09c5501a0817111dce7c850ed1ab1aef561 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Wed, 24 Feb 2021 12:05:55 +1100 Subject: [PATCH 147/501] Add test function to be part of dev-deploy --- query/query/query.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/query/query/query.py b/query/query/query.py index 858a58df4d7..d4ed17714c9 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -196,6 +196,25 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument return java_to_web_response(jresp) +@routes.get('/api/v1alpha/wait') +async def wait_seconds(request): + """ + Wait query.duration seconds before returning the request. + """ + duration = request.query.get('duration') + try: + duration = int(duration) + except Exception as e: + return web.json_response({ + 'error': f'Invalid parameter duration "{duration}": {e}', + }, status=422) + + await asyncio.sleep(int(duration)) + e = os.getenv("TEST_VALUE", "None") + # remove_request_from_weakref_set(request) + return web.json_response({"d": f"You waited '{duration}' seconds!!", "env": e}) + + async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool From c3fe1b6231b09f919bac92e8bfde530225214a77 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Wed, 24 Feb 2021 13:30:51 +1100 Subject: [PATCH 148/501] Add logging to query shutdown --- query/query/query.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/query/query/query.py b/query/query/query.py index d4ed17714c9..3c6970b7dac 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -249,7 +249,10 @@ async def on_cleanup(app): async def on_shutdown(app): - await asyncio.gather(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) + remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + log.info(f"On shutdown request received, with {len(remaining_tasks)} tasks left") + await asyncio.gather(*remaining_tasks) + log.info("Tasks have all completed.") def run(): From cc2cafc35a986971254a537369964eed04a06ba1 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 25 Feb 2021 09:57:19 +1100 Subject: [PATCH 149/501] Bump aiohttp version to avoid pre-cancelled tasks --- docker/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index ff6bc2e98fc..160bdd43442 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -2,7 +2,7 @@ aiodns==2.0.0 aiodocker==0.17.0 aiohttp-jinja2==1.1.1 aiohttp-session==2.7.0 -aiohttp==3.6.0 +aiohttp==3.7.3 aiomysql==0.0.20 aioredis==1.3.1 async-timeout==3.0.1 From af2f47012dfc9bc04c7ab04fabd19aa80e55b36e Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 25 Feb 2021 10:35:47 +1100 Subject: [PATCH 150/501] Cleanup test code --- query/query/query.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/query/query/query.py b/query/query/query.py index 3c6970b7dac..5038f3eab0c 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -196,25 +196,6 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument return java_to_web_response(jresp) -@routes.get('/api/v1alpha/wait') -async def wait_seconds(request): - """ - Wait query.duration seconds before returning the request. - """ - duration = request.query.get('duration') - try: - duration = int(duration) - except Exception as e: - return web.json_response({ - 'error': f'Invalid parameter duration "{duration}": {e}', - }, status=422) - - await asyncio.sleep(int(duration)) - e = os.getenv("TEST_VALUE", "None") - # remove_request_from_weakref_set(request) - return web.json_response({"d": f"You waited '{duration}' seconds!!", "env": e}) - - async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool From ed51351e4754e763a1febb8a2c33a684c60e5a76 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 25 Feb 2021 10:46:56 +1100 Subject: [PATCH 151/501] Update on_shutdown logging phrasing --- query/query/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/query/query/query.py b/query/query/query.py index 5038f3eab0c..3ed7809e6ef 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -231,9 +231,9 @@ async def on_cleanup(app): async def on_shutdown(app): remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - log.info(f"On shutdown request received, with {len(remaining_tasks)} tasks left") + log.info(f"On shutdown request received, with {len(remaining_tasks)} remaining tasks") await asyncio.gather(*remaining_tasks) - log.info("Tasks have all completed.") + log.info("All tasks on shutdown have completed") def run(): From 68c3beb060743dd4e2e4371199033b07f0559720 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Thu, 25 Feb 2021 12:14:28 +1100 Subject: [PATCH 152/501] Use asyncio.wait instead of gather # Because we don't need to be strict on the order of the # returned tasks, because we only care that they finish. --- query/query/query.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/query/query/query.py b/query/query/query.py index 3ed7809e6ef..521a0e2a2a7 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -226,13 +226,15 @@ async def on_startup(app): async def on_cleanup(app): del app['k8s_client'] - await asyncio.gather(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) + await asyncio.wait(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) async def on_shutdown(app): + # Filter the asyncio.current_task(), because if we await + # the current task we'll end up in a deadlock remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] log.info(f"On shutdown request received, with {len(remaining_tasks)} remaining tasks") - await asyncio.gather(*remaining_tasks) + await asyncio.wait(*remaining_tasks) log.info("All tasks on shutdown have completed") From dd4ba6e066bec2ff7061e6d819ebff05dd0929a7 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 25 Feb 2021 14:27:10 +1100 Subject: [PATCH 153/501] Fix build.yaml. --- build.yaml | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/build.yaml b/build.yaml index b66dc5aff98..6d87f2c12f2 100644 --- a/build.yaml +++ b/build.yaml @@ -123,7 +123,7 @@ steps: - from: /io/repo/atgu/sql to: /repo/atgu/ - from: /io/repo/hail/python/hailtop - to: /repo/ + to: /repo/hailtop/ - from: /io/repo/hail/python/test to: /repo/test/ - from: /io/repo/hail/python/cluster-tests @@ -136,10 +136,6 @@ steps: to: /git_version - from: /io/repo/address/test to: /repo/address/ - - from: /io/repo/ci/bootstrap_create_accounts.py - to: /repo/ci/ - - from: /io/repo/ci/create_initial_account.py - to: /repo/ci/ - from: /io/repo/lsm to: /repo/lsm - from: /io/repo/ci/bootstrap_create_accounts.py @@ -353,42 +349,9 @@ steps: kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "benchmark-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "ci-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - kubectl -n {{ default_ns.name }} get -o json secret test-gsa-key | jq '{apiVersion, kind, type, data, metadata: {name: "test-dev-gsa-key"}}' | kubectl -n {{ default_ns.name }} apply -f - - serviceAccount: - name: admin - namespace: - valueFrom: default_ns.name scopes: - test - dev - dependsOn: - - default_ns - - service_base_image - - kind: buildImage - name: auth_image - dockerFile: auth/Dockerfile - contextPath: . - publishAs: auth - dependsOn: - - service_base_image - - kind: deploy - name: deploy_auth_driver_service_account - namespace: - valueFrom: default_ns.name - config: auth/auth-driver-service-account.yaml - dependsOn: - - default_ns - - kind: runImage - name: create_accounts - image: - valueFrom: auth_image.image - script: | - set -ex - export PROJECT={{ global.project }} - export ZONE={{ global.zone }} - export HAIL_PROJECT={{ global.project }} - export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} - export HAIL_SCOPE={{ scope }} - python3 /io/bootstrap_create_accounts.py serviceAccount: name: admin namespace: From fa97cd1a63d3ba6a91452403a7c67f161978f836 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 09:20:17 +1100 Subject: [PATCH 154/501] One place with docker_prefix missed --- ci/test/resources/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index 0b78c54383f..2bd4ef77426 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -7,7 +7,7 @@ steps: name: inline_image dockerFile: inline: | - FROM {{ global.region }}-docker.pkg.dev/{{ global.project }}/hail/ubuntu:18.04 + FROM {{ global.docker_prefix }}/ubuntu:18.04 RUN apt-get update && apt-get install -y git contextPath: . - kind: runImage From ef9f131a7f22c76bd51c2ff5f2d3e2023f9edb39 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 10:10:40 +1100 Subject: [PATCH 155/501] Use DOCKER_ROOT_IMAGE to pass to batch worker image --- batch/batch/batch_configuration.py | 2 +- batch/batch/driver/create_instance.py | 10 +++++----- batch/build-batch-worker-image-startup.sh | 6 ++---- batch/deployment.yaml | 8 ++++---- ci/Makefile | 2 +- ci/bootstrap.py | 5 +++-- ci/ci/build.py | 2 ++ ci/ci/environment.py | 1 + ci/deployment.yaml | 2 ++ 9 files changed, 21 insertions(+), 17 deletions(-) diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index efb9b969175..dae316a2e4f 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -8,7 +8,7 @@ GCP_REGION = os.environ['HAIL_GCP_REGION'] GCP_ZONE = os.environ['HAIL_GCP_ZONE'] -DOCKER_PREFIX = os.environ['HAIL_DOCKER_PREFIX'] +DOCKER_ROOT_IMAGE = os.environ['HAIL_DOCKER_ROOT_IMAGE'] BATCH_GCP_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GCP_REGIONS'])) BATCH_GCP_REGIONS.add(GCP_REGION) diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index 969309675ca..a7bcf516cf1 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -5,7 +5,7 @@ from hailtop import aiogoogle -from ..batch_configuration import PROJECT, DOCKER_PREFIX, DEFAULT_NAMESPACE +from ..batch_configuration import PROJECT, DOCKER_ROOT_IMAGE, DEFAULT_NAMESPACE from ..worker_config import WorkerConfig from ..log_store import LogStore @@ -173,7 +173,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke ZONE=$(curl -s http://metadata.google.internal/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google') BATCH_WORKER_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/batch_worker_image") -DOCKER_PREFIX=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_prefix") +DOCKER_ROOT_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_root_image") # Setup fluentd touch /worker.log @@ -250,7 +250,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke -e BATCH_LOGS_BUCKET_NAME=$BATCH_LOGS_BUCKET_NAME \ -e INSTANCE_ID=$INSTANCE_ID \ -e PROJECT=$PROJECT \ --e DOCKER_PREFIX=$DOCKER_PREFIX \ +-e DOCKER_ROOT_IMAGE=$DOCKER_ROOT_IMAGE \ -e WORKER_CONFIG=$WORKER_CONFIG \ -e MAX_IDLE_TIME_MSECS=$MAX_IDLE_TIME_MSECS \ -e WORKER_DATA_DISK_MOUNT=/mnt/disks/$WORKER_DATA_DISK_NAME \ @@ -295,8 +295,8 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke 'key': 'batch_worker_image', 'value': BATCH_WORKER_IMAGE }, { - 'key': 'docker_prefix', - 'value': DOCKER_PREFIX + 'key': 'docker_root_image', + 'value': DOCKER_ROOT_IMAGE }, { 'key': 'namespace', 'value': DEFAULT_NAMESPACE diff --git a/batch/build-batch-worker-image-startup.sh b/batch/build-batch-worker-image-startup.sh index 623b7eb985d..8401b8dfb72 100644 --- a/batch/build-batch-worker-image-startup.sh +++ b/batch/build-batch-worker-image-startup.sh @@ -40,11 +40,9 @@ curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/release # avoid "unable to get current user home directory: os/user lookup failed" export HOME=/root -DOCKER_PREFIX=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_prefix") - docker-credential-gcr configure-docker --include-artifact-registry - -docker pull ${DOCKER_PREFIX}/ubuntu:18.04 +DOCKER_ROOT_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_root_image") +docker pull ${DOCKER_ROOT_IMAGE} docker pull gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine # add docker daemon debug logging diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 26b2018e2c3..818289fb9e3 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -62,8 +62,8 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" - - name: HAIL_DOCKER_PREFIX - value: "{{ global.docker_prefix }}" + - name: HAIL_DOCKER_ROOT_IMAGE + value: "{{ global.docker_root_image }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: @@ -200,8 +200,8 @@ spec: value: "{{ default_ns.name }}" - name: PROJECT value: "{{ global.project }}" - - name: HAIL_DOCKER_PREFIX - value: "{{ global.docker_prefix }}" + - name: HAIL_DOCKER_ROOT_IMAGE + value: "{{ global.docker_root_image }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: diff --git a/ci/Makefile b/ci/Makefile index 4b40f21aa61..7b650a26c20 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -47,5 +47,5 @@ push: build .PHONY: deploy deploy: push push-ci-utils ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","docker_prefix":"$(DOCKER_PREFIX)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"}}' deployment.yaml deployment.yaml.out + python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","docker_prefix":"$(DOCKER_PREFIX)","docker_root_image":"$(DOCKER_ROOT_IMAGE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 3d1f82c2d4e..f0eb20b4373 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -17,6 +17,7 @@ from batch.driver.k8s_cache import K8sCache KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] +DOCKER_PREFIX = os.environ['DOCKER_PREFIX'] def populate_secret_host_path(host_path: str, secret_data: Union[str, bytes]): @@ -144,7 +145,7 @@ async def run(self): f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', - 'ubuntu:18.04', + f'{DOCKER_PREFIX}/ubuntu:18.04', '/bin/bash', '-c', copy_script, @@ -282,7 +283,7 @@ async def run(self): f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', - 'ubuntu:18.04', + f'{DOCKER_PREFIX}/ubuntu:18.04', '/bin/bash', '-c', copy_script, diff --git a/ci/ci/build.py b/ci/ci/build.py index 6e4e9f08ec7..6cce1774c7d 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -12,6 +12,7 @@ GCP_PROJECT, GCP_ZONE, DOCKER_PREFIX, + DOCKER_ROOT_IMAGE, DOMAIN, IP, CI_UTILS_IMAGE, @@ -169,6 +170,7 @@ def input_config(self, code, scope): 'project': GCP_PROJECT, 'zone': GCP_ZONE, 'docker_prefix': DOCKER_PREFIX, + 'docker_root_image': DOCKER_ROOT_IMAGE, 'domain': DOMAIN, 'ip': IP, 'k8s_server_url': KUBERNETES_SERVER_URL, diff --git a/ci/ci/environment.py b/ci/ci/environment.py index fdba01268af..8dd4a782fdf 100644 --- a/ci/ci/environment.py +++ b/ci/ci/environment.py @@ -7,6 +7,7 @@ GCP_REGION = '-'.join(GCP_ZONE.split('-')[:-1]) # us-west1-a -> us-west1 DOCKER_PREFIX = os.environ.get('HAIL_DOCKER_PREFIX', f'gcr.io/{GCP_REGION}') assert DOCKER_PREFIX != '' +DOCKER_ROOT_IMAGE = os.environ['HAIL_DOCKER_ROOT_IMAGE'] DOMAIN = os.environ['HAIL_DOMAIN'] assert DOMAIN != '' IP = os.environ.get('HAIL_IP') diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 5da6fe42d7a..4f3ab152a4a 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -54,6 +54,8 @@ spec: value: "{{ global.zone }}" - name: HAIL_DOCKER_PREFIX value: "{{ global.docker_prefix }}" + - name: HAIL_DOCKER_ROOT_IMAGE + value: "{{ global.docker_root_image }}" - name: HAIL_CI_UTILS_IMAGE value: "{{ ci_utils_image.image }}" - name: HAIL_IP From 2f86105b2197c2343c8a61aef9aad0c2c36f93fd Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 10:21:50 +1100 Subject: [PATCH 156/501] Use DOCKER_ROOT_IMAGE wherever makes sense --- benchmark/Dockerfile | 2 +- benchmark/Makefile | 2 +- build.yaml | 20 ++++++++++---------- ci/bootstrap.py | 5 +++-- ci/test/resources/build.yaml | 2 +- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index 1c4f1950339..692d7eb7148 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -1,4 +1,4 @@ -FROM {{ global.docker_prefix }}/ubuntu:18.04 +FROM {{ global.docker_root_image }} ENV LANG C.UTF-8 diff --git a/benchmark/Makefile b/benchmark/Makefile index 3113151bf5c..6b5d22aa715 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -41,7 +41,7 @@ image_sha: wheel cleanup_image cp $(BENCHMARK_WHEEL) . # it's possible that the HAIL_WHEEL installs different dependencies, but this generally creates less work for docker cp ../hail/python/requirements.txt . - python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile Dockerfile.out + python3 ../ci/jinja2_render.py '{"global":{"docker_root_image":"$(DOCKER_ROOT_IMAGE)"}' Dockerfile Dockerfile.out docker build -f Dockerfile.out -t $(BENCHMARK_DOCKER_TAG) . --build-arg HAIL_WHEEL=$(notdir $(HAIL_WHEEL)) --build-arg BENCHMARK_WHEEL=$(notdir $(BENCHMARK_WHEEL)) @printf $$(docker images -q --no-trunc $(BENCHMARK_DOCKER_TAG) | sed -e 's,[^:]*:,,') > image_sha @echo Image sha is `cat image_sha` diff --git a/build.yaml b/build.yaml index 6d87f2c12f2..65c14c37db7 100644 --- a/build.yaml +++ b/build.yaml @@ -2296,7 +2296,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2364,7 +2364,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2432,7 +2432,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2500,7 +2500,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2568,7 +2568,7 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2785,7 +2785,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2836,7 +2836,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2887,7 +2887,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2938,7 +2938,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2989,7 +2989,7 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 - export DOCKER_ROOT_IMAGE="{{ global.docker_prefix }}/ubuntu:18.04" + export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test diff --git a/ci/bootstrap.py b/ci/bootstrap.py index f0eb20b4373..3cbe72fc61d 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -18,6 +18,7 @@ KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] DOCKER_PREFIX = os.environ['DOCKER_PREFIX'] +DOCKER_ROOT_IMAGE = f'{DOCKER_PREFIX}/ubuntu:18.04' def populate_secret_host_path(host_path: str, secret_data: Union[str, bytes]): @@ -145,7 +146,7 @@ async def run(self): f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', - f'{DOCKER_PREFIX}/ubuntu:18.04', + DOCKER_ROOT_IMAGE, '/bin/bash', '-c', copy_script, @@ -283,7 +284,7 @@ async def run(self): f'{root}/shared:/shared', '-v', f'{job_root}/io:/io', - f'{DOCKER_PREFIX}/ubuntu:18.04', + DOCKER_ROOT_IMAGE, '/bin/bash', '-c', copy_script, diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index 2bd4ef77426..67801cf15af 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -7,7 +7,7 @@ steps: name: inline_image dockerFile: inline: | - FROM {{ global.docker_prefix }}/ubuntu:18.04 + FROM {{ global.docker_root_image }} RUN apt-get update && apt-get install -y git contextPath: . - kind: runImage From 669746512bd9c441d99ebf486791dbdf7f1f7cf9 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 10:49:27 +1100 Subject: [PATCH 157/501] Render build-batch-worker-image-startup.sh with Jinja to set up docker root image --- batch/Makefile | 5 +++-- batch/build-batch-worker-image-startup.sh | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/batch/Makefile b/batch/Makefile index b557413539e..f5da4d578d3 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -48,7 +48,7 @@ push: build docker tag batch-worker $(BATCH_WORKER_IMAGE) docker push $(BATCH_WORKER_IMAGE) -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"}},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: push @@ -62,7 +62,8 @@ deploy: push .PHONY: create-build-worker-image-instance create-build-worker-image-instance: -gcloud -q compute --project $(PROJECT) instances delete --zone=$(ZONE) build-batch-worker-image - gcloud -q compute --project $(PROJECT) instances create --zone=$(ZONE) build-batch-worker-image --machine-type=n1-standard-1 --network=default --network-tier=PREMIUM --metadata-from-file startup-script=build-batch-worker-image-startup.sh --no-restart-on-failure --maintenance-policy=MIGRATE --scopes=https://www.googleapis.com/auth/cloud-platform --image=$$(gcloud compute images list --standard-images --filter 'family="ubuntu-minimal-2004-lts"' --format='value(name)') --image-project=ubuntu-os-cloud --boot-disk-size=10GB --boot-disk-type=pd-ssd + python3 ../ci/jinja2_render.py '{"global":{"docker_root_image":"$(DOCKER_ROOT_IMAGE)"}}' build-batch-worker-image-startup.sh build-batch-worker-image-startup.sh.out + gcloud -q compute --project $(PROJECT) instances create --zone=$(ZONE) build-batch-worker-image --machine-type=n1-standard-1 --network=default --network-tier=PREMIUM --metadata-from-file startup-script=build-batch-worker-image-startup.sh.out --no-restart-on-failure --maintenance-policy=MIGRATE --scopes=https://www.googleapis.com/auth/cloud-platform --image=$$(gcloud compute images list --standard-images --filter 'family="ubuntu-minimal-2004-lts"' --format='value(name)') --image-project=ubuntu-os-cloud --boot-disk-size=10GB --boot-disk-type=pd-ssd .PHONY: create-worker-image create-worker-image: diff --git a/batch/build-batch-worker-image-startup.sh b/batch/build-batch-worker-image-startup.sh index 8401b8dfb72..4fdcd8ca732 100644 --- a/batch/build-batch-worker-image-startup.sh +++ b/batch/build-batch-worker-image-startup.sh @@ -41,8 +41,7 @@ curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/release export HOME=/root docker-credential-gcr configure-docker --include-artifact-registry -DOCKER_ROOT_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_root_image") -docker pull ${DOCKER_ROOT_IMAGE} +docker pull {{ global.docker_root_image }} docker pull gcr.io/google.com/cloudsdktool/cloud-sdk:310.0.0-alpine # add docker daemon debug logging From ad3b96ecb1ae5016028137bd53f7fe68bc54cd61 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 12:26:01 +1100 Subject: [PATCH 158/501] prod_deploy: print complete batch URL --- ci/ci/ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 5426288efcb..7e10d9c5f89 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -150,7 +150,8 @@ async def prod_deploy(request, unused_userdata): watched_branch.sha = 'HEAD' await watched_branch._start_deploy(request.app['batch_client'], steps) - url = deploy_config.external_url('ci', '/batches') + batch_id = watched_branch.deploy_batch.id + url = deploy_config.external_url('ci', f'/batches/{batch_id}') return web.Response(text=f'{url}\n') From 127237c8e27276fbeec3100fe2792fe81c484200 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 12:56:03 +1100 Subject: [PATCH 159/501] prod_deploy fix unused variable --- ci/ci/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 7e10d9c5f89..175b52a3548 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -148,7 +148,7 @@ async def prod_deploy(request, unused_userdata): 0, FQBranch.from_short_str('populationgenomics/hail:main'), True ) watched_branch.sha = 'HEAD' - await watched_branch._start_deploy(request.app['batch_client'], steps) + await watched_branch._start_deploy(app['batch_client'], steps) batch_id = watched_branch.deploy_batch.id url = deploy_config.external_url('ci', f'/batches/{batch_id}') From 8372680a2c703a47614cd106e9a6f58740a37e93 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Fri, 26 Feb 2021 16:33:57 +1100 Subject: [PATCH 160/501] Update conda/README.md Co-authored-by: Michael Franklin --- conda/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conda/README.md b/conda/README.md index c45e68038f8..deedb5ca4aa 100644 --- a/conda/README.md +++ b/conda/README.md @@ -9,9 +9,8 @@ synced with the [official PyPI release](https://pypi.org/project/hail). However, a separate conda package in the `cpg` channel allows us to build it against the codebase in our fork. -Because we don't control versioning of Hail project, the `cpg` conda package is -versioned specifically: we append the git commit hash to the official version tag, -e.g. `0.2.62.dev289c163`. +We don't control versioning of original Hail project, so our `cpg` conda release name +is the official version tag appended with the git commit has, e.g. `0.2.62.dev289c163`. [GitHub Actions CI](../.github/workflows/condarise.yaml) is set up to build the package using this recipe and push it to Anaconda on every push event to the `main` branch in From 86f507027608e41b576112d2724616a301c19732 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Fri, 26 Feb 2021 16:34:15 +1100 Subject: [PATCH 161/501] Update conda/README.md Co-authored-by: Michael Franklin --- conda/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/README.md b/conda/README.md index deedb5ca4aa..9e0ab3b9d48 100644 --- a/conda/README.md +++ b/conda/README.md @@ -17,8 +17,8 @@ using this recipe and push it to Anaconda on every push event to the `main` bran the [CPG hail fork](https://github.com/populationgenomics/hail). -When installing the package, list the `cpg` channel before `bioconda` to prioritize it -in the channel order: +When installing the package, list the `cpg` channel before `bioconda` to prioritize +the channel order: ``` conda create --name hail -c cpg -c bioconda -c conda-forge hail From b537d2114a494d0cd9a34bf468a1239a43e8533d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 26 Feb 2021 17:22:21 +1100 Subject: [PATCH 162/501] Fix jinja params --- batch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/Makefile b/batch/Makefile index f5da4d578d3..fa12c0888e5 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -48,7 +48,7 @@ push: build docker tag batch-worker $(BATCH_WORKER_IMAGE) docker push $(BATCH_WORKER_IMAGE) -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"}},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: push From 9a286d867ee76c6072fe512994ac8cfd54476408 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Sun, 28 Feb 2021 23:20:04 +1100 Subject: [PATCH 163/501] Add missing docker_root_image --- batch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/Makefile b/batch/Makefile index fa12c0888e5..6273c03defd 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -48,7 +48,7 @@ push: build docker tag batch-worker $(BATCH_WORKER_IMAGE) docker push $(BATCH_WORKER_IMAGE) -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)"},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)","docker_root_image":"$(DOCKER_ROOT_IMAGE)"},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: push From fe8bed12409af19b7c7f317c3ef1624ef05ef466 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 1 Mar 2021 13:08:20 +1100 Subject: [PATCH 164/501] Don't hardcode hail.is domain in letsencrypt --- letsencrypt/letsencrypt.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/letsencrypt/letsencrypt.sh b/letsencrypt/letsencrypt.sh index 9fc6b7a8455..2c4a37023f5 100755 --- a/letsencrypt/letsencrypt.sh +++ b/letsencrypt/letsencrypt.sh @@ -30,8 +30,8 @@ metadata: namespace: default type: Opaque data: - fullchain.pem: $(cat /etc/letsencrypt/live/hail.is/fullchain.pem | base64 | tr -d \\n) - privkey.pem: $(cat /etc/letsencrypt/live/hail.is/privkey.pem | base64 | tr -d \\n) + fullchain.pem: $(cat /etc/letsencrypt/live/$DOMAIN/fullchain.pem | base64 | tr -d \\n) + privkey.pem: $(cat /etc/letsencrypt/live/$DOMAIN/privkey.pem | base64 | tr -d \\n) options-ssl-nginx.conf: $(cat /options-ssl-nginx.conf | base64 | tr -d \\n) ssl-dhparams.pem: $(cat /opt/certbot/src/certbot/certbot/ssl-dhparams.pem | base64 | tr -d \\n) EOF From b10d8e18bf95b6099314b0c83f39ef87c8898b26 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 2 Mar 2021 00:09:51 +1100 Subject: [PATCH 165/501] [prod deploy] show batch URL: check for failured batch --- ci/ci/ci.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 175b52a3548..86276304a26 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -17,7 +17,7 @@ ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template -from .github import FQBranch, WatchedBranch, UnwatchedBranch +from .github import FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch log = logging.getLogger('ci') @@ -150,8 +150,12 @@ async def prod_deploy(request, unused_userdata): watched_branch.sha = 'HEAD' await watched_branch._start_deploy(app['batch_client'], steps) - batch_id = watched_branch.deploy_batch.id - url = deploy_config.external_url('ci', f'/batches/{batch_id}') + batch = watched_branch.deploy_batch + if isinstance(batch, MergeFailureBatch): + path = f'/batches' + else: + path = f'/batches/{batch.id}' + url = deploy_config.external_url('ci', path) return web.Response(text=f'{url}\n') From f8b32ad7032335ca73d69a10c066516cec533bcb Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 2 Mar 2021 12:08:03 +1100 Subject: [PATCH 166/501] Push to production on merges with main. --- .github/workflows/prod_deploy.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/workflows/prod_deploy.yaml diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml new file mode 100644 index 00000000000..db879bd4e63 --- /dev/null +++ b/.github/workflows/prod_deploy.yaml @@ -0,0 +1,10 @@ +name: prod-deploy +on: + push: + branches: + - main +jobs: + invoke-prod-deploy: + runs-on: ubuntu-latest + steps: + - run: curl -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy \ No newline at end of file From dda456a252251fdaf59d511e1b412d3620d5f1a2 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Tue, 2 Mar 2021 12:26:16 +1100 Subject: [PATCH 167/501] Fix yaml syntax error in deploy.yaml --- .github/workflows/prod_deploy.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index db879bd4e63..95e2d461d6b 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -7,4 +7,6 @@ jobs: invoke-prod-deploy: runs-on: ubuntu-latest steps: - - run: curl -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy \ No newline at end of file + - name: "deploy" + run: | + curl -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy From 171554b5a42b9053475c0f3a16dc4918cd2881b9 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Tue, 2 Mar 2021 12:44:55 +1100 Subject: [PATCH 168/501] Ensure deploy fails if curl requests fails --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 95e2d461d6b..9cc2971141f 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,4 +9,4 @@ jobs: steps: - name: "deploy" run: | - curl -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy From 58b038e0ea68a81c0ea6836a907f19478c32003b Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 2 Mar 2021 12:56:31 +1100 Subject: [PATCH 169/501] Allow prod deploy by "ci". --- ci/ci/ci.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 175b52a3548..802dc7c54fb 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -125,10 +125,14 @@ async def dev_deploy_branch(request, userdata): # This is CPG-specific, as the Hail team redeploys by watching the main branch. @routes.post('/api/v1alpha/prod_deploy') -@rest_authenticated_developers_only -async def prod_deploy(request, unused_userdata): +@rest_authenticated_users_only +async def prod_deploy(request, userdata): """Deploys the main branch to the production namespace ("default").""" + # Only allow access by "ci" or dev accounts. + if not (userdata['username'] == 'ci' or userdata['is_developer'] == 1): + raise web.HTTPUnauthorized() + app = request.app try: params = await request.json() From 4c11ad064af8f9b6a3c1929ac431572061777c74 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 2 Mar 2021 14:32:35 +1100 Subject: [PATCH 170/501] Make sure the env variable HAIL_DOCKER_PREFIX available to the batch worker instance --- batch/batch/batch_configuration.py | 1 + batch/batch/driver/create_instance.py | 7 ++++++- batch/deployment.yaml | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index dae316a2e4f..bf0b036eb59 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -9,6 +9,7 @@ GCP_REGION = os.environ['HAIL_GCP_REGION'] GCP_ZONE = os.environ['HAIL_GCP_ZONE'] DOCKER_ROOT_IMAGE = os.environ['HAIL_DOCKER_ROOT_IMAGE'] +DOCKER_PREFIX = os.environ['HAIL_DOCKER_PREFIX'] BATCH_GCP_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GCP_REGIONS'])) BATCH_GCP_REGIONS.add(GCP_REGION) diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index a7bcf516cf1..ceebecd1e37 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -5,7 +5,7 @@ from hailtop import aiogoogle -from ..batch_configuration import PROJECT, DOCKER_ROOT_IMAGE, DEFAULT_NAMESPACE +from ..batch_configuration import PROJECT, DOCKER_ROOT_IMAGE, DOCKER_PREFIX, DEFAULT_NAMESPACE from ..worker_config import WorkerConfig from ..log_store import LogStore @@ -174,6 +174,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke BATCH_WORKER_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/batch_worker_image") DOCKER_ROOT_IMAGE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_root_image") +DOCKER_PREFIX=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/docker_prefix") # Setup fluentd touch /worker.log @@ -251,6 +252,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke -e INSTANCE_ID=$INSTANCE_ID \ -e PROJECT=$PROJECT \ -e DOCKER_ROOT_IMAGE=$DOCKER_ROOT_IMAGE \ +-e DOCKER_PREFIX=$DOCKER_PREFIX \ -e WORKER_CONFIG=$WORKER_CONFIG \ -e MAX_IDLE_TIME_MSECS=$MAX_IDLE_TIME_MSECS \ -e WORKER_DATA_DISK_MOUNT=/mnt/disks/$WORKER_DATA_DISK_NAME \ @@ -297,6 +299,9 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke }, { 'key': 'docker_root_image', 'value': DOCKER_ROOT_IMAGE + }, { + 'key': 'docker_prefix', + 'value': DOCKER_PREFIX }, { 'key': 'namespace', 'value': DEFAULT_NAMESPACE diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 818289fb9e3..33dd9828f4a 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -64,6 +64,8 @@ spec: value: "{{ global.project }}" - name: HAIL_DOCKER_ROOT_IMAGE value: "{{ global.docker_root_image }}" + - name: HAIL_DOCKER_PREFIX + value: "{{ global.docker_prefix }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: @@ -202,6 +204,8 @@ spec: value: "{{ global.project }}" - name: HAIL_DOCKER_ROOT_IMAGE value: "{{ global.docker_root_image }}" + - name: HAIL_DOCKER_PREFIX + value: "{{ global.docker_prefix }}" - name: HAIL_GCP_REGION valueFrom: secretKeyRef: From 9ba167f04192d393e32a43be8f9249433eedd1ce Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 2 Mar 2021 15:56:34 +1100 Subject: [PATCH 171/501] Fix Terraform. --- infra/global.tfvars | 2 ++ infra/main.tf | 2 ++ 2 files changed, 4 insertions(+) diff --git a/infra/global.tfvars b/infra/global.tfvars index 391608593aa..f3b6cdbb38d 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -6,3 +6,5 @@ gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" use_artifact_registry = true +batch_logs_bucket_location = "australia-southeast1" +batch_logs_bucket_storage_class = "STANDARD" diff --git a/infra/main.tf b/infra/main.tf index 73b4d52ae7c..a3e15e17361 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -21,6 +21,8 @@ variable "gcp_location" {} variable "gcp_region" {} variable "gcp_zone" {} variable "domain" {} +variable "batch_logs_bucket_location" {} +variable "batch_logs_bucket_storage_class" {} variable "use_artifact_registry" { type = bool description = "pull the ubuntu image from Artifact Registry. Otherwise, GCR" From 5432431090e8ed92ab42f90b7d8f99bbf2f01dae Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 2 Mar 2021 15:57:46 +1100 Subject: [PATCH 172/501] Reorder. --- infra/global.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/global.tfvars b/infra/global.tfvars index f3b6cdbb38d..532bb083507 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -5,6 +5,6 @@ gcp_location = "australia-southeast1" gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" -use_artifact_registry = true batch_logs_bucket_location = "australia-southeast1" batch_logs_bucket_storage_class = "STANDARD" +use_artifact_registry = true From 79c07a76cbbfc1bbc0d2e8d312b4047c3b2524b5 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 2 Mar 2021 17:32:18 +1100 Subject: [PATCH 173/501] Prod deploy: raise on failure --- ci/ci/ci.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 86276304a26..2bc38a7a59b 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -151,12 +151,15 @@ async def prod_deploy(request, unused_userdata): await watched_branch._start_deploy(app['batch_client'], steps) batch = watched_branch.deploy_batch - if isinstance(batch, MergeFailureBatch): - path = f'/batches' + if not isinstance(batch, MergeFailureBatch): + url = deploy_config.external_url('ci', f'/batches/{batch.id}') + return web.Response(text=f'{url}\n') else: - path = f'/batches/{batch.id}' - url = deploy_config.external_url('ci', path) - return web.Response(text=f'{url}\n') + message = traceback.format_exc() + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest( + text=f'starting prod deploy failed due to\n{message}' + ) from batch.exception async def on_startup(app): From e779830db71aaa0969afa4069a6738cdc5ceba37 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 2 Mar 2021 21:19:10 +1100 Subject: [PATCH 174/501] Add docker_root_prefix into build.yaml --- build.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/build.yaml b/build.yaml index 58bcadefcd1..510ae75266b 100644 --- a/build.yaml +++ b/build.yaml @@ -2258,6 +2258,7 @@ steps: export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2326,6 +2327,7 @@ steps: export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2394,6 +2396,7 @@ steps: export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2462,6 +2465,7 @@ steps: export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2530,6 +2534,7 @@ steps: export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2747,6 +2752,7 @@ steps: export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2798,6 +2804,7 @@ steps: export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2849,6 +2856,7 @@ steps: export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2900,6 +2908,7 @@ steps: export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2951,6 +2960,7 @@ steps: export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test From 120d493c33488fe7f1e939ce1adf5df00fb5eba1 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Tue, 2 Mar 2021 17:19:01 -0500 Subject: [PATCH 175/501] get version from copy files --- .dockerignore | 1 + batch/Dockerfile.worker | 3 +- build.yaml | 56 +++++++++++++++---------- docker/Dockerfile.service-base | 6 +-- docker/Makefile | 2 + hail/Dockerfile.hailtop | 5 --- hail/Makefile | 4 +- hail/python/hailtop/__init__.py | 8 ++++ hail/python/hailtop/hailctl/__init__.py | 8 ---- hail/python/hailtop/hailctl/__main__.py | 5 ++- query/query/query.py | 6 +-- 11 files changed, 54 insertions(+), 50 deletions(-) delete mode 100644 hail/Dockerfile.hailtop diff --git a/.dockerignore b/.dockerignore index 90c65c26b86..59bd597af0f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,7 @@ hail/.bloop/ hail/.gradle/ hail/.idea/ hail/.pytest_cache/ +.git/ hail/.ensime.cache.d/ hail/.ensime_cache.d/ hail/.ensime_cache/ diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index 72b8acaece3..8f68e77a415 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -33,6 +33,7 @@ RUN python3 -m pip install --upgrade --no-cache-dir pip COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ +COPY /hail_version /hailtop/hailtop/hail_version RUN hail-pip-install --no-deps /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py @@ -43,4 +44,4 @@ COPY batch/setup.py batch/MANIFEST.in /batch/ COPY batch/batch /batch/batch/ RUN hail-pip-install --no-deps /batch && rm -rf /batch -COPY batch/hail.jar / \ No newline at end of file +COPY batch/hail.jar / diff --git a/build.yaml b/build.yaml index 0360979aa59..7a100adec85 100644 --- a/build.yaml +++ b/build.yaml @@ -64,13 +64,6 @@ steps: publishAs: base dependsOn: - hail_ubuntu_image - - kind: buildImage - name: service_base_image - dockerFile: docker/Dockerfile.service-base - contextPath: . - publishAs: service-base - dependsOn: - - base_image - kind: runImage name: check_hail image: @@ -83,21 +76,6 @@ steps: make check-hail dependsOn: - base_image - - kind: runImage - name: check_services - image: - valueFrom: service_base_image.image - script: | - set -ex - mkdir repo - cd repo - {{ code.checkout_script }} - {% if 'target_sha' in code %} - export HAIL_TARGET_SHA={{ code.target_sha }} - {% endif %} - make -k check-services - dependsOn: - - service_base_image - kind: runImage name: copy_files image: @@ -147,6 +125,32 @@ steps: to: /repo/lsm dependsOn: - base_image + - kind: buildImage + name: service_base_image + dockerFile: docker/Dockerfile.service-base + contextPath: . + publishAs: service-base + dependsOn: + - base_image + - copy_files + inputs: + - from: /hail_version + to: /hail_version + - kind: runImage + name: check_services + image: + valueFrom: service_base_image.image + script: | + set -ex + mkdir repo + cd repo + {{ code.checkout_script }} + {% if 'target_sha' in code %} + export HAIL_TARGET_SHA={{ code.target_sha }} + {% endif %} + make -k check-services + dependsOn: + - service_base_image - kind: buildImage name: create_certs_image dockerFile: tls/Dockerfile @@ -520,8 +524,12 @@ steps: scopes: - test - dev + inputs: + - from: /hail_version + to: /hail_version dependsOn: - base_image + - copy_files - kind: buildImage name: test_monitoring_image dockerFile: monitoring/Dockerfile.test @@ -669,9 +677,11 @@ steps: inputs: - from: /just-jar/hail.jar to: /batch/hail.jar + - from: /hail_version + to: /hail_version dependsOn: - - service_base_image - build_hail_jar_only + - copy_files - kind: buildImage name: query_image dockerFile: query/Dockerfile diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index dfd3e7bb779..b360166a77e 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,11 +7,7 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ -COPY hail/Makefile hail/env_var.mk .git/ hailtop/ -RUN mkdir -p hailtop/python/hail hailtop/python/hailtop/hailctl hailtop/python/hail/docs/_static && \ - (cd hailtop && echo $(pwd) && make python-version-info) && \ - cp hailtop/python/hail/hail_*version hailtop/hailtop/hailctl && \ - rm -rf hailtop/Makefile hailtop/env_var.mk .git/ +COPY /hail_version /hailtop/hailtop/hail_version RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py diff --git a/docker/Makefile b/docker/Makefile index 1907254ae55..7ed7f61393f 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -45,6 +45,8 @@ base-stmp: hail-ubuntu-stmp Dockerfile.base core-site.xml requirements.txt ../py .PHONY: service-base service-base: base-stmp -docker pull $(SERVICE_BASE_LATEST) + $(MAKE) -C ../hail python/hailtop/hail_version + cp ../hail/python/hailtop/hail_version ../hail_version python3 ../ci/jinja2_render.py '{"base_image":{"image":"base"}}' Dockerfile.service-base Dockerfile.service-base.out [ "$(shell bash stat-permissions.sh Dockerfile.service-base.out)" = "644" ] [ "$(shell bash stat-permissions.sh service-base-requirements.txt)" = "644" ] diff --git a/hail/Dockerfile.hailtop b/hail/Dockerfile.hailtop deleted file mode 100644 index 6e39fbe3a3d..00000000000 --- a/hail/Dockerfile.hailtop +++ /dev/null @@ -1,5 +0,0 @@ -FROM {{ base_image.image }} - -COPY python/setup-hailtop.py /hailtop/setup.py -COPY python/hailtop /hailtop/hailtop/ -RUN hail-pip-install /hailtop && rm -rf /hailtop diff --git a/hail/Makefile b/hail/Makefile index fc3eeab8125..c32e32f1062 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -50,7 +50,7 @@ PY_FILES := $(shell git ls-files python) INIT_SCRIPTS := python/hailtop/hailctl/deploy.yaml PYTHON_VERSION_INFO := python/hail/hail_version PYTHON_VERSION_INFO += python/hail/hail_pip_version -PYTHON_VERSION_INFO += python/hailtop/hailctl/hail_version +PYTHON_VERSION_INFO += python/hailtop/hail_version PYTHON_VERSION_INFO += python/hail/docs/_static/hail_version.js SCALA_BUILD_INFO := src/main/resources/build-info.properties @@ -128,7 +128,7 @@ python/hail/docs/_static/hail_version.js: python/hail/hail_version python/hail/h printf 'hail_version="$(shell cat python/hail/hail_version)";' > $@ printf 'hail_pip_version="$(shell cat python/hail/hail_pip_version)"' >> $@ -python/hailtop/hailctl/hail_version: python/hail/hail_version +python/hailtop/hail_version: python/hail/hail_version cp -f $< $@ python/README.md: ../README.md diff --git a/hail/python/hailtop/__init__.py b/hail/python/hailtop/__init__.py index e69de29bb2d..603455b7801 100644 --- a/hail/python/hailtop/__init__.py +++ b/hail/python/hailtop/__init__.py @@ -0,0 +1,8 @@ +import pkg_resources + + +_VERSION = pkg_resources.resource_string(__name__, 'hail_version').decode().strip() + + +def version() -> str: + return _VERSION diff --git a/hail/python/hailtop/hailctl/__init__.py b/hail/python/hailtop/hailctl/__init__.py index 96598044c3e..e69de29bb2d 100644 --- a/hail/python/hailtop/hailctl/__init__.py +++ b/hail/python/hailtop/hailctl/__init__.py @@ -1,8 +0,0 @@ -def version() -> str: - import pkg_resources # pylint: disable=import-outside-toplevel - return pkg_resources.resource_string(__name__, 'hail_version').decode().strip() - - -__all__ = [ - 'version' -] diff --git a/hail/python/hailtop/hailctl/__main__.py b/hail/python/hailtop/hailctl/__main__.py index 43738fe4bb0..36d7b0b19b1 100644 --- a/hail/python/hailtop/hailctl/__main__.py +++ b/hail/python/hailtop/hailctl/__main__.py @@ -6,6 +6,7 @@ import time from hailtop import hailctl +from hailtop import version def print_help(): @@ -61,7 +62,7 @@ def check_for_update(): pip_out = sp.check_output(['pip3', 'search', 'hail'], stderr=sp.STDOUT) latest = re.search(r'hail \((\d+)\.(\d+)\.(\d+).*', pip_out.decode()).groups() - installed = re.search(r'(\d+)\.(\d+)\.(\d+).*', hailctl.version()).groups() + installed = re.search(r'(\d+)\.(\d+)\.(\d+).*', version()).groups() def int_version(version): return tuple(map(int, version)) @@ -81,7 +82,7 @@ def fmt_version(version): def print_version(): - print(hailctl.version()) + print(version()) def main(): diff --git a/query/query/query.py b/query/query/query.py index 2e277193d2d..18259d32f5c 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -12,7 +12,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger -from hailtop.hailctl import version +from hailtop import version from gear import setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only uvloop.install() @@ -20,8 +20,6 @@ DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] log = logging.getLogger('batch') routes = web.RouteTableDef() -# Store this value once so we don't hit the desk -HAIL_VERSION = version() def java_to_web_response(jresp): @@ -202,7 +200,7 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/version') async def rest_get_version(request): # pylint: disable=W0613 try: - return web.Response(text=HAIL_VERSION) + return web.Response(text=version()) except Exception as e: return web.json_response({"error": str(e)}) From a1c41c4e7b271212cc168a6c8a81d93e688a4a30 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Wed, 3 Mar 2021 10:55:33 +1100 Subject: [PATCH 176/501] Remove try-catch from query endpoint --- query/query/query.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/query/query/query.py b/query/query/query.py index 18259d32f5c..471026d4911 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -199,10 +199,7 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/version') async def rest_get_version(request): # pylint: disable=W0613 - try: - return web.Response(text=version()) - except Exception as e: - return web.json_response({"error": str(e)}) + return web.Response(text=version()) async def on_startup(app): From 9a3cc98627eea831a24f78598df978e232ffd4d9 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 3 Mar 2021 10:59:04 +1100 Subject: [PATCH 177/501] Import rest_authenticated_users_only to fix ci crash-loop. --- ci/ci/ci.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 0d7b8c4ead5..c78e8cd8e13 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -13,6 +13,7 @@ from gear import ( setup_aiohttp_session, rest_authenticated_developers_only, + rest_authenticated_users_only, web_authenticated_developers_only, ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template From 778aaa641b75ae468a05aec2cd4859de9209a8ec Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Mar 2021 11:53:27 +1100 Subject: [PATCH 178/501] Add HAIL_ prefix --- build.yaml | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/build.yaml b/build.yaml index 510ae75266b..2d687dfddec 100644 --- a/build.yaml +++ b/build.yaml @@ -2257,8 +2257,8 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2326,8 +2326,8 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2395,8 +2395,8 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2464,8 +2464,8 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2533,8 +2533,8 @@ steps: export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export HAIL_TEST_TOKEN_FILE=/user-tokens/tokens.json export HAIL_TEST_DEV_TOKEN_FILE=/dev-tokens/tokens.json export HAIL_TOKEN="{{ token }}" @@ -2751,8 +2751,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2803,8 +2803,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2855,8 +2855,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2907,8 +2907,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test @@ -2959,8 +2959,8 @@ steps: export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 - export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" - export DOCKER_PREFIX="{{ global.docker_prefix }}" + export HAIL_DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" + export HAIL_DOCKER_PREFIX="{{ global.docker_prefix }}" export PYTHON_DILL_IMAGE="{{ global.docker_prefix }}/python-dill:3.7-slim" hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test From 149cdd8cd587a7e706096ece196cd18db9d12c5f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 3 Mar 2021 17:35:38 +1100 Subject: [PATCH 179/501] Fix indent. --- amundsen/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amundsen/deployment.yaml b/amundsen/deployment.yaml index 6a57781a51a..8ee78b4e7e1 100644 --- a/amundsen/deployment.yaml +++ b/amundsen/deployment.yaml @@ -30,7 +30,7 @@ spec: {% endif %} containers: - name: amundsen-frontend - image: {{ global.docker_prefix }}/amundsendev/amundsen-frontend:2.3.0 + image: {{ global.docker_prefix }}/amundsendev/amundsen-frontend:2.3.0 imagePullPolicy: Always resources: requests: From daf3dae7dd7100ed55c857ccf405bf114d9354b5 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 25 Feb 2021 12:25:37 +1100 Subject: [PATCH 180/501] Dataproc: allow upload of deps in zip archive --- .../python/hailtop/hailctl/dataproc/submit.py | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index 6c95397267e..cbbd034e395 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -22,27 +22,7 @@ def main(args, pass_through_args): # pylint: disable=unused-argument files = '' if args.files: files = args.files - pyfiles = [] - if args.pyfiles: - pyfiles.extend(args.pyfiles.split(',')) - pyfiles.extend(os.environ.get('HAIL_SCRIPTS', '').split(':')) - if pyfiles: - tfile = tempfile.mkstemp(suffix='.zip', prefix='pyscripts_')[1] - zipf = zipfile.ZipFile(tfile, 'w', zipfile.ZIP_DEFLATED) - for hail_script_entry in pyfiles: - if hail_script_entry.endswith('.py'): - zipf.write(hail_script_entry, arcname=os.path.basename(hail_script_entry)) - else: - for root, _, pyfiles_walk in os.walk(hail_script_entry): - for pyfile in pyfiles_walk: - if pyfile.endswith('.py'): - zipf.write(os.path.join(root, pyfile), - os.path.relpath(os.path.join(root, pyfile), - os.path.join(hail_script_entry, '..'))) - zipf.close() - pyfiles = tfile - else: - pyfiles = '' + pyfiles = args.pyfiles # create properties argument properties = '' From f4cfc8e0aa8231885620d1605e129331c3e19d9f Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Sun, 28 Feb 2021 23:07:06 +1100 Subject: [PATCH 181/501] hailctl dataproc submit: add --region option --- hail/python/hailtop/hailctl/dataproc/submit.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index cbbd034e395..9d6595531f4 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -12,6 +12,7 @@ def init_parser(parser): parser.add_argument('--pyfiles', required=False, type=str, help='Comma-separated list of files (or directories with python files) to add to the PYTHONPATH.') parser.add_argument('--properties', '-p', required=False, type=str, help='Extra Spark properties to set.') parser.add_argument('--gcloud_configuration', help='Google Cloud configuration to submit job (defaults to currently set configuration).') + parser.add_argument('--region', help='Region.') parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") @@ -39,7 +40,8 @@ def main(args, pass_through_args): # pylint: disable=unused-argument '--cluster={}'.format(args.name), '--files={}'.format(files), '--py-files={}'.format(pyfiles), - '--properties={}'.format(properties) + '--properties={}'.format(properties), + '--region={}'.format(args.region) ] if args.gcloud_configuration: cmd.append('--configuration={}'.format(args.gcloud_configuration)) From ad59ec5142c71be41f83826df7bbf352e0c7ada8 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Mar 2021 22:54:10 +1100 Subject: [PATCH 182/501] Dataproc stop: add --region --- hail/python/hailtop/hailctl/dataproc/stop.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dataproc/stop.py b/hail/python/hailtop/hailctl/dataproc/stop.py index 5c4cb83f8df..1cc87555fbc 100644 --- a/hail/python/hailtop/hailctl/dataproc/stop.py +++ b/hail/python/hailtop/hailctl/dataproc/stop.py @@ -5,6 +5,7 @@ def init_parser(parser): parser.add_argument('name', type=str, help='Cluster name.') parser.add_argument('--async', action='store_true', dest='asink', help="Do not wait for cluster deletion.") + parser.add_argument('--region', help='Region.') parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") @@ -12,7 +13,14 @@ def init_parser(parser): def main(args, pass_through_args): print("Stopping cluster '{}'...".format(args.name)) - cmd = ['dataproc', 'clusters', 'delete', '--quiet', args.name] + cmd = [ + 'dataproc', + 'clusters', + 'delete', + '--region={}'.format(args.region), + '--quiet', + args.name + ] if args.asink: cmd.append('--async') From 42be78fb1ec7de4fa85ff23ebaf82b958d91d5a7 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Mar 2021 23:00:53 +1100 Subject: [PATCH 183/501] Allow both ways to provide pyfiles --- .../python/hailtop/hailctl/dataproc/submit.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index 9d6595531f4..077a51db367 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -23,7 +23,32 @@ def main(args, pass_through_args): # pylint: disable=unused-argument files = '' if args.files: files = args.files - pyfiles = args.pyfiles + + if args.pyfiles and args.pyfiles.endswith('.zip') and not ',' in args.pyfiles: + # Adding the zip archive directly + pyfiles = args.pyfiles + else: + pyfiles = [] + elif args.pyfiles: + pyfiles.extend(args.pyfiles.split(',')) + pyfiles.extend(os.environ.get('HAIL_SCRIPTS', '').split(':')) + if pyfiles: + tfile = tempfile.mkstemp(suffix='.zip', prefix='pyscripts_')[1] + zipf = zipfile.ZipFile(tfile, 'w', zipfile.ZIP_DEFLATED) + for hail_script_entry in pyfiles: + if hail_script_entry.endswith('.py'): + zipf.write(hail_script_entry, arcname=os.path.basename(hail_script_entry)) + else: + for root, _, pyfiles_walk in os.walk(hail_script_entry): + for pyfile in pyfiles_walk: + if pyfile.endswith('.py'): + zipf.write(os.path.join(root, pyfile), + os.path.relpath(os.path.join(root, pyfile), + os.path.join(hail_script_entry, '..'))) + zipf.close() + pyfiles = tfile + else: + pyfiles = '' # create properties argument properties = '' From b26bd402f422754d3ed05bf1c87d6ab49c132f9d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 3 Mar 2021 23:04:42 +1100 Subject: [PATCH 184/501] Fix --- hail/python/hailtop/hailctl/dataproc/submit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index 077a51db367..e88e63fee71 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -29,7 +29,7 @@ def main(args, pass_through_args): # pylint: disable=unused-argument pyfiles = args.pyfiles else: pyfiles = [] - elif args.pyfiles: + if args.pyfiles: pyfiles.extend(args.pyfiles.split(',')) pyfiles.extend(os.environ.get('HAIL_SCRIPTS', '').split(':')) if pyfiles: From 53588b3b0dafb24854f80169bbdfe556e9e6e16a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 4 Mar 2021 10:00:49 +1100 Subject: [PATCH 185/501] Make --region param required --- hail/python/hailtop/hailctl/dataproc/stop.py | 2 +- hail/python/hailtop/hailctl/dataproc/submit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hailtop/hailctl/dataproc/stop.py b/hail/python/hailtop/hailctl/dataproc/stop.py index 1cc87555fbc..9817918290e 100644 --- a/hail/python/hailtop/hailctl/dataproc/stop.py +++ b/hail/python/hailtop/hailctl/dataproc/stop.py @@ -5,7 +5,7 @@ def init_parser(parser): parser.add_argument('name', type=str, help='Cluster name.') parser.add_argument('--async', action='store_true', dest='asink', help="Do not wait for cluster deletion.") - parser.add_argument('--region', help='Region.') + parser.add_argument('--region', help='Region.', required=True) parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index e88e63fee71..53caad1b7ef 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -12,7 +12,7 @@ def init_parser(parser): parser.add_argument('--pyfiles', required=False, type=str, help='Comma-separated list of files (or directories with python files) to add to the PYTHONPATH.') parser.add_argument('--properties', '-p', required=False, type=str, help='Extra Spark properties to set.') parser.add_argument('--gcloud_configuration', help='Google Cloud configuration to submit job (defaults to currently set configuration).') - parser.add_argument('--region', help='Region.') + parser.add_argument('--region', help='Region.', required=True) parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") From 2b1010214554b67ebbdd4f10273e7d10472cb264 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Thu, 4 Mar 2021 11:13:50 +1100 Subject: [PATCH 186/501] Update hail/python/hailtop/hailctl/dataproc/submit.py Co-authored-by: Michael Franklin --- hail/python/hailtop/hailctl/dataproc/submit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index 53caad1b7ef..c0c4ee05b54 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -24,6 +24,7 @@ def main(args, pass_through_args): # pylint: disable=unused-argument if args.files: files = args.files + # If you only provide one (comma-sep) argument, and it's a zip file, use that file directly if args.pyfiles and args.pyfiles.endswith('.zip') and not ',' in args.pyfiles: # Adding the zip archive directly pyfiles = args.pyfiles From 606c94bc39ddeae873502a0f7e827356a62b4100 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 4 Mar 2021 17:49:37 +1100 Subject: [PATCH 187/501] Redeploy analysis-server after building a new conda package. --- .github/workflows/condarise.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index b62c0cdb412..268bb4117ce 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -22,6 +22,7 @@ jobs: VERSION=${VERSION/-/.dev} cat conda/hail/meta-template.yaml \ | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml + echo "version=$VERSION" >> $GITHUB_ENV - uses: conda-incubator/setup-miniconda@v2 with: @@ -40,3 +41,12 @@ jobs: run: | anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 + + - name: Redeploy the analysis-server + run: | + curl \ + -X POST \ + -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ + -d "{\"ref\": \"main\", \"inputs\": {\"hail_version\": \"${{ env.version }}\"}}" \ No newline at end of file From 3653b3ff9ae02353fe97db5733eaa146e662ba1f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 4 Mar 2021 18:39:09 +1100 Subject: [PATCH 188/501] Add --fail. --- .github/workflows/condarise.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 268bb4117ce..7eabd316cda 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -44,7 +44,7 @@ jobs: - name: Redeploy the analysis-server run: | - curl \ + curl --fail \ -X POST \ -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ From 602101971b2bb2d5d58eca97121731008a4b9874 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Mar 2021 09:06:30 +1100 Subject: [PATCH 189/501] Update .github/workflows/condarise.yaml Co-authored-by: Michael Franklin --- .github/workflows/condarise.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 7eabd316cda..c63d457c021 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -49,4 +49,4 @@ jobs: -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d "{\"ref\": \"main\", \"inputs\": {\"hail_version\": \"${{ env.version }}\"}}" \ No newline at end of file + -d "{\"ref\": \"main\", \"inputs\": {\"hail_version\": \"${{ env.version }}\"}}" From 19fbee182707832f66c3265203109a0b6f194bd2 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Mar 2021 10:09:46 +1100 Subject: [PATCH 190/501] Only run the step to redeploy the analysis-server once. (#74) --- .github/workflows/condarise.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index c63d457c021..d6dd62ba93d 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -43,6 +43,8 @@ jobs: upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 - name: Redeploy the analysis-server + # Only run this step once. + if: ${{ matrix.os == 'ubuntu-latest' }} run: | curl --fail \ -X POST \ From ede2f4b408dc33c1d5d8eff03461da51db3148b5 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Mar 2021 10:10:53 +1100 Subject: [PATCH 191/501] Merge upstream changes (#73) --- batch/Dockerfile.worker | 1 + batch/batch/globals.py | 2 +- batch/batch/worker/worker.py | 25 +- batch/deployment.yaml | 10 - batch/sql/increase-test-and-dev-pool-sizes.py | 31 + build.yaml | 208 ++++- ci/ci/templates/index.html | 4 +- ci/ci/templates/pr-table.html | 6 +- ci/ci/templates/user.html | 4 +- devbin/functions.sh | 28 +- docker/Dockerfile.service-java-run-base | 35 + docker/Makefile | 14 + docker/requirements.txt | 2 +- hail/build.gradle | 2 + hail/build.sbt | 1 + hail/python/hail/__init__.py | 5 +- hail/python/hail/backend/backend.py | 3 +- hail/python/hail/backend/service_backend.py | 49 +- hail/python/hail/context.py | 118 ++- hail/python/hail/fs/fs.py | 21 + hail/python/hail/fs/google_fs.py | 36 +- hail/python/hail/fs/hadoop_fs.py | 6 + hail/python/hail/fs/local_fs.py | 11 +- hail/python/hail/methods/relatedness/king.py | 11 +- hail/python/hailtop/aiotools/fs.py | 64 +- hail/python/requirements.txt | 2 +- .../hail/experimental/test_annotation_db.py | 9 +- .../test/hail/experimental/test_dnd_array.py | 18 +- .../hail/experimental/test_experimental.py | 57 +- .../hail/experimental/test_vcf_combiner.py | 9 +- hail/python/test/hail/expr/test_expr.py | 56 +- hail/python/test/hail/expr/test_ndarrays.py | 6 +- hail/python/test/hail/expr/test_show.py | 3 +- hail/python/test/hail/expr/test_types.py | 2 + .../test/hail/genetics/test_pedigree.py | 1 + .../hail/genetics/test_reference_genome.py | 5 + hail/python/test/hail/helpers.py | 27 +- hail/python/test/hail/linalg/test_linalg.py | 268 +++--- .../hail/matrixtable/test_file_formats.py | 1 + .../matrixtable/test_grouped_matrix_table.py | 6 + .../hail/matrixtable/test_matrix_table.py | 70 ++ .../test/hail/methods/test_family_methods.py | 6 + hail/python/test/hail/methods/test_impex.py | 92 ++ hail/python/test/hail/methods/test_king.py | 15 +- hail/python/test/hail/methods/test_misc.py | 3 + hail/python/test/hail/methods/test_pca.py | 7 +- hail/python/test/hail/methods/test_qc.py | 9 +- hail/python/test/hail/methods/test_statgen.py | 35 +- .../test/hail/table/test_grouped_table.py | 3 + hail/python/test/hail/table/test_table.py | 80 +- hail/python/test/hail/test_context.py | 3 +- hail/python/test/hail/utils/test_utils.py | 3 + .../test/hailtop/aiotools/copy_test_specs.py | 830 +++++++++--------- .../aiotools/generate_copy_test_specs.py | 4 +- .../python/test/hailtop/aiotools/test_copy.py | 44 +- .../is/hail/annotations/CodeOrdering.scala | 708 --------------- .../is/hail/annotations/RegionPool.scala | 8 +- .../scala/is/hail/asm4s/ClassBuilder.scala | 8 +- .../main/scala/is/hail/backend/Backend.scala | 2 +- .../is/hail/backend/local/LocalBackend.scala | 2 +- .../hail/backend/service/ServiceBackend.scala | 612 +++++++++---- .../is/hail/backend/spark/SparkBackend.scala | 2 +- .../scala/is/hail/expr/ir/BinarySearch.scala | 11 +- .../scala/is/hail/expr/ir/ComparisonOp.scala | 7 +- .../src/main/scala/is/hail/expr/ir/Emit.scala | 8 +- .../is/hail/expr/ir/EmitClassBuilder.scala | 147 +--- .../is/hail/expr/ir/EmitCodeBuilder.scala | 2 + .../scala/is/hail/expr/ir/EmitStream.scala | 68 +- .../main/scala/is/hail/expr/ir/Parser.scala | 7 + .../main/scala/is/hail/expr/ir/TableIR.scala | 8 +- .../expr/ir/agg/CollectAsSetAggregator.scala | 5 +- .../expr/ir/agg/DownsampleAggregator.scala | 5 +- .../hail/expr/ir/agg/GroupedAggregator.scala | 7 +- .../hail/expr/ir/agg/TakeByAggregator.scala | 32 +- .../expr/ir/functions/IntervalFunctions.scala | 6 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 26 +- .../expr/ir/orderings/BinaryOrdering.scala | 39 + .../hail/expr/ir/orderings/CallOrdering.scala | 24 + .../hail/expr/ir/orderings/CodeOrdering.scala | 308 +++++++ .../expr/ir/orderings/IntervalOrdering.scala | 194 ++++ .../expr/ir/orderings/IterableOrdering.scala | 185 ++++ .../expr/ir/orderings/LocusOrdering.scala | 48 + .../expr/ir/orderings/PrimitiveOrdering.scala | 112 +++ .../expr/ir/orderings/ShuffleOrdering.scala | 27 + .../expr/ir/orderings/StringOrdering.scala | 27 + .../expr/ir/orderings/StructOrdering.scala | 161 ++++ .../main/scala/is/hail/io/fs/HadoopFS.scala | 8 + hail/src/main/scala/is/hail/lir/Emit.scala | 7 +- hail/src/main/scala/is/hail/lir/package.scala | 4 +- .../is/hail/services/JSONLogLayout.scala | 79 ++ .../main/scala/is/hail/services/Tokens.scala | 5 +- .../main/scala/is/hail/services/package.scala | 20 +- .../scala/is/hail/services/shuffler/LSM.scala | 2 +- .../services/shuffler/ShuffleClient.scala | 23 +- .../services/shuffler/ShuffleCodecSpec.scala | 13 +- .../shuffler/server/ShuffleServer.scala | 8 +- hail/src/main/scala/is/hail/types/Box.scala | 20 +- .../scala/is/hail/types/physical/PArray.scala | 8 +- .../is/hail/types/physical/PBaseStruct.scala | 12 +- .../is/hail/types/physical/PBinary.scala | 29 +- .../is/hail/types/physical/PBoolean.scala | 11 +- .../types/physical/PCanonicalBaseStruct.scala | 4 +- .../hail/types/physical/PCanonicalCall.scala | 8 +- .../hail/types/physical/PCanonicalLocus.scala | 4 +- .../types/physical/PCanonicalShuffle.scala | 6 +- .../scala/is/hail/types/physical/PDict.scala | 6 - .../is/hail/types/physical/PFloat32.scala | 19 +- .../is/hail/types/physical/PFloat64.scala | 19 +- .../scala/is/hail/types/physical/PInt32.scala | 19 +- .../scala/is/hail/types/physical/PInt64.scala | 19 +- .../is/hail/types/physical/PInterval.scala | 8 +- .../is/hail/types/physical/PNDArray.scala | 5 +- .../scala/is/hail/types/physical/PSet.scala | 6 - .../is/hail/types/physical/PString.scala | 13 +- .../is/hail/types/physical/PStruct.scala | 12 +- .../hail/types/physical/PSubsetStruct.scala | 32 +- .../scala/is/hail/types/physical/PTuple.scala | 10 - .../scala/is/hail/types/physical/PType.scala | 14 - .../hail/types/physical/PUnrealizable.scala | 9 +- .../is/hail/types/physical/stypes/SType.scala | 11 +- .../stypes/concrete/SBaseStructPointer.scala | 11 +- .../stypes/concrete/SBinaryPointer.scala | 5 +- .../stypes/concrete/SCanonicalCall.scala | 7 +- .../concrete/SCanonicalLocusPointer.scala | 7 +- .../concrete/SCanonicalShufflePointer.scala | 7 +- .../stypes/concrete/SIndexablePointer.scala | 5 +- .../stypes/concrete/SIntervalPointer.scala | 10 +- .../stypes/concrete/SNDArrayPointer.scala | 17 +- .../stypes/concrete/SStringPointer.scala | 7 +- .../stypes/concrete/SSubsetStruct.scala | 16 +- .../{SStruct.scala => SBaseStruct.scala} | 10 +- .../physical/stypes/interfaces/SCall.scala | 2 + .../stypes/interfaces/SInterval.scala | 8 +- .../physical/stypes/interfaces/SLocus.scala | 7 +- .../physical/stypes/interfaces/SStream.scala | 5 +- .../physical/stypes/interfaces/SVoid.scala | 5 +- .../physical/stypes/primitives/SBoolean.scala | 5 +- .../physical/stypes/primitives/SFloat32.scala | 5 +- .../physical/stypes/primitives/SFloat64.scala | 5 +- .../physical/stypes/primitives/SInt32.scala | 5 +- .../physical/stypes/primitives/SInt64.scala | 10 +- .../main/scala/is/hail/utils/package.scala | 2 +- .../scala/is/hail/expr/ir/OrderingSuite.scala | 7 +- .../is/hail/expr/ir/StagedBTreeSuite.scala | 6 +- .../scala/is/hail/expr/ir/TableIRSuite.scala | 5 +- monitoring/monitoring/templates/billing.html | 55 +- query/Dockerfile | 6 +- query/Makefile | 4 +- query/deployment.yaml | 90 +- query/log4j.properties | 3 + query/query/query.py | 245 +++--- query/query/sockets.py | 266 ++++++ query/test/test_query.py | 17 - tls/config.yaml | 3 - 154 files changed, 4244 insertions(+), 2274 deletions(-) create mode 100644 batch/sql/increase-test-and-dev-pool-sizes.py create mode 100644 docker/Dockerfile.service-java-run-base delete mode 100644 hail/src/main/scala/is/hail/annotations/CodeOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala create mode 100644 hail/src/main/scala/is/hail/services/JSONLogLayout.scala rename hail/src/main/scala/is/hail/types/physical/stypes/interfaces/{SStruct.scala => SBaseStruct.scala} (92%) create mode 100644 query/log4j.properties create mode 100644 query/query/sockets.py delete mode 100644 query/test/test_query.py diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index e6a3aa1f776..9af5ad90937 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -45,3 +45,4 @@ COPY batch/batch /batch/batch/ RUN hail-pip-install --no-deps /batch && rm -rf /batch COPY batch/hail.jar / +COPY query/log4j.properties / diff --git a/batch/batch/globals.py b/batch/batch/globals.py index 12ef144d072..bd18e1499ed 100644 --- a/batch/batch/globals.py +++ b/batch/batch/globals.py @@ -28,7 +28,7 @@ BATCH_FORMAT_VERSION = 5 STATUS_FORMAT_VERSION = 3 -INSTANCE_VERSION = 15 +INSTANCE_VERSION = 16 WORKER_CONFIG_VERSION = 3 MAX_PERSISTENT_SSD_SIZE_BYTES = 65536 * (1024**3) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index fae418239a1..87ce568bac2 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -525,7 +525,7 @@ def __str__(self): class JVMProcess: - classpath = f'{find_spark_home()}/jars/*:/hail.jar' + classpath = f'{find_spark_home()}/jars/*:/hail.jar:/log4j.properties' stack_size = 512 * 1024 thread_pool = None @@ -540,13 +540,13 @@ def __init__(self, job, main_spec): self.java_args = main_spec['command'] self.proc = None - self.timing = {} + self.timing = {'running': dict()} self.state = 'pending' self.log = '' async def run(self, worker): log.info(f'running {self}') - self.timing['start_time'] = time_msecs() + self.timing['running']['start_time'] = time_msecs() self.proc = await asyncio.create_subprocess_exec( 'java', *self.flags, @@ -557,9 +557,9 @@ async def run(self, worker): out, err = await self.proc.communicate() finish_time = time_msecs() - self.timing['finish_time'] = finish_time - start_time = self.timing['start_time'] - self.timing['duration'] = finish_time - start_time + self.timing['running']['finish_time'] = finish_time + start_time = self.timing['running']['start_time'] + self.timing['running']['duration'] = finish_time - start_time self.log += 'STDOUT:\n' self.log += out.decode() @@ -1063,6 +1063,19 @@ def __init__(self, if input_files or output_files: raise Exception("i/o not supported") + for envvar in self.env: + assert envvar['name'] not in {'HAIL_DEPLOY_CONFIG_FILE', 'HAIL_TOKENS_FILE', + 'HAIL_SSL_CONFIG_FILE', 'HAIL_GSA_KEY_FILE', + 'HAIL_WORKER_SCRATCH_DIR'}, envvar + + self.env.append({'name': 'HAIL_DEPLOY_CONFIG_FILE', + 'value': f'{self.scratch}/secrets/deploy-config/deploy-config.json'}) + self.env.append({'name': 'HAIL_TOKENS_FILE', + 'value': f'{self.scratch}/secrets/user-tokens/tokens.json'}) + self.env.append({'name': 'HAIL_SSL_CONFIG_FILE', + 'value': f'{self.scratch}/secrets/ssl-config/ssl-config.json'}) + self.env.append({'name': 'HAIL_GSA_KEY_FILE', + 'value': f'{self.scratch}/secrets/gsa-key/key.json'}) self.env.append({'name': 'HAIL_WORKER_SCRATCH_DIR', 'value': self.scratch}) # main container diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 622f562c7b2..f4c67223d71 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -222,16 +222,6 @@ spec: value: "{{ global.k8s_server_url }}" - name: HAIL_SHA value: "{{ code.sha }}" -{% if not deploy %} - - name: HAIL_BATCH_JOB_DEFAULT_CPU - value: "0.1" - - name: HAIL_BATCH_JOB_DEFAULT_MEMORY - value: "375M" - - name: HAIL_BATCH_JOB_DEFAULT_STORAGE - value: "1G" - - name: HAIL_BATCH_JOB_DEFAULT_WORKER_TYPE - value: "standard" -{% endif %} {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME valueFrom: diff --git a/batch/sql/increase-test-and-dev-pool-sizes.py b/batch/sql/increase-test-and-dev-pool-sizes.py new file mode 100644 index 00000000000..fd6cea6d0a4 --- /dev/null +++ b/batch/sql/increase-test-and-dev-pool-sizes.py @@ -0,0 +1,31 @@ +import os +import asyncio +from gear import Database + + +async def main(): + if os.environ['HAIL_SCOPE'] == 'deploy': + return + + worker_cores = 16 + max_instances = 8 + max_live_instances = 8 + + db = Database() + await db.async_init() + + await db.execute_update( + ''' +UPDATE pools +SET worker_cores = %s +''', (worker_cores)) + + await db.execute_update( + ''' +UPDATE inst_colls +SET max_instances = %s, max_live_instances = %s +''', (max_instances, max_live_instances)) + + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) diff --git a/build.yaml b/build.yaml index 2d687dfddec..acc644b126f 100644 --- a/build.yaml +++ b/build.yaml @@ -658,8 +658,14 @@ steps: - from: /just-jar/hail.jar to: /batch/hail.jar dependsOn: - - service_base_image - build_hail_jar_only + - kind: buildImage + name: service_java_run_base_image + dockerFile: docker/Dockerfile.service-java-run-base + contextPath: . + publishAs: service-java-run-base + dependsOn: + - hail_ubuntu_image - kind: buildImage name: query_image dockerFile: query/Dockerfile @@ -669,7 +675,7 @@ steps: - from: /just-jar/hail.jar to: /query/hail.jar dependsOn: - - service_base_image + - service_java_run_base_image - build_hail_jar_only - kind: deploy name: deploy_query_sa @@ -738,6 +744,27 @@ steps: dependsOn: - default_ns - hail_repl_image + - kind: runImage + name: upload_test_resources_to_gcs + image: + valueFrom: base_image.image + script: | + set -ex + rm -rf repo + mkdir repo + cd repo + {{ code.checkout_script }} + gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json + gsutil -m cp -r hail/src/test/resources/* gs://cpg-hail-test/{{ token }}/test/resources/ + gsutil -m cp -r hail/python/hail/docs/data/* gs://cpg-hail-test/{{ token }}/doctest/data/ + secrets: + - name: test-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /test-gsa-key + dependsOn: + - default_ns + - base_image - kind: runImage name: test_hail_java_0 image: @@ -751,10 +778,8 @@ steps: mkdir -p src/test tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz - gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-0.xml inputs: - from: /resources.tar.gz @@ -773,6 +798,7 @@ steps: mountPath: /test-gsa-key dependsOn: - default_ns + - upload_test_resources_to_gcs - hail_run_image - build_hail - kind: runImage @@ -788,10 +814,8 @@ steps: mkdir -p src/test tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz - gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-1.xml inputs: - from: /resources.tar.gz @@ -810,6 +834,7 @@ steps: mountPath: /test-gsa-key dependsOn: - default_ns + - upload_test_resources_to_gcs - hail_run_image - build_hail - kind: runImage @@ -825,10 +850,8 @@ steps: mkdir -p src/test tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz - gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-2.xml inputs: - from: /resources.tar.gz @@ -847,6 +870,7 @@ steps: mountPath: /test-gsa-key dependsOn: - default_ns + - upload_test_resources_to_gcs - hail_run_image - build_hail - kind: runImage @@ -862,10 +886,8 @@ steps: mkdir -p src/test tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz - gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-3.xml inputs: - from: /resources.tar.gz @@ -884,6 +906,7 @@ steps: mountPath: /test-gsa-key dependsOn: - default_ns + - upload_test_resources_to_gcs - hail_run_image - build_hail - kind: runImage @@ -899,10 +922,8 @@ steps: mkdir -p src/test tar xzf resources.tar.gz -C src/test tar xzf splits.tar.gz - gcloud -q auth activate-service-account --key-file=/test-gsa-key/key.json - gsutil -m cp -r src/test/resources/fs gs://cpg-hail-test/{{ token }}/test/resources/ export HAIL_TEST_SKIP_R=1 - export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ token }}/test/resources/fs + export HAIL_GS_FS_TEST_RESOURCES=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources/fs java -cp hail-test.jar:$SPARK_HOME/jars/* org.testng.TestNG -listener is.hail.LogTestListener testng-splits-4.xml inputs: - from: /resources.tar.gz @@ -921,6 +942,7 @@ steps: mountPath: /test-gsa-key dependsOn: - default_ns + - upload_test_resources_to_gcs - hail_run_image - build_hail - kind: buildImage @@ -1954,6 +1976,8 @@ steps: script: /io/sql/insert_nonpreemptible_resources.py - name: fix-schedule-job script: /io/sql/fix-schedule-job.sql + - name: increase-test-and-dev-pool-sizes + script: /io/sql/increase-test-and-dev-pool-sizes.py inputs: - from: /repo/batch/sql to: /io/ @@ -2172,6 +2196,156 @@ steps: - memory_image - deploy_memory_sa - create_certs + - kind: runImage + name: test_hail_python_service_backend_0 + image: + valueFrom: hail_run_image.image + script: | + set -ex + cd /io + tar xzf test.tar.gz + tar xvf wheel-container.tar + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + export PYTEST_SPLITS=3 + export PYTEST_SPLIT_INDEX=0 + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_QUERY_BACKEND=service + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + hailctl config set batch/billing_project test + hailctl config set batch/bucket cpg-hail-test + python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test + inputs: + - from: /wheel-container.tar + to: /io/wheel-container.tar + - from: /test.tar.gz + to: /io/test.tar.gz + secrets: + - name: gce-deploy-config + namespace: + valueFrom: default_ns.name + mountPath: /deploy-config + - name: test-tokens + namespace: + valueFrom: default_ns.name + mountPath: /user-tokens + - name: ssl-config-query-tests + namespace: + valueFrom: default_ns.name + mountPath: /ssl-config + - name: test-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /test-gsa-key + timeout: 3600 + dependsOn: + - default_ns + - upload_test_resources_to_gcs + - deploy_query + - deploy_memory + - deploy_shuffler + - hail_run_image + - build_hail + - kind: runImage + name: test_hail_python_service_backend_1 + image: + valueFrom: hail_run_image.image + script: | + set -ex + cd /io + tar xzf test.tar.gz + tar xvf wheel-container.tar + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + export PYTEST_SPLITS=3 + export PYTEST_SPLIT_INDEX=1 + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_QUERY_BACKEND=service + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + hailctl config set batch/billing_project test + hailctl config set batch/bucket cpg-hail-test + python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test + inputs: + - from: /wheel-container.tar + to: /io/wheel-container.tar + - from: /test.tar.gz + to: /io/test.tar.gz + secrets: + - name: gce-deploy-config + namespace: + valueFrom: default_ns.name + mountPath: /deploy-config + - name: test-tokens + namespace: + valueFrom: default_ns.name + mountPath: /user-tokens + - name: ssl-config-query-tests + namespace: + valueFrom: default_ns.name + mountPath: /ssl-config + - name: test-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /test-gsa-key + timeout: 3600 + dependsOn: + - default_ns + - upload_test_resources_to_gcs + - deploy_query + - deploy_memory + - deploy_shuffler + - hail_run_image + - build_hail + - kind: runImage + name: test_hail_python_service_backend_2 + image: + valueFrom: hail_run_image.image + script: | + set -ex + cd /io + tar xzf test.tar.gz + tar xvf wheel-container.tar + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + export PYTEST_SPLITS=3 + export PYTEST_SPLIT_INDEX=2 + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_QUERY_BACKEND=service + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + hailctl config set batch/billing_project test + hailctl config set batch/bucket cpg-hail-test + python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test + inputs: + - from: /wheel-container.tar + to: /io/wheel-container.tar + - from: /test.tar.gz + to: /io/test.tar.gz + secrets: + - name: gce-deploy-config + namespace: + valueFrom: default_ns.name + mountPath: /deploy-config + - name: test-tokens + namespace: + valueFrom: default_ns.name + mountPath: /user-tokens + - name: ssl-config-query-tests + namespace: + valueFrom: default_ns.name + mountPath: /ssl-config + - name: test-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /test-gsa-key + timeout: 3600 + dependsOn: + - default_ns + - upload_test_resources_to_gcs + - deploy_query + - deploy_memory + - deploy_shuffler + - hail_run_image + - build_hail - kind: runImage name: test_lsm image: diff --git a/ci/ci/templates/index.html b/ci/ci/templates/index.html index 67c64a42162..aaf83e5516c 100644 --- a/ci/ci/templates/index.html +++ b/ci/ci/templates/index.html @@ -32,7 +32,7 @@

{{ wb.branch }}

PRs

{% if wb.prs is not none %} {% if wb.prs|length > 0 %} - {{ pr_table(wb) }} + {{ pr_table(wb, "prs", "prsSearchBar") }} {% else %} No PRs. {% endif %} @@ -49,6 +49,6 @@

Authorize SHA

{% endblock %} diff --git a/ci/ci/templates/pr-table.html b/ci/ci/templates/pr-table.html index 16d28be3739..405392a45db 100644 --- a/ci/ci/templates/pr-table.html +++ b/ci/ci/templates/pr-table.html @@ -1,7 +1,7 @@ -{% macro pr_table(wb, id) %} +{% macro pr_table(wb, id, searchBarId) %}
- - + +
diff --git a/ci/ci/templates/user.html b/ci/ci/templates/user.html index 836b1c76b38..2b3996d3bb8 100644 --- a/ci/ci/templates/user.html +++ b/ci/ci/templates/user.html @@ -39,7 +39,7 @@

Welcome, {{ username }}!

{% for wb in pr_wbs %} {% if wb.prs is not none %}

{{ wb.branch }} PRs

- {{ pr_table(wb, "myprs") }} + {{ pr_table(wb, "myprs", "myprsSearchBar") }} {% endif %} {% endfor %} @@ -48,7 +48,7 @@

{{ wb.branch }} PRs

{% for wb in review_wbs %} {% if wb.prs is not none %}

{{ wb.branch }} Assigned Reviews

- {{ pr_table(wb, "reviews") }} + {{ pr_table(wb, "reviews", "reviewsSearchBar") }} {% endif %} {% endfor %} diff --git a/devbin/functions.sh b/devbin/functions.sh index 830625aa493..64682cb9a97 100644 --- a/devbin/functions.sh +++ b/devbin/functions.sh @@ -31,7 +31,7 @@ kssh() { # # # kssh admin-pod # root@admin-pod-5d77d69445-86m2h:/# - kubectl -n ${2:-default} exec -it "$(kfind1 "$1" "$2")" -- /bin/bash + kubectl -n ${2:-default} exec -it "$(kfind1 "$1" "$2")" ${3:+--container="$3"} -- /bin/bash } klog() { @@ -48,7 +48,7 @@ klog() { mkdir -p $dir for x in $(kubectl get pods -l app="${1}" -n "${2}" | tail -n +2 | awk '{print $1}') do - kubectl logs --tail=999999999999999 $x -n "${2}" \ + kubectl logs --tail=999999999999999 $x -n "${2}" --all-containers \ | grep -Ev 'healthcheck|metrics' \ > $dir/$x & done @@ -66,22 +66,22 @@ kjlog() { # # # kjlog batch dking # ... - # {"asctime":"2021-02-12 16:58:49,540","container":"batch-8c6c74ffd-d498g","x_real_ip":"35.187.114.193","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0015127049991860986s: 302"} - # {"asctime":"2021-02-12 16:58:57,850","container":"batch-8c6c74ffd-lzfdh","x_real_ip":"104.197.30.241","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0010090250007124268s: 302"} - # {"asctime":"2021-02-12 16:59:11,747","container":"batch-8c6c74ffd-lzfdh","x_real_ip":"18.31.31.24","connection_id":null,"exc_info":null,"message":"https GET /api/v1alpha/batches/182764 done in 0.04281349099983345s: 200"} - # {"asctime":"2021-02-12 16:59:12,907","container":"batch-8c6c74ffd-jmhmq","x_real_ip":"35.198.194.122","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0008628759969724342s: 302"} + # {"asctime":"2021-02-12 16:58:49,540","pod":"batch-8c6c74ffd-d498g","x_real_ip":"35.187.114.193","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0015127049991860986s: 302"} + # {"asctime":"2021-02-12 16:58:57,850","pod":"batch-8c6c74ffd-lzfdh","x_real_ip":"104.197.30.241","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0010090250007124268s: 302"} + # {"asctime":"2021-02-12 16:59:11,747","pod":"batch-8c6c74ffd-lzfdh","x_real_ip":"18.31.31.24","connection_id":null,"exc_info":null,"message":"https GET /api/v1alpha/batches/182764 done in 0.04281349099983345s: 200"} + # {"asctime":"2021-02-12 16:59:12,907","pod":"batch-8c6c74ffd-jmhmq","x_real_ip":"35.198.194.122","connection_id":null,"exc_info":null,"message":"https GET / done in 0.0008628759969724342s: 302"} dir=$(mktemp -d) mkdir -p $dir for x in $(kubectl get pods -l app="${1}" -n "${2}" | tail -n +2 | awk '{print $1}') do - kubectl logs --tail=999999999999999 $x -n "${2}" \ + kubectl logs --tail=999999999999999 $x -n "${2}" --all-containers \ | grep -Ev 'healthcheck|metrics' \ | grep '^{' \ - | jq -c '{asctime, container: "'$x'", x_real_ip, connection_id, exc_info, message}' \ + | jq -c '{time: (if .asctime then .asctime else .["@timestamp"] end), pod: "'$x'", x_real_ip, connection_id, exc_info, message}' \ > $dir/$x & done wait - cat $dir/* | sort | jq -c '{asctime, container, x_real_ip, connection_id, exc_info, message}' + cat $dir/* | sort | jq -c '{time, pod, x_real_ip, connection_id, exc_info, message}' } kjlogs() { @@ -94,8 +94,8 @@ kjlogs() { # # # kjlogs default batch batch-driver # ... - # {"asctime":"2021-02-12 17:01:53,832","container":"batch-8c6c74ffd-lzfdh","x_real_ip":null,"connection_id":null,"exc_info":null,"message":"https GET /api/v1alpha/batches/182767 done in 0.0401439390006999s: 200"} - # {"asctime":"2021-02-12 17:01:55,553","container":"batch-driver-6748cd87f9-kdjv8","x_real_ip":null,"connection_id":null,"exc_info":null,"message":"marking job (182768, 140) complete new_state Success"} + # {"asctime":"2021-02-12 17:01:53,832","pod":"batch-8c6c74ffd-lzfdh","x_real_ip":null,"connection_id":null,"exc_info":null,"message":"https GET /api/v1alpha/batches/182767 done in 0.0401439390006999s: 200"} + # {"asctime":"2021-02-12 17:01:55,553","pod":"batch-driver-6748cd87f9-kdjv8","x_real_ip":null,"connection_id":null,"exc_info":null,"message":"marking job (182768, 140) complete new_state Success"} dir=$(mktemp -d) mkdir -p $dir namespace="${1}" @@ -104,15 +104,15 @@ kjlogs() { do for x in $(kubectl get pods -l app="${app}" -n "${namespace}" | tail -n +2 | awk '{print $1}') do - kubectl logs --tail=999999999999999 $x -n "${namespace}" \ + kubectl logs --tail=999999999999999 $x -n "${namespace}" --all-containers \ | grep -Ev 'healthcheck|metrics' \ | grep '^{' \ - | jq -c '{asctime, x_real_ip, connection_id, exc_info, message, container: "'$x'"}' \ + | jq -c '{time: (if .asctime then .asctime else .["@timestamp"] end), pod: "'$x'", x_real_ip, connection_id, exc_info, message}' \ > $dir/$x & done done wait - cat $dir/* | sort | jq -c '{asctime, container, x_real_ip, connection_id, exc_info, message}' + cat $dir/* | sort | jq -c '{time, pod, x_real_ip, connection_id, exc_info, message}' } knodes() { diff --git a/docker/Dockerfile.service-java-run-base b/docker/Dockerfile.service-java-run-base new file mode 100644 index 00000000000..a9131df63c1 --- /dev/null +++ b/docker/Dockerfile.service-java-run-base @@ -0,0 +1,35 @@ +FROM {{ hail_ubuntu_image.image }} + +RUN hail-apt-get-install \ + htop \ + curl \ + rsync \ + openjdk-8-jdk-headless \ + liblapack3 + +COPY docker/requirements.txt . +RUN hail-pip-install -r requirements.txt pyspark==2.4.0 + +ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark +ENV PYSPARK_PYTHON python3 + +# Regarding explicitly selecting 2.0.1: https://github.com/hail-is/hail/issues/8343 +RUN curl --silent --show-error --location \ + https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar \ + > ${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar +COPY docker/core-site.xml ${SPARK_HOME}/conf/core-site.xml + +COPY docker/service-base-requirements.txt . +RUN hail-pip-install -r service-base-requirements.txt + +COPY hail/python/setup-hailtop.py /hailtop/setup.py +COPY hail/python/hailtop /hailtop/hailtop/ +RUN hail-pip-install /hailtop && rm -rf /hailtop + +COPY gear/setup.py /gear/setup.py +COPY gear/gear /gear/gear/ +RUN hail-pip-install /gear && rm -rf /gear + +COPY web_common/setup.py web_common/MANIFEST.in /web_common/ +COPY web_common/web_common /web_common/web_common/ +RUN hail-pip-install /web_common && rm -rf /web_common diff --git a/docker/Makefile b/docker/Makefile index ddb5ec35761..e796702023c 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -12,6 +12,9 @@ BASE_IMAGE = $(DOCKER_PREFIX)/base:$(shell docker images -q --no-trunc base:late SERVICE_BASE_LATEST = $(DOCKER_PREFIX)/service-base:latest SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') +SERVICE_JAVA_RUN_BASE_LATEST = $(DOCKER_PREFIX)/service-java-run-base:latest +SERVICE_JAVA_RUN_BASE_IMAGE = $(DOCKER_PREFIX)/service-java-run-base:$(shell docker images -q --no-trunc service-java-run-base:latest | sed -e 's,[^:]*:,,') + HAIL_PUBLIC_LATEST = $(DOCKER_PREFIX)/hail-public:latest HAIL_PUBLIC_IMAGE = $(DOCKER_PREFIX)/hail-public:$(shell docker images -q --no-trunc hail-public:latest | sed -e 's,[^:]*:,,') @@ -50,6 +53,13 @@ service-base: base-stmp [ "$(shell bash stat-permissions.sh service-base-requirements.txt)" = "644" ] docker build -t service-base -f Dockerfile.service-base.out --cache-from service-base,$(SERVICE_BASE_LATEST),base,hail-ubuntu .. +.PHONY: service-java-run-base +service-java-run-base: hail-ubuntu-stmp + -docker pull $(SERVICE_JAVA_RUN_BASE_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.service-java-run-base Dockerfile.service-java-run-base.out + [ "$(shell bash stat-permissions.sh Dockerfile.service-java-run-base.out)" = "644" ] + docker build -t service-java-run-base -f Dockerfile.service-java-run-base.out --cache-from service-java-run-base,$(SERVICE_JAVA_RUN_BASE_LATEST),base,hail-ubuntu .. + .PHONY: hail-public-image hail-public-image: $(MAKE) -C ../hail wheel @@ -83,6 +93,10 @@ push: build docker push $(SERVICE_BASE_LATEST) docker tag service-base $(SERVICE_BASE_IMAGE) docker push $(SERVICE_BASE_IMAGE) + docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_LATEST) + docker push $(SERVICE_JAVA_RUN_BASE_LATEST) + docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_IMAGE) + docker push $(SERVICE_JAVA_RUN_BASE_IMAGE) .PHONY: deploy deploy: push diff --git a/docker/requirements.txt b/docker/requirements.txt index 2bfdefb6b1b..c8f83816227 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -16,7 +16,7 @@ flake8==3.8.3 Flask-Cors==3.0.8 Flask-Sockets==0.2.1 Flask==1.0.3 -gcsfs==0.2.2 +gcsfs==0.7.2 gidgethub==4.1.0 google-api-python-client==1.7.10 google-cloud-logging==1.12.1 diff --git a/hail/build.gradle b/hail/build.gradle index 0a1eb9b0add..91202a23138 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -210,6 +210,8 @@ dependencies { bundled 'com.indeed:util-serialization:1.0.31' bundled 'com.indeed:util-mmap:1.0.31' bundled group: 'org.freemarker', name: 'freemarker', version: '2.3.14' + + bundled 'com.kohlschutter.junixsocket:junixsocket-core:2.3.2' } task(checkSettings) doLast { diff --git a/hail/build.sbt b/hail/build.sbt index 91ac96e46ad..c73e7f55ea6 100644 --- a/hail/build.sbt +++ b/hail/build.sbt @@ -51,6 +51,7 @@ lazy val root = (project in file(".")). , "org.testng" % "testng" % "6.8.21" % Test , "com.indeed" % "lsmtree-core" % "1.0.7" , "com.indeed" % "util-serialization" % "1.0.30" + , "com.kohlschutter.junixsocket" % "junixsocket-core" % "2.3.2" ), unmanagedClasspath in Test += baseDirectory.value / "prebuilt" / "lib" ) diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index e9130bdac38..de930a82804 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -50,7 +50,8 @@ from .context import (init, init_local, stop, spark_context, tmp_dir, default_reference, # noqa: E402 get_reference, set_global_seed, _set_flags, _get_flags, current_backend, - debug_info, citation, cite_hail, cite_hail_bibtex, version) + debug_info, citation, cite_hail, cite_hail_bibtex, version, TemporaryFilename, + TemporaryDirectory) scan = agg.aggregators.ScanFunctions({name: getattr(agg, name) for name in agg.__all__}) @@ -60,6 +61,8 @@ 'stop', 'spark_context', 'tmp_dir', + 'TemporaryFilename', + 'TemporaryDirectory', 'default_reference', 'get_reference', 'set_global_seed', diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index 34d5eecb216..91c462d306a 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -1,4 +1,5 @@ import abc +from ..fs.fs import FS class Backend(abc.ABC): @@ -69,7 +70,7 @@ def logger(self): @property @abc.abstractmethod - def fs(self): + def fs(self) -> FS: pass @abc.abstractmethod diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index 5f6372b7ad3..bfe5d7be5e0 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -1,32 +1,43 @@ +from typing import Optional import os import aiohttp import json +import warnings from hail.utils import FatalError -from hail.expr.types import dtype +from hail.expr.types import dtype, tvoid from hail.expr.table_type import ttable from hail.expr.matrix_type import tmatrix from hail.expr.blockmatrix_type import tblockmatrix -from hailtop.config import get_deploy_config, get_user_config +from hailtop.config import get_deploy_config, get_user_config, DeployConfig from hailtop.auth import service_auth_headers -from hailtop.utils import async_to_blocking, retry_transient_errors +from hailtop.utils import async_to_blocking, retry_transient_errors, secret_alnum_string, TransientError from hail.ir.renderer import CSERenderer from .backend import Backend from ..hail_logging import PythonOnlyLogger +from ..fs.google_fs import GoogleCloudStorageFS class ServiceSocket: - def __init__(self, *, deploy_config=None): + def __init__(self, *, deploy_config: Optional[DeployConfig] = None): if not deploy_config: deploy_config = get_deploy_config() + self.deploy_config = deploy_config self.url = deploy_config.base_url('query') - self.session = aiohttp.ClientSession(headers=service_auth_headers(deploy_config, 'query')) + self._session: Optional[aiohttp.ClientSession] = None + + async def session(self) -> aiohttp.ClientSession: + if self._session is None: + self._session = aiohttp.ClientSession( + headers=service_auth_headers(self.deploy_config, 'query')) + return self._session def close(self): - async_to_blocking(self.session.close()) - self.session = None + if self._session is not None: + async_to_blocking(self._session.close()) + self._session = None def handle_response(self, resp): if resp.type == aiohttp.WSMsgType.CLOSE: @@ -37,12 +48,23 @@ def handle_response(self, resp): return resp.data async def async_request(self, endpoint, **data): - async with self.session.ws_connect(f'{self.url}/api/v1alpha/{endpoint}') as socket: + data['token'] = secret_alnum_string() + session = await self.session() + async with session.ws_connect(f'{self.url}/api/v1alpha/{endpoint}') as socket: await socket.send_str(json.dumps(data)) - result = json.loads(self.handle_response(await socket.receive())) + response = await socket.receive() + await socket.send_str('bye') + if response.type == aiohttp.WSMsgType.ERROR: + raise ValueError(f'bad response: {endpoint}; {data}; {response}') + if response.type in (aiohttp.WSMsgType.CLOSE, + aiohttp.WSMsgType.CLOSED): + warnings.warn(f'retrying after losing connection {endpoint}; {data}; {response}') + raise TransientError() + assert response.type == aiohttp.WSMsgType.TEXT + result = json.loads(response.data) if result['status'] != 200: raise FatalError(f'Error from server: {result["value"]}') - return json.loads(result['value']) + return result['value'] def request(self, endpoint, **data): return async_to_blocking(retry_transient_errors(self.async_request, endpoint, **data)) @@ -82,7 +104,7 @@ def logger(self): return self._logger @property - def fs(self): + def fs(self) -> GoogleCloudStorageFS: if self._fs is None: from hail.fs.google_fs import GoogleCloudStorageFS self._fs = GoogleCloudStorageFS() @@ -102,7 +124,10 @@ def execute(self, ir, timed=False): billing_project=self._billing_project, bucket=self._bucket) typ = dtype(resp['type']) - value = typ._convert_from_json_na(resp['value']) + if typ == tvoid: + value = None + else: + value = typ._convert_from_json_na(resp['value']) # FIXME put back timings return (value, None) if timed else value diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index fbbd5918f8b..43aefbf34db 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -1,3 +1,4 @@ +from typing import Optional import sys import os from urllib.parse import urlparse, urlunparse @@ -11,6 +12,8 @@ from hail.utils import get_env_or_default from hail.utils.java import Env, FatalError, warning from hail.backend import Backend +from hailtop.utils import secret_alnum_string +from .fs.fs import FS def _get_tmpdir(tmpdir): @@ -230,8 +233,11 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', quiet, append, min_block_size, branching_factor, tmpdir, local_tmpdir, skip_logging_configuration, optimizer_iterations) + if not backend.fs.exists(tmpdir): + backend.fs.mkdir(tmpdir) + HailContext( - log, quiet, append, tmp_dir, local_tmpdir, default_reference, + log, quiet, append, tmpdir, local_tmpdir, default_reference, global_seed, backend) @@ -261,7 +267,9 @@ def init_service( backend = ServiceBackend(billing_project, bucket, skip_logging_configuration=skip_logging_configuration) log = _get_log(log) - tmpdir = _get_tmpdir(tmpdir) + if tmpdir is None: + tmpdir = 'gs://' + backend._bucket + '/tmp/hail/' + secret_alnum_string() + assert tmpdir.startswith('gs://') local_tmpdir = _get_local_tmpdir(local_tmpdir) HailContext( @@ -299,6 +307,9 @@ def init_local( tmpdir, log, quiet, append, branching_factor, skip_logging_configuration, optimizer_iterations) + if not backend.fs.exists(tmpdir): + backend.fs.mkdir(tmpdir) + HailContext( log, quiet, append, tmpdir, tmpdir, default_reference, global_seed, backend) @@ -371,7 +382,7 @@ def spark_context(): return Env.spark_backend('spark_context').sc -def tmp_dir(): +def tmp_dir() -> str: """Returns the Hail shared temporary directory. Returns @@ -381,7 +392,106 @@ def tmp_dir(): return Env.hc()._tmpdir -def current_backend(): +class _TemporaryFilenameManager: + def __init__(self, fs: FS, name: str): + self.fs = fs + self.name = name + + def __enter__(self): + return self.name + + def __exit__(self, type, value, traceback): + return self.fs.remove(self.name) + + +def TemporaryFilename(*, + prefix: str = '', + suffix: str = '', + dir: Optional[str] = None + ) -> _TemporaryFilenameManager: + """A context manager which produces a temporary filename that is deleted when the context manager exits. + + Warning + ------- + + The filename is generated randomly and is extraordinarly unlikely to already exist, but this + function does not satisfy the strict requirements of Python's :class:`.TemporaryFilename`. + + Examples + -------- + + >>> with TemporaryFilename() as f: # doctest: +SKIP + ... open(f, 'w').write('hello hail') + ... print(open(f).read()) + hello hail + + Returns + ------- + :class:`.DeletingFile` or :class:`.DeletingDirectory` + + """ + if dir is None: + dir = tmp_dir() + if not dir.endswith('/'): + dir = dir + '/' + return _TemporaryFilenameManager( + current_backend().fs, + dir + prefix + secret_alnum_string(10) + suffix) + + +class _TemporaryDirectoryManager: + def __init__(self, fs: FS, name: str): + self.fs = fs + self.name = name + + def __enter__(self): + return self.name + + def __exit__(self, type, value, traceback): + return self.fs.rmtree(self.name) + + +def TemporaryDirectory(*, + prefix: str = '', + suffix: str = '', + dir: Optional[str] = None, + ensure_exists: bool = True + ) -> _TemporaryDirectoryManager: + """A context manager which produces a temporary directory name that is recursively deleted when the context manager exits. + + If the filesystem has a notion of directories, then we ensure the directory exists. + + Warning + ------- + + The directory name is generated randomly and is extraordinarly unlikely to already exist, but + this function does not satisfy the strict requirements of Python's :class:`.TemporaryDirectory`. + + Examples + -------- + + >>> with TemporaryDirectory() as dir: # doctest: +SKIP + ... open(f'{dir}/hello', 'w').write('hello hail') + ... print(open(f'{dir}/hello').read()) + hello hail + + Returns + ------- + :class:`.DeletingFile` or :class:`.DeletingDirectory` + + """ + if dir is None: + dir = tmp_dir() + if not dir.endswith('/'): + dir = dir + '/' + dirname = dir + prefix + secret_alnum_string(10) + suffix + fs = current_backend().fs + if ensure_exists: + fs.mkdir(dirname) + return _TemporaryDirectoryManager(fs, dirname) + + +def current_backend() -> Backend: return Env.hc()._backend diff --git a/hail/python/hail/fs/fs.py b/hail/python/hail/fs/fs.py index d8e48eae936..4c4efd7abc2 100644 --- a/hail/python/hail/fs/fs.py +++ b/hail/python/hail/fs/fs.py @@ -36,6 +36,27 @@ def stat(self, path: str) -> Dict: def ls(self, path: str) -> List[Dict]: pass + @abc.abstractmethod + def mkdir(self, path: str): + """Ensure files can be created whose dirname is `path`. + + Warning + ------- + + On file systems without a notion of directories, this function will do nothing. For example, + on Google Cloud Storage, this operation does nothing. + + """ + pass + + @abc.abstractmethod + def remove(self, path: str): + pass + + @abc.abstractmethod + def rmtree(self, path: str): + pass + def copy_log(self, path: str) -> None: log = Env.hc()._log try: diff --git a/hail/python/hail/fs/google_fs.py b/hail/python/hail/fs/google_fs.py index 55c39802c5a..efa8f3bafaa 100644 --- a/hail/python/hail/fs/google_fs.py +++ b/hail/python/hail/fs/google_fs.py @@ -3,7 +3,7 @@ from typing import Dict, List import gcsfs from hurry.filesize import size -from shutil import copy2 +from shutil import copy2, rmtree from .fs import FS @@ -13,7 +13,7 @@ def __init__(self): self.client = gcsfs.core.GCSFileSystem(secure_serialize=True) def _is_local(self, path: str): - if(path.startswith("gs://")): + if path.startswith("gs://"): return False return True @@ -21,7 +21,7 @@ def _add_gs_path_prefix(self, path: str) -> str: first_idx = 0 for char in path: - if(char != "/"): + if char != "/": break first_idx += 1 @@ -66,7 +66,7 @@ def exists(self, path: str) -> bool: def is_file(self, path: str) -> bool: try: - if(self._is_local(path)): + if self._is_local(path): return S_ISREG(os.stat(path).st_mode) return not self._stat_is_gs_dir(self.client.info(path)) except FileNotFoundError: @@ -74,24 +74,29 @@ def is_file(self, path: str) -> bool: def is_dir(self, path: str) -> bool: try: - if(self._is_local(path)): + if self._is_local(path): return self._stat_is_local_dir(os.stat(path)) return self._stat_is_gs_dir(self.client.info(path)) except FileNotFoundError: return False def stat(self, path: str) -> Dict: - if(self._is_local(path)): + if self._is_local(path): return self._format_stat_local_file(os.stat(path), path) - return self._format_stat_gs_file(self.client.info(path)) + return self._format_stat_gs_file(self.client.info(path), path) - def _format_stat_gs_file(self, stats: Dict) -> Dict: + def _format_stat_gs_file(self, stats: Dict, path: str) -> Dict: + path_from_stats = stats.get('path') + if path_from_stats is not None: + path_from_stats = self._add_gs_path_prefix(path_from_stats) + else: + path_from_stats = path return { 'is_dir': self._stat_is_gs_dir(stats), 'size_bytes': stats['size'], 'size': size(stats['size']), - 'path': self._add_gs_path_prefix(stats['path']), + 'path': path_from_stats, 'owner': stats['bucket'], 'modification_time': stats.get('updated') } @@ -119,3 +124,16 @@ def ls(self, path: str) -> List[Dict]: return [self._format_stat_local_file(os.stat(file), file) for file in os.listdir(path)] return [self._format_stat_gs_file(file) for file in self.client.ls(path, detail=True)] + + def mkdir(self, path: str): + pass + + def remove(self, path: str): + if self._is_local(path): + os.remove(path) + self.client.rm(path) + + def rmtree(self, path: str): + if self._is_local(path): + rmtree(path) + self.client.rm(path, recursive=True) diff --git a/hail/python/hail/fs/hadoop_fs.py b/hail/python/hail/fs/hadoop_fs.py index ea8cf80121a..114c0a3fe1d 100644 --- a/hail/python/hail/fs/hadoop_fs.py +++ b/hail/python/hail/fs/hadoop_fs.py @@ -44,6 +44,12 @@ def ls(self, path: str) -> List[Dict]: def mkdir(self, path: str) -> None: return self._jfs.mkDir(path) + def remove(self, path: str): + return self._jfs.remove(path) + + def rmtree(self, path: str): + return self._jfs.rmtree(path) + class HadoopReader(io.RawIOBase): def __init__(self, hfs, path, buffer_size): diff --git a/hail/python/hail/fs/local_fs.py b/hail/python/hail/fs/local_fs.py index eb7c6b25aa6..e000cd6cb07 100644 --- a/hail/python/hail/fs/local_fs.py +++ b/hail/python/hail/fs/local_fs.py @@ -2,7 +2,7 @@ from stat import S_ISREG, S_ISDIR from typing import Dict, List from hurry.filesize import size -from shutil import copy2 +from shutil import copy2, rmtree from .fs import FS @@ -57,3 +57,12 @@ def _stat_is_local_dir(self, stats: os.stat_result) -> bool: def ls(self, path: str) -> List[Dict]: return [self._format_stat_local_file(os.stat(file), file) for file in os.listdir(path)] + + def mkdir(self, path: str): + os.mkdir(path) + + def remove(self, path: str): + os.remove(path) + + def rmtree(self, path: str): + rmtree(path) diff --git a/hail/python/hail/methods/relatedness/king.py b/hail/python/hail/methods/relatedness/king.py index 191d9da94f4..111d0fadf8e 100644 --- a/hail/python/hail/methods/relatedness/king.py +++ b/hail/python/hail/methods/relatedness/king.py @@ -220,16 +220,19 @@ def king(call_expr, *, block_size=None): `call-expr`'s column keys. It has one entry field, `phi`. """ mt = matrix_table_source('king/call_expr', call_expr) + call = Env.get_uid() + mt = mt.annotate_entries(**{call: call_expr}) is_hom_ref = Env.get_uid() is_het = Env.get_uid() is_hom_var = Env.get_uid() is_defined = Env.get_uid() + mt = mt.unfilter_entries() mt = mt.select_entries(**{ - is_hom_ref: hl.float(hl.or_else(call_expr.is_hom_ref(), 0)), - is_het: hl.float(hl.or_else(call_expr.is_het(), 0)), - is_hom_var: hl.float(hl.or_else(call_expr.is_hom_var(), 0)), - is_defined: hl.float(hl.is_defined(call_expr)) + is_hom_ref: hl.float(hl.or_else(mt[call].is_hom_ref(), 0)), + is_het: hl.float(hl.or_else(mt[call].is_het(), 0)), + is_hom_var: hl.float(hl.or_else(mt[call].is_hom_var(), 0)), + is_defined: hl.float(hl.is_defined(mt[call])) }) ref = hl.linalg.BlockMatrix.from_entry_expr(mt[is_hom_ref], block_size=block_size) het = hl.linalg.BlockMatrix.from_entry_expr(mt[is_het], block_size=block_size) diff --git a/hail/python/hailtop/aiotools/fs.py b/hail/python/hailtop/aiotools/fs.py index 30e0e1f272f..0085524da2f 100644 --- a/hail/python/hailtop/aiotools/fs.py +++ b/hail/python/hailtop/aiotools/fs.py @@ -339,16 +339,19 @@ class FileAndDirectoryError(Exception): class Transfer: - TARGET_DIR = 'target_dir' - TARGET_FILE = 'target_file' - INFER_TARGET = 'infer_target' + DEST_DIR = 'dest_dir' + DEST_IS_TARGET = 'dest_is_target' + INFER_DEST = 'infer_dest' - def __init__(self, src: Union[str, List[str]], dest: str, *, treat_dest_as: str = INFER_TARGET): - if treat_dest_as not in (Transfer.TARGET_DIR, Transfer.TARGET_FILE, Transfer.INFER_TARGET): + def __init__(self, src: Union[str, List[str]], dest: str, *, treat_dest_as: str = INFER_DEST): + if treat_dest_as not in (Transfer.DEST_DIR, Transfer.DEST_IS_TARGET, Transfer.INFER_DEST): raise ValueError(f'treat_dest_as invalid: {treat_dest_as}') - if treat_dest_as == Transfer.TARGET_FILE and isinstance(src, list): + if treat_dest_as == Transfer.DEST_IS_TARGET and isinstance(src, list): raise NotADirectoryError(dest) + if (treat_dest_as == Transfer.INFER_DEST + and dest.endswith('/')): + treat_dest_as = Transfer.DEST_DIR self.src = src self.dest = dest @@ -523,22 +526,22 @@ async def _copy_file_multi_part( source_report._errors += 1 async def _full_dest(self): - dest_type = await self.dest_type_task + if self.dest_type_task: + dest_type = await self.dest_type_task + else: + dest_type = None - if (self.treat_dest_as == Transfer.TARGET_DIR - or self.dest.endswith('/') - or (self.treat_dest_as == Transfer.INFER_TARGET + if (self.treat_dest_as == Transfer.DEST_DIR + or (self.treat_dest_as == Transfer.INFER_DEST and dest_type == AsyncFS.DIR)): - if dest_type is None: - raise FileNotFoundError(self.dest) - if dest_type == AsyncFS.FILE: - raise NotADirectoryError(self.dest) - assert dest_type == AsyncFS.DIR # We know dest is a dir, but we're copying to # dest/basename(src), and we don't know its type. return url_join(self.dest, url_basename(self.src.rstrip('/'))), None - assert not self.dest.endswith('/') + if (self.treat_dest_as == Transfer.DEST_IS_TARGET + and self.dest.endswith('/')): + dest_type = AsyncFS.DIR + return self.dest, dest_type async def copy_as_file(self, @@ -653,14 +656,13 @@ async def _dest_type(self, transfer: Transfer): than the real type. A return value of `None` mean `dest` does not exist. ''' - if (transfer.treat_dest_as == Transfer.TARGET_DIR + assert transfer.treat_dest_as != Transfer.DEST_IS_TARGET + + if (transfer.treat_dest_as == Transfer.DEST_DIR or isinstance(transfer.src, list) or transfer.dest.endswith('/')): return AsyncFS.DIR - if transfer.treat_dest_as == Transfer.TARGET_FILE: - return AsyncFS.FILE - assert not transfer.dest.endswith('/') try: dest_type = await self.router_fs.staturl(transfer.dest) @@ -675,15 +677,17 @@ async def copy_source(self, sema: asyncio.Semaphore, transfer: Transfer, source_ async def _copy_one_transfer(self, sema: asyncio.Semaphore, transfer_report: TransferReport, transfer: Transfer, return_exceptions: bool): try: - dest_type_task = asyncio.create_task(self._dest_type(transfer)) - dest_type_task_awaited = False + if transfer.treat_dest_as == Transfer.INFER_DEST: + dest_type_task = asyncio.create_task(self._dest_type(transfer)) + else: + dest_type_task = None try: src = transfer.src if isinstance(src, str): await self.copy_source(sema, transfer, transfer_report._source_report, src, dest_type_task, return_exceptions) else: - if transfer.treat_dest_as == Transfer.TARGET_FILE: + if transfer.treat_dest_as == Transfer.DEST_IS_TARGET: raise NotADirectoryError(transfer.dest) await bounded_gather2(sema, *[ @@ -692,17 +696,11 @@ async def _copy_one_transfer(self, sema: asyncio.Semaphore, transfer_report: Tra ], cancel_on_error=True) # raise potential exception - dest_type_task_awaited = True - await dest_type_task + if dest_type_task: + await dest_type_task finally: - if not dest_type_task_awaited: - # retrieve dest_type_task exception to avoid - # "Task exception was never retrieved" errors - try: - dest_type_task_awaited = True - await dest_type_task - except: - pass + if dest_type_task: + await asyncio.wait([dest_type_task]) except Exception as e: if return_exceptions: transfer_report.set_exception(e) diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 7d3aec286ff..c3909bbd60f 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -5,7 +5,7 @@ bokeh>1.3,<2.0 decorator<5 Deprecated>=1.2.10,<1.3 dill>=0.3.1.1,<0.4 -gcsfs==0.2.2 +gcsfs==0.7.2 humanize==1.0.0 hurry.filesize==0.9 nest_asyncio diff --git a/hail/python/test/hail/experimental/test_annotation_db.py b/hail/python/test/hail/experimental/test_annotation_db.py index e601451b0a2..ebb1da3ddd0 100644 --- a/hail/python/test/hail/experimental/test_annotation_db.py +++ b/hail/python/test/hail/experimental/test_annotation_db.py @@ -1,5 +1,4 @@ import unittest -import tempfile import hail as hl from ..helpers import startTestHailContext, stopTestHailContext @@ -12,10 +11,10 @@ def setupAnnotationDBTests(cls): t = hl.utils.range_table(10) t = t.annotate(locus=hl.locus('1', t.idx + 1)) t = t.annotate(annotation=hl.str(t.idx)) - d = tempfile.TemporaryDirectory() - fname = d.name + '/f.mt' + cls.tempdir_manager = hl.TemporaryDirectory() + d = cls.tempdir_manager.__enter__() + fname = d + '/f.mt' t.write(fname) - cls.temp_dir = d cls.db_json = { 'unique_dataset': { 'description': 'now with unique rows!', @@ -44,7 +43,7 @@ def setupAnnotationDBTests(cls): @classmethod def tearDownAnnotationDBTests(cls): stopTestHailContext() - AnnotationDBTests.temp_dir.cleanup() + cls.tempdir_manager.__exit__(None, None, None) setUpClass = setupAnnotationDBTests tearDownClass = tearDownAnnotationDBTests diff --git a/hail/python/test/hail/experimental/test_dnd_array.py b/hail/python/test/hail/experimental/test_dnd_array.py index 73918c7c2d8..f3899227102 100644 --- a/hail/python/test/hail/experimental/test_dnd_array.py +++ b/hail/python/test/hail/experimental/test_dnd_array.py @@ -2,12 +2,13 @@ import hail as hl from hail.utils import new_temp_file -from ..helpers import startTestHailContext, stopTestHailContext, fails_local_backend +from ..helpers import startTestHailContext, stopTestHailContext, fails_local_backend, fails_service_backend setUpModule = startTestHailContext tearDownModule = stopTestHailContext +@fails_service_backend() def test_range_collect(): n_variants = 10 n_samples = 10 @@ -21,6 +22,7 @@ def test_range_collect(): assert np.array_equal(da.collect(), a) +@fails_service_backend() @fails_local_backend() def test_range_matmul(): n_variants = 10 @@ -41,6 +43,7 @@ def test_range_matmul(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_small_collect(): n_variants = 10 @@ -57,6 +60,7 @@ def test_small_collect(): assert np.array_equal(da.collect(), a) +@fails_service_backend() @fails_local_backend() def test_medium_collect(): n_variants = 100 @@ -73,6 +77,7 @@ def test_medium_collect(): assert np.array_equal(da.collect(), a) +@fails_service_backend() @fails_local_backend() def test_small_matmul(): n_variants = 10 @@ -95,6 +100,7 @@ def test_small_matmul(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_medium_matmul(): n_variants = 100 @@ -117,6 +123,7 @@ def test_medium_matmul(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_matmul_via_inner_product(): n_variants = 10 @@ -141,6 +148,7 @@ def test_matmul_via_inner_product(): assert np.array_equal(prod_result, ip_result) +@fails_service_backend() @fails_local_backend() def test_king_homo_estimator(): hl.set_global_seed(1) @@ -166,6 +174,7 @@ def sqr(x): [4., 6., 0., 6., 0.]])) +@fails_service_backend() @fails_local_backend() def test_dndarray_sum(): n_variants = 10 @@ -194,6 +203,7 @@ def test_dndarray_sum(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_sum_scalar(): n_variants = 10 @@ -216,6 +226,7 @@ def test_dndarray_sum_scalar(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_rsum_scalar(): n_variants = 10 @@ -238,6 +249,7 @@ def test_dndarray_rsum_scalar(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_mul_scalar(): n_variants = 10 @@ -260,6 +272,7 @@ def test_dndarray_mul_scalar(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_rmul_scalar(): n_variants = 10 @@ -282,6 +295,7 @@ def test_dndarray_rmul_scalar(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_sub_scalar(): n_variants = 10 @@ -304,6 +318,7 @@ def test_dndarray_sub_scalar(): assert np.array_equal(da_result, a_result) +@fails_service_backend() @fails_local_backend() def test_dndarray_rsub_scalar(): n_variants = 10 @@ -341,6 +356,7 @@ def test_dndarray_errors_on_unsorted_columns(): assert False +@fails_service_backend() @fails_local_backend() def test_dndarray_sort_columns(): n_variants = 10 diff --git a/hail/python/test/hail/experimental/test_experimental.py b/hail/python/test/hail/experimental/test_experimental.py index e3c830a70cb..9c587792d97 100644 --- a/hail/python/test/hail/experimental/test_experimental.py +++ b/hail/python/test/hail/experimental/test_experimental.py @@ -2,7 +2,7 @@ import hail as hl import unittest from ..helpers import * -from hail.utils import new_temp_file, new_local_temp_dir +from hail.utils import new_temp_file setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -93,6 +93,7 @@ def test_plot_roc_curve(self): _, aucs = hl.experimental.plot_roc_curve(ht, ['score1', 'score2', 'score3']) @pytest.mark.unchecked_allocator + @fails_service_backend() @fails_local_backend() def test_ld_score_regression(self): @@ -262,6 +263,7 @@ def test_ld_score_regression(self): results[1]['snp_heritability_standard_error'], 0.0416, places=4) + @fails_service_backend() def test_sparse(self): expected_split_mt = hl.import_vcf(resource('sparse_split_test_b.vcf')) unsplit_mt = hl.import_vcf(resource('sparse_split_test.vcf'), call_fields=['LGT', 'LPGT']) @@ -269,6 +271,7 @@ def test_sparse(self): .drop('a_index', 'was_split').select_entries(*expected_split_mt.entry.keys())) assert mt._same(expected_split_mt) + @fails_service_backend() def test_define_function(self): f1 = hl.experimental.define_function( lambda a, b: (a + 7) * b, hl.tint32, hl.tint32) @@ -277,7 +280,8 @@ def test_define_function(self): lambda a, b: (a + 7) * b, hl.tint32, hl.tint32) self.assertEqual(hl.eval(f1(1, 3)), 24) # idempotent self.assertEqual(hl.eval(f2(1, 3)), 24) # idempotent - + + @fails_service_backend() @fails_local_backend() def test_pc_project(self): mt = hl.balding_nichols_model(3, 100, 50) @@ -288,6 +292,7 @@ def test_pc_project(self): ht = hl.experimental.pc_project(mt_to_project.GT, loadings_ht.loadings, loadings_ht.af) assert ht._force_count() == 100 + @fails_service_backend() def test_mt_full_outer_join(self): mt1 = hl.utils.range_matrix_table(10, 10) mt1 = mt1.annotate_cols(c1=hl.rand_unif(0, 1)) @@ -309,6 +314,7 @@ def test_mt_full_outer_join(self): assert(mtj.count() == (15, 15)) + @fails_service_backend() def test_mt_full_outer_join_self(self): mt = hl.import_vcf(resource('sample.vcf')) jmt = hl.experimental.full_outer_join_mt(mt, mt) @@ -316,6 +322,7 @@ def test_mt_full_outer_join_self(self): assert jmt.filter_rows(hl.is_defined(jmt.left_row) & hl.is_defined(jmt.right_row)).count_rows() == mt.count_rows() assert jmt.filter_entries(hl.is_defined(jmt.left_entry) & hl.is_defined(jmt.right_entry)).entries().count() == mt.entries().count() + @fails_service_backend() @fails_local_backend() def test_block_matrices_tofiles(self): data = [ @@ -330,13 +337,15 @@ def test_block_matrices_tofiles(self): hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] - prefix = new_local_temp_dir() - hl.experimental.block_matrices_tofiles(bms, f'{prefix}/files') - for i in range(len(bms)): - a = data[i] - a2 = np.fromfile(f'{prefix}/files/{i}') - self.assertTrue(np.array_equal(a, a2)) - + with hl.TemporaryDirectory() as prefix: + hl.experimental.block_matrices_tofiles(bms, f'{prefix}/files') + for i in range(len(bms)): + a = data[i] + a2 = np.frombuffer( + hl.current_backend().fs.open(f'{prefix}/files/{i}', mode='rb').read()) + self.assertTrue(np.array_equal(a, a2)) + + @fails_service_backend() @fails_local_backend() def test_export_block_matrices(self): data = [ @@ -351,20 +360,22 @@ def test_export_block_matrices(self): hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] - prefix = new_local_temp_dir() - hl.experimental.export_block_matrices(bms, f'{prefix}/files') - for i in range(len(bms)): - a = arrs[i] - a2 = np.loadtxt(f'{prefix}/files/{i}.tsv') - self.assertTrue(np.array_equal(a, a2)) - - prefix2 = new_local_temp_dir() - custom_names = ["nameA", "inner/nameB.tsv"] - hl.experimental.export_block_matrices(bms, f'{prefix2}/files', custom_filenames=custom_names) - for i in range(len(bms)): - a = arrs[i] - a2 = np.loadtxt(f'{prefix2}/files/{custom_names[i]}') - self.assertTrue(np.array_equal(a, a2)) + with hl.TemporaryDirectory() as prefix: + hl.experimental.export_block_matrices(bms, f'{prefix}/files') + for i in range(len(bms)): + a = arrs[i] + a2 = np.loadtxt( + hl.current_backend().fs.open(f'{prefix}/files/{i}.tsv')) + self.assertTrue(np.array_equal(a, a2)) + + with hl.TemporaryDirectory() as prefix2: + custom_names = ["nameA", "inner/nameB.tsv"] + hl.experimental.export_block_matrices(bms, f'{prefix2}/files', custom_filenames=custom_names) + for i in range(len(bms)): + a = arrs[i] + a2 = np.loadtxt( + hl.current_backend().fs.open(f'{prefix2}/files/{custom_names[i]}')) + self.assertTrue(np.array_equal(a, a2)) def test_loop(self): def triangle_with_ints(n): diff --git a/hail/python/test/hail/experimental/test_vcf_combiner.py b/hail/python/test/hail/experimental/test_vcf_combiner.py index be80ecae055..cafc5fa9d7b 100644 --- a/hail/python/test/hail/experimental/test_vcf_combiner.py +++ b/hail/python/test/hail/experimental/test_vcf_combiner.py @@ -4,7 +4,7 @@ from hail.experimental.vcf_combiner import vcf_combiner as vc from hail.utils.java import Env from hail.utils.misc import new_temp_file -from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend +from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend, fails_service_backend setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -22,6 +22,7 @@ 'NA20796', 'HG00323', 'HG01384', 'NA18613', 'NA20802'] +@fails_service_backend() @fails_local_backend() def test_1kg_chr22(): out_file = new_temp_file(extension='mt') @@ -57,18 +58,21 @@ def test_1kg_chr22(): def default_exome_intervals(rg): return vc.calculate_even_genome_partitioning(rg, 2 ** 32) # 4 billion, larger than any contig +@fails_service_backend() @fails_local_backend() def test_gvcf_1k_same_as_import_vcf(): path = os.path.join(resource('gvcfs'), '1kg_chr22', f'HG00308.hg38.g.vcf.gz') [mt] = hl.import_gvcfs([path], default_exome_intervals('GRCh38'), reference_genome='GRCh38') assert mt._same(hl.import_vcf(path, force_bgz=True, reference_genome='GRCh38').key_rows_by('locus')) +@fails_service_backend() @fails_local_backend() def test_gvcf_subset_same_as_import_vcf(): path = os.path.join(resource('gvcfs'), 'subset', f'HG00187.hg38.g.vcf.gz') [mt] = hl.import_gvcfs([path], default_exome_intervals('GRCh38'), reference_genome='GRCh38') assert mt._same(hl.import_vcf(path, force_bgz=True, reference_genome='GRCh38').key_rows_by('locus')) +@fails_service_backend() @fails_local_backend() def test_key_by_locus_alleles(): out_file = new_temp_file(extension='mt') @@ -87,6 +91,7 @@ def test_key_by_locus_alleles(): mt._force_count_rows() +@fails_service_backend() @fails_local_backend() def test_non_ref_alleles_set_to_missing(): path = os.path.join(resource('gvcfs'), 'non_ref_call.g.vcf.gz') @@ -105,6 +110,7 @@ def test_non_ref_alleles_set_to_missing(): assert mt.aggregate_entries( hl.agg.all(gt_idx < (n_alleles * (n_alleles + 1)) / 2)) +@fails_service_backend() @fails_local_backend() def test_contig_recoding(): path1 = os.path.join(resource('gvcfs'), 'recoding', 'HG00187.hg38.g.vcf.gz') @@ -129,6 +135,7 @@ def test_contig_recoding(): assert mt1.count() == mt2.count() assert mt1._same(mt2) +@fails_service_backend() @fails_local_backend() def test_sample_override(): out_file = new_temp_file(extension='mt') diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 7b56b206f36..91ac96b7eaf 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -3,7 +3,6 @@ import random from scipy.stats import pearsonr import numpy as np -import tempfile import hail as hl import hail.expr.aggregators as agg @@ -19,6 +18,7 @@ class Tests(unittest.TestCase): def collect_unindexed_expression(self): self.assertEqual(hl.array([4,1,2,3]).collect(), [4,1,2,3]) + @fails_service_backend() def test_key_by_random(self): ht = hl.utils.range_table(10, 4) ht = ht.annotate(new_key=hl.rand_unif(0, 1)) @@ -64,6 +64,7 @@ def test_zeros(self): evaled = hl.eval(hl.zeros(size)) assert evaled == [0 for i in range(size)] + @fails_service_backend() @fails_local_backend() def test_seeded_sampling(self): sampled1 = hl.utils.range_table(50, 6).filter(hl.rand_bool(0.5)) @@ -79,6 +80,7 @@ def test_seeded_sampling(self): self.assertEqual(set(s1.idx.collect()), expected) self.assertEqual(set(s2.idx.collect()), expected) + @fails_service_backend() def test_order_by_head_optimization_with_randomness(self): ht = hl.utils.range_table(10, 6).annotate(x=hl.rand_unif(0, 1)) expected = sorted(ht.collect(), key=lambda x: x['x'])[:5] @@ -243,6 +245,7 @@ def test_numeric_conversion(self): for f, t in kt.row.dtype.items(): self.assertEqual(expected_schema[f], t) + @fails_service_backend() def test_genetics_constructors(self): rg = hl.ReferenceGenome("foo", ["1"], {"1": 100}) @@ -395,6 +398,7 @@ def test_approx_cdf_all_missing(self): table = hl.utils.range_table(10).annotate(foo=hl.missing(tint)) table.aggregate(hl.agg.approx_quantiles(table.foo, qs=[0.5])) + @fails_service_backend() def test_approx_cdf_col_aggregate(self): mt = hl.utils.range_matrix_table(10, 10) mt = mt.annotate_entries(foo=mt.row_idx + mt.col_idx) @@ -469,6 +473,7 @@ def test_agg_filter(self): for aggregation, expected in tests: self.assertEqual(t.aggregate(aggregation), expected) + @fails_service_backend() @fails_local_backend() def test_agg_densify(self): mt = hl.utils.range_matrix_table(5, 5, 3) @@ -824,6 +829,7 @@ def test_aggregator_bindings(self): assert t.annotate(x=hl.bind(lambda i: hl.scan.sum(t.idx + i), 1, _ctx='scan')).x.collect() == [0, 1, 3, 6, 10] assert t.aggregate(hl.bind(lambda i: hl.agg.collect(i), t.idx * t.idx, _ctx='agg')) == [0, 1, 4, 9, 16] + @fails_service_backend() def test_scan(self): table = hl.utils.range_table(10) @@ -871,6 +877,7 @@ def test_scan_filter(self): for aggregation, expected in tests: self.assertEqual(aggregation.collect(), expected) + @fails_service_backend() def test_scan_explode(self): t = hl.utils.range_table(5) tests = [ @@ -902,6 +909,7 @@ def test_scan_explode(self): for aggregation, expected in tests: self.assertEqual(aggregation.collect(), expected) + @fails_service_backend() def test_scan_group_by(self): t = hl.utils.range_table(5) tests = [ @@ -960,11 +968,13 @@ def test_aggregators_sum_product(self): self.assertTrue(r.sum_x == -15 and r.sum_y == 10 and r.sum_empty == 0 and r.prod_x == -120 and r.prod_y == 0 and r.prod_empty == 1) + @fails_service_backend def test_aggregators_hist(self): table = hl.utils.range_table(11) r = table.aggregate(hl.agg.hist(table.idx - 1, 0, 8, 4)) self.assertTrue(r.bin_edges == [0, 2, 4, 6, 8] and r.bin_freq == [2, 2, 2, 3] and r.n_smaller == 1 and r.n_larger == 1) + @fails_service_backend() def test_aggregators_hist_neg0(self): table = hl.utils.range_table(32) table = table.annotate(d=hl.if_else(table.idx == 11, -0.0, table.idx / 3)) @@ -974,6 +984,7 @@ def test_aggregators_hist_neg0(self): self.assertEqual(r.n_smaller, 0) self.assertEqual(r.n_larger, 1) + @fails_service_backend() def test_aggregators_hist_nan(self): ht = hl.utils.range_table(3).annotate(x=hl.float('nan')) r = ht.aggregate(hl.agg.hist(ht.x, 0, 10, 2)) @@ -981,6 +992,7 @@ def test_aggregators_hist_nan(self): assert r.n_smaller == 0 assert r.n_larger == 0 + @fails_service_backend() def test_aggregator_cse(self): ht = hl.utils.range_table(10) x = hl.agg.count() @@ -1009,6 +1021,7 @@ def test_aggregator_cse(self): # r2adj = sumfit$adj.r.squared # f = sumfit$fstatistic # p = pf(f[1],f[2],f[3],lower.tail=F) + @fails_service_backend() def test_aggregators_linreg(self): t = hl.Table.parallelize([ {"y": None, "x": 1.0}, @@ -1066,6 +1079,7 @@ def test_aggregators_linreg(self): self.assertAlmostEqual(r.multiple_p_value, 0.56671386) self.assertAlmostEqual(r.n, 5) + @fails_service_backend() def test_linreg_no_data(self): ht = hl.utils.range_table(1).filter(False) r = ht.aggregate(hl.agg.linreg(ht.idx, 0)) @@ -1099,6 +1113,7 @@ def test_downsample_aggregator_on_empty_table(self): r = ht.aggregate(hl.agg.downsample(ht.idx, ht.y, n_divisions=10)) self.assertTrue(len(r) == 0) + @fails_service_backend() def test_downsample_in_array_agg(self): mt = hl.utils.range_matrix_table(50, 50) mt = mt.annotate_rows(y = hl.rand_unif(0, 1)) @@ -1112,6 +1127,7 @@ def test_downsample_in_array_agg(self): ) mt.cols()._force_count() + @fails_service_backend() @fails_local_backend() def test_aggregator_info_score(self): gen_file = resource('infoScoreTest.gen') @@ -1139,6 +1155,7 @@ def test_aggregator_info_score(self): violations.show() self.fail("disagreement between computed info score and truth") + @fails_service_backend() @fails_local_backend() def test_aggregator_info_score_works_with_bgen_import(self): sample_file = resource('random.sample') @@ -1181,6 +1198,7 @@ def test_joins_inside_aggregators(self): table2 = hl.utils.range_table(10) self.assertEqual(table.aggregate(hl.agg.count_where(hl.is_defined(table2[table.idx]))), 10) + @fails_service_backend() def test_switch(self): x = hl.literal('1') na = hl.missing(tint32) @@ -1225,6 +1243,7 @@ def test_switch(self): hl.eval(hl.switch(x).when('0', 0).or_error("foo")) assert '.or_error("foo")' in str(exc.value) + @fails_service_backend() def test_case(self): def make_case(x): x = hl.literal(x) @@ -2081,6 +2100,7 @@ def test_int_typecheck(self): self.assertIsNone(hl.eval(hl.literal(None, dtype='int32'))) self.assertIsNone(hl.eval(hl.literal(None, dtype='int64'))) + @fails_service_backend() def test_is_transition(self): self.assertTrue(hl.eval(hl.is_transition("A", "G"))) self.assertTrue(hl.eval(hl.is_transition("C", "T"))) @@ -2089,6 +2109,7 @@ def test_is_transition(self): self.assertFalse(hl.eval(hl.is_transition("ACA", "AGA"))) self.assertFalse(hl.eval(hl.is_transition("A", "T"))) + @fails_service_backend() def test_is_transversion(self): self.assertTrue(hl.eval(hl.is_transversion("A", "T"))) self.assertFalse(hl.eval(hl.is_transversion("A", "G"))) @@ -2096,6 +2117,7 @@ def test_is_transversion(self): self.assertFalse(hl.eval(hl.is_transversion("AA", "T"))) self.assertFalse(hl.eval(hl.is_transversion("ACCC", "ACCT"))) + @fails_service_backend() def test_is_snp(self): self.assertTrue(hl.eval(hl.is_snp("A", "T"))) self.assertTrue(hl.eval(hl.is_snp("A", "G"))) @@ -2104,28 +2126,34 @@ def test_is_snp(self): self.assertTrue(hl.eval(hl.is_snp("AT", "AG"))) self.assertTrue(hl.eval(hl.is_snp("ATCCC", "AGCCC"))) + @fails_service_backend() def test_is_mnp(self): self.assertTrue(hl.eval(hl.is_mnp("ACTGAC", "ATTGTT"))) self.assertTrue(hl.eval(hl.is_mnp("CA", "TT"))) + @fails_service_backend() def test_is_insertion(self): self.assertTrue(hl.eval(hl.is_insertion("A", "ATGC"))) self.assertTrue(hl.eval(hl.is_insertion("ATT", "ATGCTT"))) + @fails_service_backend() def test_is_deletion(self): self.assertTrue(hl.eval(hl.is_deletion("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_deletion("GTGTA", "GTA"))) + @fails_service_backend() def test_is_indel(self): self.assertTrue(hl.eval(hl.is_indel("A", "ATGC"))) self.assertTrue(hl.eval(hl.is_indel("ATT", "ATGCTT"))) self.assertTrue(hl.eval(hl.is_indel("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_indel("GTGTA", "GTA"))) + @fails_service_backend() def test_is_complex(self): self.assertTrue(hl.eval(hl.is_complex("CTA", "ATTT"))) self.assertTrue(hl.eval(hl.is_complex("A", "TATGC"))) + @fails_service_backend() def test_is_star(self): self.assertTrue(hl.eval(hl.is_star("ATC", "*"))) self.assertTrue(hl.eval(hl.is_star("A", "*"))) @@ -2134,6 +2162,7 @@ def test_is_strand_ambiguous(self): self.assertTrue(hl.eval(hl.is_strand_ambiguous("A", "T"))) self.assertFalse(hl.eval(hl.is_strand_ambiguous("G", "T"))) + @fails_service_backend() def test_allele_type(self): self.assertEqual( hl.eval(hl.tuple(( @@ -2613,6 +2642,7 @@ def test_show_row_key_regression(self): ds = hl.utils.range_matrix_table(3, 3) ds.col_idx.show(3) + @fails_service_backend() @fails_local_backend() def test_export(self): for delimiter in ['\t', ',', '@']: @@ -2625,19 +2655,19 @@ def _test_export_entry(self, delimiter, missing, header): mt = mt.key_cols_by(col_idx = mt.col_idx + 1) mt = mt.annotate_entries(x = mt.row_idx * mt.col_idx) mt = mt.annotate_entries(x = hl.or_missing(mt.x != 4, mt.x)) - with tempfile.NamedTemporaryFile() as f: - mt.x.export(f.name, + with hl.TemporaryFilename() as f: + mt.x.export(f, delimiter=delimiter, header=header, missing=missing) if header: - actual = hl.import_matrix_table(f.name, + actual = hl.import_matrix_table(f, row_fields={'row_idx': hl.tint32}, row_key=['row_idx'], sep=delimiter, missing=missing) else: - actual = hl.import_matrix_table(f.name, + actual = hl.import_matrix_table(f, row_fields={'f0': hl.tint32}, row_key=['f0'], sep=delimiter, @@ -2657,13 +2687,14 @@ def _test_export_entry(self, delimiter, missing, header): 2, None, 6] assert expected_collect == actual.x.collect() + @fails_service_backend() @fails_local_backend() def test_export_genetic_data(self): mt = hl.balding_nichols_model(1, 3, 3) mt = mt.key_cols_by(s = 's' + hl.str(mt.sample_idx)) - with tempfile.NamedTemporaryFile() as f: - mt.GT.export(f.name) - actual = hl.import_matrix_table(f.name, + with hl.TemporaryFilename() as f: + mt.GT.export(f) + actual = hl.import_matrix_table(f, row_fields={'locus': hl.tstr, 'alleles': hl.tstr}, row_key=['locus', 'alleles'], @@ -2805,6 +2836,7 @@ def test_reference_genome_fns(self): hl.eval(hl.contig_length('chr5', 'GRCh37')) + @fails_service_backend() def test_initop(self): t = (hl.utils.range_table(5, 3) .annotate(GT=hl.call(0, 1)) @@ -3046,6 +3078,7 @@ def test_set_numeric_functions(self): self.assert_evals_to(hl.mean(s), 3) self.assert_evals_to(hl.median(s), 3) + @fails_service_backend() def test_uniroot(self): tol = 1.220703e-4 @@ -3379,9 +3412,10 @@ def test_bit_shift_edge_cases(self): assert hl.eval(hl.bit_rshift(hl.int64(-1), 64)) == -1 assert hl.eval(hl.bit_rshift(hl.int64(-11), 64, logical=True)) == 0 + @fails_service_backend() def test_bit_shift_errors(self): with pytest.raises(hl.utils.HailUserError): - hl.eval(hl.bit_lshift(1, -1)) + hl.eval(hl.bit_lshift(1, -1)) with pytest.raises(hl.utils.HailUserError): hl.eval(hl.bit_rshift(1, -1)) @@ -3521,10 +3555,12 @@ def test_parse_json(self): ] assert hl.eval(hl._compare(hl.tuple(values), hl.tuple(hl.parse_json(hl.json(v), v.dtype) for v in values)) == 0) + @fails_service_backend() def test_expr_persist(self): # need to test laziness, so we will overwrite a file ht2 = hl.utils.range_table(100) - with tempfile.TemporaryDirectory() as f: + + with hl.TemporaryDirectory(ensure_exists=False) as f: hl.utils.range_table(10).write(f, overwrite=True) ht = hl.read_table(f) count1 = ht.aggregate(hl.agg.count(), _localize=False)._persist() diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index 7b6a04a4b91..eeaddc0ded7 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -1,6 +1,5 @@ import numpy as np from ..helpers import * -import tempfile import pytest from hail.utils.java import FatalError, HailUserError @@ -27,6 +26,7 @@ def assert_ndarrays_almost_eq(*expr_and_expected): assert_ndarrays(np.allclose, expr_and_expected) +@fails_service_backend() def test_ndarray_ref(): scalar = 5.0 @@ -62,6 +62,7 @@ def test_ndarray_ref(): assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc) +@fails_service_backend() def test_ndarray_slice(): np_rect_prism = np.arange(24).reshape((2, 3, 4)) rect_prism = hl.nd.array(np_rect_prism) @@ -161,6 +162,7 @@ def test_ndarray_transposed_slice(): ) +@fails_service_backend() def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] mishapen_data_list1 = [[4], [1, 2, 3]] @@ -469,7 +471,7 @@ def test_ndarray_save(): ] for expected in arrs: - with tempfile.NamedTemporaryFile(suffix='.npy') as f: + with hl.TemporaryFilename(suffix='.npy') as f: hl.nd.array(expected).save(f.name) actual = np.load(f.name) diff --git a/hail/python/test/hail/expr/test_show.py b/hail/python/test/hail/expr/test_show.py index dfb851bce02..f2a83e7c369 100644 --- a/hail/python/test/hail/expr/test_show.py +++ b/hail/python/test/hail/expr/test_show.py @@ -1,4 +1,4 @@ -from ..helpers import startTestHailContext, stopTestHailContext +from ..helpers import startTestHailContext, stopTestHailContext, fails_service_backend import unittest import hail as hl @@ -8,6 +8,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test(self): mt = hl.balding_nichols_model(3, 10, 10) t = mt.rows() diff --git a/hail/python/test/hail/expr/test_types.py b/hail/python/test/hail/expr/test_types.py index e8beaadd5c9..910c80710ce 100644 --- a/hail/python/test/hail/expr/test_types.py +++ b/hail/python/test/hail/expr/test_types.py @@ -64,6 +64,7 @@ def test_equality(self): else: self.assertNotEqual(ts[i], ts2[j]) + @fails_service_backend() def test_type_jvm_roundtrip(self): ts = self.types_to_test() for t in ts: @@ -86,6 +87,7 @@ def test_coercers_can_coerce(self): self.assertTrue(c.can_coerce(t)) self.assertFalse(c.requires_conversion(t)) + @fails_service_backend() @fails_local_backend() def test_nested_type_to_spark(self): ht = hl.utils.range_table(10) diff --git a/hail/python/test/hail/genetics/test_pedigree.py b/hail/python/test/hail/genetics/test_pedigree.py index 93cd1c0e650..ca592a8ac9a 100644 --- a/hail/python/test/hail/genetics/test_pedigree.py +++ b/hail/python/test/hail/genetics/test_pedigree.py @@ -36,6 +36,7 @@ def test_trios(self): self.assertEqual(t6.is_complete(), True) + @skip_when_service_backend('gcsfs does not support crc32 hashes and sample.fam intermittent only has a crc32 hash') def test_pedigree(self): ped = Pedigree.read(resource('sample.fam')) ped.write('/tmp/sample_out.fam') diff --git a/hail/python/test/hail/genetics/test_reference_genome.py b/hail/python/test/hail/genetics/test_reference_genome.py index e2ac16c176d..c79438a6f15 100644 --- a/hail/python/test/hail/genetics/test_reference_genome.py +++ b/hail/python/test/hail/genetics/test_reference_genome.py @@ -11,6 +11,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test_reference_genome(self): rg = hl.get_reference('GRCh37') self.assertEqual(rg.name, "GRCh37") @@ -40,6 +41,7 @@ def test_reference_genome(self): self.assertDictEqual(gr2.lengths, lengths) gr2.write("/tmp/my_gr.json") + @fails_service_backend() def test_reference_genome_sequence(self): gr3 = ReferenceGenome.read(resource("fake_ref_genome.json")) self.assertEqual(gr3.name, "my_reference_genome") @@ -66,6 +68,7 @@ def test_reference_genome_sequence(self): assert gr4.has_sequence() self.assertEqual(gr4._sequence_files, (resource("fake_reference.fasta"), resource("fake_reference.fasta.fai"))) + @fails_service_backend() def test_reference_genome_liftover(self): grch37 = hl.get_reference('GRCh37') grch38 = hl.get_reference('GRCh38') @@ -124,6 +127,7 @@ def test_reference_genome_liftover(self): grch37.remove_liftover("GRCh38") grch38.remove_liftover("GRCh37") + @fails_service_backend() def test_liftover_strand(self): grch37 = hl.get_reference('GRCh37') grch37.add_liftover(resource('grch37_to_grch38_chr20.over.chain.gz'), 'GRCh38') @@ -141,6 +145,7 @@ def test_liftover_strand(self): grch37.remove_liftover("GRCh38") + @fails_service_backend() def test_read_custom_reference_genome(self): # this test doesn't behave properly if these reference genomes are already defined in scope. available_rgs = set(hl.ReferenceGenome._references.keys()) diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 27ea1b8e5e4..1b921902114 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -1,3 +1,4 @@ +import asyncio import os from timeit import default_timer as timer import unittest @@ -12,6 +13,11 @@ def startTestHailContext(): + try: + asyncio.get_event_loop() + except RuntimeError as err: + if 'There is no current event loop in thread' in err.args[0]: + asyncio.set_event_loop(asyncio.new_event_loop()) global _initialized if not _initialized: backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') @@ -130,11 +136,30 @@ def wrapper(func, *args, **kwargs): return wrapper +def skip_when_service_backend(message='does not work on ServiceBackend'): + from hail.backend.service_backend import ServiceBackend + @decorator + def wrapper(func, *args, **kwargs): + if isinstance(hl.utils.java.Env.backend(), ServiceBackend): + raise unittest.SkipTest(message) + else: + return func(*args, **kwargs) + + return wrapper + + fails_local_backend = pytest.mark.xfail( os.environ.get('HAIL_QUERY_BACKEND') == 'local', reason="doesn't yet work on local backend", strict=True) + +fails_service_backend = pytest.mark.xfail( + os.environ.get('HAIL_QUERY_BACKEND') == 'service', + reason="doesn't yet work on service backend", + strict=True) + + def run_with_cxx_compile(): @decorator def wrapper(func, *args, **kwargs): @@ -167,4 +192,4 @@ def wrapper(func, *args, **kwargs): return func(*args, **kwargs) finally: hl._set_flags(lower=prev_lower, lower_only=prev_lower_only) - return wrapper \ No newline at end of file + return wrapper diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 740e7652d5f..02d56855b30 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -1,10 +1,10 @@ import pytest +import hail as hl from hail.linalg import BlockMatrix -from hail.utils import new_temp_file, new_local_temp_dir, local_path_uri, FatalError, HailUserError +from hail.utils import local_path_uri, FatalError, HailUserError from ..helpers import * import numpy as np -import tempfile import math from hail.expr.expressions import ExpressionException @@ -53,16 +53,23 @@ def _assert_close(self, a, b): def _assert_rectangles_eq(self, expected, rect_path, export_rects, binary=False): for (i, r) in enumerate(export_rects): - file = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) - expected_rect = expected[r[0]:r[1], r[2]:r[3]] - actual_rect = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) if binary else np.loadtxt(file, ndmin=2) - self._assert_eq(expected_rect, actual_rect) + piece_path = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) + with hl.current_backend().fs.open(piece_path, mode='rb' if binary else 'r') as file: + expected_rect = expected[r[0]:r[1], r[2]:r[3]] + if binary: + actual_rect = np.reshape( + np.frombuffer(file.read()), + (r[1] - r[0], r[3] - r[2])) + else: + actual_rect = np.loadtxt(file, ndmin=2) + self._assert_eq(expected_rect, actual_rect) def assert_sums_agree(self, bm, nd): self.assertAlmostEqual(bm.sum(), np.sum(nd)) self._assert_close(bm.sum(axis=0), np.sum(nd, axis=0, keepdims=True)) self._assert_close(bm.sum(axis=1), np.sum(nd, axis=1, keepdims=True)) + @fails_service_backend() @fails_local_backend() def test_from_entry_expr(self): mt = get_dataset() @@ -75,11 +82,12 @@ def test_from_entry_expr(self): self._assert_eq(a1, a2) self._assert_eq(a1, a3) - path = new_temp_file() - BlockMatrix.write_from_entry_expr(mt.x, path, block_size=32) - a4 = BlockMatrix.read(path).to_numpy() - self._assert_eq(a1, a4) + with hl.TemporaryDirectory(ensure_exists=False) as path: + BlockMatrix.write_from_entry_expr(mt.x, path, block_size=32) + a4 = BlockMatrix.read(path).to_numpy() + self._assert_eq(a1, a4) + @fails_service_backend() @fails_local_backend() def test_from_entry_expr_options(self): def build_mt(a): @@ -115,27 +123,29 @@ def check(expr, mean_impute, center, normalize, expected): with self.assertRaises(Exception): BlockMatrix.from_entry_expr(mt.x) + @fails_service_backend() @fails_local_backend() def test_write_from_entry_expr_overwrite(self): mt = hl.balding_nichols_model(1, 1, 1) mt = mt.select_entries(x=mt.GT.n_alt_alleles()) bm = BlockMatrix.from_entry_expr(mt.x) - path = new_temp_file() - BlockMatrix.write_from_entry_expr(mt.x, path) - self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x, path)) + with hl.TemporaryDirectory(ensure_exists=False) as path: + BlockMatrix.write_from_entry_expr(mt.x, path) + self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x, path)) - BlockMatrix.write_from_entry_expr(mt.x, path, overwrite=True) - self._assert_eq(BlockMatrix.read(path), bm) + BlockMatrix.write_from_entry_expr(mt.x, path, overwrite=True) + self._assert_eq(BlockMatrix.read(path), bm) - # non-field expressions currently take a separate code path - path2 = new_temp_file() - BlockMatrix.write_from_entry_expr(mt.x + 1, path2) - self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x + 1, path2)) + with hl.TemporaryDirectory(ensure_exists=False) as path: + # non-field expressions currently take a separate code path + BlockMatrix.write_from_entry_expr(mt.x + 1, path) + self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x + 1, path)) - BlockMatrix.write_from_entry_expr(mt.x + 2, path2, overwrite=True) - self._assert_eq(BlockMatrix.read(path2), bm + 2) + BlockMatrix.write_from_entry_expr(mt.x + 2, path, overwrite=True) + self._assert_eq(BlockMatrix.read(path), bm + 2) + @fails_service_backend() @fails_local_backend() def test_random_uniform(self): uniform = BlockMatrix.random(10, 10, gaussian=False) @@ -145,6 +155,7 @@ def test_random_uniform(self): for entry in row: assert entry > 0 + @fails_service_backend() @fails_local_backend() def test_to_from_numpy(self): n_rows = 10 @@ -154,45 +165,48 @@ def test_to_from_numpy(self): bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) a = data.reshape((n_rows, n_cols)) - with tempfile.NamedTemporaryFile() as bm_f: - with tempfile.NamedTemporaryFile() as a_f: - bm.tofile(bm_f.name) - a.tofile(a_f.name) + with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f: + bm.tofile(bm_f) + a.tofile(a_f) - a1 = bm.to_numpy() - a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() - a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) - a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() - a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() + a1 = bm.to_numpy() + a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() + a3 = np.frombuffer( + hl.current_backend().fs.open(bm_f, mode='rb').read() + ).reshape((n_rows, n_cols)) + a4 = BlockMatrix.fromfile(a_f, n_rows, n_cols, block_size=3).to_numpy() + a5 = BlockMatrix.fromfile(bm_f, n_rows, n_cols).to_numpy() - self._assert_eq(a1, a) - self._assert_eq(a2, a) - self._assert_eq(a3, a) - self._assert_eq(a4, a) - self._assert_eq(a5, a) + self._assert_eq(a1, a) + self._assert_eq(a2, a) + self._assert_eq(a3, a) + self._assert_eq(a4, a) + self._assert_eq(a5, a) bmt = bm.T at = a.T - with tempfile.NamedTemporaryFile() as bmt_f: - with tempfile.NamedTemporaryFile() as at_f: - bmt.tofile(bmt_f.name) - at.tofile(at_f.name) + with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f: + bmt.tofile(bmt_f) + at.tofile(at_f) - at1 = bmt.to_numpy() - at2 = BlockMatrix.from_numpy(at).to_numpy() - at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows)) - at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy() - at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy() + at1 = bmt.to_numpy() + at2 = BlockMatrix.from_numpy(at).to_numpy() + at3 = np.frombuffer( + hl.current_backend().fs.open(bmt_f, mode='rb').read() + ).reshape((n_cols, n_rows)) + at4 = BlockMatrix.fromfile(at_f, n_cols, n_rows).to_numpy() + at5 = BlockMatrix.fromfile(bmt_f, n_cols, n_rows).to_numpy() - self._assert_eq(at1, at) - self._assert_eq(at2, at) - self._assert_eq(at3, at) - self._assert_eq(at4, at) - self._assert_eq(at5, at) + self._assert_eq(at1, at) + self._assert_eq(at2, at) + self._assert_eq(at3, at) + self._assert_eq(at4, at) + self._assert_eq(at5, at) self._assert_eq(bm.to_numpy(_force_blocking=True), a) + @fails_service_backend() @fails_local_backend() def test_to_table(self): schema = hl.tstruct(row_idx=hl.tint64, entries=hl.tarray(hl.tfloat64)) @@ -209,6 +223,7 @@ def test_to_table(self): actual = bm.to_table_row_major(n_partitions) self.assertTrue(expected._same(actual)) + @fails_service_backend() @fails_local_backend() def test_to_table_maximum_cache_memory_in_bytes_limits(self): bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2) @@ -222,6 +237,7 @@ def test_to_table_maximum_cache_memory_in_bytes_limits(self): bm = BlockMatrix._create(5, 2, [float(i) for i in range(10)], 2) bm.to_table_row_major(2, maximum_cache_memory_in_bytes=16)._force_count() + @fails_service_backend() @fails_local_backend() def test_to_matrix_table(self): n_partitions = 2 @@ -240,6 +256,7 @@ def test_to_matrix_table(self): mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major() assert mt._same(mt_round_trip) + @fails_service_backend() @fails_local_backend() def test_elementwise_ops(self): nx = np.array([[2.0]]) @@ -427,6 +444,7 @@ def test_elementwise_ops(self): self._assert_close(m / nr, m / r) self._assert_close(m / nm, m / m) + @fails_service_backend() @fails_local_backend() def test_special_elementwise_ops(self): nm = np.array([[1.0, 2.0, 3.0, 3.14], [4.0, 5.0, 6.0, 12.12]]) @@ -439,6 +457,7 @@ def test_special_elementwise_ops(self): self._assert_close(m.log(), np.log(nm)) self._assert_close((m - 4).abs(), np.abs(nm - 4)) + @fails_service_backend() @fails_local_backend() def test_matrix_ops(self): nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) @@ -480,6 +499,7 @@ def test_matrix_ops(self): [14.0, 16.0, 18.0]])) self._assert_eq(square.sum(axis=0).T + square.sum(axis=1), np.array([[18.0], [30.0], [42.0]])) + @fails_service_backend() @fails_local_backend() def test_tree_matmul(self): nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) @@ -510,6 +530,7 @@ def test_tree_matmul(self): self._assert_eq(bm_fifty_by_sixty.tree_matmul(bm_sixty_by_twenty_five, splits=split_size), fifty_by_sixty @ sixty_by_twenty_five) + @fails_service_backend() @fails_local_backend() def test_fill(self): nd = np.ones((3, 5)) @@ -521,6 +542,7 @@ def test_fill(self): self._assert_eq(bm, nd) self._assert_eq(bm2, nd) + @fails_service_backend() @fails_local_backend() def test_sum(self): nd = np.random.normal(size=(11, 13)) @@ -549,6 +571,7 @@ def test_sum_with_sparsify(self): self.assert_sums_agree(bm3, nd) self.assert_sums_agree(bm4, nd4) + @fails_service_backend() @fails_local_backend() def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) @@ -749,6 +772,7 @@ def test_sparsify_rectangles(self): self._assert_eq(bm.sparsify_rectangles([]), np.zeros(shape=(4, 4))) + @fails_service_backend() @fails_local_backend() def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) @@ -763,70 +787,64 @@ def test_export_rectangles(self): for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: - rect_path = new_local_temp_dir() - rect_uri = local_path_uri(rect_path) + with hl.TemporaryDirectory() as rect_uri, hl.TemporaryDirectory() as rect_uri_bytes: + bm = BlockMatrix.from_numpy(nd, block_size=block_size) - bm = BlockMatrix.from_numpy(nd, block_size=block_size) - bm.export_rectangles(rect_uri, rects) + bm.export_rectangles(rect_uri, rects) + self._assert_rectangles_eq(nd, rect_uri, rects) - self._assert_rectangles_eq(nd, rect_path, rects) - - rect_path_bytes = new_local_temp_dir() - rect_uri_bytes = local_path_uri(rect_path_bytes) - - bm.export_rectangles(rect_uri_bytes, rects, binary=True) - self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True) + bm.export_rectangles(rect_uri_bytes, rects, binary=True) + self._assert_rectangles_eq(nd, rect_uri_bytes, rects, binary=True) @skip_unless_spark_backend() def test_export_rectangles_sparse(self): - rect_path = new_local_temp_dir() - rect_uri = local_path_uri(rect_path) - nd = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0], - [13.0, 14.0, 15.0, 16.0]]) - bm = BlockMatrix.from_numpy(nd, block_size=2) - sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] - export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] - bm.sparsify_rectangles(sparsify_rects).export_rectangles(rect_uri, export_rects) - - expected = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 0.0, 0.0], - [13.0, 14.0, 0.0, 0.0]]) - - self._assert_rectangles_eq(expected, rect_path, export_rects) - + with hl.TemporaryDirectory() as rect_uri: + nd = np.array([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0], + [13.0, 14.0, 15.0, 16.0]]) + bm = BlockMatrix.from_numpy(nd, block_size=2) + sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] + export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] + bm.sparsify_rectangles(sparsify_rects).export_rectangles(rect_uri, export_rects) + + expected = np.array([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 0.0, 0.0], + [13.0, 14.0, 0.0, 0.0]]) + + self._assert_rectangles_eq(expected, rect_uri, export_rects) + + @fails_service_backend() @fails_local_backend() def test_export_rectangles_filtered(self): - rect_path = new_local_temp_dir() - rect_uri = local_path_uri(rect_path) - nd = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0], - [13.0, 14.0, 15.0, 16.0]]) - bm = BlockMatrix.from_numpy(nd) - bm = bm[1:3, 1:3] - export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] - bm.export_rectangles(rect_uri, export_rects) - - expected = np.array([[6.0, 7.0], - [10.0, 11.0]]) - - self._assert_rectangles_eq(expected, rect_path, export_rects) - + with hl.TemporaryDirectory() as rect_uri: + nd = np.array([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0], + [13.0, 14.0, 15.0, 16.0]]) + bm = BlockMatrix.from_numpy(nd) + bm = bm[1:3, 1:3] + export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] + bm.export_rectangles(rect_uri, export_rects) + + expected = np.array([[6.0, 7.0], + [10.0, 11.0]]) + + self._assert_rectangles_eq(expected, rect_uri, export_rects) + + @fails_service_backend() @fails_local_backend() def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) - bm_path = new_local_temp_dir() - bm_uri = local_path_uri(bm_path) - bm.export_blocks(bm_uri, binary=True) - actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True) - - self._assert_eq(nd, actual) + with hl.TemporaryDirectory() as bm_uri: + bm.export_blocks(bm_uri, binary=True) + actual = BlockMatrix.rectangles_to_numpy(bm_uri, binary=True) + self._assert_eq(nd, actual) + @fails_service_backend() @fails_local_backend() def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], @@ -835,20 +853,17 @@ def test_rectangles_to_numpy(self): rects = [[0, 3, 0, 1], [1, 2, 0, 2]] - rect_path = new_local_temp_dir() - rect_uri = local_path_uri(rect_path) - BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) + with hl.TemporaryDirectory() as rect_uri, hl.TemporaryDirectory() as rect_bytes_uri: + BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) + BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) - rect_bytes_path = new_local_temp_dir() - rect_bytes_uri = local_path_uri(rect_bytes_path) - BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) - - expected = np.array([[1.0, 0.0], - [4.0, 5.0], - [7.0, 0.0]]) - self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) - self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True)) + expected = np.array([[1.0, 0.0], + [4.0, 5.0], + [7.0, 0.0]]) + self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_uri)) + self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_uri, binary=True)) + @fails_service_backend() @fails_local_backend() def test_block_matrix_entries(self): n_rows, n_cols = 5, 3 @@ -867,6 +882,7 @@ def test_block_matrix_entries(self): self.assertEqual(len(entries_table.row), 3) self.assertTrue(table._same(entries_table)) + @fails_service_backend() @fails_local_backend() def test_from_entry_expr_filtered(self): mt = hl.utils.range_matrix_table(1, 1).filter_entries(False) @@ -909,6 +925,7 @@ def test_locus_windows_per_contig(self): f = hl._locus_windows_per_contig([[1.0, 3.0, 4.0], [2.0, 2.0], [5.0]], 1.0) assert hl.eval(f) == ([0, 1, 1, 3, 3, 5], [1, 3, 3, 5, 5, 6]) + @fails_service_backend() def test_locus_windows(self): def assert_eq(a, b): assert np.array_equal(a, np.array(b)), f"a={a}, b={b}" @@ -988,26 +1005,27 @@ def assert_eq(a, b): hl.linalg.utils.locus_windows(ht.locus, 1.0, coord_expr=ht.cm) assert "missing value for 'coord_expr'" in str(cm.exception) + @fails_service_backend() @fails_local_backend() def test_write_overwrite(self): - path = new_temp_file() - - bm = BlockMatrix.from_numpy(np.array([[0]])) - bm.write(path) - self.assertRaises(FatalError, lambda: bm.write(path)) + with hl.TemporaryDirectory(ensure_exists=False) as path: + bm = BlockMatrix.from_numpy(np.array([[0]])) + bm.write(path) + self.assertRaises(FatalError, lambda: bm.write(path)) - bm2 = BlockMatrix.from_numpy(np.array([[1]])) - bm2.write(path, overwrite=True) - self._assert_eq(BlockMatrix.read(path), bm2) + bm2 = BlockMatrix.from_numpy(np.array([[1]])) + bm2.write(path, overwrite=True) + self._assert_eq(BlockMatrix.read(path), bm2) + @fails_service_backend() @fails_local_backend() def test_stage_locally(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) - bm_uri = new_temp_file() - BlockMatrix.from_numpy(nd, block_size=3).write(bm_uri, stage_locally=True) + with hl.TemporaryDirectory(ensure_exists=False) as bm_uri: + BlockMatrix.from_numpy(nd, block_size=3).write(bm_uri, stage_locally=True) - bm = BlockMatrix.read(bm_uri) - self._assert_eq(nd, bm) + bm = BlockMatrix.read(bm_uri) + self._assert_eq(nd, bm) @skip_unless_spark_backend() def test_svd(self): @@ -1146,7 +1164,7 @@ def test_sparse_transposition(self): @skip_unless_spark_backend() def test_row_blockmatrix_sum(self): - + row = BlockMatrix.from_numpy(np.arange(10)) col = row.T diff --git a/hail/python/test/hail/matrixtable/test_file_formats.py b/hail/python/test/hail/matrixtable/test_file_formats.py index 09604025710..df1ca854197 100644 --- a/hail/python/test/hail/matrixtable/test_file_formats.py +++ b/hail/python/test/hail/matrixtable/test_file_formats.py @@ -37,6 +37,7 @@ class Tests(unittest.TestCase): def test_write(self): create_backward_compatibility_files() + @fails_service_backend() @fails_local_backend() def test_backward_compatability(self): import os diff --git a/hail/python/test/hail/matrixtable/test_grouped_matrix_table.py b/hail/python/test/hail/matrixtable/test_grouped_matrix_table.py index 9c1a3d9e8b0..294d6860b57 100644 --- a/hail/python/test/hail/matrixtable/test_grouped_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_grouped_matrix_table.py @@ -90,6 +90,7 @@ def test_errors_caught_correctly(self): d = mt.group_cols_by(group5=(mt['group4']['a'] + 1)).aggregate_cols(x=hl.agg.count()) self.assertRaises(ExpressionException, d.aggregate_cols, x=hl.agg.count()) # duplicate field + @fails_service_backend() def test_fields_work_correctly(self): mt = self.get_groupable_matrix() a = mt.group_rows_by(mt['group1']).aggregate(c=hl.agg.sum(mt['c'])) @@ -100,6 +101,7 @@ def test_fields_work_correctly(self): self.assertEqual(b.count_cols(), 6) self.assertTrue('group3' in b.col_key) + @fails_service_backend() def test_nested_fields_work_correctly(self): mt = self.get_groupable_matrix() a = mt.group_rows_by(mt['group2']['a']).aggregate(c=hl.agg.sum(mt['c'])) @@ -110,6 +112,7 @@ def test_nested_fields_work_correctly(self): self.assertEqual(b.count_cols(), 6) self.assertTrue('a' in b.col_key) + @fails_service_backend() def test_named_fields_work_correctly(self): mt = self.get_groupable_matrix() a = mt.group_rows_by(group5=(mt['group2']['a'] + 1)).aggregate(c=hl.agg.sum(mt['c'])) @@ -120,6 +123,7 @@ def test_named_fields_work_correctly(self): self.assertEqual(b.count_cols(), 6) self.assertTrue('group5' in b.col_key) + @fails_service_backend() def test_joins_work_correctly(self): mt, mt2 = self.get_groupable_matrix2() @@ -163,6 +167,7 @@ def test_joins_work_correctly(self): self.assertTrue(row_result.entries()._same(row_expected)) + @fails_service_backend() def test_group_rows_by_aggregate(self): mt, mt2 = self.get_groupable_matrix2() @@ -192,6 +197,7 @@ def test_group_rows_by_aggregate(self): self.assertTrue(row_result.entries()._same(row_expected)) + @fails_service_backend() def test_group_cols_by_aggregate(self): mt, mt2 = self.get_groupable_matrix2() diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index 501d42fd58b..b7d610bfad9 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -73,6 +73,7 @@ def test_annotate_globals(self): self.assertTrue(f(hl.eval(mt.annotate_globals(foo=hl.literal(x, t)).foo), x), f"{x}, {t}") self.assertTrue(f(hl.eval(ht.annotate_globals(foo=hl.literal(x, t)).foo), x), f"{x}, {t}") + @fails_service_backend() def test_head(self): # no empty partitions mt1 = hl.utils.range_matrix_table(10, 10) @@ -99,6 +100,7 @@ def test_head_cols(self): assert mt1.head(1, None).count() == (1, 10) assert mt1.head(None, 1).count() == (10, 1) + @fails_service_backend() def test_tail(self): # no empty partitions mt1 = hl.utils.range_matrix_table(10, 10) @@ -125,6 +127,7 @@ def test_tail_cols(self): assert mt1.tail(1, None).count() == (1, 10) assert mt1.tail(None, 1).count() == (10, 1) + @fails_service_backend() def test_tail_entries(self): mt = hl.utils.range_matrix_table(100, 30) mt = mt.filter_cols(mt.col_idx != 29) @@ -140,6 +143,7 @@ def expected(n, m): assert tail(30, None) == expected(30, 29) assert tail(30, 10) == expected(30, 10) + @fails_service_backend() @fails_local_backend() def test_tail_scan(self): mt = hl.utils.range_matrix_table(30, 40) @@ -163,6 +167,7 @@ def test_filter(self): mt = mt.filter_entries((mt.z1 < 5) & (mt.y1 == 3) & (mt.x1 == 5) & (mt.foo == 2)) mt.count_rows() + @fails_service_backend() @fails_local_backend() def test_aggregate(self): mt = self.get_mt() @@ -271,6 +276,7 @@ def test_drop(self): self.assertTrue('GT' not in mt2.entry) mt2._force_count_rows() + @fails_service_backend() def test_explode_rows(self): mt = hl.utils.range_matrix_table(4, 4) mt = mt.annotate_entries(e=mt.row_idx * 10 + mt.col_idx) @@ -283,6 +289,7 @@ def test_explode_rows(self): mt = mt.annotate_rows(x=hl.struct(y=hl.range(0, mt.row_idx))) self.assertEqual(mt.explode_rows(mt.x.y).count_rows(), 6) + @fails_service_backend() def test_explode_cols(self): mt = hl.utils.range_matrix_table(4, 4) mt = mt.annotate_entries(e=mt.row_idx * 10 + mt.col_idx) @@ -300,6 +307,7 @@ def test_explode_key_errors(self): with self.assertRaises(ValueError): mt.explode_rows('b') + @fails_service_backend() def test_group_by_field_lifetimes(self): mt = hl.utils.range_matrix_table(3, 3) mt2 = (mt.group_rows_by(row_idx='100') @@ -310,6 +318,7 @@ def test_group_by_field_lifetimes(self): .aggregate(x=hl.agg.collect_as_set(mt.col_idx + 5))) assert mt3.aggregate_entries(hl.agg.all(mt3.x == hl.set({5, 6, 7}))) + @fails_service_backend() def test_aggregate_cols_by(self): mt = hl.utils.range_matrix_table(2, 4) mt = (mt.annotate_cols(group=mt.col_idx < 2) @@ -339,6 +348,7 @@ def test_aggregate_cols_scope_violation(self): mt.aggregate_cols(hl.agg.filter(False, hl.agg.sum(mt.GT.is_non_ref()))) assert "scope violation" in str(exc.value) + @fails_service_backend() def test_aggregate_rows_by(self): mt = hl.utils.range_matrix_table(4, 2) mt = (mt.annotate_rows(group=mt.row_idx < 2) @@ -424,6 +434,7 @@ def test_semi_anti_join_cols(self): assert mt.semi_join_cols(ht).count() == (3, 3) assert mt.anti_join_cols(ht).count() == (3, 7) + @fails_service_backend() @fails_local_backend() def test_joins(self): mt = self.get_mt().select_rows(x1=1, y1=1) @@ -443,6 +454,7 @@ def test_joins(self): self.assertTrue(rt.all(rt.y2 == 2)) self.assertTrue(ct.all(ct.c2 == 2)) + @fails_service_backend() @fails_local_backend() def test_joins_with_key_structs(self): mt = self.get_mt() @@ -464,12 +476,14 @@ def test_index_keyless(self): with self.assertRaisesRegex(hl.expr.ExpressionException, "MatrixTable col key: *<<>>"): mt.key_cols_by().index_cols(mt.col_idx) + @fails_service_backend() def test_table_join(self): ds = self.get_mt() # test different row schemas self.assertTrue(ds.union_cols(ds.drop(ds.info)) .count_rows(), 346) + @fails_service_backend() def test_table_product_join(self): left = hl.utils.range_matrix_table(5, 1) right = hl.utils.range_table(5) @@ -478,6 +492,7 @@ def test_table_product_join(self): rows = left.rows() self.assertTrue(rows.all(rows.matches.map(lambda x: x.idx) == hl.range(0, rows.row_idx))) + @fails_service_backend() @fails_local_backend() def test_naive_coalesce(self): mt = self.get_mt(min_partitions=8) @@ -494,6 +509,7 @@ def test_literals_rebuild(self): mt = mt.annotate_rows(x=hl.if_else(hl.literal([1,2,3])[mt.row_idx] < hl.rand_unif(10, 11), mt.globals, hl.struct())) mt._force_count_rows() + @fails_service_backend() @fails_local_backend() def test_globals_lowering(self): mt = hl.utils.range_matrix_table(1, 1).annotate_globals(x=1) @@ -513,6 +529,7 @@ def test_globals_lowering(self): .aggregate(bar=hl.agg.collect(mt.globals == lit)) ._force_count_rows()) + @fails_service_backend() def test_unions(self): dataset = hl.import_vcf(resource('sample2.vcf')) @@ -534,6 +551,7 @@ def test_unions(self): for s, count in ds.aggregate_cols(agg.counter(ds.s)).items(): self.assertEqual(count, 3) + @fails_service_backend() def test_union_cols_example(self): joined = hl.import_vcf(resource('joined.vcf')) @@ -542,11 +560,13 @@ def test_union_cols_example(self): self.assertTrue(left.union_cols(right)._same(joined)) + @fails_service_backend() def test_union_cols_distinct(self): mt = hl.utils.range_matrix_table(10, 10) mt = mt.key_rows_by(x = mt.row_idx // 2) assert mt.union_cols(mt).count_rows() == 5 + @fails_service_backend() def test_union_cols_outer(self): r, c = 10, 10 mt = hl.utils.range_matrix_table(2*r, c) @@ -594,6 +614,7 @@ def test_choose_cols(self): self.assertEqual(ds.choose_cols(list(range(10))).s.collect(), old_order[:10]) + @fails_service_backend() def test_choose_cols_vs_explode(self): ds = self.get_mt() @@ -601,6 +622,7 @@ def test_choose_cols_vs_explode(self): self.assertTrue(ds.choose_cols(sorted(list(range(ds.count_cols())) * 2))._same(ds2)) + @fails_service_backend() def test_distinct_by_row(self): orig_mt = hl.utils.range_matrix_table(10, 10) mt = orig_mt.key_rows_by(row_idx=orig_mt.row_idx // 2) @@ -608,6 +630,7 @@ def test_distinct_by_row(self): self.assertTrue(orig_mt.union_rows(orig_mt).distinct_by_row()._same(orig_mt)) + @fails_service_backend() def test_distinct_by_col(self): orig_mt = hl.utils.range_matrix_table(10, 10) mt = orig_mt.key_cols_by(col_idx=orig_mt.col_idx // 2) @@ -620,6 +643,7 @@ def test_aggregation_with_no_aggregators(self): self.assertEqual(mt.group_rows_by(mt.row_idx).aggregate().count_rows(), 3) self.assertEqual(mt.group_cols_by(mt.col_idx).aggregate().count_cols(), 3) + @fails_service_backend() def test_computed_key_join_1(self): ds = self.get_mt() kt = hl.Table.parallelize( @@ -633,6 +657,7 @@ def test_computed_key_join_1(self): self.assertTrue( rt.all(((rt.locus.position % 2) == 0) == rt['value'])) + @fails_service_backend() def test_computed_key_join_2(self): # multiple keys ds = self.get_mt() @@ -649,6 +674,7 @@ def test_computed_key_join_2(self): self.assertTrue( rt.all((rt.locus.position % 2) - 2 * (rt.info.DP % 2) == rt['value'])) + @fails_service_backend() def test_computed_key_join_3(self): # duplicate row keys ds = self.get_mt() @@ -668,6 +694,7 @@ def test_computed_key_join_3(self): rt['value'] == "IB", hl.is_missing(rt['value'])))) + @fails_service_backend() @fails_local_backend() def test_interval_join(self): left = hl.utils.range_matrix_table(50, 1, n_partitions=10) @@ -679,6 +706,7 @@ def test_interval_join(self): .when(rows.row_idx % 10 < 5, rows.interval_matches.idx == rows.row_idx // 10) .default(hl.is_missing(rows.interval_matches)))) + @fails_service_backend() @fails_local_backend() def test_interval_product_join(self): left = hl.utils.range_matrix_table(50, 1, n_partitions=8) @@ -703,6 +731,7 @@ def test_entry_join_self(self): self.assertTrue(mt_join_entries.all(mt_join_entries.x == mt_join_entries.x2)) + @fails_service_backend() def test_entry_join_const(self): mt1 = hl.utils.range_matrix_table(10, 10, n_partitions=4) mt1 = mt1.annotate_entries(x=mt1.row_idx + mt1.col_idx) @@ -752,6 +781,7 @@ def test_entries_table_no_keys(self): assert mt.key_rows_by().key_cols_by().entries().collect() == original_order assert mt.key_rows_by().entries().collect() == sorted(original_order, key=lambda x: x.col_idx) + @fails_service_backend() def test_entries_table_with_out_of_order_row_key_fields(self): mt = hl.utils.range_matrix_table(10, 10, 1) mt = mt.select_rows(key2=0, key1=mt.row_idx) @@ -777,6 +807,7 @@ def test_vcf_regression(self): self.assertEqual( ds.filter_rows(ds.alleles.length() == 2).count_rows(), 0) + @fails_service_backend() def test_field_groups(self): ds = self.get_mt() @@ -793,6 +824,7 @@ def test_field_groups(self): (df.GT == df.entry_struct.GT)) & (df.AD == df.entry_struct.AD)))) + @fails_service_backend() @fails_local_backend() def test_filter_partitions(self): ds = self.get_mt(min_partitions=8) @@ -805,6 +837,7 @@ def test_filter_partitions(self): ds._filter_partitions([0, 3, 7]), ds._filter_partitions([0, 3, 7], keep=False)))) + @fails_service_backend() def test_from_rows_table(self): mt = hl.import_vcf(resource('sample.vcf')) mt = mt.annotate_globals(foo='bar') @@ -817,6 +850,7 @@ def test_sample_rows(self): ds_small = ds.sample_rows(0.01) self.assertTrue(ds_small.count_rows() < ds.count_rows()) + @fails_service_backend() @fails_local_backend() def test_read_stored_cols(self): ds = self.get_mt() @@ -826,6 +860,7 @@ def test_read_stored_cols(self): t = hl.read_table(f + '/cols') self.assertTrue(ds.cols()._same(t)) + @fails_service_backend() def test_read_stored_rows(self): ds = self.get_mt() ds = ds.annotate_globals(x='foo') @@ -834,6 +869,7 @@ def test_read_stored_rows(self): t = hl.read_table(f + '/rows') self.assertTrue(ds.rows()._same(t)) + @fails_service_backend() def test_read_stored_globals(self): ds = self.get_mt() ds = ds.annotate_globals(x=5, baz='foo') @@ -842,6 +878,7 @@ def test_read_stored_globals(self): t = hl.read_table(f + '/globals') self.assertTrue(ds.globals_table()._same(t)) + @fails_service_backend() @fails_local_backend() def test_indexed_read(self): mt = hl.utils.range_matrix_table(2000, 100, 10) @@ -861,6 +898,7 @@ def test_indexed_read(self): self.assertEqual(mt2.n_partitions(), 3) self.assertTrue(mt.filter_rows((mt.row_idx >= 150) & (mt.row_idx < 500))._same(mt2)) + @fails_service_backend() @fails_local_backend() def test_indexed_read_vcf(self): vcf = self.get_mt(10) @@ -876,6 +914,7 @@ def test_indexed_read_vcf(self): q = (vcf.locus >= l3) & (vcf.locus < l4) self.assertTrue(vcf.filter_rows(p | q)._same(mt)) + @fails_service_backend() def test_codecs_matrix(self): from hail.utils.java import scala_object supported_codecs = scala_object(Env.hail().io, 'BufferSpec').specs() @@ -886,6 +925,7 @@ def test_codecs_matrix(self): ds2 = hl.read_matrix_table(temp) self.assertTrue(ds._same(ds2)) + @fails_service_backend() def test_codecs_table(self): from hail.utils.java import scala_object supported_codecs = scala_object(Env.hail().io, 'BufferSpec').specs() @@ -896,6 +936,7 @@ def test_codecs_table(self): rt2 = hl.read_table(temp) self.assertTrue(rt._same(rt2)) + @fails_service_backend() def test_fix3307_read_mt_wrong(self): mt = hl.import_vcf(resource('sample2.vcf')) mt = hl.split_multi_hts(mt) @@ -951,6 +992,7 @@ def test_filter_na(self): self.assertEqual(mt.filter_cols(hl.missing(hl.tbool)).count_cols(), 0) self.assertEqual(mt.filter_entries(hl.missing(hl.tbool)).entries().count(), 0) + @fails_service_backend() def test_to_table_on_various_fields(self): mt = hl.utils.range_matrix_table(3, 4) @@ -999,16 +1041,19 @@ def test_to_table_on_various_fields(self): self.assertEqual(mt.rows().r.collect(), sorted_rows) self.assertEqual(mt.rows().r.take(1), [sorted_rows[0]]) + @fails_service_backend() def test_order_by(self): ht = hl.utils.range_table(10) self.assertEqual(ht.order_by('idx').idx.collect(), list(range(10))) self.assertEqual(ht.order_by(hl.asc('idx')).idx.collect(), list(range(10))) self.assertEqual(ht.order_by(hl.desc('idx')).idx.collect(), list(range(10))[::-1]) + @fails_service_backend() def test_order_by_complex_exprs(self): ht = hl.utils.range_table(10) assert ht.order_by(-ht.idx).idx.collect() == list(range(10))[::-1] + @fails_service_backend() def test_order_by_intervals(self): intervals = {0: hl.Interval(0, 3, includes_start=True, includes_end=False), 1: hl.Interval(0, 4, includes_start=True, includes_end=True), @@ -1046,6 +1091,7 @@ def test_range_matrix_table_0_rows_0_cols(self): mt = mt.annotate_entries(x=mt.row_idx * mt.col_idx) self.assertEqual(mt.x.collect(), []) + @fails_service_backend() def test_make_table(self): mt = hl.utils.range_matrix_table(3, 2) mt = mt.select_entries(x=mt.row_idx * mt.col_idx) @@ -1081,6 +1127,7 @@ def test_make_table_sep(self): t = mt.make_table(separator='__') assert list(t.row) == ['row_idx', '0__x', '1__x'] + @fails_service_backend() def test_make_table_row_equivalence(self): mt = hl.utils.range_matrix_table(3, 3) mt = mt.annotate_rows(r1 = hl.rand_norm(), r2 = hl.rand_norm()) @@ -1140,6 +1187,7 @@ def test_agg_call_stats(self): self.assertTrue(hl.Table.parallelize([actual]), hl.Table.parallelize([expected])) + @fails_service_backend() @fails_local_backend() def test_hardy_weinberg_test(self): mt = hl.import_vcf(resource('HWE_test.vcf')) @@ -1176,6 +1224,7 @@ def test_hw_func_and_agg_agree(self): rt = mt.rows() self.assertTrue(rt.all(rt.hw == rt.hw2)) + @fails_service_backend() @fails_local_backend() def test_write_stage_locally(self): mt = self.get_mt() @@ -1185,6 +1234,7 @@ def test_write_stage_locally(self): mt2 = hl.read_matrix_table(f) self.assertTrue(mt._same(mt2)) + @fails_service_backend() def test_nulls_in_distinct_joins(self): # MatrixAnnotateRowsTable uses left distinct join @@ -1223,6 +1273,7 @@ def row(new_key, idx1, idx2): self.assertTrue(matrix1.union_cols(matrix2)._same(expected)) + @fails_service_backend() @fails_local_backend() def test_row_joins_into_table(self): rt = hl.utils.range_matrix_table(9, 13, 3) @@ -1388,6 +1439,7 @@ def localize_entries_creates_arrays_of_entries_and_array_of_cols(self): assert [[x * y for x in range(0, 10)] for y in range(0, 10)] == localized.entries.collect() assert range(0, 10) == localized.cols.collect() + @fails_service_backend() @fails_local_backend() def test_multi_write(self): mt = self.get_mt() @@ -1442,6 +1494,7 @@ def test_entry_filter_stats(self): fraction_filtered=hl.float32(0.0))}) assert mt.aggregate_cols(hl.agg.all(mt.entry_stats_col == col_expected[mt.col_idx % 4 == 0])) + @fails_service_backend() def test_annotate_col_agg_lowering(self): mt = hl.utils.range_matrix_table(10, 10, 2) mt = mt.annotate_cols(c1=[mt.col_idx, mt.col_idx * 2]) @@ -1453,6 +1506,7 @@ def test_annotate_col_agg_lowering(self): grouped=hl.agg.group_by(mt.e1 % 5, hl.agg.sum(mt.e1) + common_ref)) mt.cols()._force_count() + @fails_service_backend() def test_annotate_rows_scan_lowering(self): mt = hl.utils.range_matrix_table(10, 10, 2) mt = mt.annotate_rows(r1=[mt.row_idx, mt.row_idx * 2]) @@ -1487,6 +1541,7 @@ def assert_res(x): mt.show(handler=assert_res) + @fails_service_backend() def test_partitioned_write(self): mt = hl.utils.range_matrix_table(40, 3, 5) @@ -1526,6 +1581,7 @@ def test_parts(parts, expected=mt): ], mt.filter_rows((mt.row_idx >= 5) & (mt.row_idx < 35))) + @fails_service_backend() def test_partitioned_write_coerce(self): mt = hl.import_vcf(resource('sample.vcf')) parts = [ @@ -1552,16 +1608,19 @@ def test_invalid_metadata(self): with pytest.raises(hl.utils.FatalError, match='metadata does not contain file version'): hl.read_matrix_table(resource('0.1-1fd5cc7.vds')) + @fails_service_backend() def test_legacy_files_with_required_globals(self): hl.read_table(resource('required_globals.ht'))._force_count() hl.read_matrix_table(resource('required_globals.mt'))._force_count_rows() + @fails_service_backend() def test_matrix_native_write_range(self): mt = hl.utils.range_matrix_table(11, 3, n_partitions=3) f = new_temp_file() mt.write(f) assert hl.read_matrix_table(f)._same(mt) + @fails_service_backend() @fails_local_backend() def test_matrix_multi_write_range(self): mts = [ @@ -1579,6 +1638,7 @@ def test_key_cols_by_extract_issue(self): mt = mt.add_col_index() mt.show() + @fails_service_backend() def test_filtered_entries_group_rows_by(self): mt = hl.utils.range_matrix_table(1, 1) mt = mt.filter_entries(False) @@ -1604,12 +1664,22 @@ def test_invalid_field_ref_annotate(self): mt.annotate_entries(x = mt2.af) +@fails_service_backend() def test_read_write_all_types(): mt = create_all_values_matrix_table() tmp_file = new_temp_file() mt.write(tmp_file) assert hl.read_matrix_table(tmp_file)._same(mt) +@fails_service_backend() +@fails_local_backend() +def test_read_write_balding_nichols_model(): + mt = hl.balding_nichols_model(3, 10, 10) + tmp_file = new_temp_file() + mt.write(tmp_file) + assert hl.read_matrix_table(tmp_file)._same(mt) + +@fails_service_backend() @fails_local_backend() def test_read_partitions(): ht = hl.utils.range_matrix_table(n_rows=100, n_cols=10, n_partitions=3) diff --git a/hail/python/test/hail/methods/test_family_methods.py b/hail/python/test/hail/methods/test_family_methods.py index 5e13e7e01c1..395bad42aed 100644 --- a/hail/python/test/hail/methods/test_family_methods.py +++ b/hail/python/test/hail/methods/test_family_methods.py @@ -8,6 +8,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test_trio_matrix(self): """ This test depends on certain properties of the trio matrix VCF and @@ -80,6 +81,7 @@ def test_trio_matrix(self): self.assertEqual(e_cols.row.dtype, t_cols.row.dtype) self.assertTrue(e_cols._same(t_cols)) + @fails_service_backend() def test_trio_matrix_null_keys(self): ped = hl.Pedigree.read(resource('triomatrix.fam')) ht = hl.import_fam(resource('triomatrix.fam')) @@ -93,12 +95,14 @@ def test_trio_matrix_null_keys(self): tt = hl.trio_matrix(mt, ped, complete_trios=True) self.assertEqual(tt.count_cols(), 0) + @fails_service_backend() def test_trio_matrix_incomplete_trios(self): ped = hl.Pedigree.read(resource('triomatrix.fam')) mt = hl.import_vcf(resource('triomatrix.vcf')) hl.trio_matrix(mt, ped, complete_trios=False) + @fails_service_backend() def test_mendel_errors(self): mt = hl.import_vcf(resource('mendel.vcf')) ped = hl.Pedigree.read(resource('mendel.fam')) @@ -181,6 +185,7 @@ def test_mendel_errors(self): self.assertTrue(men2.filter(men2.s == 'Dtr1')._same(men.filter(men.s == 'Dtr1'))) + @fails_service_backend() @fails_local_backend() def test_tdt(self): pedigree = hl.Pedigree.read(resource('tdt.fam')) @@ -214,6 +219,7 @@ def test_tdt(self): bad.order_by(hl.asc(bad.v)).show() self.fail('Found rows in violation of the predicate (see show output)') + @fails_service_backend() def test_de_novo(self): mt = hl.import_vcf(resource('denovo.vcf')) mt = mt.filter_rows(mt.locus.in_y_par(), keep=False) # de_novo_finder doesn't know about y PAR diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index 1adaa411501..2d9b1ad01a8 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -35,6 +35,7 @@ class VCFTests(unittest.TestCase): def test_info_char(self): self.assertEqual(hl.import_vcf(resource('infochar.vcf')).count_rows(), 1) + @fails_service_backend() @fails_local_backend() def test_import_export_same(self): for i in range(10): @@ -57,6 +58,7 @@ def test_info_float64(self): for f in _FLOAT_ARRAY_INFO_FIELDS: self.assertEqual(mt['info'][f].dtype, hl.tarray(hl.tfloat64)) + @fails_service_backend() def test_glob(self): full = hl.import_vcf(resource('sample.vcf')) parts = hl.import_vcf(resource('samplepart*.vcf')) @@ -73,11 +75,13 @@ def test_undeclared_info(self): self.assertFalse('undeclared' in info_type) self.assertFalse('undeclaredFlag' in info_type) + @fails_service_backend() def test_malformed(self): with self.assertRaisesRegex(FatalError, "invalid character"): mt = hl.import_vcf(resource('malformed.vcf')) mt._force_count_rows() + @fails_service_backend() @fails_local_backend() def test_not_identical_headers(self): t = new_temp_file(extension='vcf') @@ -100,6 +104,7 @@ def test_find_replace(self): mt.rows().show() assert mt.aggregate_rows(hl.agg.all(hl.is_missing(mt.rsid))) + @fails_service_backend() def test_haploid(self): expected = hl.Table.parallelize( [hl.struct(locus = hl.locus("X", 16050036), s = "C1046::HG02024", @@ -118,6 +123,7 @@ def test_haploid(self): entries = entries.select('GT', 'AD', 'GQ') self.assertTrue(entries._same(expected)) + @fails_service_backend() def test_call_fields(self): expected = hl.Table.parallelize( [hl.struct(locus = hl.locus("X", 16050036), s = "C1046::HG02024", @@ -136,6 +142,7 @@ def test_call_fields(self): entries = entries.select('GT', 'GTA', 'GTZ') self.assertTrue(entries._same(expected)) + @fails_service_backend() def test_import_vcf(self): vcf = hl.split_multi_hts( hl.import_vcf(resource('sample2.vcf'), @@ -181,6 +188,7 @@ def test_import_vcf_can_import_negative_numbers(self): hl.agg.all(mt.negative_int_array == [-1, -2]) & hl.agg.all(mt.negative_float_array == [-0.5, -1.5]))) + @fails_service_backend() def test_import_vcf_missing_info_field_elements(self): mt = hl.import_vcf(resource('missingInfoArray.vcf'), reference_genome='GRCh37', array_elements_required=False) mt = mt.select_rows(FOO=mt.info.FOO, BAR=mt.info.BAR) @@ -193,6 +201,7 @@ def test_import_vcf_missing_info_field_elements(self): key=['locus', 'alleles']) self.assertTrue(mt.rows()._same(expected)) + @fails_service_backend() def test_import_vcf_missing_format_field_elements(self): mt = hl.import_vcf(resource('missingFormatArray.vcf'), reference_genome='GRCh37', array_elements_required=False) mt = mt.select_rows().select_entries('AD', 'PL') @@ -211,6 +220,7 @@ def test_import_vcf_missing_format_field_elements(self): self.assertTrue(mt.entries()._same(expected)) + @fails_service_backend() def test_import_vcf_skip_invalid_loci(self): mt = hl.import_vcf(resource('skip_invalid_loci.vcf'), reference_genome='GRCh37', skip_invalid_loci=True) @@ -223,6 +233,7 @@ def test_import_vcf_set_field_missing(self): mt = hl.import_vcf(resource('test_set_field_missing.vcf')) mt.aggregate_entries(hl.agg.sum(mt.DP)) + @fails_service_backend() def test_import_vcf_dosages_as_doubles_or_floats(self): mt = hl.import_vcf(resource('small-ds.vcf')) self.assertEqual(hl.expr.expressions.typed_expressions.Float64Expression, type(mt.entry.DS)) @@ -244,6 +255,7 @@ def test_import_vcf_invalid_float_type(self): with self.assertRaises(TypeError): mt = hl.import_vcf(resource('small-ds.vcf'), entry_float_type=hl.tint64) + @fails_service_backend() @fails_local_backend() def test_export_vcf(self): dataset = hl.import_vcf(resource('sample.vcf.bgz')) @@ -261,6 +273,7 @@ def test_export_vcf(self): # are py4 JavaMaps, not dicts, so can't use assertDictEqual self.assertEqual(vcf_metadata, metadata_imported) + @fails_service_backend() @fails_local_backend() def test_export_vcf_empty_format(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).select_entries() @@ -269,6 +282,7 @@ def test_export_vcf_empty_format(self): assert hl.import_vcf(tmp)._same(mt) + @fails_service_backend() @fails_local_backend() def test_export_vcf_no_gt(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).drop('GT') @@ -277,6 +291,7 @@ def test_export_vcf_no_gt(self): assert hl.import_vcf(tmp)._same(mt) + @fails_service_backend() @fails_local_backend() def test_export_vcf_no_alt_alleles(self): mt = hl.import_vcf(resource('gvcfs/HG0096_excerpt.g.vcf'), reference_genome='GRCh38') @@ -287,6 +302,7 @@ def test_export_vcf_no_alt_alleles(self): mt2 = hl.import_vcf(tmp, reference_genome='GRCh38') self.assertTrue(mt._same(mt2)) + @fails_service_backend() @fails_local_backend() def test_export_sites_only_from_table(self): mt = hl.import_vcf(resource('sample.vcf.bgz'))\ @@ -370,12 +386,15 @@ def test_import_gvcfs_long_line(self): ref_str = ref.read().decode('utf-8') self.assertEqual(ref_str, data) + @fails_service_backend() def test_vcf_parser_golden_master__ex_GRCh37(self): self._test_vcf_parser_golden_master(resource('ex.vcf'), 'GRCh37') + @fails_service_backend() def test_vcf_parser_golden_master__sample_GRCh37(self): self._test_vcf_parser_golden_master(resource('sample.vcf'), 'GRCh37') + @fails_service_backend() def test_vcf_parser_golden_master__gvcf_GRCh37(self): self._test_vcf_parser_golden_master(resource('gvcfs/HG00096.g.vcf.gz'), 'GRCh38') @@ -488,6 +507,7 @@ def test_missing_float_entries(self): assert gl_gp == [hl.Struct(GL=[None, None, None], GP=[0.22, 0.5, 0.27]), hl.Struct(GL=[None, None, None], GP=[None, None, None])] + @fails_service_backend() @fails_local_backend() def test_same_bgzip(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4) @@ -495,6 +515,7 @@ def test_same_bgzip(self): hl.export_vcf(mt, f) assert hl.import_vcf(f)._same(mt) + @fails_service_backend() @fails_local_backend() def test_vcf_parallel_export(self): import glob @@ -522,6 +543,7 @@ def concat_files(outpath, inpaths): assert hl.import_vcf(nf)._same(mt) + @fails_service_backend() @fails_local_backend() def test_sorted(self): mt = hl.utils.range_matrix_table(10, 10, n_partitions=4).filter_cols(False) @@ -541,6 +563,7 @@ def test_sorted(self): assert pos >= last last = pos + @fails_service_backend() @fails_local_backend() def test_empty_read_write(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4).filter_rows(False) @@ -557,6 +580,7 @@ def test_empty_read_write(self): assert hl.import_vcf(out1)._same(mt) assert hl.import_vcf(out2)._same(mt) + @fails_service_backend() @fails_local_backend() def test_format_header(self): mt = hl.import_vcf(resource('sample2.vcf')) @@ -578,6 +602,7 @@ def test_format_header(self): '##FORMAT=', } + @fails_service_backend() @fails_local_backend() def test_format_genotypes(self): mt = hl.import_vcf(resource('sample.vcf')) @@ -596,6 +621,7 @@ def test_format_genotypes(self): else: assert False, 'expected pattern not found' + @fails_service_backend() @fails_local_backend() def test_contigs_header(self): mt = hl.import_vcf(resource('sample.vcf')).filter_cols(False) @@ -609,6 +635,7 @@ def test_contigs_header(self): else: assert False, 'expected pattern not found' + @fails_service_backend() @fails_local_backend() def test_metadata_argument(self): mt = hl.import_vcf(resource('multipleChromosomes.vcf')) @@ -636,6 +663,7 @@ def test_metadata_argument(self): assert saw_gt assert saw_lq + @fails_service_backend() @fails_local_backend() def test_invalid_info_fields(self): t = new_temp_file(extension='vcf') @@ -652,6 +680,7 @@ def test_invalid_info_fields(self): assert warning.call_count == 1 class PLINKTests(unittest.TestCase): + @fails_service_backend() def test_import_fam(self): fam_file = resource('sample.fam') nfam = hl.import_fam(fam_file).count() @@ -662,6 +691,7 @@ def test_import_fam(self): i += 1 self.assertEqual(nfam, i) + @fails_service_backend() @fails_local_backend() def test_export_import_plink_same(self): mt = get_dataset() @@ -679,6 +709,7 @@ def test_export_import_plink_same(self): self.assertTrue(mt._same(mt_imported)) self.assertTrue(mt.aggregate_rows(hl.agg.all(mt.cm_position == 15.0))) + @fails_service_backend() @fails_local_backend() def test_import_plink_empty_fam(self): mt = get_dataset().filter_cols(False) @@ -687,6 +718,7 @@ def test_import_plink_empty_fam(self): with self.assertRaisesRegex(FatalError, "Empty FAM file"): hl.import_plink(bfile + '.bed', bfile + '.bim', bfile + '.fam') + @fails_service_backend() @fails_local_backend() def test_import_plink_empty_bim(self): mt = get_dataset().filter_rows(False) @@ -695,6 +727,7 @@ def test_import_plink_empty_bim(self): with self.assertRaisesRegex(FatalError, "BIM file does not contain any variants"): hl.import_plink(bfile + '.bed', bfile + '.bim', bfile + '.fam') + @fails_service_backend() @fails_local_backend() def test_import_plink_a1_major(self): mt = get_dataset() @@ -721,6 +754,7 @@ def get_data(a2_reference): (j.a1_vqc.homozygote_count[0] == j.a2_vqc.homozygote_count[1]) & (j.a1_vqc.homozygote_count[1] == j.a2_vqc.homozygote_count[0]))) + @fails_service_backend() @fails_local_backend() def test_import_plink_contig_recoding_w_reference(self): vcf = hl.split_multi_hts( @@ -747,6 +781,7 @@ def test_import_plink_no_reference_specified(self): self.assertEqual(plink.locus.dtype, hl.tstruct(contig=hl.tstr, position=hl.tint32)) + @fails_service_backend() @fails_local_backend() def test_import_plink_skip_invalid_loci(self): mt = hl.import_plink(resource('skip_invalid_loci.bed'), @@ -764,6 +799,7 @@ def test_import_plink_skip_invalid_loci(self): ._force_count_rows()) @unittest.skipIf('HAIL_TEST_SKIP_PLINK' in os.environ, 'Skipping tests requiring plink') + @fails_service_backend() @fails_local_backend() def test_export_plink(self): vcf_file = resource('sample.vcf') @@ -811,6 +847,7 @@ def test_export_plink(self): self.assertTrue(same) + @fails_service_backend() @fails_local_backend() def test_export_plink_exprs(self): ds = get_dataset() @@ -878,6 +915,7 @@ def test_export_plink_exprs(self): with self.assertRaisesRegex(FatalError, "no white space allowed:"): hl.export_plink(ds, new_temp_file(), varid="hello world") + @fails_service_backend() def test_contig_recoding_defaults(self): hl.import_plink(resource('sex_mt_contigs.bed'), resource('sex_mt_contigs.bim'), @@ -945,6 +983,7 @@ def setUp(self) -> None: contig_recoding={'01': '1'}, reference_genome='GRCh37') + @fails_service_backend() @fails_local_backend() def test_import_bgen_dosage_entry(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -952,6 +991,7 @@ def test_import_bgen_dosage_entry(self): self.assertEqual(bgen.entry.dtype, hl.tstruct(dosage=hl.tfloat64)) self.assertEqual(bgen.count_rows(), 199) + @fails_service_backend() @fails_local_backend() def test_import_bgen_GT_GP_entries(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -959,6 +999,7 @@ def test_import_bgen_GT_GP_entries(self): sample_file=resource('example.sample')) self.assertEqual(bgen.entry.dtype, hl.tstruct(GT=hl.tcall, GP=hl.tarray(hl.tfloat64))) + @fails_service_backend() @fails_local_backend() def test_import_bgen_no_entries(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -966,6 +1007,7 @@ def test_import_bgen_no_entries(self): sample_file=resource('example.sample')) self.assertEqual(bgen.entry.dtype, hl.tstruct()) + @fails_service_backend() @fails_local_backend() def test_import_bgen_no_reference(self): hl.index_bgen(resource('example.8bits.bgen'), @@ -977,6 +1019,7 @@ def test_import_bgen_no_reference(self): self.assertEqual(bgen.locus.dtype, hl.tstruct(contig=hl.tstr, position=hl.tint32)) self.assertEqual(bgen.count_rows(), 199) + @fails_service_backend() @fails_local_backend() def test_import_bgen_skip_invalid_loci(self): # Note: the skip_invalid_loci.bgen has 16-bit probabilities, and Hail @@ -998,6 +1041,7 @@ def test_import_bgen_skip_invalid_loci(self): sample_file=resource('skip_invalid_loci.sample')) mt.rows().count() + @fails_service_backend() @fails_local_backend() def test_import_bgen_gavin_example(self): recoding = {'0{}'.format(i): str(i) for i in range(1, 10)} @@ -1012,6 +1056,7 @@ def test_import_bgen_gavin_example(self): self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_random(self): sample_file = resource('random.sample') @@ -1023,6 +1068,7 @@ def test_import_bgen_random(self): self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True)) + @fails_service_backend() @fails_local_backend() def test_parallel_import(self): bgen_file = resource('parallelBgenExport.bgen') @@ -1032,6 +1078,7 @@ def test_parallel_import(self): resource('parallelBgenExport.sample')) self.assertEqual(mt.count(), (16, 10)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_dosage_and_gp_dosage_function_agree(self): recoding = {'0{}'.format(i): str(i) for i in range(1, 10)} @@ -1046,6 +1093,7 @@ def test_import_bgen_dosage_and_gp_dosage_function_agree(self): (hl.is_missing(et.dosage) & hl.is_missing(et.gp_dosage)) | (hl.abs(et.dosage - et.gp_dosage) < 1e-6))) + @fails_service_backend() @fails_local_backend() def test_import_bgen_row_fields(self): default_row_fields = hl.import_bgen(resource('example.8bits.bgen'), @@ -1081,6 +1129,7 @@ def test_import_bgen_row_fields(self): self.assertTrue( default_row_fields.drop('varid', 'rsid')._same(no_row_fields)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_variant_filtering_from_literals(self): bgen_file = resource('example.8bits.bgen') @@ -1123,6 +1172,7 @@ def test_import_bgen_variant_filtering_from_literals(self): self.assertTrue(expected._same(part_1)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_locus_filtering_from_literals(self): bgen_file = resource('example.8bits.bgen') @@ -1150,6 +1200,7 @@ def test_import_bgen_locus_filtering_from_literals(self): self.assertEqual(locus_object.rows().key_by('locus', 'alleles').select().collect(), expected_result) + @fails_service_backend() @fails_local_backend() def test_import_bgen_variant_filtering_from_exprs(self): bgen_file = resource('example.8bits.bgen') @@ -1166,6 +1217,7 @@ def test_import_bgen_variant_filtering_from_exprs(self): self.assertTrue(everything._same(actual)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_locus_filtering_from_exprs(self): bgen_file = resource('example.8bits.bgen') @@ -1185,6 +1237,7 @@ def test_import_bgen_locus_filtering_from_exprs(self): self.assertTrue(everything._same(actual_locus)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_variant_filtering_from_table(self): bgen_file = resource('example.8bits.bgen') @@ -1201,6 +1254,7 @@ def test_import_bgen_variant_filtering_from_table(self): self.assertTrue(everything._same(actual)) + @fails_service_backend() @fails_local_backend() def test_import_bgen_locus_filtering_from_table(self): bgen_file = resource('example.8bits.bgen') @@ -1221,6 +1275,7 @@ def test_import_bgen_locus_filtering_from_table(self): self.assertEqual(result.rows().key_by('locus', 'alleles').select().collect(), expected_result) + @fails_service_backend() @fails_local_backend() def test_import_bgen_empty_variant_filter(self): bgen_file = resource('example.8bits.bgen') @@ -1243,6 +1298,7 @@ def test_import_bgen_empty_variant_filter(self): self.assertEqual(actual.count_rows(), 0) # FIXME testing block_size (in MB) requires large BGEN + @fails_service_backend() @fails_local_backend() def test_n_partitions(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -1250,6 +1306,7 @@ def test_n_partitions(self): n_partitions=210) self.assertEqual(bgen.n_partitions(), 199) # only 199 variants in the file + @fails_service_backend() @fails_local_backend() def test_drop(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -1263,6 +1320,7 @@ def test_drop(self): self.assertEqual(dc._force_count_rows(), 199) self.assertEqual(dc._force_count_cols(), 0) + @fails_service_backend() @fails_local_backend() def test_multiple_files(self): sample_file = resource('random.sample') @@ -1274,6 +1332,7 @@ def test_multiple_files(self): self.assertTrue( bgenmt._same(genmt, tolerance=1.0 / 255, absolute=True)) + @fails_service_backend() @fails_local_backend() def test_multiple_files_variant_filtering(self): bgen_file = [resource('random-b.bgen'), resource('random-c.bgen'), resource('random-a.bgen')] @@ -1304,6 +1363,7 @@ def test_multiple_files_variant_filtering(self): self.assertTrue(expected._same(actual)) + @fails_service_backend() @fails_local_backend() def test_multiple_files_disjoint(self): sample_file = resource('random.sample') @@ -1312,6 +1372,7 @@ def test_multiple_files_disjoint(self): with self.assertRaisesRegex(FatalError, 'Each BGEN file must contain a region of the genome disjoint from other files'): hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, n_partitions=3) + @fails_service_backend() @fails_local_backend() def test_multiple_references_throws_error(self): sample_file = resource('random.sample') @@ -1323,6 +1384,7 @@ def test_multiple_references_throws_error(self): with self.assertRaisesRegex(FatalError, 'Found multiple reference genomes were specified in the BGEN index files'): hl.import_bgen([bgen_file1, bgen_file2], ['GT'], sample_file=sample_file) + @fails_service_backend() def test_old_index_file_throws_error(self): sample_file = resource('random.sample') bgen_file = resource('random.bgen') @@ -1339,6 +1401,7 @@ def test_old_index_file_throws_error(self): hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file) run_command(['rm', bgen_file + '.idx']) + @fails_service_backend() @fails_local_backend() def test_specify_different_index_file(self): sample_file = resource('random.sample') @@ -1354,6 +1417,7 @@ def test_specify_different_index_file(self): index_file_map = {bgen_file: index_file} hl.index_bgen(bgen_file, index_file_map=index_file_map) + @fails_service_backend() @fails_local_backend() def test_export_bgen(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -1367,6 +1431,7 @@ def test_export_bgen(self): sample_file=tmp + '.sample') assert bgen._same(bgen2) + @fails_service_backend() @fails_local_backend() def test_export_bgen_parallel(self): bgen = hl.import_bgen(resource('example.8bits.bgen'), @@ -1382,6 +1447,7 @@ def test_export_bgen_parallel(self): sample_file=tmp + '.sample') assert bgen._same(bgen2) + @fails_service_backend() @fails_local_backend() def test_export_bgen_from_vcf(self): mt = hl.import_vcf(resource('sample.vcf')) @@ -1402,6 +1468,7 @@ def test_export_bgen_from_vcf(self): class GENTests(unittest.TestCase): + @fails_service_backend() @fails_local_backend() def test_import_gen(self): gen = hl.import_gen(resource('example.gen'), @@ -1412,6 +1479,7 @@ def test_import_gen(self): self.assertEqual(gen.count(), 199) self.assertEqual(gen.locus.dtype, hl.tlocus('GRCh37')) + @fails_service_backend() @fails_local_backend() def test_import_gen_no_reference_specified(self): gen = hl.import_gen(resource('example.gen'), @@ -1422,6 +1490,7 @@ def test_import_gen_no_reference_specified(self): hl.tstruct(contig=hl.tstr, position=hl.tint32)) self.assertEqual(gen.count_rows(), 199) + @fails_service_backend() @fails_local_backend() def test_import_gen_skip_invalid_loci(self): mt = hl.import_gen(resource('skip_invalid_loci.gen'), @@ -1435,6 +1504,7 @@ def test_import_gen_skip_invalid_loci(self): hl.import_gen(resource('skip_invalid_loci.gen'), resource('skip_invalid_loci.sample')) + @fails_service_backend() @fails_local_backend() def test_export_gen(self): gen = hl.import_gen(resource('example.gen'), @@ -1458,6 +1528,7 @@ def test_export_gen(self): self.assertTrue(gen._same(gen2, tolerance=3E-4, absolute=True)) + @fails_service_backend() @fails_local_backend() def test_export_gen_exprs(self): gen = hl.import_gen(resource('example.gen'), @@ -1480,6 +1551,7 @@ def test_export_gen_exprs(self): class LocusIntervalTests(unittest.TestCase): + @fails_service_backend() @fails_local_backend() def test_import_locus_intervals(self): interval_file = resource('annotinterall.interval_list') @@ -1512,6 +1584,7 @@ def test_import_locus_intervals_no_reference_specified(self): self.assertEqual(t.count(), 2) self.assertEqual(t.interval.dtype.point_type, hl.tstruct(contig=hl.tstr, position=hl.tint32)) + @fails_service_backend() def test_import_locus_intervals_recoding(self): interval_file = resource('annotinterall.grch38.no.chr.interval_list') t = hl.import_locus_intervals(interval_file, @@ -1528,6 +1601,7 @@ def test_import_locus_intervals_badly_defined_intervals(self): t = hl.import_locus_intervals(interval_file, reference_genome=None, skip_invalid_intervals=True) self.assertEqual(t.count(), 22) + @fails_service_backend() def test_import_bed(self): bed_file = resource('example1.bed') bed = hl.import_bed(bed_file, reference_genome='GRCh37') @@ -1560,6 +1634,7 @@ def test_import_bed(self): self.assertEqual(t.interval.collect(), hl.eval(expected)) + @fails_service_backend() def test_import_bed_recoding(self): bed_file = resource('some-missing-chr-grch38.bed') bed = hl.import_bed(bed_file, @@ -1645,6 +1720,7 @@ def test_import_matrix_comment(self): comment=['#', '%']) assert mt1._same(mt2) + @fails_service_backend() @fails_local_backend() def test_headers_not_identical(self): self.assertRaisesRegex( @@ -1655,6 +1731,7 @@ def test_headers_not_identical(self): row_fields={'f0': hl.tstr}, row_key=['f0']) + @fails_service_backend() @fails_local_backend() def test_too_few_entries(self): def boom(): @@ -1667,6 +1744,7 @@ def boom(): "unexpected end of line while reading entry 3", boom) + @fails_service_backend() @fails_local_backend() def test_round_trip(self): for missing in ['.', '9']: @@ -1733,6 +1811,7 @@ def _test_round_trip(self, missing, delimiter, header, entry_type, entry_fun): mt = mt.key_rows_by(*row_key) assert mt._same(actual) + @fails_service_backend() @fails_local_backend() def test_key_by_after_empty_key_import(self): fields = {'Chromosome':hl.tstr, @@ -1746,6 +1825,7 @@ def test_key_by_after_empty_key_import(self): mt = mt.key_rows_by('Chromosome', 'Position') assert 0.001 < abs(0.50965 - mt.aggregate_entries(hl.agg.mean(mt.x))) + @fails_service_backend() @fails_local_backend() def test_key_by_after_empty_key_import(self): fields = {'Chromosome':hl.tstr, @@ -1759,6 +1839,7 @@ def test_key_by_after_empty_key_import(self): mt = mt.key_rows_by('Chromosome', 'Position') mt._force_count_rows() + @fails_service_backend() @fails_local_backend() def test_devlish_nine_separated_eight_missing_file(self): fields = {'chr': hl.tstr, @@ -1789,6 +1870,7 @@ def test_devlish_nine_separated_eight_missing_file(self): actual = mt.alt.collect() assert actual == ['T', 'TGG', 'A', None] + @fails_service_backend() @fails_local_backend() def test_empty_import_matrix_table(self): path = new_temp_file(extension='tsv.bgz') @@ -1800,6 +1882,7 @@ def test_empty_import_matrix_table(self): mt.x.export(path, header=False) assert hl.import_matrix_table(path, no_header=True)._force_count_rows() == 0 + @fails_service_backend() @fails_local_backend() def test_import_row_id_multiple_partitions(self): path = new_temp_file(extension='txt') @@ -1817,6 +1900,7 @@ def test_import_row_id_multiple_partitions(self): min_partitions=10) assert mt.row_id.collect() == list(range(50)) + @fails_service_backend() @fails_local_backend() def test_long_parsing(self): path = resource('text_matrix_longs.tsv') @@ -1829,6 +1913,7 @@ def test_long_parsing(self): class ImportTableTests(unittest.TestCase): + @fails_service_backend() @fails_local_backend() def test_import_table_force_bgz(self): f = new_temp_file(extension="bgz") @@ -1868,6 +1953,7 @@ def test_type_imputation(self): assert ht.row.dtype == hl.dtype( 'struct{A:int64, B:int32}') + @fails_service_backend() @fails_local_backend() def test_import_export_identity(self): ht = hl.import_table(resource('sampleAnnotations.tsv')) @@ -1890,23 +1976,27 @@ def small_dataset_1(self): ] return hl.Table.parallelize(data, key='Sample') + @fails_service_backend() def test_read_write_identity(self): ht = self.small_dataset_1() f = new_temp_file(extension='ht') ht.write(f) assert ht._same(hl.read_table(f)) + @fails_service_backend() def test_read_write_identity_keyed(self): ht = self.small_dataset_1().key_by() f = new_temp_file(extension='ht') ht.write(f) assert ht._same(hl.read_table(f)) + @fails_service_backend() def test_import_same(self): ht = hl.import_table(resource('sampleAnnotations.tsv')) ht2 = hl.import_table(resource('sampleAnnotations.tsv')) assert ht._same(ht2) + @fails_service_backend() def test_error_with_context(self): with pytest.raises(FatalError, match='offending line'): ht = hl.import_table(resource('tsv_errors.tsv'), types={'col1': 'int32'}) @@ -1917,6 +2007,7 @@ def test_error_with_context(self): class GrepTests(unittest.TestCase): + @fails_service_backend() @fails_local_backend() def test_grep_show_false(self): expected = {'sampleAnnotations.tsv': ['HG00120\tCASE\t19599', 'HG00121\tCASE\t4832'], @@ -1931,6 +2022,7 @@ def test_grep_show_false(self): assert hl.grep('HG0012[0-1]', resource('*.tsv'), show=False) == expected +@fails_service_backend() @fails_local_backend() def test_matrix_and_table_read_intervals_with_hidden_key(): f1 = new_temp_file() diff --git a/hail/python/test/hail/methods/test_king.py b/hail/python/test/hail/methods/test_king.py index 92044fabe34..7da1e19b4cb 100644 --- a/hail/python/test/hail/methods/test_king.py +++ b/hail/python/test/hail/methods/test_king.py @@ -1,7 +1,7 @@ import pytest import hail as hl -from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend +from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend, fails_service_backend setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -30,6 +30,7 @@ def assert_c_king_same_as_hail_king(c_king_path, hail_king_mt): assert expected.count() == 0, expected.collect() +@fails_service_backend() @fails_local_backend() def test_king_small(): plink_path = resource('balding-nichols-1024-variants-4-samples-3-populations') @@ -42,6 +43,7 @@ def test_king_small(): kinship) @pytest.mark.unchecked_allocator +@fails_service_backend() @fails_local_backend() def test_king_large(): plink_path = resource('fastlmmTest') @@ -51,3 +53,14 @@ def test_king_large(): reference_genome=None) kinship = hl.king(mt.GT) assert_c_king_same_as_hail_king(resource('fastlmmTest.kin0.bgz'), kinship) + + +@fails_service_backend() +@fails_local_backend() +def test_king_filtered_entries_no_error(): + plink_path = resource('balding-nichols-1024-variants-4-samples-3-populations') + mt = hl.import_plink(bed=f'{plink_path}.bed', + bim=f'{plink_path}.bim', + fam=f'{plink_path}.fam') + mt = mt.filter_entries(hl.rand_bool(0.5)) + hl.king(mt.GT)._force_count_rows() diff --git a/hail/python/test/hail/methods/test_misc.py b/hail/python/test/hail/methods/test_misc.py index 42f07bd9d6b..54a8102763d 100644 --- a/hail/python/test/hail/methods/test_misc.py +++ b/hail/python/test/hail/methods/test_misc.py @@ -28,6 +28,7 @@ def test_rename_duplicates(self): 'foo' )['foo'].dtype == hl.tstr + @fails_service_backend() @fails_local_backend() def test_annotate_intervals(self): ds = get_dataset() @@ -185,6 +186,7 @@ def test_filter_intervals_compound_key(self): hl.Struct(locus=hl.Locus('20', 10644700), alleles=['A', 'T']))] self.assertEqual(hl.filter_intervals(ds, intervals).count_rows(), 3) + @fails_service_backend() def test_summarize_variants(self): mt = hl.utils.range_matrix_table(3, 3) variants = hl.literal({0: hl.Struct(locus=hl.Locus('1', 1), alleles=['A', 'T', 'C']), @@ -197,6 +199,7 @@ def test_summarize_variants(self): self.assertEqual(r.allele_types, {'SNP': 2, 'MNP': 1, 'Unknown': 1, 'Insertion': 1}) self.assertEqual(r.allele_counts, {2: 1, 3: 2}) + @fails_service_backend() def test_verify_biallelic(self): mt = hl.import_vcf(resource('sample2.vcf')) # has multiallelics with self.assertRaises(hl.utils.HailUserError): diff --git a/hail/python/test/hail/methods/test_pca.py b/hail/python/test/hail/methods/test_pca.py index 78d3c780833..57a119c9681 100644 --- a/hail/python/test/hail/methods/test_pca.py +++ b/hail/python/test/hail/methods/test_pca.py @@ -4,12 +4,13 @@ import numpy as np import hail as hl -from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend +from ..helpers import resource, startTestHailContext, stopTestHailContext, fails_local_backend, fails_service_backend setUpModule = startTestHailContext tearDownModule = stopTestHailContext +@fails_service_backend() @fails_local_backend() def test_hwe_normalized_pca(): mt = hl.balding_nichols_model(3, 100, 50) @@ -24,6 +25,7 @@ def test_hwe_normalized_pca(): assert loadings is None +@fails_service_backend() @fails_local_backend() def test_pca_against_numpy(): mt = hl.import_vcf(resource('tiny_m.vcf')) @@ -66,6 +68,7 @@ def normalize(a): np.testing.assert_allclose(hail_scores, np_scores, rtol=1e-5) np.testing.assert_allclose(hail_loadings, np_loadings, rtol=1e-5) +@fails_service_backend() @fails_local_backend() def test_blanczos_against_numpy(): @@ -126,6 +129,7 @@ def bound(vs, us): # equation 12 from https://www.ncbi.nlm.nih.gov/pmc/articles assert bound(np_loadings, loadings) > 0.9 +@fails_service_backend() @fails_local_backend() def test_blanczos_against_hail(): k = 10 @@ -161,6 +165,7 @@ def bound(vs, us): assert MEV > 0.9 +@fails_service_backend() @fails_local_backend() def test_spectra(): def make_spectral_matrix(index_func, k, m, n): diff --git a/hail/python/test/hail/methods/test_qc.py b/hail/python/test/hail/methods/test_qc.py index 16226d99312..508b20bf93e 100644 --- a/hail/python/test/hail/methods/test_qc.py +++ b/hail/python/test/hail/methods/test_qc.py @@ -9,6 +9,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test_sample_qc(self): data = [ {'v': '1:1:A:T', 's': '1', 'GT': hl.Call([0, 0]), 'GQ': 10, 'DP': 0}, @@ -50,6 +51,7 @@ def test_sample_qc(self): self.assertAlmostEqual(r[0].sqc.r_het_hom_var, 0.3333333333) self.assertAlmostEqual(r[0].sqc.r_insertion_deletion, None) + @fails_service_backend() def test_variant_qc(self): data = [ {'v': '1:1:A:T', 's': '1', 'GT': hl.Call([0, 0]), 'GQ': 10, 'DP': 0}, @@ -108,6 +110,7 @@ def test_variant_qc(self): self.assertEqual(r[1].vqc.gq_stats.mean, 10) self.assertEqual(r[1].vqc.gq_stats.stdev, 0) + @fails_service_backend() @fails_local_backend() def test_concordance(self): dataset = get_dataset() @@ -136,6 +139,7 @@ def test_concordance(self): cols_conc.write('/tmp/foo.kt', overwrite=True) rows_conc.write('/tmp/foo.kt', overwrite=True) + @fails_service_backend() @fails_local_backend() def test_concordance_n_discordant(self): dataset = get_dataset() @@ -214,6 +218,7 @@ def make_mt(rows): n_discordant=0), ] + @fails_service_backend() @fails_local_backend() def test_concordance_no_values_doesnt_error(self): dataset = get_dataset().filter_rows(False) @@ -221,7 +226,6 @@ def test_concordance_no_values_doesnt_error(self): cols_conc._force_count() rows_conc._force_count() - def test_filter_alleles(self): # poor man's Gen paths = [resource('sample.vcf'), @@ -233,6 +237,7 @@ def test_filter_alleles(self): hl.filter_alleles(ds, lambda a, i: False).count_rows(), 0) self.assertEqual(hl.filter_alleles(ds, lambda a, i: True).count_rows(), ds.count_rows()) + @fails_service_backend() @fails_local_backend() def test_filter_alleles_hts(self): # 1 variant: A:T,G @@ -261,6 +266,7 @@ def test_filter_alleles_hts(self): ._same(hl.import_vcf(resource('filter_alleles/keep_allele2_downcode.vcf'))) ) + @fails_service_backend() def test_sample_and_variant_qc_call_rate(self): mt = hl.import_vcf(resource('sample.vcf')) @@ -271,6 +277,7 @@ def test_sample_and_variant_qc_call_rate(self): assert mt.aggregate_cols(hl.agg.all(hl.approx_equal(mt.sample_qc.call_rate, mt.sample_qc.n_called / n_rows))) assert mt.aggregate_rows(hl.agg.all(hl.approx_equal(mt.variant_qc.call_rate, mt.variant_qc.n_called / n_cols))) + @fails_service_backend() def test_summarize_variants_ti_tv(self): mt = hl.import_vcf(resource('sample.vcf')) # check that summarize can run with the print control flow diff --git a/hail/python/test/hail/methods/test_statgen.py b/hail/python/test/hail/methods/test_statgen.py index 200c97ef7cd..481667a4991 100644 --- a/hail/python/test/hail/methods/test_statgen.py +++ b/hail/python/test/hail/methods/test_statgen.py @@ -9,7 +9,7 @@ from hail.linalg import BlockMatrix from hail.utils import FatalError from ..helpers import (startTestHailContext, stopTestHailContext, resource, - skip_unless_spark_backend, fails_local_backend) + skip_unless_spark_backend, fails_local_backend, fails_service_backend) setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -17,6 +17,7 @@ class Tests(unittest.TestCase): @unittest.skipIf('HAIL_TEST_SKIP_PLINK' in os.environ, 'Skipping tests requiring plink') + @fails_service_backend() @fails_local_backend() def test_impute_sex_same_as_plink(self): ds = hl.import_vcf(resource('x-chromosome.vcf')) @@ -56,6 +57,7 @@ def test_impute_sex_same_as_plink(self): backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') linreg_functions = [hl.linear_regression_rows, hl._linear_regression_rows_nd] if backend_name == "spark" else [hl._linear_regression_rows_nd] + @fails_service_backend() def test_linreg_basic(self): phenos = hl.import_table(resource('regressionLinear.pheno'), types={'Pheno': hl.tfloat64}, @@ -91,6 +93,7 @@ def test_linreg_basic(self): self.assertTrue(t1._same(t4a)) self.assertTrue(t1._same(t4b)) + @fails_service_backend() def test_linreg_pass_through(self): phenos = hl.import_table(resource('regressionLinear.pheno'), types={'Pheno': hl.tfloat64}, @@ -134,6 +137,7 @@ def test_linreg_pass_through(self): linreg_function([[phenos[mt.s].Pheno]], mt.GT.n_alt_alleles(), [1.0], pass_through=[mt.filters.length()]) + @fails_service_backend() def test_linreg_chained(self): phenos = hl.import_table(resource('regressionLinear.pheno'), types={'Pheno': hl.tfloat64}, @@ -209,7 +213,6 @@ def all_eq(*args): t5 = t5.annotate(**{x: t5[x][0] for x in ['n', 'sum_x', 'y_transpose_x', 'beta', 'standard_error', 't_stat', 'p_value']}) assert t4._same(t5) - def test_linear_regression_without_intercept(self): for linreg_function in self.linreg_functions: pheno = hl.import_table(resource('regressionLinear.pheno'), @@ -313,6 +316,7 @@ def test_linear_regression_pl(self): self.assertAlmostEqual(results[3].t_stat, 1.5872510, places=6) self.assertAlmostEqual(results[3].p_value, 0.2533675, places=6) + @fails_service_backend() @fails_local_backend() # Because of import_gen def test_linear_regression_with_dosage(self): @@ -348,6 +352,7 @@ def test_linear_regression_with_dosage(self): self.assertAlmostEqual(results[3].p_value, 0.2533675, places=6) self.assertTrue(np.isnan(results[6].standard_error)) + @fails_service_backend() def test_linear_regression_equivalence_between_ds_and_gt(self): """Test that linear regressions on data converted from dosage to genotype returns the same results""" ds_mt = hl.import_vcf(resource('small-ds.vcf')) @@ -364,6 +369,7 @@ def test_linear_regression_equivalence_between_ds_and_gt(self): results_t = ds_results_t.annotate(**gt_results_t[ds_results_t.locus, ds_results_t.alleles]) self.assertTrue(all(hl.approx_equal(results_t.ds_p_value, results_t.gt_p_value, nan_same=True).collect())) + @fails_service_backend() def test_linear_regression_with_import_fam_boolean(self): covariates = hl.import_table(resource('regressionLinear.cov'), key='Sample', @@ -394,6 +400,7 @@ def test_linear_regression_with_import_fam_boolean(self): self.assertTrue(np.isnan(results[9].standard_error)) self.assertTrue(np.isnan(results[10].standard_error)) + @fails_service_backend() def test_linear_regression_with_import_fam_quant(self): covariates = hl.import_table(resource('regressionLinear.cov'), key='Sample', @@ -426,6 +433,7 @@ def test_linear_regression_with_import_fam_quant(self): self.assertTrue(np.isnan(results[9].standard_error)) self.assertTrue(np.isnan(results[10].standard_error)) + @fails_service_backend() def test_linear_regression_multi_pheno_same(self): covariates = hl.import_table(resource('regressionLinear.cov'), key='Sample', @@ -464,6 +472,7 @@ def eq(x1, x2): # se <- waldtest["x", "Std. Error"] # zstat <- waldtest["x", "z value"] # pval <- waldtest["x", "Pr(>|z|)"] + @fails_service_backend() @fails_local_backend() def test_logistic_regression_wald_test(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -501,6 +510,7 @@ def is_constant(r): self.assertTrue(is_constant(results[9])) self.assertTrue(is_constant(results[10])) + @fails_service_backend() @fails_local_backend() def test_logistic_regression_wald_test_apply_multi_pheno(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -541,6 +551,7 @@ def is_constant(r): self.assertTrue(is_constant(results[9])) self.assertTrue(is_constant(results[10])) + @fails_service_backend() @fails_local_backend() def test_logistic_regression_wald_test_multi_pheno_bgen_dosage(self): covariates = hl.import_table(resource('regressionLogisticMultiPheno.cov'), @@ -573,6 +584,7 @@ def test_logistic_regression_wald_test_multi_pheno_bgen_dosage(self): #TODO test handling of missingness + @fails_service_backend() @fails_local_backend() def test_logistic_regression_wald_test_pl(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -611,6 +623,7 @@ def is_constant(r): self.assertTrue(is_constant(results[9])) self.assertTrue(is_constant(results[10])) + @fails_service_backend() @fails_local_backend() def test_logistic_regression_wald_dosage(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -661,6 +674,7 @@ def is_constant(r): # lrtest <- anova(logfitnull, logfit, test="LRT") # chi2 <- lrtest[["Deviance"]][2] # pval <- lrtest[["Pr(>Chi)"]][2] + @fails_service_backend() @fails_local_backend() def test_logistic_regression_lrt(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -707,6 +721,7 @@ def is_constant(r): # scoretest <- anova(logfitnull, logfit, test="Rao") # chi2 <- scoretest[["Rao"]][2] # pval <- scoretest[["Pr(>Chi)"]][2] + @fails_service_backend() @fails_local_backend() def test_logistic_regression_score(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -743,6 +758,7 @@ def is_constant(r): self.assertTrue(is_constant(results[9])) self.assertTrue(is_constant(results[10])) + @fails_service_backend() @fails_local_backend() def test_logistic_regression_epacts(self): covariates = hl.import_table(resource('regressionLogisticEpacts.cov'), @@ -829,6 +845,7 @@ def get_results(table): self.assertAlmostEqual(firth[16117953].beta, 0.5258, places=4) self.assertAlmostEqual(firth[16117953].p_value, 0.22562, places=4) + @fails_service_backend() @fails_local_backend() def test_logreg_pass_through(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -860,6 +877,7 @@ def test_logreg_pass_through(self): # se <- waldtest["x", "Std. Error"] # zstat <- waldtest["x", "z value"] # pval <- waldtest["x", "Pr(>|z|)"] + @fails_service_backend() @fails_local_backend() def test_poission_regression_wald_test(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -908,6 +926,7 @@ def is_constant(r): # lrtest <- anova(poisfitnull, poisfit, test="LRT") # chi2 <- lrtest[["Deviance"]][2] # pval <- lrtest[["Pr(>Chi)"]][2] + @fails_service_backend() @fails_local_backend() def test_poission_regression_lrt(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -953,6 +972,7 @@ def is_constant(r): # scoretest <- anova(poisfitnull, poisfit, test="Rao") # chi2 <- scoretest[["Rao"]][2] # pval <- scoretest[["Pr(>Chi)"]][2] + @fails_service_backend() @fails_local_backend() def test_poission_regression_score_test(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -989,6 +1009,7 @@ def is_constant(r): self.assertTrue(is_constant(results[9])) self.assertTrue(is_constant(results[10])) + @fails_service_backend() @fails_local_backend() def test_poisson_pass_through(self): covariates = hl.import_table(resource('regressionLogistic.cov'), @@ -1133,6 +1154,7 @@ def test_ld_matrix(self): hl.ld_matrix(mt.GT.n_alt_alleles(), mt.locus, radius=1.0, coord_expr=mt.cm).to_numpy(), [[1., -0.85280287, 0.], [-0.85280287, 1., 0.], [0., 0., 1.]])) + @fails_service_backend() def test_split_multi_hts(self): ds1 = hl.import_vcf(resource('split_test.vcf')) ds1 = hl.split_multi_hts(ds1) @@ -1142,6 +1164,7 @@ def test_split_multi_hts(self): ds1 = ds1.drop('was_split', 'a_index') self.assertTrue(ds1._same(ds2)) + @fails_service_backend() @fails_local_backend() def test_split_multi_table(self): ds1 = hl.import_vcf(resource('split_test.vcf')).rows() @@ -1158,6 +1181,7 @@ def test_split_multi_table(self): ds1 = ds1.drop('was_split', 'a_index') self.assertTrue(ds1._same(ds2)) + @fails_service_backend() def test_split_multi_shuffle(self): ht = hl.utils.range_table(1) ht = ht.annotate(keys=[hl.struct(locus=hl.locus('1', 1180), alleles=['A', 'C', 'T']), @@ -1174,6 +1198,7 @@ def test_split_multi_shuffle(self): mt._force_count_rows() assert mt.alleles.collect() == [['A', 'C'], ['A', 'G'], ['A', 'T']] + @fails_service_backend() def test_issue_4527(self): mt = hl.utils.range_matrix_table(1, 1) mt = mt.key_rows_by(locus=hl.locus(hl.str(mt.row_idx+1), mt.row_idx+1), alleles=['A', 'T']) @@ -1273,6 +1298,7 @@ def test_ld_prune_with_duplicate_row_keys(self): pruned_table = hl.ld_prune(ds_duplicate.GT) self.assertEqual(pruned_table.count(), 1) + @fails_service_backend() def test_balding_nichols_model(self): hl.set_global_seed(1) ds = hl.balding_nichols_model(2, 20, 25, 3, @@ -1293,6 +1319,7 @@ def test_balding_nichols_model(self): self.assertEqual(hl.eval(glob.bn.pop_dist), [1, 2]) self.assertEqual(hl.eval(glob.bn.fst), [.02, .06]) + @fails_service_backend() def test_balding_nichols_model_same_results(self): for mixture in [True, False]: hl.set_global_seed(1) @@ -1359,6 +1386,7 @@ def variance(expr): test_stat(10, 100, 100, 0) test_stat(40, 400, 20, 12) + @fails_service_backend() @fails_local_backend() def test_skat(self): ds2 = hl.import_vcf(resource('sample2.vcf')) @@ -1417,6 +1445,7 @@ def test_skat(self): covariates=[1.0, ds.cov.Cov1, ds.cov.Cov2], logistic=True)._force_count() + @fails_service_backend() def test_de_novo(self): mt = hl.import_vcf(resource('denovo.vcf')) mt = mt.filter_rows(mt.locus.in_y_par(), keep=False) # de_novo_finder doesn't know about y PAR @@ -1443,6 +1472,7 @@ def test_de_novo(self): j = r.join(truth, how='outer') self.assertTrue(j.all((j.confidence == j.confidence_1) & (hl.abs(j.p_de_novo - j.p_de_novo_1) < 1e-4))) + @fails_service_backend() def test_de_novo_error(self): mt = hl.import_vcf(resource('denovo.vcf')) ped = hl.Pedigree.read(resource('denovo.fam')) @@ -1450,6 +1480,7 @@ def test_de_novo_error(self): with pytest.raises(Exception, match='pop_frequency_prior'): hl.de_novo(mt, ped, pop_frequency_prior=2.0).count() + @fails_service_backend() def test_de_novo_ignore_computed_af_runs(self): mt = hl.import_vcf(resource('denovo.vcf')) ped = hl.Pedigree.read(resource('denovo.fam')) diff --git a/hail/python/test/hail/table/test_grouped_table.py b/hail/python/test/hail/table/test_grouped_table.py index 74c0c99bffb..cec89801f92 100644 --- a/hail/python/test/hail/table/test_grouped_table.py +++ b/hail/python/test/hail/table/test_grouped_table.py @@ -8,6 +8,7 @@ class GroupedTableTests(unittest.TestCase): + @fails_service_backend() def test_aggregate_by(self): ht = hl.utils.range_table(4) ht = ht.annotate(foo=0, group=ht.idx < 2, bar='hello').annotate_globals(glob=5) @@ -27,6 +28,7 @@ def test_aggregate_by(self): with self.assertRaises(ValueError): grouped.aggregate(group=hl.agg.sum(ht.idx)) + @fails_service_backend() def test_aggregate_by_with_joins(self): ht = hl.utils.range_table(4) ht2 = hl.utils.range_table(4) @@ -47,6 +49,7 @@ def test_aggregate_by_with_joins(self): self.assertTrue(result._same(expected)) + @fails_service_backend() def test_issue_2446_takeby(self): t = hl.utils.range_table(10) result = t.group_by(foo=5).aggregate(x=hl.agg.take(t.idx, 3, ordering=t.idx)) diff --git a/hail/python/test/hail/table/test_table.py b/hail/python/test/hail/table/test_table.py index d1edbeb87fa..f2a17f3b737 100644 --- a/hail/python/test/hail/table/test_table.py +++ b/hail/python/test/hail/table/test_table.py @@ -17,6 +17,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test_annotate(self): schema = hl.tstruct(a=hl.tint32, b=hl.tint32, c=hl.tint32, d=hl.tint32, e=hl.tstr, f=hl.tarray(hl.tint32)) @@ -129,6 +130,7 @@ def test_aggregate1(self): self.assertEqual(set(results.q4), {"hello", "cat"}) self.assertAlmostEqual(results.q5, 4) + @fails_service_backend() def test_aggregate2(self): schema = hl.tstruct(status=hl.tint32, GT=hl.tcall, qPheno=hl.tint32) @@ -193,6 +195,7 @@ def test_aggregate_ir(self): r = kt.aggregate(agg.filter(kt.idx % 2 != 0, agg.sum(kt.idx + 2)) + kt.g1) self.assertEqual(r, 40) + @fails_service_backend() def test_to_matrix_table(self): N, M = 50, 50 mt = hl.utils.range_matrix_table(N, M) @@ -207,6 +210,7 @@ def test_to_matrix_table(self): assert re_mt.choose_cols(mapping).drop('col_idx')._same(mt.drop('col_idx')) + @fails_service_backend() def test_to_matrix_table_row_major(self): t = hl.utils.range_table(10) t = t.annotate(foo=t.idx, bar=2 * t.idx, baz=3 * t.idx) @@ -232,12 +236,14 @@ def test_to_matrix_table_row_major(self): self.assertRaises(ValueError, lambda: t.to_matrix_table_row_major(['d'], entry_field_name='c')) self.assertRaises(ValueError, lambda: t.to_matrix_table_row_major([])) + @fails_service_backend() def test_group_by_field_lifetimes(self): ht = hl.utils.range_table(3) ht2 = (ht.group_by(idx='100') .aggregate(x=hl.agg.collect_as_set(ht.idx + 5))) assert (ht2.all(ht2.x == hl.set({5, 6, 7}))) + @fails_service_backend() def test_group_aggregate_by_key(self): ht = hl.utils.range_table(100, n_partitions=10) @@ -246,6 +252,7 @@ def test_group_aggregate_by_key(self): assert r1.all(r1.n == 20) assert r2.all(r2.n == 20) + @fails_service_backend() def test_aggregate_by_key_partitioning(self): ht1 = hl.Table.parallelize([ {'k': 'foo', 'b': 1}, @@ -361,6 +368,7 @@ def test_semi_anti_join(self): assert ht.semi_join(ht2).count() == 3 assert ht.anti_join(ht2).count() == 7 + @fails_service_backend() def test_indirected_joins(self): kt = hl.utils.range_table(1) kt = kt.annotate(a='foo') @@ -379,6 +387,7 @@ def test_indirected_joins(self): assert kt.aggregate(agg.collect(kt4[kt3[kt2[kt1[kt.a].b].c].d].e)) == ['quam'] + @fails_service_backend() def test_table_matrix_join_combinations(self): m = hl.import_vcf(resource('sample.vcf')) vkt = m.rows() @@ -403,6 +412,7 @@ def test_interval_filter_loci(self): ht = hl.import_vcf(resource('sample.vcf')).rows() assert ht.filter(ht.locus > hl.locus('20', 17434581)).count() == 100 + @fails_service_backend() @fails_local_backend() def test_interval_join(self): left = hl.utils.range_table(50, n_partitions=10) @@ -413,6 +423,7 @@ def test_interval_join(self): .when(left.idx % 10 < 5, left.interval_matches.idx == left.idx // 10) .default(hl.is_missing(left.interval_matches)))) + @fails_service_backend() @fails_local_backend() def test_interval_product_join(self): left = hl.utils.range_table(50, n_partitions=8) @@ -425,6 +436,7 @@ def test_interval_product_join(self): self.assertTrue(left.all(hl.sorted(left.interval_matches.map(lambda x: x.i)) == hl.range(0, hl.min(left.idx % 10, 10 - left.idx % 10)))) + @fails_service_backend() @fails_local_backend() def test_interval_product_join_long_key(self): left = hl.utils.range_table(50, n_partitions=8) @@ -451,6 +463,7 @@ def test_join_with_key(self): ht1 = ht.annotate(foo=5) self.assertTrue(ht.all(ht1[ht.key].foo == 5)) + @fails_service_backend() def test_product_join(self): left = hl.utils.range_table(5) right = hl.utils.range_table(5) @@ -466,6 +479,7 @@ def test_multiple_entry_joins(self): mt.select_entries(a=mt2[mt.row_idx, mt.col_idx].x, b=mt2[mt.row_idx, mt.col_idx].x) + @fails_service_backend() @fails_local_backend() def test_multi_way_zip_join(self): d1 = [{"id": 0, "name": "a", "data": 0.0}, @@ -519,6 +533,7 @@ def test_multi_way_zip_join_globals(self): joined = hl.Table.multi_way_zip_join([t1, t2, t3], '__data', '__globals') self.assertEqual(hl.eval(joined.globals), hl.eval(expected)) + @fails_service_backend() def test_multi_way_zip_join_key_downcast(self): mt = hl.import_vcf(resource('sample.vcf.bgz')) mt = mt.key_rows_by('locus') @@ -526,6 +541,7 @@ def test_multi_way_zip_join_key_downcast(self): j = hl.Table.multi_way_zip_join([ht, ht], 'd', 'g') j._force_count() + @fails_service_backend() def test_multi_way_zip_join_key_downcast2(self): vcf2 = hl.import_vcf(resource('gvcfs/HG00268.g.vcf.gz'), force_bgz=True, reference_genome='GRCh38') vcf1 = hl.import_vcf(resource('gvcfs/HG00096.g.vcf.gz'), force_bgz=True, reference_genome='GRCh38') @@ -558,10 +574,12 @@ def test_index_keyless_table(self): with self.assertRaisesRegex(hl.expr.ExpressionException, "Table key: *<<>>"): t[t.idx] + @fails_service_backend() def test_aggregation_with_no_aggregators(self): ht = hl.utils.range_table(3) self.assertEqual(ht.group_by(ht.idx).aggregate().count(), 3) + @fails_service_backend() def test_drop(self): kt = hl.utils.range_table(10) kt = kt.annotate(sq=kt.idx ** 2, foo='foo', bar='bar').key_by('foo') @@ -651,6 +669,7 @@ def test_rename(self): with self.assertRaises(LookupError): kt.rename({'hello': 'a'}) + @fails_service_backend() def test_distinct(self): t1 = hl.Table.parallelize([ {'a': 'foo', 'b': 1}, @@ -673,6 +692,7 @@ def test_distinct(self): self.assertTrue(dist.all(hl.len(dist.values) == 1)) self.assertEqual(dist.count(), len(t1.aggregate(hl.agg.collect_as_set(t1.a)))) + @fails_service_backend() def test_group_by_key(self): t1 = hl.Table.parallelize([ {'a': 'foo', 'b': 1}, @@ -746,6 +766,7 @@ def test_value_error(self): with pytest.raises(ValueError): t.explode(t.foo.bar, name='baz') + @fails_service_backend() @fails_local_backend() def test_export(self): t = hl.utils.range_table(1).annotate(foo=3) @@ -755,6 +776,7 @@ def test_export(self): with hl.hadoop_open(tmp_file, 'r') as f_in: assert f_in.read() == 'idx\tfoo\n0\t3\n' + @fails_service_backend() @fails_local_backend() def test_export_delim(self): t = hl.utils.range_table(1).annotate(foo = 3) @@ -764,6 +786,7 @@ def test_export_delim(self): with hl.hadoop_open(tmp_file, 'r') as f_in: assert f_in.read() == 'idx,foo\n0,3\n' + @fails_service_backend() @fails_local_backend() def test_write_stage_locally(self): t = hl.utils.range_table(5) @@ -775,6 +798,7 @@ def test_write_stage_locally(self): def test_min_partitions(self): assert hl.import_table(resource('variantAnnotations.tsv'), min_partitions=50).n_partitions() == 50 + @fails_service_backend() @fails_local_backend() def test_read_back_same_as_exported(self): t, _ = create_all_values_datasets() @@ -783,6 +807,7 @@ def test_read_back_same_as_exported(self): t_read_back = hl.import_table(tmp_file, types=dict(t.row.dtype)).key_by('idx') self.assertTrue(t.select_globals()._same(t_read_back, tolerance=1e-4, absolute=True)) + @fails_service_backend() def test_indexed_read(self): t = hl.utils.range_table(2000, 10) f = new_temp_file(extension='ht') @@ -802,14 +827,17 @@ def test_indexed_read(self): self.assertEqual(t2.n_partitions(), 3) self.assertTrue(t.filter((t.idx >= 150) & (t.idx < 500))._same(t2)) + @fails_service_backend() def test_order_by_parsing(self): hl.utils.range_table(1).annotate(**{'a b c' : 5}).order_by('a b c')._force_count() + @fails_service_backend() def test_take_order(self): t = hl.utils.range_table(20, n_partitions=2) t = t.key_by(rev_idx=-t.idx) assert t.take(10) == [hl.Struct(idx=idx, rev_idx=-idx) for idx in range(19, 9, -1)] + @fails_service_backend() @fails_local_backend() def test_filter_partitions(self): ht = hl.utils.range_table(23, n_partitions=8) @@ -826,6 +854,7 @@ def test_filter_partitions(self): ht._filter_partitions([0, 7]).idx.collect(), [0, 1, 2, 21, 22]) + @fails_service_backend() def test_localize_entries(self): ref_schema = hl.tstruct(row_idx=hl.tint32, __entries=hl.tarray(hl.tstruct(v=hl.tint32))) @@ -838,6 +867,7 @@ def test_localize_entries(self): t = mt._localize_entries('__entries', '__cols') self.assertTrue(t._same(ref_tab)) + @fails_service_backend() def test_localize_self_join(self): ref_schema = hl.tstruct(row_idx=hl.tint32, __entries=hl.tarray(hl.tstruct(v=hl.tint32))) @@ -851,6 +881,7 @@ def test_localize_self_join(self): t = t.join(t, how='outer') self.assertTrue(t._same(ref_tab)) + @fails_service_backend() def test_union(self): t1 = hl.utils.range_table(5) @@ -864,6 +895,7 @@ def test_union(self): self.assertTrue(t1.key_by().union(t2.key_by(), t3.key_by()) ._same(hl.utils.range_table(15).key_by())) + @fails_service_backend() @fails_local_backend() def test_nested_union(self): N = 10 @@ -914,6 +946,7 @@ def test_table_head_returns_right_number(self): self.assertEqual(table.head(0).count(), 0) self.assertEqual(table.head(0)._force_count(), 0) + @fails_service_backend() def test_table_order_by_head_rewrite(self): rt = hl.utils.range_table(10, 2) rt = rt.annotate(x = 10 - rt.idx) @@ -921,6 +954,7 @@ def test_table_order_by_head_rewrite(self): self.assertEqual(rt.order_by('x').idx.take(10), expected) self.assertEqual(rt.order_by('x').idx.collect(), expected) + @fails_service_backend() def test_order_by_expr(self): ht = hl.utils.range_table(10, 3) ht = ht.annotate(xs = hl.range(0, 1).map(lambda x: hl.int(hl.rand_unif(0, 100)))) @@ -937,6 +971,7 @@ def test_order_by_expr(self): assert desc.xs[0].collect() == res_desc assert [s['xs'][0] for s in desc.take(5)] == res_desc[:5] + @fails_service_backend() def test_null_joins(self): tr = hl.utils.range_table(7, 1) table1 = tr.key_by(new_key=hl.if_else((tr.idx == 3) | (tr.idx == 5), @@ -977,6 +1012,7 @@ def row(new_key, idx1, idx2): self.assertEqual(inner_join.collect(), inner_join_expected) self.assertEqual(outer_join.collect(), outer_join_expected) + @fails_service_backend() def test_null_joins_2(self): tr = hl.utils.range_table(7, 1) table1 = tr.key_by(new_key=hl.if_else((tr.idx == 3) | (tr.idx == 5), @@ -1017,6 +1053,7 @@ def row(new_key, key2, idx1, idx2): self.assertEqual(inner_join.collect(), inner_join_expected) self.assertEqual(outer_join.collect(), outer_join_expected) + @fails_service_backend() def test_joins_one_null(self): tr = hl.utils.range_table(7, 1) table1 = tr.key_by(new_key=tr.idx) @@ -1051,6 +1088,7 @@ def row(new_key, idx1, idx2): self.assertEqual(inner_join.collect(), inner_join_expected) self.assertEqual(outer_join.collect(), outer_join_expected) + @fails_service_backend() @fails_local_backend() def test_partitioning_rewrite(self): ht = hl.utils.range_table(10, 3) @@ -1058,6 +1096,7 @@ def test_partitioning_rewrite(self): self.assertEqual(ht1.x.collect()[:5], ht1.head(5).x.collect()) self.assertEqual(ht1.x.collect()[-5:], ht1.tail(5).x.collect()) + @fails_service_backend() def test_flatten(self): t1 = hl.utils.range_table(10) t1 = t1.key_by(x = hl.struct(a=t1.idx, b=0)).flatten() @@ -1065,6 +1104,7 @@ def test_flatten(self): t2 = t2.annotate(**{'x.a': t2.idx, 'x.b': 0}) self.assertTrue(t1._same(t2)) + @fails_service_backend() def test_expand_types(self): t1 = hl.utils.range_table(10) t1 = t1.key_by(x = hl.locus('1', t1.idx+1)).expand_types() @@ -1088,6 +1128,7 @@ def test_join_mangling(self): assert j.globals.dtype == hl.tstruct(glob1=hl.tint32, glob1_1=hl.tint32) j._force_count() + @fails_service_backend() def test_join_with_filter_intervals(self): ht = hl.utils.range_table(100, 5) ht = ht.key_by(idx2=ht.idx // 2) @@ -1104,6 +1145,7 @@ def test_join_with_filter_intervals(self): ht3 = ht1.join(ht2) assert ht3.filter(ht3.idx2 == 10).count() == 4 + @fails_service_backend() def test_key_by_aggregate_rewriting(self): ht = hl.utils.range_table(10) ht = ht.group_by(x=ht.idx % 5).aggregate(aggr = hl.agg.count()) @@ -1140,6 +1182,7 @@ def test_expr_collect_localize_false(self): ht = hl.utils.range_table(10) assert hl.eval(ht.idx.collect(_localize=False)) == ht.idx.collect() + @fails_service_backend() def test_expr_collect(self): t = hl.utils.range_table(3) @@ -1177,10 +1220,12 @@ def test_empty_show(self): def test_no_row_fields_show(self): hl.utils.range_table(5).key_by().select().show() + @fails_service_backend() def test_same_equal(self): t1 = hl.utils.range_table(1) self.assertTrue(t1._same(t1)) + @fails_service_backend() def test_same_within_tolerance(self): t = hl.utils.range_table(1) t1 = t.annotate(x = 1.0) @@ -1205,6 +1250,7 @@ def test_same_different_global(self): t2 = t1.annotate_globals(x = 8) self.assertFalse(t1._same(t2)) + @fails_service_backend() def test_same_different_rows(self): t1 = (hl.utils.range_table(2) .annotate(x = 7)) @@ -1215,14 +1261,15 @@ def test_same_different_rows(self): t3 = t1.filter(t1.idx == 0) self.assertFalse(t1._same(t3)) + @fails_service_backend() def test_rvd_key_write(self): - tempfile = new_temp_file(extension='ht') - ht1 = hl.utils.range_table(1).key_by(foo='a', bar='b') - ht1.write(tempfile) # write ensures that table is written with both key fields + with hl.TemporaryDirectory(suffix='.ht', ensure_exists=False) as tempfile: + ht1 = hl.utils.range_table(1).key_by(foo='a', bar='b') + ht1.write(tempfile) # write ensures that table is written with both key fields - ht1 = hl.read_table(tempfile) - ht2 = hl.utils.range_table(1).annotate(foo='a') - assert ht2.annotate(x = ht1.key_by('foo')[ht2.foo])._force_count() == 1 + ht1 = hl.read_table(tempfile) + ht2 = hl.utils.range_table(1).annotate(foo='a') + assert ht2.annotate(x = ht1.key_by('foo')[ht2.foo])._force_count() == 1 def test_show_long_field_names(self): hl.utils.range_table(1).annotate(**{'a' * 256: 5}).show() @@ -1266,11 +1313,13 @@ def test_unicode_ordering(self): ht = ht.annotate(fd=hl.sorted(a)) assert ht.fd.collect()[0] == ["e", "é"] + @fails_service_backend() def test_physical_key_truncation(self): path = new_temp_file(extension='ht') hl.import_vcf(resource('sample.vcf')).rows().key_by('locus').write(path) hl.read_table(path).select()._force_count() + @fails_service_backend() @fails_local_backend() def test_repartition_empty_key(self): data = [{'x': i} for i in range(1000)] @@ -1278,6 +1327,7 @@ def test_repartition_empty_key(self): assert ht.naive_coalesce(4)._same(ht) assert ht.repartition(3, shuffle=False)._same(ht) + @fails_service_backend() def test_path_collision_error(self): path = new_temp_file(extension='ht') ht = hl.utils.range_table(10) @@ -1287,6 +1337,7 @@ def test_path_collision_error(self): ht.write(path) assert "both an input and output source" in str(exc.value) +@fails_service_backend() def test_large_number_of_fields(tmpdir): ht = hl.utils.range_table(100) ht = ht.annotate(**{ @@ -1301,6 +1352,7 @@ def test_large_number_of_fields(tmpdir): def test_import_many_fields(): assert_time(lambda: hl.import_table(resource('many_cols.txt')), 5) +@fails_service_backend() def test_segfault(): t = hl.utils.range_table(1) t2 = hl.utils.range_table(3) @@ -1311,6 +1363,7 @@ def test_segfault(): assert joined.collect() == [] +@fails_service_backend() def test_maybe_flexindex_table_by_expr_direct_match(): t1 = hl.utils.range_table(1) t2 = hl.utils.range_table(1) @@ -1330,6 +1383,7 @@ def test_maybe_flexindex_table_by_expr_direct_match(): assert t1._maybe_flexindex_table_by_expr(hl.str(mt1.row_key)) is None +@fails_service_backend() def test_maybe_flexindex_table_by_expr_prefix_match(): t1 = hl.utils.range_table(1) t2 = hl.utils.range_table(1) @@ -1349,6 +1403,7 @@ def test_maybe_flexindex_table_by_expr_prefix_match(): assert t1._maybe_flexindex_table_by_expr((hl.str(mt1.row_idx), mt1.row_idx)) is None +@fails_service_backend() @fails_local_backend() def test_maybe_flexindex_table_by_expr_direct_interval_match(): t1 = hl.utils.range_table(1) @@ -1370,6 +1425,7 @@ def test_maybe_flexindex_table_by_expr_direct_interval_match(): assert t1._maybe_flexindex_table_by_expr(hl.str(mt1.row_key)) is None +@fails_service_backend() @fails_local_backend() def test_maybe_flexindex_table_by_expr_prefix_interval_match(): t1 = hl.utils.range_table(1) @@ -1394,6 +1450,7 @@ def test_maybe_flexindex_table_by_expr_prefix_interval_match(): widths = [256, 512, 1024, 2048, 4096] +@fails_service_backend() def test_can_process_wide_tables(): for w in widths: print(f'working on width {w}') @@ -1436,6 +1493,7 @@ def write_file(n, n_rows=5): write_file(w) +@fails_service_backend() def test_join_distinct_preserves_count(): left_pos = [1, 2, 4, 4, 5, 5, 9, 13, 13, 14, 15] right_pos = [1, 1, 1, 3, 4, 4, 6, 6, 8, 9, 13, 15] @@ -1452,6 +1510,7 @@ def test_join_distinct_preserves_count(): assert n_defined_2 == 0 assert keys_2 == left_pos +@fails_service_backend() def test_write_table_containing_ndarray(): t = hl.utils.range_table(5) t = t.annotate(n = hl.nd.arange(t.idx)) @@ -1510,6 +1569,7 @@ def test_range_annotate_range(): ht2 = hl.utils.range_table(5).annotate(x = 1) ht1.annotate(x = ht2[ht1.idx].x)._force_count() +@fails_service_backend() def test_read_write_all_types(): ht = create_all_values_table() tmp_file = new_temp_file() @@ -1530,6 +1590,7 @@ def test_map_partitions_errors(): with pytest.raises(ValueError, match='must preserve key fields'): ht._map_partitions(lambda rows: rows.map(lambda r: r.drop('idx'))) +@fails_service_backend() def test_map_partitions_indexed(): tmp_file = new_temp_file() hl.utils.range_table(100, 8).write(tmp_file) @@ -1538,6 +1599,8 @@ def test_map_partitions_indexed(): assert [inner.idx for outer in ht.foo.collect() for inner in outer] == list(range(11, 55)) + +@fails_service_backend() @lower_only() def test_lowered_persist(): ht = hl.utils.range_table(100, 10).persist() @@ -1545,15 +1608,18 @@ def test_lowered_persist(): assert ht.filter(ht.idx == 55).count() == 1 + +@fails_service_backend() @lower_only() def test_lowered_shuffle(): ht = hl.utils.range_table(100, 10) ht = ht.order_by(-ht.idx) assert ht.aggregate(hl.agg.take(ht.idx, 3)) == [99, 98, 97] +@fails_service_backend() @fails_local_backend() def test_read_partitions(): ht = hl.utils.range_table(100, 3) path = new_temp_file() ht.write(path) - assert hl.read_table(path, _n_partitions=10).n_partitions() == 10 \ No newline at end of file + assert hl.read_table(path, _n_partitions=10).n_partitions() == 10 diff --git a/hail/python/test/hail/test_context.py b/hail/python/test/hail/test_context.py index cabb3cd72d5..f333af5da82 100644 --- a/hail/python/test/hail/test_context.py +++ b/hail/python/test/hail/test_context.py @@ -1,7 +1,7 @@ import unittest import hail as hl -from .helpers import startTestHailContext, stopTestHailContext, skip_unless_spark_backend, fails_local_backend +from .helpers import startTestHailContext, stopTestHailContext, skip_unless_spark_backend, fails_local_backend, fails_service_backend setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -15,6 +15,7 @@ def test_init_hail_context_twice(self): hl.init(idempotent=True) # Should be no error hl.init(hl.spark_context(), idempotent=True) # Should be no error + @fails_service_backend() @fails_local_backend() def test_top_level_functions_are_do_not_error(self): hl.current_backend() diff --git a/hail/python/test/hail/utils/test_utils.py b/hail/python/test/hail/utils/test_utils.py index 965057c9a25..f8fb8207679 100644 --- a/hail/python/test/hail/utils/test_utils.py +++ b/hail/python/test/hail/utils/test_utils.py @@ -13,6 +13,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() @fails_local_backend() def test_hadoop_methods(self): data = ['foo', 'bar', 'baz'] @@ -82,6 +83,7 @@ def test_hadoop_mkdir_p(self): self.assertFalse(hl.hadoop_exists(resource('./some2'))) + @fails_service_backend() @fails_local_backend() def test_hadoop_copy_log(self): with with_local_temp_file('log') as r: @@ -112,6 +114,7 @@ def test_hadoop_stat(self): self.assertTrue('owner' in stat2) self.assertTrue('modification_time' in stat2) + @fails_service_backend() @fails_local_backend() def test_hadoop_ls(self): path1 = resource('ls_test/f_50') diff --git a/hail/python/test/hailtop/aiotools/copy_test_specs.py b/hail/python/test/hailtop/aiotools/copy_test_specs.py index 3f9f57eb2b0..4606cb095df 100644 --- a/hail/python/test/hailtop/aiotools/copy_test_specs.py +++ b/hail/python/test/hailtop/aiotools/copy_test_specs.py @@ -4,350 +4,350 @@ 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', - 'result': {'files': {'/a': 'src/a', '/keep': ''}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', - 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x/a': 'src/a'}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -357,7 +357,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -367,52 +367,49 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/a': 'src/a', - '/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'dest/a/subdir/file2', - '/keep': ''}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -422,7 +419,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -432,21 +429,21 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -456,7 +453,7 @@ '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -466,31 +463,28 @@ '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'dest/a/subdir/file2', - '/keep': '', - '/x/a': 'src/a'}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -500,21 +494,21 @@ '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -524,7 +518,7 @@ '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -534,427 +528,439 @@ '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a': 'src/a', '/keep': ''}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/a/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/a/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a/a': 'src/a', '/keep': ''}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/a/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/a': 'src/a', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/keep': '', '/x/a': 'src/a'}}, + 'result': {'exception': 'IsADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', '/x/a': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', '/x': 'src/a'}}, 'src_trailing_slash': False, 'src_type': 'file', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'NotADirectoryError'}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', @@ -964,7 +970,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', @@ -974,7 +980,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', @@ -984,7 +990,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', @@ -994,41 +1000,47 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'files': {'/a': 'dest/a', '/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a': 'dest/a', + '/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', @@ -1038,7 +1050,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', @@ -1048,7 +1060,7 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', @@ -1058,7 +1070,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', @@ -1068,7 +1080,7 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1078,7 +1090,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1088,7 +1100,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1098,7 +1110,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1108,41 +1120,51 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/file1': 'src/a/file1', - '/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'src/a/subdir/file2', - '/keep': ''}}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/file1': 'src/a/file1', - '/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'src/a/subdir/file2', - '/keep': ''}}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1152,7 +1174,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1162,7 +1184,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1172,7 +1194,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1182,7 +1204,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1193,7 +1215,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1204,7 +1226,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1215,7 +1237,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1226,43 +1248,47 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/a/file1': 'src/a/file1', - '/a/a/subdir/file2': 'src/a/subdir/file2', + 'result': {'files': {'/a/file1': 'src/a/file1', '/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'dest/a/subdir/file2', + '/a/subdir/file2': 'src/a/subdir/file2', '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'src/a/subdir/file2', + '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', - 'result': {'files': {'/a/a/file1': 'src/a/file1', - '/a/a/subdir/file2': 'src/a/subdir/file2', + 'result': {'files': {'/a/file1': 'src/a/file1', '/a/file3': 'dest/a/file3', - '/a/subdir/file2': 'dest/a/subdir/file2', + '/a/subdir/file2': 'src/a/subdir/file2', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'src/a/subdir/file2', + '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1273,7 +1299,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1284,7 +1310,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1295,7 +1321,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1306,7 +1332,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1317,7 +1343,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1328,7 +1354,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1339,7 +1365,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1350,43 +1376,51 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'files': {'/a/file3': 'dest/a/file3', '/a/subdir/file2': 'dest/a/subdir/file2', '/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'files': {'/a/file3': 'dest/a/file3', '/a/subdir/file2': 'dest/a/subdir/file2', '/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file3': 'dest/a/file3', + '/a/subdir/file2': 'dest/a/subdir/file2', + '/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1397,7 +1431,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1408,7 +1442,7 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', @@ -1419,7 +1453,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', @@ -1430,7 +1464,7 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1439,7 +1473,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1448,7 +1482,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1457,7 +1491,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1466,39 +1500,43 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a/file1': 'src/a/file1', - '/a/subdir/file2': 'src/a/subdir/file2', - '/keep': ''}}, + 'result': {'files': {'/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a/file1': 'src/a/file1', - '/a/subdir/file2': 'src/a/subdir/file2', - '/keep': ''}}, + 'result': {'files': {'/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/file1': 'src/a/file1', + '/keep': '', + '/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1507,7 +1545,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1516,7 +1554,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1525,7 +1563,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1534,7 +1572,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1543,7 +1581,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1552,7 +1590,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1561,7 +1599,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1570,39 +1608,43 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a/a/file1': 'src/a/file1', - '/a/a/subdir/file2': 'src/a/subdir/file2', + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/subdir/file2': 'src/a/subdir/file2', '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/subdir/file2': 'src/a/subdir/file2', + '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', - 'result': {'files': {'/a/a/file1': 'src/a/file1', - '/a/a/subdir/file2': 'src/a/subdir/file2', + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/subdir/file2': 'src/a/subdir/file2', '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/a/file1': 'src/a/file1', + '/a/subdir/file2': 'src/a/subdir/file2', + '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1611,7 +1653,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1620,7 +1662,7 @@ '/keep': ''}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1629,7 +1671,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1638,7 +1680,7 @@ '/keep': ''}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1647,7 +1689,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1656,7 +1698,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1665,7 +1707,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1674,39 +1716,43 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'files': {'/keep': '', - '/x/a/file1': 'src/a/file1', - '/x/a/subdir/file2': 'src/a/subdir/file2'}}, + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', - 'result': {'exception': 'NotADirectoryError'}, + 'result': {'files': {'/keep': '', + '/x/file1': 'src/a/file1', + '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1715,7 +1761,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1724,7 +1770,7 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': True, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', @@ -1733,7 +1779,7 @@ '/x/a/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', @@ -1742,760 +1788,760 @@ '/x/subdir/file2': 'src/a/subdir/file2'}}, 'src_trailing_slash': False, 'src_type': 'dir', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'file', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'dir', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': None, 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'a', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_dir'}, + 'treat_dest_as': 'dest_dir'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'target_file'}, + 'treat_dest_as': 'dest_is_target'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': True, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': True, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}, + 'treat_dest_as': 'infer_dest'}, {'dest_basename': 'x', 'dest_trailing_slash': False, 'dest_type': 'noexist', 'result': {'exception': 'FileNotFoundError'}, 'src_trailing_slash': False, 'src_type': 'noexist', - 'treat_dest_as': 'infer_target'}] + 'treat_dest_as': 'infer_dest'}] diff --git a/hail/python/test/hailtop/aiotools/generate_copy_test_specs.py b/hail/python/test/hailtop/aiotools/generate_copy_test_specs.py index ad881d11a9b..b356bd4e718 100644 --- a/hail/python/test/hailtop/aiotools/generate_copy_test_specs.py +++ b/hail/python/test/hailtop/aiotools/generate_copy_test_specs.py @@ -60,7 +60,7 @@ def copy_test_configurations(): for src_type in ['file', 'dir', 'noexist']: for dest_type in ['file', 'dir', 'noexist']: for dest_basename in [None, 'a', 'x']: - for treat_dest_as in [Transfer.TARGET_DIR, Transfer.TARGET_FILE, Transfer.INFER_TARGET]: + for treat_dest_as in [Transfer.DEST_DIR, Transfer.DEST_IS_TARGET, Transfer.INFER_DEST]: for src_trailing_slash in [True, False]: for dest_trailing_slash in [True, False]: yield { @@ -137,7 +137,7 @@ async def copy_test_specs(): pass sema = asyncio.Semaphore(50) - with sema: + async with sema: result = await run_test_spec(sema, fs, config, src_base, dest_base) config['result'] = result diff --git a/hail/python/test/hailtop/aiotools/test_copy.py b/hail/python/test/hailtop/aiotools/test_copy.py index 2ed7a1ffc71..e7a8bea3646 100644 --- a/hail/python/test/hailtop/aiotools/test_copy.py +++ b/hail/python/test/hailtop/aiotools/test_copy.py @@ -189,7 +189,7 @@ async def test_copy_rename_file_dest_target_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.TARGET_FILE)) + await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.DEST_IS_TARGET)) await expect_file(fs, f'{dest_base}x', 'src/a') @@ -201,7 +201,7 @@ async def test_copy_file_dest_target_directory_doesnt_exist(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') # SourceCopier._copy_file creates destination directories as needed - await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.TARGET_DIR)) + await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.DEST_DIR)) await expect_file(fs, f'{dest_base}x/a', 'src/a') @@ -229,13 +229,39 @@ async def test_copy_rename_dir(copy_test_context): await expect_file(fs, f'{dest_base}x/subdir/file2', 'src/a/subdir/file2') +@pytest.mark.asyncio +async def test_copy_rename_dir_dest_is_target(copy_test_context): + sema, fs, src_base, dest_base = copy_test_context + + await create_test_dir(fs, 'src', src_base, 'a/') + + await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.DEST_IS_TARGET)) + + await expect_file(fs, f'{dest_base}x/file1', 'src/a/file1') + await expect_file(fs, f'{dest_base}x/subdir/file2', 'src/a/subdir/file2') + + +@pytest.mark.asyncio +async def test_overwrite_rename_dir(copy_test_context): + sema, fs, src_base, dest_base = copy_test_context + + await create_test_dir(fs, 'src', src_base, 'a/') + await create_test_dir(fs, 'dest', dest_base, 'x/') + + await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}x', treat_dest_as=Transfer.DEST_IS_TARGET)) + + await expect_file(fs, f'{dest_base}x/file1', 'src/a/file1') + await expect_file(fs, f'{dest_base}x/subdir/file2', 'src/a/subdir/file2') + await expect_file(fs, f'{dest_base}x/file3', 'dest/x/file3') + + @pytest.mark.asyncio async def test_copy_file_dest_trailing_slash_target_dir(copy_test_context): sema, fs, src_base, dest_base = copy_test_context await create_test_file(fs, 'src', src_base, 'a') - await fs.copy(sema, Transfer(f'{src_base}a', dest_base, treat_dest_as=Transfer.TARGET_DIR)) + await fs.copy(sema, Transfer(f'{src_base}a', dest_base, treat_dest_as=Transfer.DEST_DIR)) await expect_file(fs, f'{dest_base}a', 'src/a') @@ -246,7 +272,7 @@ async def test_copy_file_dest_target_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.TARGET_DIR)) + await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_DIR)) await expect_file(fs, f'{dest_base}a', 'src/a') @@ -257,7 +283,7 @@ async def test_copy_file_dest_target_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}a', treat_dest_as=Transfer.TARGET_FILE)) + await fs.copy(sema, Transfer(f'{src_base}a', f'{dest_base}a', treat_dest_as=Transfer.DEST_IS_TARGET)) await expect_file(fs, f'{dest_base}a', 'src/a') @@ -269,7 +295,7 @@ async def test_copy_dest_target_file_is_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') with RaisesOrGS(dest_base, IsADirectoryError): - await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.TARGET_FILE)) + await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @pytest.mark.asyncio @@ -341,7 +367,7 @@ async def test_copy_multiple_dest_target_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'b') with RaisesOrGS(dest_base, NotADirectoryError): - await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], dest_base.rstrip('/'), treat_dest_as=Transfer.TARGET_FILE)) + await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @pytest.mark.asyncio @@ -363,7 +389,7 @@ async def test_file_overwrite_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') with RaisesOrGS(dest_base, IsADirectoryError): - await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.TARGET_FILE)) + await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @pytest.mark.asyncio @@ -386,7 +412,7 @@ async def test_copy_src_parts(copy_test_context): await create_test_dir(fs, 'src', src_base, 'a/') - await fs.copy(sema, Transfer([f'{src_base}a/file1', f'{src_base}a/subdir'], dest_base.rstrip('/'), treat_dest_as=Transfer.TARGET_DIR)) + await fs.copy(sema, Transfer([f'{src_base}a/file1', f'{src_base}a/subdir'], dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_DIR)) await expect_file(fs, f'{dest_base}file1', 'src/a/file1') await expect_file(fs, f'{dest_base}subdir/file2', 'src/a/subdir/file2') diff --git a/hail/src/main/scala/is/hail/annotations/CodeOrdering.scala b/hail/src/main/scala/is/hail/annotations/CodeOrdering.scala deleted file mode 100644 index e6cf401d215..00000000000 --- a/hail/src/main/scala/is/hail/annotations/CodeOrdering.scala +++ /dev/null @@ -1,708 +0,0 @@ -package is.hail.annotations - -import is.hail.asm4s._ -import is.hail.expr.ir.{Ascending, Descending, EmitMethodBuilder, EmitCode, EmitCodeBuilder, SortField, SortOrder} -import is.hail.types._ -import is.hail.asm4s.coerce -import is.hail.types.physical._ -import is.hail.types.physical.stypes.interfaces._ -import is.hail.utils._ - -object CodeOrdering { - - sealed trait Op { - type ReturnType - val rtti: TypeInfo[ReturnType] - val missingEqual: Boolean - } - final case class Compare(missingEqual: Boolean = true) extends Op { - type ReturnType = Int - val rtti = typeInfo[Int] - } - sealed trait BooleanOp extends Op { - type ReturnType = Boolean - val rtti = typeInfo[Boolean] - } - final case class Equiv(missingEqual: Boolean = true) extends BooleanOp - final case class Lt(missingEqual: Boolean = true) extends BooleanOp - final case class Lteq(missingEqual: Boolean = true) extends BooleanOp - final case class Gt(missingEqual: Boolean = true) extends BooleanOp - final case class Gteq(missingEqual: Boolean = true) extends BooleanOp - final case class Neq(missingEqual: Boolean = true) extends BooleanOp - - type F[R] = (EmitCodeBuilder, EmitCode, EmitCode) => Code[R] - - def rowOrdering( - t1: PBaseStruct, - t2: PBaseStruct, - mb: EmitMethodBuilder[_], - sortOrders: Array[SortOrder] = null, - missingFieldsEqual: Boolean = true - ): CodeOrdering = new CodeOrdering { - require(sortOrders == null || sortOrders.size == t1.size) - - def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PBaseStructValue, PBaseStructValue) = { - lhs.asBaseStruct.memoize(cb, "structord_lhs") -> rhs.asBaseStruct.memoize(cb, "structord_rhs") - } - - private[this] def fieldOrdering(i: Int, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = - mb.getCodeOrdering( - t1.types(i), - t2.types(i), - if (sortOrders == null) Ascending else sortOrders(i), - op) - - override def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val cmp = cb.newLocal("cmp", 0) - - var i = 0 - while (i < t1.size) { - val fldCmp = fieldOrdering(i, CodeOrdering.Compare(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } - cb.assign(cmp, fldCmp(cb, l, r)) - cb.ifx(cmp.cne(0), cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - cmp - } - - override def ltNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val lt = cb.newLocal("lt", true) - val eq = cb.newLocal("eq", true) - - var i = 0 - while (i < t1.size) { - val fldLt = fieldOrdering(i, CodeOrdering.Lt(missingFieldsEqual)) - val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lt_rhs_fld$i") - cb.assign(lt, fldLt(cb, l, r)) - cb.assign(eq, !lt && fldEq(cb, l, r)) - cb.ifx(!eq, cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - lt - } - - override def lteqNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val lteq = cb.newLocal("lteq", true) - val eq = cb.newLocal("eq", true) - - var i = 0 - while (i < t1.size) { - val fldLtEq = fieldOrdering(i, CodeOrdering.Lteq(missingFieldsEqual)) - val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lteq_rhs_fld$i") - cb.assign(lteq, fldLtEq(cb, l, r)) - cb.assign(eq, fldEq(cb, l, r)) - cb.ifx(!eq, cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - lteq - } - - override def gtNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val gt = cb.newLocal("gt", false) - val eq = cb.newLocal("eq", true) - - var i = 0 - while (i < t1.size) { - val fldGt = fieldOrdering(i, CodeOrdering.Gt(missingFieldsEqual)) - val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gt_rhs_fld$i") - cb.assign(gt, fldGt(cb, l, r)) - cb.assign(eq, !gt && fldEq(cb, l, r)) - cb.ifx(!eq, cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - gt - } - - override def gteqNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val gteq = cb.newLocal("gteq", true) - val eq = cb.newLocal("eq", true) - - var i = 0 - while (i < t1.size) { - val fldGtEq = fieldOrdering(i, CodeOrdering.Gteq(missingFieldsEqual)) - val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gteq_rhs_fld$i") - cb.assign(gteq, fldGtEq(cb, l, r)) - cb.assign(eq, fldEq(cb, l, r)) - cb.ifx(!eq, cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - gteq - } - - override def equivNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val (lhs, rhs) = setup(cb, x, y) - val Lout = CodeLabel() - val eq = cb.newLocal("cmp", true) - - var i = 0 - while (i < t1.size) { - val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } - cb.assign(eq, fldEq(cb, l, r)) - cb.ifx(!eq, cb.goto(Lout)) - i += 1 - } - - cb.define(Lout) - eq - } - } - - def iterableOrdering(t1: PArray, t2: PArray, mb: EmitMethodBuilder[_]): CodeOrdering = new CodeOrdering { - private[this] def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PIndexableValue, PIndexableValue) = { - val lhsv = lhs.asIndexable.memoize(cb, "container_ord_lhs") - val rhsv = rhs.asIndexable.memoize(cb, "container_ord_rhs") - lhsv -> rhsv - } - - private[this] def loop(cb: EmitCodeBuilder, lhs: PIndexableValue, rhs: PIndexableValue)( - f: (EmitCode, EmitCode) => Unit - ): Unit = { - val i = cb.newLocal[Int]("i") - val lim = cb.newLocal("lim", lhs.loadLength().min(rhs.loadLength())) - cb.forLoop(cb.assign(i, 0), i < lim, cb.assign(i, i + 1), { - val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i).typecast) - val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i).typecast) - f(left, right) - }) - } - - override def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { - val elemCmp = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Compare()) - - val Lout = CodeLabel() - val cmp = cb.newLocal[Int]("iterable_cmp", 0) - - val (lhs, rhs) = setup(cb, x, y) - loop(cb, lhs, rhs) { (lhs, rhs) => - cb.assign(cmp, elemCmp(cb, lhs, rhs)) - cb.ifx(cmp.cne(0), cb.goto(Lout)) - } - - // if we get here, cmp is 0 - cb.assign(cmp, - Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( - "compare", lhs.loadLength(), rhs.loadLength())) - cb.define(Lout) - cmp - } - - override def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val elemLt = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Lt()) - val elemEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Equiv()) - - val ret = cb.newLocal[Boolean]("iterable_lt") - val Lout = CodeLabel() - - val (lhs, rhs) = setup(cb, x, y) - val lt = cb.newLocal("lt", false) - val eq = cb.newLocal("eq", true) - - loop(cb, lhs, rhs) { (lhsEC, rhsEC) => - val lhs = cb.memoize(lhsEC, "lhs_item") - val rhs = cb.memoize(rhsEC, "rhs_item") - cb.assign(lt, elemLt(cb, lhs, rhs)) - cb.assign(eq, !lt && elemEq(cb, lhs, rhs)) - - cb.ifx(!eq, { - cb.assign(ret, lt) - cb.goto(Lout) - }) - } - - cb.assign(ret, lhs.loadLength() < rhs.loadLength()) - cb.define(Lout) - ret - } - - override def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val elemLtEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Lteq()) - val elemEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Equiv()) - - val ret = cb.newLocal[Boolean]("iterable_lteq") - val Lout = CodeLabel() - - val (lhs, rhs) = setup(cb, x, y) - val lteq = cb.newLocal("lteq", false) - val eq = cb.newLocal("eq", true) - - loop(cb, lhs, rhs) { (lhsEC, rhsEC) => - val lhs = cb.memoize(lhsEC, "lhs_item") - val rhs = cb.memoize(rhsEC, "rhs_item") - cb.assign(lteq, elemLtEq(cb, lhs, rhs)) - cb.assign(eq, elemEq(cb, lhs, rhs)) - - cb.ifx(!eq, { - cb.assign(ret, lteq) - cb.goto(Lout) - }) - } - - cb.assign(ret, lhs.loadLength() <= rhs.loadLength) - cb.define(Lout) - ret - } - - override def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val elemGt = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Gt()) - val elemEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Equiv()) - - val ret = cb.newLocal[Boolean]("iterable_gt") - val Lout = CodeLabel() - - val (lhs, rhs) = setup(cb, x, y) - val gt = cb.newLocal("gt", false) - val eq = cb.newLocal("eq", true) - - loop(cb, lhs, rhs) { (lhsEC, rhsEC) => - val lhs = cb.memoize(lhsEC, "lhs_item") - val rhs = cb.memoize(rhsEC, "rhs_item") - cb.assign(gt, elemGt(cb, lhs, rhs)) - cb.assign(eq, !gt && elemEq(cb, lhs, rhs)) - - cb.ifx(!eq, { - cb.assign(ret, gt) - cb.goto(Lout) - }) - } - - cb.assign(ret, lhs.loadLength() > rhs.loadLength()) - cb.define(Lout) - ret - } - - override def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val elemGtEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Gteq()) - val elemEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Equiv()) - - val ret = cb.newLocal[Boolean]("iterable_gteq") - val Lout = CodeLabel() - - val (lhs, rhs) = setup(cb, x, y) - val gteq = cb.newLocal("gteq", true) - val eq = cb.newLocal("eq", true) - - loop(cb, lhs, rhs) { (lhsEC, rhsEC) => - val lhs = cb.memoize(lhsEC, "lhs_item") - val rhs = cb.memoize(rhsEC, "rhs_item") - cb.assign(gteq, elemGtEq(cb, lhs, rhs)) - cb.assign(eq, elemEq(cb, lhs, rhs)) - - cb.ifx(!eq, { - cb.assign(ret, gteq) - cb.goto(Lout) - }) - } - - cb.assign(ret, lhs.loadLength() >= rhs.loadLength) - cb.define(Lout) - ret - } - - override def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val elemEq = mb.getCodeOrdering(t1.elementType, t2.elementType, CodeOrdering.Equiv()) - val ret = cb.newLocal[Boolean]("iterable_eq", true) - val Lout = CodeLabel() - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - cb.ifx(lhs.loadLength().cne(rhs.loadLength()), exitWith(false)) - loop(cb, lhs, rhs) { (lhs, rhs) => - cb.assign(ret, elemEq(cb, lhs, rhs)) - cb.ifx(!ret, cb.goto(Lout)) - } - - cb.define(Lout) - ret - } - } - - def intervalOrdering(t1: PInterval, t2: PInterval, mb: EmitMethodBuilder[_]): CodeOrdering = new CodeOrdering { - private val setup: (EmitCodeBuilder, PCode, PCode) => (PIntervalValue, PIntervalValue) = { - case (cb, lhs: PIntervalCode, rhs: PIntervalCode) => - lhs.memoize(cb, "intervalord_lhs") -> rhs.memoize(cb, "intervalord_rhs") - } - - override def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { - val pointCompare = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Compare()) - val cmp = cb.newLocal[Int]("intervalord_cmp", 0) - - val (lhs, rhs) = setup(cb, x, y) - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) - cb.assign(cmp, pointCompare(cb, lstart, rstart)) - cb.ifx(cmp.ceq(0), { - cb.ifx(lhs.includesStart().cne(rhs.includesStart()), { - cb.assign(cmp, lhs.includesStart().mux(-1, 1)) - }, { - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) - cb.assign(cmp, pointCompare(cb, lend, rend)) - cb.ifx(cmp.ceq(0), { - cb.ifx(lhs.includesEnd().cne(rhs.includesEnd()), { - cb.assign(cmp, lhs.includesEnd().mux(1, -1)) - }) - }) - }) - }) - - cmp - } - - override def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val pointEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Equiv()) - - val Lout = CodeLabel() - val ret = cb.newLocal[Boolean]("interval_eq", true) - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - - cb.ifx(lhs.includesStart().cne(rhs.includesStart()) || - lhs.includesEnd().cne(rhs.includesEnd()), { - exitWith(false) - }) - - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) - cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) - - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) - cb.ifx(!pointEq(cb, lend, rend), exitWith(false)) - - cb.define(Lout) - ret - } - - override def ltNonnull(cb:EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val pointLt = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Lt()) - val pointEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Equiv()) - - val Lout = CodeLabel() - val ret = cb.newLocal[Boolean]("interval_lt") - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") - - cb.ifx(pointLt(cb, lstart, rstart), exitWith(true)) - cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) - cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) - cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") - - cb.ifx(pointLt(cb, lend, rend), exitWith(true)) - cb.assign(ret, pointEq(cb, lend, rend) && !lhs.includesEnd() && rhs.includesEnd()) - - cb.define(Lout) - ret - } - - override def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val pointLtEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Lteq()) - val pointEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Equiv()) - - val Lout = CodeLabel() - val ret = cb.newLocal[Boolean]("interval_lteq") - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") - - cb.ifx(!pointLtEq(cb, lstart, rstart), exitWith(false)) - cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) - cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) - cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") - cb.ifx(!pointLtEq(cb, lend, rend), exitWith(false)) - cb.assign(ret, !pointEq(cb, lend, rend) || !lhs.includesEnd() || rhs.includesEnd()) - - cb.define(Lout) - ret - } - - override def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val pointGt = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Gt()) - val pointEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Equiv()) - - val Lout = CodeLabel() - val ret = cb.newLocal[Boolean]("interval_gt") - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") - - cb.ifx(pointGt(cb, lstart, rstart), exitWith(true)) - cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) - cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) - cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") - - cb.ifx(pointGt(cb, lend, rend), exitWith(true)) - cb.assign(ret, pointEq(cb, lend, rend) && lhs.includesEnd() && !rhs.includesEnd()) - - cb.define(Lout) - ret - } - - override def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - val pointGtEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Gteq()) - val pointEq = mb.getCodeOrdering(t1.pointType, t2.pointType, CodeOrdering.Equiv()) - - val Lout = CodeLabel() - val ret = cb.newLocal[Boolean]("interval_gteq") - val exitWith = (value: Code[Boolean]) => { - cb.assign(ret, value) - cb.goto(Lout) - } - - val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") - - cb.ifx(!pointGtEq(cb, lstart, rstart), exitWith(false)) - cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) - cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) - cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") - cb.ifx(!pointGtEq(cb, lend, rend), exitWith(false)) - cb.assign(ret, !pointEq(cb, lend, rend) || lhs.includesEnd() || !rhs.includesEnd()) - - cb.define(Lout) - ret - } - } - - def locusOrdering(t1: PLocus, t2: PLocus, mb: EmitMethodBuilder[_]): CodeOrdering = - new CodeOrderingCompareConsistentWithOthers { - require(t1.rg == t2.rg) - - def compareNonnull(cb: EmitCodeBuilder, lhsc: PCode, rhsc: PCode): Code[Int] = { - val codeRG = mb.getReferenceGenome(t1.rg) - val lhs: PLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") - val rhs: PLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") - val lhsContig = lhs.contig(cb).memoize(cb, "locus_cmp_lcontig").asInstanceOf[SStringValue] - val rhsContig = rhs.contig(cb).memoize(cb, "locus_cmp_rcontig").asInstanceOf[SStringValue] - - // ugh - val lhsContigBinType = lhsContig.get.asBytes().st.pType.asInstanceOf[PBinary] - val rhsContigBinType = rhsContig.get.asBytes().st.pType.asInstanceOf[PBinary] - val bincmp = lhsContigBinType.codeOrdering(mb, rhsContigBinType) - - val ret = cb.newLocal[Int]("locus_cmp_ret", 0) - cb.ifx(bincmp.compareNonnull(cb, - lhsContig.get.asBytes().asPCode, - rhsContig.get.asBytes().asPCode).ceq(0), { - cb.assign(ret, Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( - "compare", lhs.position(cb), rhs.position(cb))) - }, { - cb.assign(ret, codeRG.invoke[String, String, Int]( - "compare", lhsContig.get.loadString(), rhsContig.get.loadString())) - }) - ret - } - } - - def mapOrdering(t1: PDict, t2: PDict, mb: EmitMethodBuilder[_]): CodeOrdering = - iterableOrdering(PCanonicalArray(t1.elementType, t1.required), PCanonicalArray(t2.elementType, t2.required), mb) - - def setOrdering(t1: PSet, t2: PSet, mb: EmitMethodBuilder[_]): CodeOrdering = - iterableOrdering(PCanonicalArray(t1.elementType, t1.required), PCanonicalArray(t2.elementType, t2.required), mb) - -} - -abstract class CodeOrdering { - outer => - - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] - - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - - def compare(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean = true): Code[Int] = { - cb += x.setup - cb += y.setup - val xm = cb.newLocal("cord_compare_xm", x.m) - val ym = cb.newLocal("cord_compare_ym", y.m) - val cmp = cb.newLocal[Int]("cmp") - cb.ifx(xm, - cb.ifx(ym, cb.assign(cmp, if (missingEqual) 0 else -1), cb.assign(cmp, 1)), - cb.ifx(ym, cb.assign(cmp, -1), cb.assign(cmp, compareNonnull(cb, x.pv, y.pv)))) - cmp - } - - def lt(cb: EmitCodeBuilder, x:EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { - val ret = cb.newLocal[Boolean]("lt") - cb += x.setup - cb += y.setup - if (missingEqual) { - cb.ifx(x.m, - cb.assign(ret, false), - cb.ifx(y.m, - cb.assign(ret, true), - cb.assign(ret, ltNonnull(cb, x.pv, y.pv)))) - } else { - cb.ifx(y.m, - cb.assign(ret, true), - cb.ifx(x.m, - cb.assign(ret, false), - cb.assign(ret, ltNonnull(cb, x.pv, y.pv)))) - } - ret - } - - def lteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { - val ret = cb.newLocal[Boolean]("lteq") - cb += x.setup - cb += y.setup - cb.ifx(y.m, - cb.assign(ret, true), - cb.ifx(x.m, - cb.assign(ret, false), - cb.assign(ret, lteqNonnull(cb, x.pv, y.pv)))) - ret - } - - def gt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { - val ret = cb.newLocal[Boolean]("gt") - cb += x.setup - cb += y.setup - cb.ifx(y.m, - cb.assign(ret, false), - cb.ifx(x.m, - cb.assign(ret, true), - cb.assign(ret, gtNonnull(cb, x.pv, y.pv)))) - ret - } - - def gteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { - val ret = cb.newLocal[Boolean]("gteq") - cb += x.setup - cb += y.setup - if (missingEqual) { - cb.ifx(x.m, - cb.assign(ret, true), - cb.ifx(y.m, - cb.assign(ret, false), - cb.assign(ret, gteqNonnull(cb, x.pv, y.pv)))) - } else { - cb.ifx(y.m, - cb.assign(ret, false), - cb.ifx(x.m, - cb.assign(ret, true), - cb.assign(ret, gteqNonnull(cb, x.pv, y.pv)))) - } - ret - } - - def equiv(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { - val ret = cb.newLocal[Boolean]("eq") - cb += x.setup - cb += y.setup - if (missingEqual) { - val xm = cb.newLocal("cord_equiv_xm", x.m) - val ym = cb.newLocal("cord_equiv_ym", y.m) - cb.ifx(xm && ym, - cb.assign(ret, true), - cb.ifx(!xm && !ym, - cb.assign(ret, equivNonnull(cb, x.pv, y.pv)), - cb.assign(ret, false))) - } else { - cb.ifx(!x.m && !y.m, cb.assign(ret, equivNonnull(cb, x.pv, y.pv)), cb.assign(ret, false)) - } - ret - } - - // reverses the sense of the non-null comparison only - def reverse: CodeOrdering = new CodeOrdering () { - override def reverse: CodeOrdering = CodeOrdering.this - - override def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.compareNonnull(cb, y, x) - override def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.ltNonnull(cb, y, x) - override def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.lteqNonnull(cb, y, x) - override def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.gtNonnull(cb, y, x) - override def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.gteqNonnull(cb, y, x) - override def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode) = CodeOrdering.this.equivNonnull(cb, y, x) - } -} - -abstract class CodeOrderingCompareConsistentWithOthers extends CodeOrdering { - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) -} diff --git a/hail/src/main/scala/is/hail/annotations/RegionPool.scala b/hail/src/main/scala/is/hail/annotations/RegionPool.scala index c1702169edb..a2f92170358 100644 --- a/hail/src/main/scala/is/hail/annotations/RegionPool.scala +++ b/hail/src/main/scala/is/hail/annotations/RegionPool.scala @@ -138,7 +138,13 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t override def finalize(): Unit = close() + private[this] var closed: Boolean = false + def close(): Unit = { + if (closed) + return + closed = true + report("FREE") var i = 0 @@ -167,4 +173,4 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t warn(msg) } } -} \ No newline at end of file +} diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 3099c2bdff6..8bde2e6a2a8 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -13,6 +13,7 @@ import org.objectweb.asm.ClassReader import scala.collection.mutable import scala.reflect.ClassTag +import java.nio.charset.StandardCharsets class Field[T: TypeInfo](classBuilder: ClassBuilder[_], val name: String) { val ti: TypeInfo[T] = implicitly @@ -60,7 +61,10 @@ class ClassesBytes(classesBytes: Array[(String, Array[Byte])]) extends Serializa HailClassLoader.loadOrDefineClass(n, bytes) } catch { case e: Exception => - FunctionBuilder.bytesToBytecodeString(bytes, FunctionBuilder.stderrAndLoggerErrorOS) + val buffer = new ByteArrayOutputStream() + FunctionBuilder.bytesToBytecodeString(bytes, buffer) + val classJVMByteCodeAsEscapedStr = buffer.toString(StandardCharsets.UTF_8.name()) + log.error(s"Failed to load bytecode ${e}:\n" + classJVMByteCodeAsEscapedStr) throw e } } @@ -404,8 +408,6 @@ class ClassBuilder[C]( } object FunctionBuilder { - val stderrAndLoggerErrorOS = getStderrAndLogOutputStream[FunctionBuilder[_]] - def bytesToBytecodeString(bytes: Array[Byte], out: OutputStream) { val tcv = new TraceClassVisitor(null, new Textifier, new PrintWriter(out)) new ClassReader(bytes).accept(tcv, 0) diff --git a/hail/src/main/scala/is/hail/backend/Backend.scala b/hail/src/main/scala/is/hail/backend/Backend.scala index f954eea539d..08867cac027 100644 --- a/hail/src/main/scala/is/hail/backend/Backend.scala +++ b/hail/src/main/scala/is/hail/backend/Backend.scala @@ -48,6 +48,6 @@ abstract class Backend { stage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - tableTypeRequiredness: RTable + rowTypeRequiredness: RStruct ): TableStage } diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index b46af5a09a6..9d90359f2e7 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -270,7 +270,7 @@ class LocalBackend( stage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - tableTypeRequiredness: RTable + rowTypeRequiredness: RStruct ): TableStage = { // Use a local sort for the moment to enable larger pipelines to run LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 86e35a56f57..15fffb0d966 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -1,9 +1,12 @@ package is.hail.backend.service import java.io._ +import java.net._ +import java.nio.charset.StandardCharsets +import java.util.concurrent._ import is.hail.HailContext -import is.hail.annotations.{Annotation, Region, SafeRow, UnsafeRow} +import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.{Backend, BackendContext, BroadcastValue, HailTaskContext} import is.hail.expr.JSONAnnotationImpex @@ -12,24 +15,27 @@ import is.hail.expr.ir.{Compile, ExecuteContext, IR, IRParser, Literal, MakeArra import is.hail.io.fs.GoogleStorageFS import is.hail.linalg.BlockMatrix import is.hail.rvd.RVDPartitioner -import is.hail.services.{DeployConfig, Tokens} +import is.hail.services._ import is.hail.services.batch_client.BatchClient import is.hail.services.shuffler.ShuffleClient import is.hail.types._ -import is.hail.types.encoded.{EBaseStruct, EType} -import is.hail.types.physical.{PBaseStruct, PType} -import is.hail.types.virtual.{TArray, TInt64, TInterval, TShuffle, TStruct, Type} -import is.hail.utils._ +import is.hail.types.encoded._ +import is.hail.types.physical._ +import is.hail.types.virtual._ +import is.hail.utils.{log => donotuseme, _} import is.hail.variant.ReferenceGenome import org.apache.commons.io.IOUtils -import org.apache.log4j.LogManager +import org.apache.log4j.Logger import org.apache.spark.sql.Row import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats} +import org.newsclub.net.unix.{AFUNIXSocket, AFUNIXSocketAddress, AFUNIXServerSocket} + import scala.collection.mutable import scala.reflect.ClassTag +import scala.annotation.switch class ServiceTaskContext(val partitionId: Int) extends HailTaskContext { override type BackendType = ServiceBackend @@ -39,7 +45,13 @@ class ServiceTaskContext(val partitionId: Int) extends HailTaskContext { override def attemptNumber(): Int = 0 } +object WorkerTimer { + private val log = Logger.getLogger(getClass.getName()) +} + class WorkerTimer() { + import WorkerTimer._ + var startTimes: mutable.Map[String, Long] = mutable.Map() def start(label: String): Unit = { startTimes.put(label, System.nanoTime()) @@ -56,6 +68,8 @@ class WorkerTimer() { } object Worker { + private val log = Logger.getLogger(getClass.getName()) + def main(args: Array[String]): Unit = { if (args.length != 2) { throw new IllegalArgumentException(s"expected two arguments, not: ${ args.length }") @@ -73,32 +87,42 @@ object Worker { timer.start(s"Job $i") timer.start("readInputs") - val fs = using(new FileInputStream(s"$scratchDir/gsa-key/key.json")) { is => - new GoogleStorageFS(IOUtils.toString(is)) + val fs = retryTransientErrors { + using(new FileInputStream(s"$scratchDir/gsa-key/key.json")) { is => + new GoogleStorageFS(IOUtils.toString(is)) + } } - val f = using(new ObjectInputStream(fs.openNoCompression(s"$root/f"))) { is => - is.readObject().asInstanceOf[(Array[Byte], HailTaskContext) => Array[Byte]] + val f = retryTransientErrors { + using(new ObjectInputStream(fs.openNoCompression(s"$root/f"))) { is => + is.readObject().asInstanceOf[(Array[Byte], HailTaskContext) => Array[Byte]] + } } var offset = 0L var length = 0 - using(fs.openNoCompression(s"$root/context.offsets")) { is => - is.seek(i * 12) - offset = is.readLong() - length = is.readInt() + retryTransientErrors { + using(fs.openNoCompression(s"$root/context.offsets")) { is => + is.seek(i * 12) + offset = is.readLong() + length = is.readInt() + } } - val context = using(fs.openNoCompression(s"$root/contexts")) { is => - is.seek(offset) - val context = new Array[Byte](length) - is.readFully(context) - context + val context = retryTransientErrors { + using(fs.openNoCompression(s"$root/contexts")) { is => + is.seek(offset) + val context = new Array[Byte](length) + is.readFully(context) + context + } } timer.end("readInputs") timer.start("executeFunction") + val hailContext = HailContext( + ServiceBackend(), skipLoggingConfiguration = true, quiet = true) val htc = new ServiceTaskContext(i) HailTaskContext.setTaskContext(htc) val result = f(context, htc) @@ -127,7 +151,7 @@ class ServiceBackendContext( } object ServiceBackend { - lazy val log = LogManager.getLogger("is.hail.backend.service.ServiceBackend") + private val log = Logger.getLogger(getClass.getName()) def apply(): ServiceBackend = { new ServiceBackend() @@ -139,25 +163,19 @@ class User( val tmpdir: String, val fs: GoogleStorageFS) -final class Response(val status: Int, val value: String) - class ServiceBackend() extends Backend { import ServiceBackend.log - private[this] val users = mutable.Map[String, User]() + private[this] val users = new ConcurrentHashMap[String, User]() - def addUser(username: String, key: String): Unit = { - assert(!users.contains(username)) - users += username -> new User(username, "/tmp", new GoogleStorageFS(key)) - } - - def removeUser(username: String): Unit = { - assert(users.contains(username)) - users -= username + def addUser(username: String, key: String): Unit = synchronized { + val previous = users.put(username, new User(username, "/tmp", new GoogleStorageFS(key))) + assert(previous == null) } def userContext[T](username: String, timer: ExecutionTimer)(f: (ExecuteContext) => T): T = { - val user = users(username) + val user = users.get(username) + assert(user != null, username) ExecuteContext.scoped(user.tmpdir, "file:///tmp", this, user.fs, timer, null)(f) } @@ -170,7 +188,8 @@ class ServiceBackend() extends Backend { def parallelizeAndComputeWithIndex(_backendContext: BackendContext, collection: Array[Array[Byte]], dependency: Option[TableStageDependency] = None)(f: (Array[Byte], HailTaskContext) => Array[Byte]): Array[Array[Byte]] = { val backendContext = _backendContext.asInstanceOf[ServiceBackendContext] - val user = users(backendContext.username) + val user = users.get(backendContext.username) + assert(user != null, backendContext.username) val fs = user.fs val n = collection.length @@ -256,151 +275,128 @@ class ServiceBackend() extends Backend { def stop(): Unit = () - def formatException(e: Exception): String = { - using(new StringWriter()) { sw => - using(new PrintWriter(sw)) { pw => - e.printStackTrace(pw) - sw.toString + def valueType(username: String, s: String): String = { + ExecutionTimer.logTime("ServiceBackend.valueType") { timer => + userContext(username, timer) { ctx => + val x = IRParser.parse_value_ir(ctx, s) + x.typ.toString } } } - def statusForException(f: => String): Response = { - try { - new Response(200, f) - } catch { - case e: HailException => - new Response(400, formatException(e)) - case e: Exception => - new Response(500, formatException(e)) - } - } - - def valueType(username: String, s: String): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.valueType") { timer => - userContext(username, timer) { ctx => - val x = IRParser.parse_value_ir(ctx, s) - x.typ.toString - } + def tableType(username: String, s: String): String = { + ExecutionTimer.logTime("ServiceBackend.tableType") { timer => + userContext(username, timer) { ctx => + val x = IRParser.parse_table_ir(ctx, s) + val t = x.typ + val jv = JObject("global" -> JString(t.globalType.toString), + "row" -> JString(t.rowType.toString), + "row_key" -> JArray(t.key.map(f => JString(f)).toList)) + JsonMethods.compact(jv) } } } - def tableType(username: String, s: String): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.tableType") { timer => - userContext(username, timer) { ctx => - val x = IRParser.parse_table_ir(ctx, s) - val t = x.typ - val jv = JObject("global" -> JString(t.globalType.toString), - "row" -> JString(t.rowType.toString), - "row_key" -> JArray(t.key.map(f => JString(f)).toList)) - JsonMethods.compact(jv) - } + def matrixTableType(username: String, s: String): String = { + ExecutionTimer.logTime("ServiceBackend.matrixTableType") { timer => + userContext(username, timer) { ctx => + val x = IRParser.parse_matrix_ir(ctx, s) + val t = x.typ + val jv = JObject("global" -> JString(t.globalType.toString), + "col" -> JString(t.colType.toString), + "col_key" -> JArray(t.colKey.map(f => JString(f)).toList), + "row" -> JString(t.rowType.toString), + "row_key" -> JArray(t.rowKey.map(f => JString(f)).toList), + "entry" -> JString(t.entryType.toString)) + JsonMethods.compact(jv) } } } - def matrixTableType(username: String, s: String): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.matrixTableType") { timer => - userContext(username, timer) { ctx => - val x = IRParser.parse_matrix_ir(ctx, s) - val t = x.typ - val jv = JObject("global" -> JString(t.globalType.toString), - "col" -> JString(t.colType.toString), - "col_key" -> JArray(t.colKey.map(f => JString(f)).toList), - "row" -> JString(t.rowType.toString), - "row_key" -> JArray(t.rowKey.map(f => JString(f)).toList), - "entry" -> JString(t.entryType.toString)) - JsonMethods.compact(jv) - } + def blockMatrixType(username: String, s: String): String = { + ExecutionTimer.logTime("ServiceBackend.blockMatrixType") { timer => + userContext(username, timer) { ctx => + val x = IRParser.parse_blockmatrix_ir(ctx, s) + val t = x.typ + val jv = JObject("element_type" -> JString(t.elementType.toString), + "shape" -> JArray(t.shape.map(s => JInt(s)).toList), + "is_row_vector" -> JBool(t.isRowVector), + "block_size" -> JInt(t.blockSize)) + JsonMethods.compact(jv) } } } - def blockMatrixType(username: String, s: String): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.blockMatrixType") { timer => - userContext(username, timer) { ctx => - val x = IRParser.parse_blockmatrix_ir(ctx, s) - val t = x.typ - val jv = JObject("element_type" -> JString(t.elementType.toString), - "shape" -> JArray(t.shape.map(s => JInt(s)).toList), - "is_row_vector" -> JBool(t.isRowVector), - "block_size" -> JInt(t.blockSize)) - JsonMethods.compact(jv) - } - } - } + def referenceGenome(username: String, name: String): String = { + ReferenceGenome.getReference(name).toJSONString } - def referenceGenome(username: String, name: String): Response = { - statusForException { - ReferenceGenome.getReference(name).toJSONString + private[this] def execute(ctx: ExecuteContext, _x: IR): Option[(Annotation, PType)] = { + val x = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, _x) + .asInstanceOf[IR] + if (x.typ == TVoid) { + val (_, f) = Compile[AsmFunction1RegionUnit](ctx, + FastIndexedSeq[(String, PType)](), + FastIndexedSeq[TypeInfo[_]](classInfo[Region]), UnitInfo, + x, + optimize = true) + + f(0, ctx.r)(ctx.r) + None + } else { + val (pt, f) = Compile[AsmFunction1RegionLong](ctx, + FastIndexedSeq[(String, PType)](), + FastIndexedSeq[TypeInfo[_]](classInfo[Region]), LongInfo, + MakeTuple.ordered(FastIndexedSeq(x)), + optimize = true) + + val a = f(0, ctx.r)(ctx.r) + val retPType = pt.asInstanceOf[PBaseStruct] + Some((new UnsafeRow(retPType, ctx.r, a).get(0), retPType.types(0))) } } - private[this] def execute(ctx: ExecuteContext, _x: IR): (Annotation, PType) = { - val x = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, _x) - .asInstanceOf[IR] - val (pt, f) = Compile[AsmFunction1RegionLong](ctx, - FastIndexedSeq[(String, PType)](), - FastIndexedSeq[TypeInfo[_]](classInfo[Region]), LongInfo, - MakeTuple.ordered(FastIndexedSeq(x)), - optimize = true) - - val a = f(0, ctx.r)(ctx.r) - val retPType = pt.asInstanceOf[PBaseStruct] - (new UnsafeRow(retPType, ctx.r, a).get(0), retPType.types(0)) - } - def execute(username: String, sessionID: String, billingProject: String, bucket: String, code: String): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.execute") { timer => - userContext(username, timer) { ctx => - ctx.backendContext = new ServiceBackendContext(username, sessionID, billingProject, bucket) - - val (v, t) = execute(ctx, IRParser.parse_value_ir(ctx, code)) - - JsonMethods.compact( - JObject(List("value" -> JSONAnnotationImpex.exportAnnotation(v, t.virtualType), - "type" -> JString(t.virtualType.toString)))) + def execute(username: String, sessionID: String, billingProject: String, bucket: String, code: String, token: String): String = { + ExecutionTimer.logTime("ServiceBackend.execute") { timer => + userContext(username, timer) { ctx => + log.info(s"executing: ${token}") + ctx.backendContext = new ServiceBackendContext(username, sessionID, billingProject, bucket) + + execute(ctx, IRParser.parse_value_ir(ctx, code)) match { + case Some((v, t)) => + JsonMethods.compact( + JObject(List("value" -> JSONAnnotationImpex.exportAnnotation(v, t.virtualType), + "type" -> JString(t.virtualType.toString)))) + case None => + JsonMethods.compact( + JObject(List("value" -> null, "type" -> JString(TVoid.toString)))) } } } } - def flags(): Response = { - statusForException { - JsonMethods.compact(JObject(HailContext.get.flags.available.toArray().map { case f: String => - val v = HailContext.getFlag(f) - f -> (if (v == null) JNull else JString(v)) - }: _*)) - } + def flags(): String = { + JsonMethods.compact(JObject(HailContext.get.flags.available.toArray().map { case f: String => + val v = HailContext.getFlag(f) + f -> (if (v == null) JNull else JString(v)) + }: _*)) } - def getFlag(name: String): Response = { - statusForException { - val v = HailContext.getFlag(name) - JsonMethods.compact(if (v == null) JNull else JString(v)) - } + def getFlag(name: String): String = { + val v = HailContext.getFlag(name) + JsonMethods.compact(if (v == null) JNull else JString(v)) } - def setFlag(name: String, value: String): Response = { - statusForException { - val v = HailContext.getFlag(name) - HailContext.setFlag(name, value) - JsonMethods.compact(if (v == null) JNull else JString(v)) - } + def setFlag(name: String, value: String): String = { + val v = HailContext.getFlag(name) + HailContext.setFlag(name, value) + JsonMethods.compact(if (v == null) JNull else JString(v)) } - def unsetFlag(name: String): Response = { - statusForException { - val v = HailContext.getFlag(name) - HailContext.setFlag(name, null) - JsonMethods.compact(if (v == null) JNull else JString(v)) - } + def unsetFlag(name: String): String = { + val v = HailContext.getFlag(name) + HailContext.setFlag(name, null) + JsonMethods.compact(if (v == null) JNull else JString(v)) } def lowerDistributedSort( @@ -408,14 +404,14 @@ class ServiceBackend() extends Backend { stage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - tableTypeRequiredness: RTable + rowTypeRequiredness: RStruct ): TableStage = { val region = ctx.r val rowType = stage.rowType val keyFields = sortFields.map(_.field).toArray val keyType = rowType.typeAfterSelectNames(keyFields) - val rowEType = EType.fromTypeAndAnalysis(rowType, tableTypeRequiredness.rowType).asInstanceOf[EBaseStruct] - val keyEType = EType.fromTypeAndAnalysis(keyType, tableTypeRequiredness.rowType.select(keyFields)).asInstanceOf[EBaseStruct] + val rowEType = EType.fromTypeAndAnalysis(rowType, rowTypeRequiredness).asInstanceOf[EBaseStruct] + val keyEType = EType.fromTypeAndAnalysis(keyType, rowTypeRequiredness.select(keyFields)).asInstanceOf[EBaseStruct] val shuffleType = TShuffle(sortFields, rowType, rowEType, keyEType) val shuffleClient = new ShuffleClient(shuffleType, ctx) assert(keyType == shuffleClient.codecs.keyType) @@ -430,10 +426,13 @@ class ServiceBackend() extends Backend { }) try { - val successfulPartitionIds = execute( + val Some((successfulPartitionIdsAndGlobals, pType)) = execute( ctx, - stage.mapCollect(relationalLetsAbove)( - ShuffleWrite(Literal(shuffleType, uuid), _))) + stage.mapCollectWithGlobals + (relationalLetsAbove) + { partition => ShuffleWrite(Literal(shuffleType, uuid), partition) } + { (rows, globals) => MakeTuple.ordered(Seq(rows, globals)) }) + val globals = successfulPartitionIdsAndGlobals.asInstanceOf[UnsafeRow].get(1) val partitionBoundsPointers = shuffleClient.partitionBounds(region, stage.numPartitions) val partitionIntervals = partitionBoundsPointers.zip(partitionBoundsPointers.drop(1)).map { case (l, r) => @@ -446,7 +445,7 @@ class ServiceBackend() extends Backend { val partitioner = new RVDPartitioner(keyType, partitionIntervals.toFastIndexedSeq) TableStage( - globals = Literal(TStruct(), Row()), + globals = Literal(stage.globalType, globals), partitioner = partitioner, TableStageDependency.none, contexts = ToStream(MakeArray( @@ -472,13 +471,308 @@ class ServiceBackend() extends Backend { billingProject: String, bucket: String, path: String - ): Response = { - statusForException { - ExecutionTimer.logTime("ServiceBackend.loadReferencesFromDataset") { timer => - userContext(username, timer) { ctx => - ReferenceGenome.fromHailDataset(ctx.fs, path) + ): String = { + ExecutionTimer.logTime("ServiceBackend.loadReferencesFromDataset") { timer => + userContext(username, timer) { ctx => + ReferenceGenome.fromHailDataset(ctx.fs, path) + } + } + } +} + +class EndOfInputException extends RuntimeException + +object ServiceBackendSocketAPI { + private val log = Logger.getLogger(getClass.getName()) +} + +class ServiceBackendSocketAPI(backend: ServiceBackend, socket: Socket) extends Thread { + import ServiceBackendSocketAPI._ + + private[this] val LOAD_REFERENCES_FROM_DATASET = 1 + private[this] val VALUE_TYPE = 2 + private[this] val TABLE_TYPE = 3 + private[this] val MATRIX_TABLE_TYPE = 4 + private[this] val BLOCK_MATRIX_TYPE = 5 + private[this] val REFERENCE_GENOME = 6 + private[this] val EXECUTE = 7 + private[this] val FLAGS = 8 + private[this] val GET_FLAG = 9 + private[this] val UNSET_FLAG = 10 + private[this] val SET_FLAG = 11 + private[this] val ADD_USER = 12 + private[this] val GOODBYE = 254 + + private[this] val in = socket.getInputStream + private[this] val out = socket.getOutputStream + + private[this] val dummy = new Array[Byte](8) + + def read(bytes: Array[Byte], off: Int, n: Int): Unit = { + assert(off + n <= bytes.length) + var read = 0 + while (read < n) { + val r = in.read(bytes, off + read, n - read) + if (r < 0) { + throw new EndOfInputException + } else { + read += r + } + } + } + + def readInt(): Int = { + read(dummy, 0, 4) + Memory.loadInt(dummy, 0) + } + + def readLong(): Long = { + read(dummy, 0, 8) + Memory.loadLong(dummy, 0) + } + + def readBytes(): Array[Byte] = { + val n = readInt() + val bytes = new Array[Byte](n) + read(bytes, 0, n) + bytes + } + + def readString(): String = new String(readBytes(), StandardCharsets.UTF_8) + + def writeBool(b: Boolean): Unit = { + out.write(if (b) 1 else 0) + } + + def writeInt(v: Int): Unit = { + Memory.storeInt(dummy, 0, v) + out.write(dummy, 0, 4) + } + + def writeLong(v: Long): Unit = { + Memory.storeLong(dummy, 0, v) + out.write(dummy) + } + + def writeBytes(bytes: Array[Byte]): Unit = { + writeInt(bytes.length) + out.write(bytes) + } + + def writeString(s: String): Unit = writeBytes(s.getBytes(StandardCharsets.UTF_8)) + + def eventLoop(): Unit = { + var continue = true + while (continue) { + val cmd = readInt() + + (cmd: @switch) match { + case LOAD_REFERENCES_FROM_DATASET => + val username = readString() + val sessionId = readString() + val billingProject = readString() + val bucket = readString() + val path = readString() + try { + val result = backend.loadReferencesFromDataset(username, sessionId, billingProject, bucket, path) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case VALUE_TYPE => + val username = readString() + val s = readString() + try { + val result = backend.valueType(username, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case TABLE_TYPE => + val username = readString() + val s = readString() + try { + val result = backend.tableType(username, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case MATRIX_TABLE_TYPE => + val username = readString() + val s = readString() + try { + val result = backend.matrixTableType(username, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case BLOCK_MATRIX_TYPE => + val username = readString() + val s = readString() + try { + val result = backend.blockMatrixType(username, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case REFERENCE_GENOME => + val username = readString() + val name = readString() + try { + val result = backend.referenceGenome(username, name) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case EXECUTE => + val username = readString() + val sessionId = readString() + val billingProject = readString() + val bucket = readString() + val code = readString() + val token = readString() + try { + val result = backend.execute(username, sessionId, billingProject, bucket, code, token) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case FLAGS => + try { + val result = backend.flags() + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case GET_FLAG => + val name = readString() + try { + val result = backend.getFlag(name) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case SET_FLAG => + val name = readString() + val value = readString() + try { + val result = backend.setFlag(name, value) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case UNSET_FLAG => + val name = readString() + try { + val result = backend.unsetFlag(name) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case ADD_USER => + val name = readString() + val gsaKey = readString() + try { + val result = backend.addUser(name, gsaKey) + writeBool(true) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } + + case GOODBYE => + continue = false + writeInt(GOODBYE) + } + } + } + + override def run(): Unit = { + try { + eventLoop() + } catch { + case t: Throwable => + log.info("ServiceBackendSocketAPI caught exception", t) + } finally { + socket.close() + } + } +} + +object ServiceBackendMain { + private val log = Logger.getLogger(getClass.getName()) + + def main(argv: Array[String]): Unit = { + assert(argv.length == 1, argv.toFastIndexedSeq) + val udsAddress = argv(0) + val executor = Executors.newCachedThreadPool() + val backend = new ServiceBackend() + HailContext(backend, "hail.log", false, false, 50, skipLoggingConfiguration = true, 3) + + val ss = AFUNIXServerSocket.newInstance() + ss.bind(new AFUNIXSocketAddress(new File(udsAddress))) + try { + log.info(s"serving on ${udsAddress}") + while (true) { + val sock = ss.accept() + try { + log.info(s"accepted") + executor.execute(new ServiceBackendSocketAPI(backend, sock)) + } catch { + case e: SocketException => { + log.info(s"exception while handing socket to thread", e) + sock.close() + } } } + } catch { + case se: SocketException => + fatal("unexpected closed server socket", se) } } } diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index 8c53f70d862..d345e0c502d 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -629,7 +629,7 @@ class SparkBackend( stage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - tableTypeRequiredness: RTable + rowTypeRequiredness: RStruct ): TableStage = { val (globals, rvd) = TableStageToRVD(ctx, stage, relationalLetsAbove) diff --git a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala index 34c03326688..b7934e543d0 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala @@ -1,7 +1,8 @@ package is.hail.expr.ir -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.types.physical.stypes._ import is.hail.utils.FastIndexedSeq @@ -34,7 +35,7 @@ class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, v2.memoize(cb, "bs_comp_v2").loadStart(cb) }.map(cb)(_.asPCode) } - findMB.getCodeOrdering(eltType, kt, CodeOrdering.Compare())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Compare())(cb, ec1, ec2) } val ceq: CodeOrdering.F[Boolean] = { (cb: EmitCodeBuilder, ec1: EmitCode, _ec2: EmitCode) => @@ -47,12 +48,12 @@ class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, v2.memoize(cb, "bs_comp_v2").loadStart(cb) }.map(cb)(_.asPCode) } - findMB.getCodeOrdering(eltType, kt, CodeOrdering.Equiv())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Equiv())(cb, ec1, ec2) } (comp, ceq, findMB) } else - (mb.getCodeOrdering(eltType, elt, CodeOrdering.Compare()), - mb.getCodeOrdering(eltType, elt, CodeOrdering.Equiv()), + (mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Compare()), + mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Equiv()), mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], elt.ti), typeInfo[Int])) private[this] val array = findElt.getCodeParam[Long](1) diff --git a/hail/src/main/scala/is/hail/expr/ir/ComparisonOp.scala b/hail/src/main/scala/is/hail/expr/ir/ComparisonOp.scala index c2ed7acc92f..2a248a2d3f7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ComparisonOp.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ComparisonOp.scala @@ -1,7 +1,8 @@ package is.hail.expr.ir -import is.hail.annotations.CodeOrdering +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical.PType +import is.hail.types.physical.stypes.SType import is.hail.types.virtual.{TStruct, Type} object ComparisonOp { @@ -54,9 +55,9 @@ sealed trait ComparisonOp[ReturnType] { def t2: Type val op: CodeOrdering.Op val strict: Boolean = true - def codeOrdering(mb: EmitMethodBuilder[_], t1p: PType, t2p: PType): CodeOrdering.F[op.ReturnType] = { + def codeOrdering(ecb: EmitClassBuilder[_], t1p: SType, t2p: SType): CodeOrdering.F[op.ReturnType] = { ComparisonOp.checkCompatible(t1p.virtualType, t2p.virtualType) - mb.getCodeOrdering(t1p, t2p, op) + ecb.getOrderingFunction(t1p, t2p, op).asInstanceOf[CodeOrdering.F[op.ReturnType]] } def render(): is.hail.utils.prettyPrint.Doc = Pretty.prettyClass(this) diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index eca325d54d7..6f31217657c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -14,7 +14,7 @@ import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.linalg.{BLAS, LAPACK, LinalgCodeUtils} import is.hail.services.shuffler._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable} import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode} import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat64, SInt32, SInt64, SInt64Code} @@ -394,6 +394,8 @@ object EmitCode { } class EmitCode(private val start: CodeLabel, private val iec: IEmitCode) { + def st: SType = iec.value.st + def pv: PCode = iec.value def setup: Code[Unit] = Code._empty @@ -883,14 +885,14 @@ class Emit[C]( if (op.strict) { emitI(l).flatMap(cb) { l => emitI(r).map(cb) { r => - val f = op.codeOrdering(mb, l.pt, r.pt) + val f = op.codeOrdering(cb.emb.ecb, l.st, r.st) PCode(pt, f(cb, EmitCode.present(cb.emb, l), EmitCode.present(cb.emb, r))) } } } else { val lc = emitI(l).memoize(cb, "l") val rc = emitI(r).memoize(cb, "r") - val f = op.codeOrdering(mb, lc.pt, rc.pt) + val f = op.codeOrdering(cb.emb.ecb, lc.st, rc.st) presentC(f(cb, lc, rc)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index 6fff79c8731..424fbc007be 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -4,11 +4,12 @@ import java.io._ import java.util.Base64 import is.hail.{HailContext, lir} -import is.hail.annotations.{CodeOrdering, Region, RegionPool, RegionValueBuilder, SafeRow} +import is.hail.annotations.{Region, RegionPool, RegionValueBuilder, SafeRow} import is.hail.asm4s._ import is.hail.asm4s.joinpoint.Ctrl import is.hail.backend.BackendUtils import is.hail.expr.ir.functions.IRRandomness +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.io.fs.FS import is.hail.io.{BufferSpec, InputBuffer, TypedCodecSpec} import is.hail.lir @@ -174,11 +175,6 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def genStaticEmitMethod(baseName: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = ecb.genStaticEmitMethod(baseName, argsInfo, returnInfo) - def getCodeOrdering( - t1: PType, t2: PType, sortOrder: SortOrder, op: CodeOrdering.Op, ignoreMissingness: Boolean - ): CodeOrdering.F[op.ReturnType] = - ecb.getCodeOrdering(t1, t2, sortOrder, op, ignoreMissingness) - def addAggStates(aggSigs: Array[agg.AggStateSig]): agg.TupleAggregatorState = ecb.addAggStates(aggSigs) def genDependentFunction[F](baseName: String, @@ -194,24 +190,6 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { baseName: String, key: Any, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType )(body: EmitMethodBuilder[C] => Unit): EmitMethodBuilder[C] = ecb.getOrGenEmitMethod(baseName, key, argsInfo, returnInfo)(body) - // derived functions - def getCodeOrdering(t: PType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t, t, sortOrder = Ascending, op, ignoreMissingness = false) - - def getCodeOrdering(t: PType, op: CodeOrdering.Op, ignoreMissingness: Boolean): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t, t, sortOrder = Ascending, op, ignoreMissingness) - - def getCodeOrdering(t1: PType, t2: PType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder = Ascending, op, ignoreMissingness = false) - - def getCodeOrdering(t1: PType, t2: PType, op: CodeOrdering.Op, ignoreMissingness: Boolean): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder = Ascending, op, ignoreMissingness) - - def getCodeOrdering( - t1: PType, t2: PType, sortOrder: SortOrder, op: CodeOrdering.Op - ): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder, op, ignoreMissingness = false) - def genEmitMethod[R: TypeInfo](baseName: String): EmitMethodBuilder[C] = ecb.genEmitMethod[R](baseName) @@ -353,9 +331,9 @@ class EmitClassBuilder[C]( private[this] val pTypeMap: mutable.Map[PType, Value[_ <: PType]] = mutable.Map() - private[this] type CompareMapKey = (PType, PType, CodeOrdering.Op, SortOrder, Boolean) - private[this] val compareMap: mutable.Map[CompareMapKey, CodeOrdering.F[_]] = - mutable.Map[CompareMapKey, CodeOrdering.F[_]]() + private[this] type CompareMapKey = (SType, SType) + private[this] val memoizedComparisons: mutable.Map[CompareMapKey, CodeOrdering] = + mutable.Map[CompareMapKey, CodeOrdering]() def numTypes: Int = typMap.size @@ -585,68 +563,55 @@ class EmitClassBuilder[C]( genLazyFieldThisRef[T](setup)).get.asInstanceOf[Code[T]] } - def getCodeOrdering( - t1: PType, - t2: PType, + def getOrdering(t1: SType, + t2: SType, + sortOrder: SortOrder = Ascending + ): CodeOrdering = { + val baseOrd = memoizedComparisons.getOrElseUpdate((t1, t2), { + CodeOrdering.makeOrdering(t1, t2, this) + }) + sortOrder match { + case Ascending => baseOrd + case Descending => baseOrd.reverse + } + } + + def getOrderingFunction( + t1: SType, + t2: SType, sortOrder: SortOrder, - op: CodeOrdering.Op, - ignoreMissingness: Boolean + op: CodeOrdering.Op ): CodeOrdering.F[op.ReturnType] = { - val f = compareMap.getOrElseUpdate((t1, t2, op, sortOrder, ignoreMissingness), { - val rt = op.rtti - val newMB = if (ignoreMissingness) { - val newMB = genEmitMethod("cord", FastIndexedSeq[ParamType](t1.asEmitParam, t2.asEmitParam), rt) - lazy val ord = t1.codeOrdering(newMB, t2, sortOrder) - val v1 = newMB.getEmitParam(1).pv - val v2 = newMB.getEmitParam(2).pv - newMB.emitWithBuilder { cb => - op match { - case CodeOrdering.Compare(_) => ord.compareNonnull(cb, v1, v2) - case CodeOrdering.Equiv(_) => ord.equivNonnull(cb, v1, v2) - case CodeOrdering.Lt(_) => ord.ltNonnull(cb, v1, v2) - case CodeOrdering.Lteq(_) => ord.lteqNonnull(cb, v1, v2) - case CodeOrdering.Gt(_) => ord.gtNonnull(cb, v1, v2) - case CodeOrdering.Gteq(_) => ord.gteqNonnull(cb, v1, v2) - case CodeOrdering.Neq(_) => !ord.equivNonnull(cb, v1, v2) - } - } - newMB - } else { - val newMB = genEmitMethod("cord", FastIndexedSeq[ParamType](t1.asEmitParam, t2.asEmitParam), rt) - lazy val ord = t1.codeOrdering(newMB, t2, sortOrder) - val v1 = newMB.getEmitParam(1) - val v2 = newMB.getEmitParam(2) - newMB.emitWithBuilder { cb => - op match { - case CodeOrdering.Compare(missingEqual) => ord.compare(cb, v1, v2, missingEqual) - case CodeOrdering.Equiv(missingEqual) => ord.equiv(cb, v1, v2, missingEqual) - case CodeOrdering.Lt(missingEqual) => ord.lt(cb, v1, v2, missingEqual) - case CodeOrdering.Lteq(missingEqual) => ord.lteq(cb, v1, v2, missingEqual) - case CodeOrdering.Gt(missingEqual) => ord.gt(cb, v1, v2, missingEqual) - case CodeOrdering.Gteq(missingEqual) => ord.gteq(cb, v1, v2, missingEqual) - case CodeOrdering.Neq(missingEqual) => !ord.equiv(cb, v1, v2, missingEqual) - } - } - newMB + val ord = getOrdering(t1, t2, sortOrder); + + { (cb: EmitCodeBuilder, v1: EmitCode, v2: EmitCode) => + + val r = op match { + case CodeOrdering.Compare(missingEqual) => ord.compare(cb, v1, v2, missingEqual) + case CodeOrdering.Equiv(missingEqual) => ord.equiv(cb, v1, v2, missingEqual) + case CodeOrdering.Lt(missingEqual) => ord.lt(cb, v1, v2, missingEqual) + case CodeOrdering.Lteq(missingEqual) => ord.lteq(cb, v1, v2, missingEqual) + case CodeOrdering.Gt(missingEqual) => ord.gt(cb, v1, v2, missingEqual) + case CodeOrdering.Gteq(missingEqual) => ord.gteq(cb, v1, v2, missingEqual) + case CodeOrdering.Neq(missingEqual) => !ord.equiv(cb, v1, v2, missingEqual) } - { (cb: EmitCodeBuilder, elhs: EmitCode, erhs: EmitCode) => - if (t1 != elhs.pt) - fatal(s"ordering types do not match (lhs), requested type=$t1, code type=${elhs.pt}") - if (t2 != erhs.pt) - fatal(s"ordering types do not match (rhs), requested type=$t2, code type=${erhs.pt}") - cb.invokeCode(newMB, elhs, erhs) - } - }) - ((cb: EmitCodeBuilder, elhs: EmitCode, erhs: EmitCode) => coerce[op.ReturnType](f(cb, elhs, erhs))) + coerce[op.ReturnType](r) + } } - def getCodeOrdering( - t: PType, + // derived functions + def getOrderingFunction(t: SType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = + getOrderingFunction(t, t, sortOrder = Ascending, op) + + def getOrderingFunction(t1: SType, t2: SType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = + getOrderingFunction(t1, t2, sortOrder = Ascending, op) + + def getOrderingFunction( + t: SType, op: CodeOrdering.Op, - sortOrder: SortOrder, - ignoreMissingness: Boolean + sortOrder: SortOrder ): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t, t, sortOrder, op, ignoreMissingness) + getOrderingFunction(t, t, sortOrder, op) private def getCodeArgsInfo(argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): (IndexedSeq[TypeInfo[_]], TypeInfo[_]) = { val codeArgsInfo = argsInfo.flatMap { @@ -817,24 +782,6 @@ class EmitClassBuilder[C]( }) } - // derived functions - def getCodeOrdering(t: PType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t, t, sortOrder = Ascending, op, ignoreMissingness = false) - - def getCodeOrdering(t: PType, op: CodeOrdering.Op, ignoreMissingness: Boolean): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t, t, sortOrder = Ascending, op, ignoreMissingness) - - def getCodeOrdering(t1: PType, t2: PType, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder = Ascending, op, ignoreMissingness = false) - - def getCodeOrdering(t1: PType, t2: PType, op: CodeOrdering.Op, ignoreMissingness: Boolean): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder = Ascending, op, ignoreMissingness) - - def getCodeOrdering( - t1: PType, t2: PType, sortOrder: SortOrder, op: CodeOrdering.Op - ): CodeOrdering.F[op.ReturnType] = - getCodeOrdering(t1, t2, sortOrder, op, ignoreMissingness = false) - def genEmitMethod(baseName: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = newEmitMethod(genName("m", baseName), argsInfo, returnInfo) diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala index 9581e9f652c..5542f48a68d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala @@ -200,6 +200,8 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten StringFunctions.boxArg(EmitRegion(emb, r), t)(code).invoke[String]("toString") } + def strValue(r: Value[Region], x: PCode): Code[String] = strValue(r, x.pt, x.code) + // for debugging def println(cString: Code[String]*) = this += Code._printlns(cString:_*) } diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala index c43a2b49664..b04c961a373 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala @@ -3,11 +3,12 @@ package is.hail.expr.ir import is.hail.annotations._ import is.hail.asm4s._ import is.hail.asm4s.joinpoint.Ctrl +import is.hail.expr.ir.orderings.StructOrdering import is.hail.services.shuffler._ import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.{SBinaryPointer, SBinaryPointerSettable, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable, SIntervalPointer, SIntervalPointerSettable, SSubsetStruct, SSubsetStructCode} import is.hail.types.physical.stypes.{interfaces, _} -import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode, SStruct} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStream, SStreamCode} import is.hail.types.physical.stypes.primitives.SInt32Code import is.hail.types.virtual._ import is.hail.utils._ @@ -1075,14 +1076,6 @@ object EmitStream { val curKey = ctx.mb.newPField("st_grpby_curkey", keyType) val eltRegions = destRegion.createSiblingRegionArray(mb, k) - val keyViewType = PSubsetStruct(eltType, key: _*) - val lt: (EmitCodeBuilder, PCode, PCode) => Code[Boolean] = keyViewType - .codeOrdering(mb, keyViewType, missingFieldsEqual = false) - .lteqNonnull - val hasKey: (EmitCodeBuilder, PCode, PCode) => Code[Boolean] = keyViewType - .codeOrdering(mb, keyType, missingFieldsEqual = false) - .equivNonnull - val runMatch = CodeLabel() val LpullChild = CodeLabel() val LloopEnd = CodeLabel() @@ -1105,7 +1098,7 @@ object EmitStream { Code(LstartNewKey, Code.forLoop(i := 0, i < k, i := i + 1, result(i) = 0L), EmitCodeBuilder.scopedVoid(mb) { cb => - cb.assign(curKey, eltRegions(winner).copyTo(cb, PCode(keyViewType, heads(winner)), destRegion, keyType)) + cb.assign(curKey, eltRegions(winner).copyTo(cb, eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*), destRegion, keyType)) }, LaddToResult.goto) @@ -1126,9 +1119,13 @@ object EmitStream { challenger := bracket(matchIdx), (matchIdx.ceq(0) || challenger.ceq(-1)).orEmpty(LloopEnd.goto), (challenger.cne(k) && (winner.ceq(k) - || EmitCodeBuilder.scopedCode(mb)( - lt(_, PCode(keyViewType, heads(challenger)), PCode(keyViewType, heads(winner))))) - ).orEmpty(Code( + || EmitCodeBuilder.scopedCode(mb) { cb => + val left = eltType.loadCheapPCode(cb, heads(challenger)).subset(key: _*) + val right = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) + val ord = StructOrdering.make(left.st, right.st, cb.emb.ecb, missingFieldsEqual = false) + ord.lteqNonnull(cb, left, right) + }) + ).orEmpty(Code( bracket(matchIdx) = winner, winner := challenger)), matchIdx := matchIdx >>> 1, @@ -1143,9 +1140,13 @@ object EmitStream { Leos.goto, Code(result := Code.newArray[Long](k), LstartNewKey.goto)), (winner.cne(k) - && EmitCodeBuilder.scopedCode(mb)( - hasKey(_, PCode(keyViewType, heads(winner)), curKey)) - ).mux( + && EmitCodeBuilder.scopedCode(mb) { cb => + val left = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) + val right = curKey + val ord = StructOrdering.make(left.st, right.st.asInstanceOf[SBaseStruct], + cb.emb.ecb, missingFieldsEqual = false) + ord.equivNonnull(cb, left, right) + }).mux( LaddToResult.goto, Lpush.goto)), // We're still in the setup phase @@ -1195,8 +1196,6 @@ object EmitStream { def apply(outerEos: Code[Ctrl], outerPush: (ChildStagedRegion => Stream[PCode]) => Code[Ctrl])(implicit ctx: EmitStreamContext): Source[ChildStagedRegion => Stream[PCode]] = { val eltType = coerce[PStruct](innerStreamType.elementType) val keyType = eltType.selectFields(key) - val keyViewType = SSubsetStruct(eltType.sType.asInstanceOf[SStruct], key) - val ordering = keyType.codeOrdering(mb, keyViewType.pType, missingFieldsEqual = false) val xCurKey = ctx.mb.newPField("st_grpby_curkey", keyType) val xCurElt = ctx.mb.newPField("st_grpby_curelt", eltType) @@ -1228,8 +1227,12 @@ object EmitStream { // !xInOuter iff this element was requested by an inner stream. // Else we are stepping to the beginning of the next group. (xCurKey.tcode[Long].cne(0L) && - EmitCodeBuilder.scopedCode(mb)(ordering.equivNonnull(_, xCurKey, xCurElt.asBaseStruct.subset(key: _*).asPCode)) - ).mux( + EmitCodeBuilder.scopedCode(mb) { cb => + val right = xCurElt.asBaseStruct.subset(key: _*).asPCode + StructOrdering.make(xCurKey.st.asInstanceOf[SBaseStruct], right.st.asInstanceOf[SBaseStruct], + cb.emb.ecb, missingFieldsEqual = false) + .equivNonnull(cb, xCurKey, right) + }).mux( xInOuter.mux( Code(holdingRegion.clear(), LchildPull.goto), LinnerPush.goto), @@ -1644,16 +1647,16 @@ object EmitStream { val rElemType = coerce[PStruct](coerce[PStream](rightIR.pType).elementType) val outElemType = coerce[PStream](x.pType).elementType - val lKeyViewType = PSubsetStruct(lElemType, key: _*) - val rKeyViewType = PSubsetStruct(rElemType, key: _*) - val ordering = lKeyViewType.codeOrdering(mb, rKeyViewType, missingFieldsEqual = false) def compare(lelt: EmitValue, relt: EmitValue): Code[Int] = EmitCodeBuilder.scopedCode(mb) { cb => assert(lelt.pt == lElemType) assert(relt.pt == rElemType) val lhs = lelt.map(_.asBaseStruct.subset(key: _*).asPCode) val rhs = relt.map(_.asBaseStruct.subset(key: _*).asPCode) - ordering.compare(cb, lhs, rhs) + + StructOrdering.make(lhs.st.asInstanceOf[SBaseStruct], rhs.st.asInstanceOf[SBaseStruct], + cb.emb.ecb, missingFieldsEqual = false) + .compare(cb, lhs, rhs, missingEqual = true) } emitStream(leftIR).flatMap(cb) { leftPC => @@ -1830,11 +1833,11 @@ object EmitStream { val eltType = x.pType.elementType.asInstanceOf[PStruct] val keyViewType = PSubsetStruct(eltType, key: _*) - val ord = keyViewType.codeOrdering(mb, keyViewType) def comp(li: Code[Int], lv: Code[Long], ri: Code[Int], rv: Code[Long]): Code[Boolean] = EmitCodeBuilder.scopedCode(mb) { cb => val l = PCode(keyViewType, lv) val r = PCode(keyViewType, rv) + val ord = cb.emb.ecb.getOrdering(l.st, r.st) val c = cb.newLocal("stream_merge_comp", ord.compareNonnull(cb, l, r)) c < 0 || (c.ceq(0) && li < ri) } @@ -2102,16 +2105,17 @@ object EmitStream { val xRElt = mb.newEmitField("join_relt", rEltType.setRequired(false)) val newEnv = env.bind(leftName -> xLElt, rightName -> xRElt) - val lKeyViewType = PSubsetStruct(lEltType, lKey: _*) - val rKeyViewType = PSubsetStruct(rEltType, rKey: _*) - val ordering = lKeyViewType.codeOrdering(mb, rKeyViewType, missingFieldsEqual = false) - def compare(lelt: EmitValue, relt: EmitValue): Code[Int] = { assert(lelt.pt == lEltType) assert(relt.pt == rEltType) - val lhs = lelt.map(_.asBaseStruct.subset(lKey: _*).asPCode) - val rhs = relt.map(_.asBaseStruct.subset(rKey: _*).asPCode) - EmitCodeBuilder.scopedCode(mb) { cb => ordering.compare(cb, lhs, rhs) } + + EmitCodeBuilder.scopedCode(mb) { cb => + val lhs = lelt.map(_.asBaseStruct.subset(lKey: _*).asPCode) + val rhs = relt.map(_.asBaseStruct.subset(rKey: _*).asPCode) + StructOrdering.make(lhs.st.asInstanceOf[SBaseStruct], rhs.st.asInstanceOf[SBaseStruct], + cb.emb.ecb, missingFieldsEqual = false) + .compare(cb, lhs, rhs, missingEqual = false) + } } def joinF: ((EmitCode, EmitCode)) => EmitCode = { case (lelt, relt) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 15012941fcf..17c6fda9644 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -438,6 +438,13 @@ object IRParser { punctuation(it, "}") val fields = args.zipWithIndex.map { case ((id, t), i) => PField(id, t, i) } PCanonicalStruct(fields, req) + case "PSubsetStruct" => + punctuation(it, "{") + val parent = ptype_expr(env)(it).asInstanceOf[PStruct] + punctuation(it, "{") + val args = repsepUntil(it, identifier, PunctuationToken(","), PunctuationToken("}")) + punctuation(it, "}") + PSubsetStruct(parent, args) } assert(typ.required == req) typ diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index cd7790c17a5..0db805154ad 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -389,15 +389,13 @@ object LoweredTableReader { ToStream(Literal(TArray(contextType), partOrigIndex.map(i => contexts(i)))), body) - val tableType = TableType(tableStage.rowType, tableStage.key, tableStage.globalType) - - val rTable = BaseTypeWithRequiredness(tableType).asInstanceOf[RTable] + val rowRType = TypeWithRequiredness(tableStage.rowType).asInstanceOf[RStruct] ctx.backend.lowerDistributedSort(ctx, tableStage, keyType.fieldNames.map(f => SortField(f, Ascending)), Map.empty, - rTable + rowRType ) } } @@ -764,7 +762,7 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig .asString .loadString() Code.checkcast[IndexReader]( - makeIndex.invoke[AnyRef, AnyRef, AnyRef, AnyRef]("apply", mb.getFS, indexPath, Code.boxInt(8))) + makeIndex.invoke[AnyRef, AnyRef, AnyRef, AnyRef, AnyRef]("apply", mb.getFS, indexPath, Code.boxInt(8), cb.emb.ecb.pool())) case None => Code._null[IndexReader] } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala index fd09f1b0141..a3439e2f1d9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala @@ -1,7 +1,8 @@ package is.hail.expr.ir.agg -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitRegion, IEmitCode} import is.hail.io._ import is.hail.types.VirtualTypeWithReq @@ -48,7 +49,7 @@ class TypedKey(typ: PType, kb: EmitClassBuilder[_], region: Value[Region]) exten } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = { - kb.getCodeOrdering(k1.pt, k2.pt, CodeOrdering.Compare(), ignoreMissingness = false)(cb, k1, k2) + kb.getOrderingFunction(k1.st, k2.st, CodeOrdering.Compare())(cb, k1, k2) } def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala index e6f1d75a2a8..0e1a05f2a8e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala @@ -1,7 +1,8 @@ package is.hail.expr.ir.agg -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitParamType, EmitRegion, IEmitCode, ParamType} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq @@ -19,7 +20,7 @@ class DownsampleBTreeKey(binType: PBaseStruct, pointType: PBaseStruct, kb: EmitC "empty" -> PBooleanRequired) val compType: PType = binType - private val kcomp = kb.getCodeOrdering(binType, CodeOrdering.Compare(), ignoreMissingness = false) + private val kcomp = kb.getOrderingFunction(binType.sType, CodeOrdering.Compare()) def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = PBooleanRequired.loadCheapPCode(cb, storageType.loadField(off, "empty")).boolCode(cb) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala index ee989e8bb81..a480f6381f3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala @@ -1,7 +1,8 @@ package is.hail.expr.ir.agg -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitMethodBuilder, EmitRegion, IEmitCode, ParamType} import is.hail.io._ import is.hail.types.VirtualTypeWithReq @@ -23,7 +24,7 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], FastIndexedSeq[ParamType](typeInfo[Long], k.pt.asEmitParam), typeInfo[Int] ) { mb => - val comp = kb.getCodeOrdering(compType, k.pt, CodeOrdering.Compare(), ignoreMissingness = false) + val comp = kb.getOrderingFunction(compType.sType, k.st, CodeOrdering.Compare()) val off = mb.getCodeParam[Long](1) val ev1 = loadCompKey(cb, off) val ev2 = mb.getEmitParam(2) @@ -90,7 +91,7 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = { - kb.getCodeOrdering(k1.pt, k2.pt, CodeOrdering.Compare(), ignoreMissingness = false)(cb, k1, k2) + kb.getOrderingFunction(k1.st, k2.st, CodeOrdering.Compare())(cb, k1, k2) } def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala index c5a76fb4c1c..a0299faceed 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala @@ -2,11 +2,13 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s.{Code, _} +import is.hail.expr.ir.orderings.StructOrdering import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, ParamType, SortOrder} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SIndexablePointerCode} +import is.hail.types.physical.stypes.interfaces.SBaseStruct import is.hail.types.virtual.{TInt32, Type} import is.hail.utils._ @@ -17,7 +19,7 @@ object TakeByRVAS { class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWithReq, val kb: EmitClassBuilder[_], so: SortOrder = Ascending) extends AggregatorState { private val r: Settable[Region] = kb.genFieldThisRef[Region]("takeby_region") - val valueType: PType = valueVType.canonicalPType + val valueType: PType = valueVType.canonicalPType val keyType: PType = keyVType.canonicalPType val region: Value[Region] = r @@ -50,27 +52,13 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi ) def compareKey(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = { - val cmp = kb.genEmitMethod("compare", FastIndexedSeq[ParamType](k1.pv.st.pType.asEmitParam, k2.pv.st.pType.asEmitParam), IntInfo) - val ord = k1.pv.st.pType.codeOrdering(cmp, k2.pv.st.pType, so) - - cmp.emitWithBuilder { - val k1 = cmp.getEmitParam(1) - val k2 = cmp.getEmitParam(2) - cb => ord.compare(cb, k1, k2) - } - cb.invokeCode(cmp, k1, k2) + val ord = cb.emb.ecb.getOrdering(k1.st, k2.st, so) + ord.compare(cb, k1, k2, true) } - private val compareIndexedKey: (Code[Long], Code[Long]) => Code[Int] = { - val indexedkeyTypeTypeInfo = typeToTypeInfo(indexedKeyType) - val cmp = kb.genEmitMethod("take_by_compare", FastIndexedSeq[ParamType](indexedkeyTypeTypeInfo, indexedkeyTypeTypeInfo), IntInfo) - val ord = indexedKeyType.codeOrdering(cmp, indexedKeyType, Array(so, Ascending), true) - val k1 = cmp.getCodeParam(1)(indexedkeyTypeTypeInfo) - val k2 = cmp.getCodeParam(2)(indexedkeyTypeTypeInfo) - - cmp.emitWithBuilder(cb => ord.compare(cb , EmitCode.present(cb.emb, PCode(indexedKeyType, k1)), EmitCode.present(cb.emb, PCode(indexedKeyType, k2)))) - - cmp.invokeCode(_, _) + private def compareIndexedKey(cb: EmitCodeBuilder, k1: PCode, k2: PCode): Code[Int] = { + val ord = StructOrdering.make(k1.st.asInstanceOf[SBaseStruct], k2.st.asInstanceOf[SBaseStruct], cb.emb.ecb, Array(so, Ascending), true) + ord.compareNonnull(cb, k1, k2) } private def maybeGCCode(cb: EmitCodeBuilder, alwaysRun: EmitCodeBuilder => Unit)(runIfGarbage: EmitCodeBuilder => Unit, runBefore: Boolean = false): Unit = { @@ -220,7 +208,9 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi val i = mb.getCodeParam[Long](1) val j = mb.getCodeParam[Long](2) - mb.emit(compareIndexedKey(eltTuple.fieldOffset(i, 0), eltTuple.fieldOffset(j, 0))) + mb.emitWithBuilder(cb => compareIndexedKey(cb, + indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(i, 0)), + indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(j, 0)))) mb.invokeCode(_, _) } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala index ebcde24d95f..a2ce9bf5d36 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala @@ -1,8 +1,8 @@ package is.hail.expr.ir.functions -import is.hail.annotations.CodeOrdering import is.hail.asm4s.{Code, _} import is.hail.expr.ir._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.types.virtual._ @@ -68,7 +68,7 @@ object IntervalFunctions extends RegistryFunctions { int.toI(cb).map(cb) { case (intc: PIntervalCode) => val interval: PIntervalValue = intc.memoize(cb, "interval") val pointv = cb.memoize(point.toI(cb), "point") - val compare = cb.emb.getCodeOrdering(pointv.pt, interval.pt.pointType, CodeOrdering.Compare()) + val compare = cb.emb.ecb.getOrderingFunction(pointv.st, interval.st.pointType, CodeOrdering.Compare()) val start = EmitCode.fromI(cb.emb)(cb => interval.loadStart(cb).typecast[PCode]) val cmp = cb.newLocal("cmp", compare(cb, pointv, start)) @@ -97,7 +97,7 @@ object IntervalFunctions extends RegistryFunctions { val overlap = EmitCodeBuilder.scopedCode(r.mb) { cb => val interval1 = int1.memoize(cb, "interval1") val interval2 = int2.memoize(cb, "interval2") - val compare = cb.emb.getCodeOrdering(int1.pt.pointType, int2.pt.pointType, CodeOrdering.Compare()) + val compare = cb.emb.ecb.getOrderingFunction(int1.st.pointType, int2.st.pointType, CodeOrdering.Compare()) def isAboveOnNonempty(cb: EmitCodeBuilder, lhs: PIntervalValue, rhs: PIntervalValue): Code[Boolean] = { val start = EmitCode.fromI(cb.emb)(cb => lhs.loadStart(cb).typecast[PCode]) diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index 860ebea385c..24ec443107e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -4,7 +4,7 @@ import is.hail.expr.ir._ import is.hail.methods.{ForceCountTable, NPartitionsTable} import is.hail.rvd.{PartitionBoundOrdering, RVDPartitioner} import is.hail.types.virtual._ -import is.hail.types.{RTable, TableType} +import is.hail.types.{RTable, TableType, RStruct, RField} import is.hail.utils._ import org.apache.spark.sql.Row @@ -494,7 +494,7 @@ object LowerTableIR { } // TODO: This ignores nPartitions and bufferSize - case self@TableKeyByAndAggregate(child, expr, newKey, nPartitions, bufferSize) => + case TableKeyByAndAggregate(child, expr, newKey, nPartitions, bufferSize) => val loweredChild = lower(child) val newKeyType = newKey.typ.asInstanceOf[TStruct] val oldRowType = child.typ.rowType @@ -511,8 +511,12 @@ object LowerTableIR { val shuffledRowType = withNewKeyFields.rowType val sortFields = newKeyType.fieldNames.map(fieldName => SortField(fieldName, Ascending)).toIndexedSeq + val childRowRType = r.lookup(child).asInstanceOf[RTable].rowType + val newKeyRType = r.lookup(newKey).asInstanceOf[RStruct] + val withNewKeyRType = RStruct( + newKeyRType.fields ++ Seq(RField(fullRowUID, childRowRType, newKeyRType.fields.length))) val shuffled = ctx.backend.lowerDistributedSort( - ctx, withNewKeyFields, sortFields, relationalLetsAbove, r.lookup(self).asInstanceOf[RTable]) + ctx, withNewKeyFields, sortFields, relationalLetsAbove, withNewKeyRType) val repartitioned = shuffled.repartitionNoShuffle(shuffled.partitioner.strictify) repartitioned.mapPartition(None) { partition => @@ -898,7 +902,7 @@ object LowerTableIR { ) } - case self@TableKeyBy(child, newKey, isSorted: Boolean) => + case TableKeyBy(child, newKey, isSorted: Boolean) => val loweredChild = lower(child) val nPreservedFields = loweredChild.kType.fieldNames @@ -913,8 +917,9 @@ object LowerTableIR { loweredChild.changePartitionerNoRepartition(loweredChild.partitioner.coarsen(nPreservedFields)) .extendKeyPreservesPartitioning(newKey) else { + val rowRType = r.lookup(child).asInstanceOf[RTable].rowType val sorted = ctx.backend.lowerDistributedSort( - ctx, loweredChild, newKey.map(k => SortField(k, Ascending)), relationalLetsAbove, r.lookup(self).asInstanceOf[RTable]) + ctx, loweredChild, newKey.map(k => SortField(k, Ascending)), relationalLetsAbove, rowRType) assert(sorted.kType.fieldNames.sameElements(newKey)) sorted } @@ -1030,12 +1035,15 @@ object LowerTableIR { InsertFields(keyRef, FastSeq(fieldName -> projectedVals))) ) - case self@TableOrderBy(child, sortFields) => + case TableOrderBy(child, sortFields) => val loweredChild = lower(child) - if (TableOrderBy.isAlreadyOrdered(sortFields, loweredChild.partitioner.kType.fieldNames)) + if (TableOrderBy.isAlreadyOrdered(sortFields, loweredChild.partitioner.kType.fieldNames)) { loweredChild.changePartitionerNoRepartition(RVDPartitioner.unkeyed(loweredChild.partitioner.numPartitions)) - else - ctx.backend.lowerDistributedSort(ctx, loweredChild, sortFields, relationalLetsAbove, r.lookup(self).asInstanceOf[RTable]) + } else { + val rowRType = r.lookup(child).asInstanceOf[RTable].rowType + ctx.backend.lowerDistributedSort( + ctx, loweredChild, sortFields, relationalLetsAbove, rowRType) + } case TableExplode(child, path) => lower(child).mapPartition(Some(child.typ.key.takeWhile(k => k != path(0)))) { rows => diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala new file mode 100644 index 00000000000..85152faa92c --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala @@ -0,0 +1,39 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s._ +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryValue} + +object BinaryOrdering { + def make(t1: SBinary, t2: SBinary, ecb: EmitClassBuilder[_]): CodeOrdering = { + + new CodeOrderingCompareConsistentWithOthers { + + val type1: SBinary = t1 + val type2: SBinary = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val xv: SBinaryValue = x.asBinary.memoize(cb, "xv") + val yv: SBinaryValue = y.asBinary.memoize(cb, "yv") + val xlen = cb.newLocal[Int]("xlen", xv.loadLength()) + val ylen = cb.newLocal[Int]("ylen", yv.loadLength()) + val lim = cb.newLocal[Int]("lim", (xlen < ylen).mux(xlen, ylen)) + val i = cb.newLocal[Int]("i", 0) + val cmp = cb.newLocal[Int]("cmp", 0) + val Lbreak = CodeLabel() + + cb.forLoop({}, i < lim, cb.assign(i, i + 1), { + val compval = Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", + Code.invokeStatic1[java.lang.Byte, Byte, Int]("toUnsignedInt", xv.loadByte(i)), + Code.invokeStatic1[java.lang.Byte, Byte, Int]("toUnsignedInt", yv.loadByte(i))) + cb.assign(cmp, compval) + cb.ifx(cmp.cne(0), cb.goto(Lbreak)) + }) + + cb.define(Lbreak) + cmp.ceq(0).mux(Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", xlen, ylen), cmp) + } + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala new file mode 100644 index 00000000000..813b807059b --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala @@ -0,0 +1,24 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.interfaces.SCall + +object CallOrdering { + def make(t1: SCall, t2: SCall, ecb: EmitClassBuilder[_]): CodeOrdering = { + // ugh ugh ugh + // mistakes were made + // we made our bed now we lie in it + new CodeOrderingCompareConsistentWithOthers { + override val type1: SType = t1 + override val type2: SType = t2 + + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", + x.asCall.loadCanonicalRepresentation(cb), y.asCall.loadCanonicalRepresentation(cb)) + } + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala new file mode 100644 index 00000000000..5f5319f4b5a --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala @@ -0,0 +1,308 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s._ +import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} +import is.hail.types.physical._ +import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.virtual._ +import is.hail.utils.FastIndexedSeq + +object CodeOrdering { + + sealed trait Op { + type ReturnType + val rtti: TypeInfo[ReturnType] + val missingEqual: Boolean + } + + final case class Compare(missingEqual: Boolean = true) extends Op { + type ReturnType = Int + val rtti = typeInfo[Int] + } + + sealed trait BooleanOp extends Op { + type ReturnType = Boolean + val rtti = typeInfo[Boolean] + } + + final case class Equiv(missingEqual: Boolean = true) extends BooleanOp + + final case class Lt(missingEqual: Boolean = true) extends BooleanOp + + final case class Lteq(missingEqual: Boolean = true) extends BooleanOp + + final case class Gt(missingEqual: Boolean = true) extends BooleanOp + + final case class Gteq(missingEqual: Boolean = true) extends BooleanOp + + final case class Neq(missingEqual: Boolean = true) extends BooleanOp + + type F[R] = (EmitCodeBuilder, EmitCode, EmitCode) => Code[R] + + def makeOrdering(t1: SType, t2: SType, ecb: EmitClassBuilder[_]): CodeOrdering = { + val canCompare = (t1.virtualType, t2.virtualType) match { + case (t1: TStruct, t2: TStruct) => t1.isIsomorphicTo(t2) + case (t1, t2) if t1 == t2 => t1 == t2 + } + if (!canCompare) { + throw new RuntimeException(s"ordering: type mismatch:\n left: ${ t1.virtualType }\n right: ${ t2.virtualType }") + } + + t1.virtualType match { + case TInt32 => Int32Ordering.make(t1.asInstanceOf[SInt32], t2.asInstanceOf[SInt32], ecb) + case TInt64 => Int64Ordering.make(t1.asInstanceOf[SInt64], t2.asInstanceOf[SInt64], ecb) + case TFloat32 => Float32Ordering.make(t1.asInstanceOf[SFloat32], t2.asInstanceOf[SFloat32], ecb) + case TFloat64 => Float64Ordering.make(t1.asInstanceOf[SFloat64], t2.asInstanceOf[SFloat64], ecb) + case TBoolean => BooleanOrdering.make(t1.asInstanceOf[SBoolean], t2.asInstanceOf[SBoolean], ecb) + case TCall => CallOrdering.make(t1.asInstanceOf[SCall], t2.asInstanceOf[SCall], ecb) + case TString => StringOrdering.make(t1.asInstanceOf[SString], t2.asInstanceOf[SString], ecb) + case TBinary => BinaryOrdering.make(t1.asInstanceOf[SBinary], t2.asInstanceOf[SBinary], ecb) + case _: TBaseStruct => StructOrdering.make(t1.asInstanceOf[SBaseStruct], t2.asInstanceOf[SBaseStruct], ecb) + case _: TShuffle => ShuffleOrdering.make(t1.asInstanceOf[SShuffle], t2.asInstanceOf[SShuffle], ecb) + case _: TLocus => LocusOrdering.make(t1.asInstanceOf[SLocus], t2.asInstanceOf[SLocus], ecb) + case _: TInterval => IntervalOrdering.make(t1.asInstanceOf[SInterval], t2.asInstanceOf[SInterval], ecb) + case _: TSet | _: TArray | _: TDict => + IterableOrdering.make(t1.asInstanceOf[SContainer], t2.asInstanceOf[SContainer], ecb) + } + } +} + +abstract class CodeOrdering { + outer => + + val type1: SType + val type2: SType + + def reversed: Boolean = false + + final def checkedPCode[T](cb: EmitCodeBuilder, arg1: PCode, arg2: PCode, context: String, + f: (EmitCodeBuilder, PCode, PCode) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { + if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) + throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") + if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) + throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") + + val cacheKey = ("ordering", reversed, type1, type2, context) + val mb = cb.emb.ecb.getOrGenEmitMethod(s"ord_$context", cacheKey, + FastIndexedSeq(arg1.st.paramType, arg2.st.paramType), ti) { mb => + + mb.emitWithBuilder[T] { cb => + val arg1 = mb.getPCodeParam(1) + val arg2 = mb.getPCodeParam(2) + f(cb, arg1, arg2) + } + } + cb.invokeCode[T](mb, arg1, arg2) + } + + final def checkedEmitCode[T](cb: EmitCodeBuilder, arg1: EmitCode, arg2: EmitCode, missingEqual: Boolean, context: String, + f: (EmitCodeBuilder, EmitCode, EmitCode, Boolean) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { + if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) + throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") + if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) + throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") + + val cacheKey = ("ordering", reversed, type1, type2, context, missingEqual) + val mb = cb.emb.ecb.getOrGenEmitMethod(s"ord_$context", cacheKey, + FastIndexedSeq(arg1.st.asEmitParam, arg2.st.asEmitParam), ti) { mb => + + mb.emitWithBuilder[T] { cb => + val arg1 = mb.getEmitParam(1) + val arg2 = mb.getEmitParam(2) + f(cb, arg1, arg2, missingEqual) + } + } + cb.invokeCode[T](mb, arg1, arg2) + } + + + final def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + checkedPCode(cb, x, y, "compareNonnull", _compareNonnull) + } + + final def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "ltNonnull", _ltNonnull) + } + + final def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "lteqNonnull", _lteqNonnull) + } + + final def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "gtNonnull", _gtNonnull) + } + + final def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "gteqNonnull", _gteqNonnull) + } + + final def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "equivNonnull", _equivNonnull) + } + + final def lt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + checkedEmitCode(cb, x, y, missingEqual, "lt", _lt) + } + + final def lteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + checkedEmitCode(cb, x, y, missingEqual, "lteq", _lteq) + } + + final def gt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + checkedEmitCode(cb, x, y, missingEqual, "gt", _gt) + } + + final def gteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + checkedEmitCode(cb, x, y, missingEqual, "gteq", _gteq) + } + + final def equiv(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + checkedEmitCode(cb, x, y, missingEqual, "equiv", _equiv) + } + + final def compare(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Int] = { + checkedEmitCode(cb, x, y, missingEqual, "compare", _compare) + } + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] + + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + + def _compare(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean = true): Code[Int] = { + cb += x.setup + cb += y.setup + val xm = cb.newLocal("cord_compare_xm", x.m) + val ym = cb.newLocal("cord_compare_ym", y.m) + val cmp = cb.newLocal[Int]("cmp") + cb.ifx(xm, + cb.ifx(ym, cb.assign(cmp, if (missingEqual) 0 else -1), cb.assign(cmp, 1)), + cb.ifx(ym, cb.assign(cmp, -1), cb.assign(cmp, compareNonnull(cb, x.pv, y.pv)))) + cmp + } + + def _lt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + val ret = cb.newLocal[Boolean]("lt") + cb += x.setup + cb += y.setup + if (missingEqual) { + cb.ifx(x.m, + cb.assign(ret, false), + cb.ifx(y.m, + cb.assign(ret, true), + cb.assign(ret, ltNonnull(cb, x.pv, y.pv)))) + } else { + cb.ifx(y.m, + cb.assign(ret, true), + cb.ifx(x.m, + cb.assign(ret, false), + cb.assign(ret, ltNonnull(cb, x.pv, y.pv)))) + } + ret + } + + def _lteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + val ret = cb.newLocal[Boolean]("lteq") + cb += x.setup + cb += y.setup + cb.ifx(y.m, + cb.assign(ret, true), + cb.ifx(x.m, + cb.assign(ret, false), + cb.assign(ret, lteqNonnull(cb, x.pv, y.pv)))) + ret + } + + def _gt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + val ret = cb.newLocal[Boolean]("gt") + cb += x.setup + cb += y.setup + cb.ifx(y.m, + cb.assign(ret, false), + cb.ifx(x.m, + cb.assign(ret, true), + cb.assign(ret, gtNonnull(cb, x.pv, y.pv)))) + ret + } + + def _gteq(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + val ret = cb.newLocal[Boolean]("gteq") + cb += x.setup + cb += y.setup + if (missingEqual) { + cb.ifx(x.m, + cb.assign(ret, true), + cb.ifx(y.m, + cb.assign(ret, false), + cb.assign(ret, gteqNonnull(cb, x.pv, y.pv)))) + } else { + cb.ifx(y.m, + cb.assign(ret, false), + cb.ifx(x.m, + cb.assign(ret, true), + cb.assign(ret, gteqNonnull(cb, x.pv, y.pv)))) + } + ret + } + + def _equiv(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { + val ret = cb.newLocal[Boolean]("eq") + cb += x.setup + cb += y.setup + if (missingEqual) { + val xm = cb.newLocal("cord_equiv_xm", x.m) + val ym = cb.newLocal("cord_equiv_ym", y.m) + cb.ifx(xm && ym, + cb.assign(ret, true), + cb.ifx(!xm && !ym, + cb.assign(ret, equivNonnull(cb, x.pv, y.pv)), + cb.assign(ret, false))) + } else { + cb.ifx(!x.m && !y.m, cb.assign(ret, equivNonnull(cb, x.pv, y.pv)), cb.assign(ret, false)) + } + ret + } + + // reverses the sense of the non-null comparison only + def reverse: CodeOrdering = new CodeOrdering() { + override def reverse: CodeOrdering = outer + + val type1: SType = outer.type1 + val type2: SType = outer.type2 + + override def reversed: Boolean = true + + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = outer._compareNonnull(cb, y, x) + + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._ltNonnull(cb, y, x) + + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._lteqNonnull(cb, y, x) + + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gtNonnull(cb, y, x) + + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gteqNonnull(cb, y, x) + + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._equivNonnull(cb, y, x) + } +} + +abstract class CodeOrderingCompareConsistentWithOthers extends CodeOrdering { + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala new file mode 100644 index 00000000000..06152f3280b --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala @@ -0,0 +1,194 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.{Code, CodeLabel} +import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} +import is.hail.types.physical.stypes.interfaces.SInterval +import is.hail.types.physical.{PCode, PIntervalCode, PIntervalValue} + +object IntervalOrdering { + + def make(t1: SInterval, t2: SInterval, ecb: EmitClassBuilder[_]): CodeOrdering = new CodeOrdering { + + val type1: SInterval = t1 + val type2: SInterval = t2 + + private val setup: (EmitCodeBuilder, PCode, PCode) => (PIntervalValue, PIntervalValue) = { + case (cb, lhs: PIntervalCode, rhs: PIntervalCode) => + lhs.memoize(cb, "intervalord_lhs") -> rhs.memoize(cb, "intervalord_rhs") + } + + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val pointCompare = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Compare()) + val cmp = cb.newLocal[Int]("intervalord_cmp", 0) + + val (lhs, rhs) = setup(cb, x, y) + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) + cb.assign(cmp, pointCompare(cb, lstart, rstart)) + cb.ifx(cmp.ceq(0), { + cb.ifx(lhs.includesStart().cne(rhs.includesStart()), { + cb.assign(cmp, lhs.includesStart().mux(-1, 1)) + }, { + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) + cb.assign(cmp, pointCompare(cb, lend, rend)) + cb.ifx(cmp.ceq(0), { + cb.ifx(lhs.includesEnd().cne(rhs.includesEnd()), { + cb.assign(cmp, lhs.includesEnd().mux(1, -1)) + }) + }) + }) + }) + + cmp + } + + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) + + val Lout = CodeLabel() + val ret = cb.newLocal[Boolean]("interval_eq", true) + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + + cb.ifx(lhs.includesStart().cne(rhs.includesStart()) || + lhs.includesEnd().cne(rhs.includesEnd()), { + exitWith(false) + }) + + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) + cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) + + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) + cb.ifx(!pointEq(cb, lend, rend), exitWith(false)) + + cb.define(Lout) + ret + } + + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val pointLt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lt()) + val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) + + val Lout = CodeLabel() + val ret = cb.newLocal[Boolean]("interval_lt") + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + + cb.ifx(pointLt(cb, lstart, rstart), exitWith(true)) + cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) + cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) + cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) + + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + + cb.ifx(pointLt(cb, lend, rend), exitWith(true)) + cb.assign(ret, pointEq(cb, lend, rend) && !lhs.includesEnd() && rhs.includesEnd()) + + cb.define(Lout) + ret + } + + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val pointLtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lteq()) + val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) + + val Lout = CodeLabel() + val ret = cb.newLocal[Boolean]("interval_lteq") + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + + cb.ifx(!pointLtEq(cb, lstart, rstart), exitWith(false)) + cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) + cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) + cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) + + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + cb.ifx(!pointLtEq(cb, lend, rend), exitWith(false)) + cb.assign(ret, !pointEq(cb, lend, rend) || !lhs.includesEnd() || rhs.includesEnd()) + + cb.define(Lout) + ret + } + + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val pointGt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gt()) + val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) + + val Lout = CodeLabel() + val ret = cb.newLocal[Boolean]("interval_gt") + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + + cb.ifx(pointGt(cb, lstart, rstart), exitWith(true)) + cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) + cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) + cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) + + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + + cb.ifx(pointGt(cb, lend, rend), exitWith(true)) + cb.assign(ret, pointEq(cb, lend, rend) && lhs.includesEnd() && !rhs.includesEnd()) + + cb.define(Lout) + ret + } + + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val pointGtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gteq()) + val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) + + val Lout = CodeLabel() + val ret = cb.newLocal[Boolean]("interval_gteq") + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + + cb.ifx(!pointGtEq(cb, lstart, rstart), exitWith(false)) + cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) + cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) + cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) + + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + cb.ifx(!pointGtEq(cb, lend, rend), exitWith(false)) + cb.assign(ret, !pointEq(cb, lend, rend) || lhs.includesEnd() || !rhs.includesEnd()) + + cb.define(Lout) + ret + } + } + +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala new file mode 100644 index 00000000000..447d04d1bfe --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala @@ -0,0 +1,185 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s._ +import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} +import is.hail.types.physical.stypes.interfaces.SContainer +import is.hail.types.physical.{PCode, PIndexableValue} + +object IterableOrdering { + + def make(t1: SContainer, t2: SContainer, ecb: EmitClassBuilder[_]): CodeOrdering = new CodeOrdering { + + val type1: SContainer = t1 + val type2: SContainer = t2 + + private[this] def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PIndexableValue, PIndexableValue) = { + val lhsv = lhs.asIndexable.memoize(cb, "container_ord_lhs") + val rhsv = rhs.asIndexable.memoize(cb, "container_ord_rhs") + lhsv -> rhsv + } + + private[this] def loop(cb: EmitCodeBuilder, lhs: PIndexableValue, rhs: PIndexableValue)( + f: (EmitCode, EmitCode) => Unit + ): Unit = { + val i = cb.newLocal[Int]("i") + val lim = cb.newLocal("lim", lhs.loadLength().min(rhs.loadLength())) + cb.forLoop(cb.assign(i, 0), i < lim, cb.assign(i, i + 1), { + val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i).typecast) + val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i).typecast) + f(left, right) + }) + } + + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val elemCmp = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Compare()) + + val Lout = CodeLabel() + val cmp = cb.newLocal[Int]("iterable_cmp", 0) + + val (lhs, rhs) = setup(cb, x, y) + loop(cb, lhs, rhs) { (lhs, rhs) => + cb.assign(cmp, elemCmp(cb, lhs, rhs)) + cb.ifx(cmp.cne(0), cb.goto(Lout)) + } + + // if we get here, cmp is 0 + cb.assign(cmp, + Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( + "compare", lhs.loadLength(), rhs.loadLength())) + cb.define(Lout) + cmp + } + + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val elemLt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lt()) + val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) + + val ret = cb.newLocal[Boolean]("iterable_lt") + val Lout = CodeLabel() + + val (lhs, rhs) = setup(cb, x, y) + val lt = cb.newLocal("lt", false) + val eq = cb.newLocal("eq", true) + + loop(cb, lhs, rhs) { (lhsEC, rhsEC) => + val lhs = cb.memoize(lhsEC, "lhs_item") + val rhs = cb.memoize(rhsEC, "rhs_item") + cb.assign(lt, elemLt(cb, lhs, rhs)) + cb.assign(eq, !lt && elemEq(cb, lhs, rhs)) + + cb.ifx(!eq, { + cb.assign(ret, lt) + cb.goto(Lout) + }) + } + + cb.assign(ret, lhs.loadLength() < rhs.loadLength()) + cb.define(Lout) + ret + } + + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val elemLtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lteq()) + val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) + + val ret = cb.newLocal[Boolean]("iterable_lteq") + val Lout = CodeLabel() + + val (lhs, rhs) = setup(cb, x, y) + val lteq = cb.newLocal("lteq", false) + val eq = cb.newLocal("eq", true) + + loop(cb, lhs, rhs) { (lhsEC, rhsEC) => + val lhs = cb.memoize(lhsEC, "lhs_item") + val rhs = cb.memoize(rhsEC, "rhs_item") + cb.assign(lteq, elemLtEq(cb, lhs, rhs)) + cb.assign(eq, elemEq(cb, lhs, rhs)) + + cb.ifx(!eq, { + cb.assign(ret, lteq) + cb.goto(Lout) + }) + } + + cb.assign(ret, lhs.loadLength() <= rhs.loadLength) + cb.define(Lout) + ret + } + + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val elemGt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gt()) + val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) + + val ret = cb.newLocal[Boolean]("iterable_gt") + val Lout = CodeLabel() + + val (lhs, rhs) = setup(cb, x, y) + val gt = cb.newLocal("gt", false) + val eq = cb.newLocal("eq", true) + + loop(cb, lhs, rhs) { (lhsEC, rhsEC) => + val lhs = cb.memoize(lhsEC, "lhs_item") + val rhs = cb.memoize(rhsEC, "rhs_item") + cb.assign(gt, elemGt(cb, lhs, rhs)) + cb.assign(eq, !gt && elemEq(cb, lhs, rhs)) + + cb.ifx(!eq, { + cb.assign(ret, gt) + cb.goto(Lout) + }) + } + + cb.assign(ret, lhs.loadLength() > rhs.loadLength()) + cb.define(Lout) + ret + } + + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val elemGtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gteq()) + val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) + + val ret = cb.newLocal[Boolean]("iterable_gteq") + val Lout = CodeLabel() + + val (lhs, rhs) = setup(cb, x, y) + val gteq = cb.newLocal("gteq", true) + val eq = cb.newLocal("eq", true) + + loop(cb, lhs, rhs) { (lhsEC, rhsEC) => + val lhs = cb.memoize(lhsEC, "lhs_item") + val rhs = cb.memoize(rhsEC, "rhs_item") + cb.assign(gteq, elemGtEq(cb, lhs, rhs)) + cb.assign(eq, elemEq(cb, lhs, rhs)) + + cb.ifx(!eq, { + cb.assign(ret, gteq) + cb.goto(Lout) + }) + } + + cb.assign(ret, lhs.loadLength() >= rhs.loadLength) + cb.define(Lout) + ret + } + + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) + val ret = cb.newLocal[Boolean]("iterable_eq", true) + val Lout = CodeLabel() + val exitWith = (value: Code[Boolean]) => { + cb.assign(ret, value) + cb.goto(Lout) + } + + val (lhs, rhs) = setup(cb, x, y) + cb.ifx(lhs.loadLength().cne(rhs.loadLength()), exitWith(false)) + loop(cb, lhs, rhs) { (lhs, rhs) => + cb.assign(ret, elemEq(cb, lhs, rhs)) + cb.ifx(!ret, cb.goto(Lout)) + } + + cb.define(Lout) + ret + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala new file mode 100644 index 00000000000..24d2574e203 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala @@ -0,0 +1,48 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitMethodBuilder} +import is.hail.types.physical.stypes.concrete.SCanonicalLocusPointer +import is.hail.types.physical.stypes.interfaces.{SLocus, SStringValue} +import is.hail.types.physical.{PBinary, PCode, PLocusValue} + +object LocusOrdering { + def make(t1: SLocus, t2: SLocus, ecb: EmitClassBuilder[_]): CodeOrdering = { + + (t1, t2) match { + case (SCanonicalLocusPointer(_), SCanonicalLocusPointer(_)) => + new CodeOrderingCompareConsistentWithOthers { + val type1: SLocus = t1 + val type2: SLocus = t2 + + require(t1.rg == t2.rg) + + def _compareNonnull(cb: EmitCodeBuilder, lhsc: PCode, rhsc: PCode): Code[Int] = { + val codeRG = cb.emb.getReferenceGenome(t1.rg) + val lhs: PLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") + val rhs: PLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") + val lhsContig = lhs.contig(cb).memoize(cb, "locus_cmp_lcontig").asInstanceOf[SStringValue] + val rhsContig = rhs.contig(cb).memoize(cb, "locus_cmp_rcontig").asInstanceOf[SStringValue] + + // ugh + val lhsContigBinType = lhsContig.get.asBytes().st + val rhsContigBinType = rhsContig.get.asBytes().st + val bincmp = CodeOrdering.makeOrdering(lhsContigBinType, rhsContigBinType, ecb) + + val ret = cb.newLocal[Int]("locus_cmp_ret", 0) + cb.ifx(bincmp.compareNonnull(cb, + lhsContig.get.asBytes().asPCode, + rhsContig.get.asBytes().asPCode).ceq(0), { + cb.assign(ret, Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( + "compare", lhs.position(cb), rhs.position(cb))) + }, { + cb.assign(ret, codeRG.invoke[String, String, Int]( + "compare", lhsContig.get.loadString(), rhsContig.get.loadString())) + }) + ret + } + } + } + } + +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala new file mode 100644 index 00000000000..b2f43354fd1 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala @@ -0,0 +1,112 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.primitives._ + +object Int32Ordering { + def make(t1: SInt32, t2: SInt32, ecb: EmitClassBuilder[_]): CodeOrdering = { + new CodeOrdering { + + val type1: SInt32 = t1 + val type2: SInt32 = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.tcode[Int], y.tcode[Int]) + + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] < y.tcode[Int] + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] <= y.tcode[Int] + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] > y.tcode[Int] + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] >= y.tcode[Int] + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int].ceq(y.tcode[Int]) + } + } +} + + +object Int64Ordering { + def make(t1: SInt64, t2: SInt64, ecb: EmitClassBuilder[_]): CodeOrdering = { + new CodeOrdering { + + val type1: SInt64 = t1 + val type2: SInt64 = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.tcode[Long], y.tcode[Long]) + + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] < y.tcode[Long] + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] <= y.tcode[Long] + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] > y.tcode[Long] + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] >= y.tcode[Long] + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long].ceq(y.tcode[Long]) + } + } +} + +object Float32Ordering { + def make(t1: SFloat32, t2: SFloat32, ecb: EmitClassBuilder[_]): CodeOrdering = { + new CodeOrdering { + + val type1: SFloat32 = t1 + val type2: SFloat32 = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.tcode[Float], y.tcode[Float]) + + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] < y.tcode[Float] + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] <= y.tcode[Float] + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] > y.tcode[Float] + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] >= y.tcode[Float] + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float].ceq(y.tcode[Float]) + } + } +} + +object Float64Ordering { + def make(t1: SFloat64, t2: SFloat64, ecb: EmitClassBuilder[_]): CodeOrdering = { + new CodeOrdering { + + val type1: SFloat64 = t1 + val type2: SFloat64 = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.tcode[Double], y.tcode[Double]) + + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] < y.tcode[Double] + + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] <= y.tcode[Double] + + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] > y.tcode[Double] + + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] >= y.tcode[Double] + + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double].ceq(y.tcode[Double]) + } + } +} + +object BooleanOrdering { + def make(t1: SBoolean, t2: SBoolean, ecb: EmitClassBuilder[_]): CodeOrdering = { + + new CodeOrderingCompareConsistentWithOthers { + val type1: SBoolean = t1 + val type2: SBoolean = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.tcode[Boolean], y.tcode[Boolean]) + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala new file mode 100644 index 00000000000..cb44907593a --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala @@ -0,0 +1,27 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointer, SCanonicalShufflePointerCode} +import is.hail.types.physical.stypes.interfaces.SShuffle + +object ShuffleOrdering { + def make(t1: SShuffle, t2: SShuffle, ecb: EmitClassBuilder[_]): CodeOrdering = { + (t1, t2) match { + case (SCanonicalShufflePointer(_), SCanonicalShufflePointer(_)) => + new CodeOrderingCompareConsistentWithOthers { + + val type1: SShuffle = t1 + val type2: SShuffle = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val bcode1 = x.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr + val bcode2 = y.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr + val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) + ord.compareNonnull(cb, x.asString.asBytes(), y.asString.asBytes()) + } + } + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala new file mode 100644 index 00000000000..26e7609c0af --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala @@ -0,0 +1,27 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.concrete.{SStringPointer, SStringPointerCode} +import is.hail.types.physical.stypes.interfaces.SString + +object StringOrdering { + def make(t1: SString, t2: SString, ecb: EmitClassBuilder[_]): CodeOrdering = { + (t1, t2) match { + case (SStringPointer(_), SStringPointer(_)) => + new CodeOrderingCompareConsistentWithOthers { + + val type1: SString = t1 + val type2: SString = t2 + + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val bcode1 = x.asInstanceOf[SStringPointerCode].binaryRepr + val bcode2 = y.asInstanceOf[SStringPointerCode].binaryRepr + val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) + ord.compareNonnull(cb, x.asString.asBytes(), y.asString.asBytes()) + } + } + } + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala new file mode 100644 index 00000000000..1f81620e316 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala @@ -0,0 +1,161 @@ +package is.hail.expr.ir.orderings + +import is.hail.asm4s.{Code, CodeLabel} +import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, SortOrder} +import is.hail.types.physical.stypes.interfaces.SBaseStruct +import is.hail.types.physical.{PBaseStructValue, PCode} + +object StructOrdering { + def make( + t1: SBaseStruct, + t2: SBaseStruct, + ecb: EmitClassBuilder[_], + sortOrders: Array[SortOrder] = null, + missingFieldsEqual: Boolean = true + ): CodeOrdering = new CodeOrdering { + + val type1: SBaseStruct = t1 + val type2: SBaseStruct = t2 + + require(sortOrders == null || sortOrders.size == t1.size) + + def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PBaseStructValue, PBaseStructValue) = { + lhs.asBaseStruct.memoize(cb, "structord_lhs") -> rhs.asBaseStruct.memoize(cb, "structord_rhs") + } + + private[this] def fieldOrdering(i: Int, op: CodeOrdering.Op): CodeOrdering.F[op.ReturnType] = + ecb.getOrderingFunction(t1.fieldTypes(i), t2.fieldTypes(i), + if (sortOrders == null) Ascending else sortOrders(i), + op) + + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val cmp = cb.newLocal("cmp", 0) + + var i = 0 + while (i < t1.size) { + val fldCmp = fieldOrdering(i, CodeOrdering.Compare(missingFieldsEqual)) + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } + cb.assign(cmp, fldCmp(cb, l, r)) + cb.ifx(cmp.cne(0), cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + cmp + } + + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val lt = cb.newLocal("lt", true) + val eq = cb.newLocal("eq", true) + + var i = 0 + while (i < t1.size) { + val fldLt = fieldOrdering(i, CodeOrdering.Lt(missingFieldsEqual)) + val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) + + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lt_rhs_fld$i") + cb.assign(lt, fldLt(cb, l, r)) + cb.assign(eq, !lt && fldEq(cb, l, r)) + cb.ifx(!eq, cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + lt + } + + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val lteq = cb.newLocal("lteq", true) + val eq = cb.newLocal("eq", true) + + var i = 0 + while (i < t1.size) { + val fldLtEq = fieldOrdering(i, CodeOrdering.Lteq(missingFieldsEqual)) + val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) + + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lteq_rhs_fld$i") + cb.assign(lteq, fldLtEq(cb, l, r)) + cb.assign(eq, fldEq(cb, l, r)) + cb.ifx(!eq, cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + lteq + } + + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val gt = cb.newLocal("gt", false) + val eq = cb.newLocal("eq", true) + + var i = 0 + while (i < t1.size) { + val fldGt = fieldOrdering(i, CodeOrdering.Gt(missingFieldsEqual)) + val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) + + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gt_rhs_fld$i") + cb.assign(gt, fldGt(cb, l, r)) + cb.assign(eq, !gt && fldEq(cb, l, r)) + cb.ifx(!eq, cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + gt + } + + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val gteq = cb.newLocal("gteq", true) + val eq = cb.newLocal("eq", true) + + var i = 0 + while (i < t1.size) { + val fldGtEq = fieldOrdering(i, CodeOrdering.Gteq(missingFieldsEqual)) + val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) + + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gteq_rhs_fld$i") + cb.assign(gteq, fldGtEq(cb, l, r)) + cb.assign(eq, fldEq(cb, l, r)) + cb.ifx(!eq, cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + gteq + } + + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + val (lhs, rhs) = setup(cb, x, y) + val Lout = CodeLabel() + val eq = cb.newLocal("cmp", true) + + var i = 0 + while (i < t1.size) { + val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } + cb.assign(eq, fldEq(cb, l, r)) + cb.ifx(!eq, cb.goto(Lout)) + i += 1 + } + + cb.define(Lout) + eq + } + } +} diff --git a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala index a0b120ce8ad..95db2fc7484 100644 --- a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala @@ -115,6 +115,14 @@ class HadoopFS(val conf: SerializableHadoopConfiguration) extends FS { getFileSystem(dirname).mkdirs(new hadoop.fs.Path(dirname)) } + def remove(fname: String): Unit = { + getFileSystem(fname).delete(new hadoop.fs.Path(fname), false) + } + + def rmtree(dirname: String): Unit = { + getFileSystem(dirname).delete(new hadoop.fs.Path(dirname), true) + } + def delete(filename: String, recursive: Boolean) { getFileSystem(filename).delete(new hadoop.fs.Path(filename), recursive) } diff --git a/hail/src/main/scala/is/hail/lir/Emit.scala b/hail/src/main/scala/is/hail/lir/Emit.scala index 1051d234f32..a555eb6b30c 100644 --- a/hail/src/main/scala/is/hail/lir/Emit.scala +++ b/hail/src/main/scala/is/hail/lir/Emit.scala @@ -8,6 +8,8 @@ import org.objectweb.asm.Opcodes._ import org.objectweb.asm.util.{CheckClassAdapter, Textifier, TraceClassVisitor} import scala.collection.mutable +import java.io.ByteArrayOutputStream +import java.nio.charset.StandardCharsets object Emit { def emitMethod(cv: ClassVisitor, m: Method, debugInformation: Boolean): Int = { @@ -221,8 +223,11 @@ object Emit { b } catch { case e: Exception => - val trace = new TraceClassVisitor(new PrintWriter(System.err)) + val buffer = new ByteArrayOutputStream() + val trace = new TraceClassVisitor(new PrintWriter(buffer)) val check = new CheckClassAdapter(trace) + val classJVMByteCodeAsEscapedStr = buffer.toString(StandardCharsets.UTF_8.name()) + log.error(s"lir exception ${e}:\n" + classJVMByteCodeAsEscapedStr) emitClass(c, check, logMethodSizes = false) throw e } diff --git a/hail/src/main/scala/is/hail/lir/package.scala b/hail/src/main/scala/is/hail/lir/package.scala index 1809c5c9fef..4ed17cf071d 100644 --- a/hail/src/main/scala/is/hail/lir/package.scala +++ b/hail/src/main/scala/is/hail/lir/package.scala @@ -5,9 +5,9 @@ import is.hail.utils.FastIndexedSeq import org.objectweb.asm.Opcodes._ package object lir { - var counter: Long = 0 + private[this] var counter: Long = 0 - def genName(tag: String, baseName: String): String = { + def genName(tag: String, baseName: String): String = synchronized { counter += 1 if (baseName != null) s"__$tag$counter$baseName" diff --git a/hail/src/main/scala/is/hail/services/JSONLogLayout.scala b/hail/src/main/scala/is/hail/services/JSONLogLayout.scala new file mode 100644 index 00000000000..3afdbea8582 --- /dev/null +++ b/hail/src/main/scala/is/hail/services/JSONLogLayout.scala @@ -0,0 +1,79 @@ +package is.hail.services + +import java.text._ +import java.util.function._ +import java.nio.charset.StandardCharsets +import java.io.StringWriter + +import org.json4s._ +import org.apache.log4j._ +import org.apache.log4j.spi._ +import org.json4s.jackson.JsonMethods +import scala.collection.mutable.ArrayBuffer + +class DateFormatter { + private[this] val fmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS") + private[this] val buffer = new StringBuffer() + private[this] val fp = new FieldPosition(0) + + def format(milliseconds: Long): String = { + buffer.setLength(0) + fmt.format(milliseconds, buffer, fp) + buffer.toString + } +} + +object JSONLogLayout { + private val datefmt = ThreadLocal.withInitial(new Supplier[DateFormatter]() { + def get() = new DateFormatter() + }) +} + +class JSONLogLayout extends Layout { + import JSONLogLayout._ + + def ignoresThrowable(): Boolean = false + + def activateOptions(): Unit = () + + def format(event: LoggingEvent): String = { + val threadName = event.getThreadName(); + val timestamp = event.getTimeStamp(); + val mdc = event.getProperties(); + val ndc = event.getNDC(); + val throwableInfo = event.getThrowableInformation() + val locationInfo = event.getLocationInformation() + val fields = new ArrayBuffer[JField]() + fields += JField("@version", JInt(1)) + fields += JField("@timestamp", JString(datefmt.get.format(timestamp))) + fields += JField("message", JString(event.getRenderedMessage())) + fields += JField("filename", JString(locationInfo.getFileName())) + fields += JField("line_number", JString(locationInfo.getLineNumber())) + fields += JField("class", JString(locationInfo.getClassName())) + fields += JField("method", JString(locationInfo.getMethodName())) + fields += JField("logger_name", JString(event.getLoggerName())) + + val mdcFields = new ArrayBuffer[JField]() + mdc.forEach(new BiConsumer[Any, Any]() { def accept(key: Any, value: Any): Unit = { + mdcFields += JField(key.toString, JString(value.toString)) + } }) + fields += JField("mdc", JObject(mdcFields:_*)) + + fields += JField("ndc", JString(ndc)) + fields += JField("severity", JString(event.getLevel().toString())) + fields += JField("thread_name", JString(threadName)) + + if (throwableInfo != null) { + fields += JField("exception_class", JString(throwableInfo.getThrowable().getClass().getCanonicalName())) + fields += JField("exception_message", JString(throwableInfo.getThrowable().getMessage())) + fields += JField("exception_stacktrace", JString(formatException(throwableInfo.getThrowable()))) + } + val jsonEvent = JObject(fields:_*) + + + val sw = new StringWriter() + JsonMethods.mapper.writeValue(sw, jsonEvent) + sw.append('\n') + sw.toString() + } +} diff --git a/hail/src/main/scala/is/hail/services/Tokens.scala b/hail/src/main/scala/is/hail/services/Tokens.scala index fd2820a4ac5..4ac57f97812 100644 --- a/hail/src/main/scala/is/hail/services/Tokens.scala +++ b/hail/src/main/scala/is/hail/services/Tokens.scala @@ -25,7 +25,10 @@ object Tokens { } def getTokensFile(): String = { - if (DeployConfig.get.location == "external") + val file = System.getenv("HAIL_TOKENS_FILE") + if (file != null) + file + else if (DeployConfig.get.location == "external") s"${ System.getenv("HOME") }/.hail/tokens.json" else "/user-tokens/tokens.json" diff --git a/hail/src/main/scala/is/hail/services/package.scala b/hail/src/main/scala/is/hail/services/package.scala index 01f1b970b65..997b11eedf8 100644 --- a/hail/src/main/scala/is/hail/services/package.scala +++ b/hail/src/main/scala/is/hail/services/package.scala @@ -3,11 +3,14 @@ package is.hail import javax.net.ssl.SSLException import java.net.SocketException import java.io.EOFException +import is.hail.utils._ +import org.apache.http.NoHttpResponseException import org.apache.http.conn.HttpHostConnectException import org.apache.log4j.{LogManager, Logger} import scala.util.Random +import java.io._ package object services { lazy val log: Logger = LogManager.getLogger("is.hail.services") @@ -28,14 +31,18 @@ package object services { def isTransientError(e: Throwable): Boolean = { e match { + case e: NoHttpResponseException => + true case e: ClientResponseException => RETRYABLE_HTTP_STATUS_CODES.contains(e.status) case e: HttpHostConnectException => true case e: SocketException => - e.getMessage.contains("Connection reset") || e.getMessage.contains("Broken pipe") + e.getMessage != null && ( + e.getMessage.contains("Connection reset") || e.getMessage.contains("Broken pipe")) case e: EOFException => - e.getMessage.contains("SSL peer shut down incorrectly") + e.getMessage != null && ( + e.getMessage.contains("SSL peer shut down incorrectly")) case e: SSLException => val cause = e.getCause cause != null && isTransientError(cause) @@ -63,4 +70,13 @@ package object services { throw new AssertionError("unreachable") } + + def formatException(e: Throwable): String = { + using(new StringWriter()) { sw => + using(new PrintWriter(sw)) { pw => + e.printStackTrace(pw) + sw.toString + } + } + } } diff --git a/hail/src/main/scala/is/hail/services/shuffler/LSM.scala b/hail/src/main/scala/is/hail/services/shuffler/LSM.scala index 999dea697c6..1bc54415c50 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/LSM.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/LSM.scala @@ -162,7 +162,7 @@ class LSM ( else if (processed == 1) assert(least != -1 && greatest == -1) else if (processed >= 2) - assert(keyOrd.compare(least, greatest) < 0) + assert(keyOrd.compare(least, greatest) <= 0) assert(samplesEnd <= samples.length) if (processed == 0) { diff --git a/hail/src/main/scala/is/hail/services/shuffler/ShuffleClient.scala b/hail/src/main/scala/is/hail/services/shuffler/ShuffleClient.scala index 35824b09347..6c729d5f805 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/ShuffleClient.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/ShuffleClient.scala @@ -152,14 +152,15 @@ class ShuffleClient ( def this(shuffleType: TShuffle, uuid: Array[Byte]) = this(shuffleType, uuid, None, None, None) - val codecs = { - ExecutionTimer.logTime("ShuffleClient.codecs") { timer => - RegionPool.scoped(rp => - using(new ExecuteContext("/tmp", "file:///tmp", null, null, Region(pool=rp), timer, null)) { ctx => + val codecs = ctx match { + case None => + ExecutionTimer.logTime("ShuffleClient.codecs") { timer => + ExecuteContext.scoped("/tmp", "file:///tmp", null, null, timer, null) { ctx => new ShuffleCodecSpec(ctx, shuffleType, rowEncodingPType, keyEncodingPType) } - ) - } + } + case Some(ctx) => + new ShuffleCodecSpec(ctx, shuffleType, rowEncodingPType, keyEncodingPType) } private[this] val s = ShuffleClient.socket(ctx) @@ -292,7 +293,9 @@ class ShuffleClient ( } def partitionBoundsValueFinished(): Boolean = { - keyDecoder.readByte() == 0.toByte + val b = keyDecoder.readByte() + assert(b == 0.toByte || b == 1.toByte, b) + b == 0.toByte } def endPartitionBounds(): Unit = { @@ -303,14 +306,16 @@ class ShuffleClient ( startOperation(Wire.STOP) out.flush() log.info(s"stop") - assert(in.readByte() == 0.toByte) + val byte = in.readByte() + assert(byte == 0.toByte, byte) log.info(s"stop done") } def close(): Unit = { out.writeByte(Wire.EOS) out.flush() - assert(in.readByte() == Wire.EOS) + val byte = in.readByte() + assert(byte == Wire.EOS, byte) s.close() } } diff --git a/hail/src/main/scala/is/hail/services/shuffler/ShuffleCodecSpec.scala b/hail/src/main/scala/is/hail/services/shuffler/ShuffleCodecSpec.scala index fd4ad21fdea..07fd7afa806 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/ShuffleCodecSpec.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/ShuffleCodecSpec.scala @@ -4,6 +4,7 @@ import is.hail.expr.ir._ import is.hail.types.virtual._ import is.hail.types.physical._ import is.hail.annotations.Region +import org.apache.log4j.Logger class ShuffleCodecSpec( ctx: ExecuteContext, @@ -11,6 +12,8 @@ class ShuffleCodecSpec( _rowEncodingPType: Option[PType] = None, _keyEncodingPType: Option[PType] = None ) { + private[this] val log = Logger.getLogger(getClass.getName()) + val (rowDecodedPType, makeRowDecoder) = shuffleType.rowEType.buildStructDecoder(ctx, shuffleType.rowType) assert(rowDecodedPType == shuffleType.rowDecodedPType) val rowEncodingPType = _rowEncodingPType.getOrElse(rowDecodedPType) @@ -26,9 +29,17 @@ class ShuffleCodecSpec( if (keyDecodedPType == rowDecodedPType) { rowDecodedPType } else { - new PSubsetStruct(rowDecodedPType, shuffleType.keyFields.map(_.field).toArray) + new PSubsetStruct(rowDecodedPType, shuffleType.keyFields.map(_.field)) } } def constructKeyFromDecodedRow(r: Region, row: Long): Long = keyDecodedPType.copyFromAddress(r, keyPSubsetStruct, row, false) + + log.info(s"shuffleType.rowEType: ${shuffleType.rowEType}") + log.info(s"shuffleType.keyEType: ${shuffleType.keyEType}") + + log.info(s"rowDecodedPType: ${rowDecodedPType}") + log.info(s"rowEncodingPType: ${rowEncodingPType}") + log.info(s"keyDecodedPType: ${keyDecodedPType}") + log.info(s"keyEncodingPType: ${keyEncodingPType}") } diff --git a/hail/src/main/scala/is/hail/services/shuffler/server/ShuffleServer.scala b/hail/src/main/scala/is/hail/services/shuffler/server/ShuffleServer.scala index a7653505a3e..453be46367a 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/server/ShuffleServer.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/server/ShuffleServer.scala @@ -17,6 +17,8 @@ import javax.net.ssl._ import org.apache.log4j.Logger import scala.annotation.switch +import is.hail.backend.local.LocalBackend +import is.hail.HailContext class Handler ( private[this] val server: ShuffleServer, @@ -142,10 +144,6 @@ class Shuffle ( } } } - log.info(s"rowDecodedPType: ${codecs.rowDecodedPType}") - log.info(s"rowEncodingPType: ${codecs.rowEncodingPType}") - log.info(s"keyDecodedPType: ${codecs.keyDecodedPType}") - log.info(s"keyEncodingPType: ${codecs.keyEncodingPType}") private[this] val store = new LSM(s"/tmp/${uuidToString(uuid)}", codecs, pool) @@ -237,6 +235,8 @@ class ShuffleServer() extends AutoCloseable { val executor = Executors.newCachedThreadPool() var stopped = false + val context = HailContext(LocalBackend("/tmp"), skipLoggingConfiguration = true) + def serveInBackground(): Future[_] = executor.submit(new Runnable() { def run(): Unit = serve() }) diff --git a/hail/src/main/scala/is/hail/types/Box.scala b/hail/src/main/scala/is/hail/types/Box.scala index 2ae70062251..a338bdf1f6f 100644 --- a/hail/src/main/scala/is/hail/types/Box.scala +++ b/hail/src/main/scala/is/hail/types/Box.scala @@ -1,19 +1,25 @@ package is.hail.types -final case class Box[T](var b: Option[T] = None, matchCond: (T, T) => Boolean = { (a: T, b: T) => a == b }) { - def unify(t: T): Boolean = b match { +import java.util.function._ + +final case class Box[T]( + b: ThreadLocal[Option[T]] = ThreadLocal.withInitial( + new Supplier[Option[T]] { def get = None }), + matchCond: (T, T) => Boolean = { (a: T, b: T) => a == b } +) { + def unify(t: T): Boolean = b.get match { case Some(bt) => matchCond(t, bt) case None => - b = Some(t) + b.set(Some(t)) true } def clear() { - b = None + b.set(None) } - def get: T = b.get + def get: T = b.get.get - def isEmpty: Boolean = b.isEmpty - def isDefined: Boolean = b.isDefined + def isEmpty: Boolean = b.get.isEmpty + def isDefined: Boolean = b.get.isDefined } diff --git a/hail/src/main/scala/is/hail/types/physical/PArray.scala b/hail/src/main/scala/is/hail/types/physical/PArray.scala index 0e7f87c5bc8..e4450a49c7c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PArray.scala @@ -1,8 +1,9 @@ package is.hail.types.physical -import is.hail.annotations.{Annotation, CodeOrdering} +import is.hail.annotations.Annotation import is.hail.check.Gen import is.hail.expr.ir.EmitMethodBuilder +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.virtual.TArray trait PArrayIterator { @@ -16,11 +17,6 @@ abstract class PArray extends PContainer { lazy val virtualType: TArray = TArray(elementType.virtualType) protected[physical] final val elementRequired = elementType.required - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(this isOfType other) - CodeOrdering.iterableOrdering(this, other.asInstanceOf[PArray], mb) - } - def elementIterator(aoff: Long, length: Int): PArrayIterator override def genNonmissingValue: Gen[IndexedSeq[Annotation]] = diff --git a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala index bcf550490b9..6f3a6a36daa 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala @@ -3,6 +3,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s.{Code, _} import is.hail.check.Gen +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SBaseStructValue} import is.hail.utils._ @@ -94,17 +95,6 @@ abstract class PBaseStruct extends PType { sb.result() } - final def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = - codeOrdering(mb, other, null, true) - - final def codeOrdering(mb: EmitMethodBuilder[_], other: PType, missingFieldsEqual: Boolean): CodeOrdering = - codeOrdering(mb, other, null, missingFieldsEqual) - - final def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: Array[SortOrder]): CodeOrdering = - codeOrdering(mb, other, so, true) - - def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: Array[SortOrder], missingFieldsEqual: Boolean): CodeOrdering - def isPrefixOf(other: PBaseStruct): Boolean = size <= other.size && isCompatibleWith(other) diff --git a/hail/src/main/scala/is/hail/types/physical/PBinary.scala b/hail/src/main/scala/is/hail/types/physical/PBinary.scala index 0474a4dee09..9b0c679b41e 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBinary.scala @@ -1,10 +1,10 @@ package is.hail.types.physical -import is.hail.annotations.CodeOrdering import is.hail.annotations.{Region, UnsafeOrdering, _} import is.hail.asm4s._ import is.hail.check.Arbitrary._ import is.hail.check.Gen +import is.hail.expr.ir.orderings.{CodeOrdering, CodeOrderingCompareConsistentWithOthers} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.interfaces.{SBinaryCode, SBinaryValue} import is.hail.types.virtual.TBinary @@ -35,33 +35,6 @@ abstract class PBinary extends PType { } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrderingCompareConsistentWithOthers { - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { - val xv: SBinaryValue = x.asBinary.memoize(cb, "xv") - val yv: SBinaryValue = y.asBinary.memoize(cb, "yv") - val xlen = cb.newLocal[Int]("xlen", xv.loadLength()) - val ylen = cb.newLocal[Int]("ylen", yv.loadLength()) - val lim = cb.newLocal[Int]("lim", (xlen < ylen).mux(xlen, ylen)) - val i = cb.newLocal[Int]("i", 0) - val cmp = cb.newLocal[Int]("cmp", 0) - val Lbreak = CodeLabel() - - cb.forLoop({}, i < lim, cb.assign(i, i + 1), { - val compval = Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", - Code.invokeStatic1[java.lang.Byte, Byte, Int]("toUnsignedInt", xv.loadByte(i)), - Code.invokeStatic1[java.lang.Byte, Byte, Int]("toUnsignedInt", yv.loadByte(i))) - cb.assign(cmp, compval) - cb.ifx(cmp.cne(0), cb.goto(Lbreak)) - }) - - cb.define(Lbreak) - cmp.ceq(0).mux(Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", xlen, ylen), cmp) - } - } - } - def contentAlignment: Long def lengthHeaderBytes: Long diff --git a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala index acadfc46038..48fcfb68f16 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations.{Region, UnsafeOrdering, _} import is.hail.asm4s.Code +import is.hail.expr.ir.orderings.{CodeOrdering, CodeOrderingCompareConsistentWithOthers} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.primitives.{SBoolean, SBooleanCode} @@ -24,16 +25,6 @@ class PBoolean(override val required: Boolean) extends PType with PPrimitive { } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrderingCompareConsistentWithOthers { - type T = Boolean - - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.tcode[Boolean], y.tcode[Boolean]) - } - } - override def byteSize: Long = 1 def sType: SBoolean = SBoolean(required) diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala index 6ba407cd887..c7d994b3069 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala @@ -6,7 +6,7 @@ import is.hail.expr.ir.{EmitCode, EmitCodeBuilder} import is.hail.types.BaseStruct import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SBaseStructPointerSettable} -import is.hail.types.physical.stypes.interfaces.SStruct +import is.hail.types.physical.stypes.interfaces.SBaseStruct import is.hail.utils._ import org.apache.spark.sql.Row @@ -154,7 +154,7 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct } } - def sType: SStruct = SBaseStructPointer(this) + def sType: SBaseStruct = SBaseStructPointer(this) def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructPointerCode = new SBaseStructPointerCode(SBaseStructPointer(this), addr) diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala index 72bbfba5133..264bdafb4ef 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala @@ -1,7 +1,8 @@ package is.hail.types.physical -import is.hail.annotations.{Annotation, CodeOrdering, Region, UnsafeOrdering} +import is.hail.annotations.{Annotation, Region, UnsafeOrdering} import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SCanonicalCallCode} @@ -20,11 +21,6 @@ final case class PCanonicalCall(required: Boolean = false) extends PCall { override def unsafeOrdering(): UnsafeOrdering = representation.unsafeOrdering() // this was a terrible idea - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - PInt32().codeOrdering(mb) - } - def setRequired(required: Boolean) = if (required == this.required) this else PCanonicalCall(required) override def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala index 55379c7247a..3104abb1559 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SCanonicalLocusPointer, SCanonicalLocusPointerCode, SStringPointer} @@ -76,9 +77,6 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = - CodeOrdering.locusOrdering(this, other.asInstanceOf[PLocus], mb) - override def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { srcPType match { case pt: PCanonicalLocus => representation.unstagedStoreAtAddress(addr, region, pt.representation, srcAddress, deepCopy) diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala index 5f284cefda6..b9b19fe1c2b 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala @@ -3,6 +3,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBinaryPointerCode, SCanonicalShufflePointer, SCanonicalShufflePointerCode} import is.hail.types.virtual._ @@ -17,11 +18,6 @@ final case class PCanonicalShuffle( val representation: PCanonicalBinary = PCanonicalBinary(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - representation.codeOrdering(mb) - } - def setRequired(required: Boolean) = if (required == this.required) this else PCanonicalShuffle(tShuffle, required) def unsafeOrdering(): UnsafeOrdering = representation.unsafeOrdering() diff --git a/hail/src/main/scala/is/hail/types/physical/PDict.scala b/hail/src/main/scala/is/hail/types/physical/PDict.scala index e7704495cb7..8ec8b0688b2 100644 --- a/hail/src/main/scala/is/hail/types/physical/PDict.scala +++ b/hail/src/main/scala/is/hail/types/physical/PDict.scala @@ -2,7 +2,6 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.check.Gen -import is.hail.expr.ir.EmitMethodBuilder import is.hail.types.virtual.TDict abstract class PDict extends PContainer { @@ -13,11 +12,6 @@ abstract class PDict extends PContainer { def elementType: PStruct - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - CodeOrdering.mapOrdering(this, other.asInstanceOf[PDict], mb) - } - override def genNonmissingValue: Gen[Annotation] = Gen.buildableOf2[Map](Gen.zip(keyType.genValue, valueType.genValue)) } diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala index 9184c233c8f..d6119e2c335 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s.{Code, _} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat32Code} import is.hail.types.physical.stypes.{SCode, SType} @@ -25,24 +26,6 @@ class PFloat32(override val required: Boolean) extends PNumeric with PPrimitive } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrdering { - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.tcode[Float], y.tcode[Float]) - - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] < y.tcode[Float] - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] <= y.tcode[Float] - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] > y.tcode[Float] - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] >= y.tcode[Float] - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float].ceq(y.tcode[Float]) - } - } - override def byteSize: Long = 4 override def zero = coerce[PFloat32](const(0.0f)) diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala index 640157b98c8..1136bc03e13 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s.{Code, _} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.primitives.{SFloat64, SFloat64Code} import is.hail.types.physical.stypes.{SCode, SType} @@ -26,24 +27,6 @@ class PFloat64(override val required: Boolean) extends PNumeric with PPrimitive } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrdering { - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.tcode[Double], y.tcode[Double]) - - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] < y.tcode[Double] - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] <= y.tcode[Double] - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] > y.tcode[Double] - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] >= y.tcode[Double] - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double].ceq(y.tcode[Double]) - } - } - override def byteSize: Long = 8 override def zero = coerce[PFloat64](const(0.0)) diff --git a/hail/src/main/scala/is/hail/types/physical/PInt32.scala b/hail/src/main/scala/is/hail/types/physical/PInt32.scala index 66453976402..9485774e7af 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt32.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations.{Region, UnsafeOrdering, _} import is.hail.asm4s.{Code, coerce, const, _} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} import is.hail.types.physical.stypes.{SCode, SType} @@ -22,24 +23,6 @@ class PInt32(override val required: Boolean) extends PNumeric with PPrimitive { } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrdering { - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.tcode[Int], y.tcode[Int]) - - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] < y.tcode[Int] - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] <= y.tcode[Int] - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] > y.tcode[Int] - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] >= y.tcode[Int] - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int].ceq(y.tcode[Int]) - } - } - override def byteSize: Long = 4 override def zero = coerce[PInt32](const(0)) diff --git a/hail/src/main/scala/is/hail/types/physical/PInt64.scala b/hail/src/main/scala/is/hail/types/physical/PInt64.scala index e9ca1229237..0c474e4ad03 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt64.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations.{Region, UnsafeOrdering, _} import is.hail.asm4s.{Code, coerce, const, _} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.primitives.{SInt64, SInt64Code} import is.hail.types.physical.stypes.{SCode, SType} @@ -23,24 +24,6 @@ class PInt64(override val required: Boolean) extends PNumeric with PPrimitive { } } - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - new CodeOrdering { - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.tcode[Long], y.tcode[Long]) - - def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] < y.tcode[Long] - - def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] <= y.tcode[Long] - - def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] > y.tcode[Long] - - def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] >= y.tcode[Long] - - def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long].ceq(y.tcode[Long]) - } - } - override def byteSize: Long = 8 override def zero = coerce[PInt64](const(0L)) diff --git a/hail/src/main/scala/is/hail/types/physical/PInterval.scala b/hail/src/main/scala/is/hail/types/physical/PInterval.scala index b96461bd8c4..9d159988e46 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInterval.scala @@ -1,8 +1,9 @@ package is.hail.types.physical -import is.hail.annotations.{CodeOrdering, _} +import is.hail.annotations._ import is.hail.asm4s._ import is.hail.check.Gen +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder, IEmitCode} import is.hail.types.physical.stypes.interfaces.{SIntervalCode, SIntervalValue} import is.hail.types.virtual.TInterval @@ -13,11 +14,6 @@ abstract class PInterval extends PType { lazy val virtualType: TInterval = TInterval(pointType.virtualType) - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - CodeOrdering.intervalOrdering(this, other.asInstanceOf[PInterval], mb) - } - override def unsafeOrdering(): UnsafeOrdering = new UnsafeOrdering { private val pOrd = pointType.unsafeOrdering() diff --git a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala index c3be2ae1954..1300f25fbdc 100644 --- a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala @@ -1,8 +1,9 @@ package is.hail.types.physical -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, _} import is.hail.expr.Nat +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.SNDArrayPointerCode @@ -26,8 +27,6 @@ abstract class PNDArray extends PType { lazy val virtualType: TNDArray = TNDArray(elementType.virtualType, Nat(nDims)) assert(elementType.required, "elementType must be required") - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = throw new UnsupportedOperationException - def dataFirstElementPointer(ndAddr: Code[Long]): Code[Long] def dataPArrayPointer(ndAddr: Code[Long]): Code[Long] diff --git a/hail/src/main/scala/is/hail/types/physical/PSet.scala b/hail/src/main/scala/is/hail/types/physical/PSet.scala index ef937d5e384..2f2d4958fd0 100644 --- a/hail/src/main/scala/is/hail/types/physical/PSet.scala +++ b/hail/src/main/scala/is/hail/types/physical/PSet.scala @@ -2,16 +2,10 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.check.Gen -import is.hail.expr.ir.EmitMethodBuilder import is.hail.types.virtual.TSet abstract class PSet extends PContainer { lazy val virtualType: TSet = TSet(elementType.virtualType) - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(other isOfType this) - CodeOrdering.setOrdering(this, other.asInstanceOf[PSet], mb) - } - override def genNonmissingValue: Gen[Annotation] = Gen.buildableOf[Set](elementType.genValue) } diff --git a/hail/src/main/scala/is/hail/types/physical/PString.scala b/hail/src/main/scala/is/hail/types/physical/PString.scala index 4bd6c78a929..0f5316226ee 100644 --- a/hail/src/main/scala/is/hail/types/physical/PString.scala +++ b/hail/src/main/scala/is/hail/types/physical/PString.scala @@ -1,9 +1,9 @@ package is.hail.types.physical import is.hail.asm4s._ -import is.hail.annotations.CodeOrdering import is.hail.annotations.{UnsafeOrdering, _} -import is.hail.expr.ir.{EmitMethodBuilder, EmitCodeBuilder} +import is.hail.expr.ir.orderings.{CodeOrdering, CodeOrderingCompareConsistentWithOthers} +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.interfaces.{SStringCode, SStringValue} import is.hail.types.virtual.TString @@ -12,15 +12,6 @@ abstract class PString extends PType { override def unsafeOrdering(): UnsafeOrdering = PCanonicalBinary(required).unsafeOrdering() - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = { - assert(this isOfType other) - new CodeOrderingCompareConsistentWithOthers { - val ord = PCanonicalBinary(required).codeOrdering(mb, PCanonicalBinary(other.required)) - def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - ord.compareNonnull(cb, x.asString.asBytes(), y.asString.asBytes()) - } - } - val binaryRepresentation: PBinary def loadLength(boff: Long): Int diff --git a/hail/src/main/scala/is/hail/types/physical/PStruct.scala b/hail/src/main/scala/is/hail/types/physical/PStruct.scala index e7815e5f591..77457713b35 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStruct.scala @@ -2,18 +2,14 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.asm4s.Code -import is.hail.expr.ir.{EmitMethodBuilder, SortOrder} +import is.hail.expr.ir.orderings.CodeOrdering +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} +import is.hail.types.physical.stypes.interfaces.SBaseStructCode import is.hail.types.virtual.{Field, TStruct} trait PStruct extends PBaseStruct { lazy val virtualType: TStruct = TStruct(fields.map(f => Field(f.name, f.typ.virtualType, f.index))) - final def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: Array[SortOrder], missingFieldsEqual: Boolean): CodeOrdering = { - assert(other.asInstanceOf[PStruct].isIsomorphicTo(this)) - assert(so == null || so.size == types.size) - CodeOrdering.rowOrdering(this, other.asInstanceOf[PStruct], mb, so, missingFieldsEqual) - } - final def deleteField(key: String): PCanonicalStruct = { assert(fieldIdx.contains(key)) val index = fieldIdx(key) @@ -57,4 +53,6 @@ trait PStruct extends PBaseStruct { def setFieldMissing(offset: Code[Long], fieldName: String): Code[Unit] def insertFields(fieldsToInsert: TraversableOnce[(String, PType)]): PStruct + + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode } diff --git a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala index d3cad22e072..80531fe8ee1 100644 --- a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala @@ -4,7 +4,7 @@ import is.hail.annotations.{Annotation, Region, UnsafeUtils} import is.hail.asm4s.{Code, Settable, SettableBuilder, Value, coerce, const} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} import is.hail.types.BaseStruct -import is.hail.types.physical.stypes.interfaces.SStruct +import is.hail.types.physical.stypes.interfaces.SBaseStruct import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.concrete.SSubsetStruct import is.hail.types.virtual.TStruct @@ -19,7 +19,7 @@ object PSubsetStruct { // Semantics: PSubsetStruct is a non-constructible view of another PStruct, which is not allowed to mutate // that underlying PStruct's region data -final case class PSubsetStruct(ps: PStruct, _fieldNames: Array[String]) extends PStruct { +final case class PSubsetStruct(ps: PStruct, _fieldNames: IndexedSeq[String]) extends PStruct { val fields: IndexedSeq[PField] = _fieldNames.zipWithIndex.map { case (name, i) => PField(name, ps.fieldType(name), i)} val required = ps.required @@ -27,7 +27,7 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: Array[String]) extends log.warn("PSubsetStruct used without subsetting input PStruct") } - private val idxMap: Array[Int] = _fieldNames.map(f => ps.fieldIdx(f)) + private val idxMap: Array[Int] = _fieldNames.map(f => ps.fieldIdx(f)).toArray lazy val missingIdx: Array[Int] = idxMap.map(i => ps.missingIdx(i)) lazy val nMissing: Int = missingIdx.length @@ -38,22 +38,12 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: Array[String]) extends override val byteSize: Long = 8 override def _pretty(sb: StringBuilder, indent: Int, compact: Boolean) { - if (compact) { - sb.append("PSubsetStruct{") - fields.foreachBetween(_.pretty(sb, indent, compact))(sb += ',') - sb += '}' - } else { - if (size == 0) - sb.append("Struct { }") - else { - sb.append("Struct {") - sb += '\n' - fields.foreachBetween(_.pretty(sb, indent + 4, compact))(sb.append(",\n")) - sb += '\n' - sb.append(" " * indent) - sb += '}' - } - } + sb.append("PSubsetStruct{") + ps.pretty(sb, indent, compact) + sb += '{' + fieldNames.foreachBetween(f => sb.append(prettyIdentifier(f)))(sb += ',') + sb += '}' + sb += '}' } override def rename(m: Map[String, String]): PStruct = { @@ -125,7 +115,7 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: Array[String]) extends override def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = throw new UnsupportedOperationException - def sType: SSubsetStruct = SSubsetStruct(ps.sType.asInstanceOf[SStruct], _fieldNames) + def sType: SSubsetStruct = SSubsetStruct(ps.sType.asInstanceOf[SBaseStruct], _fieldNames) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = throw new UnsupportedOperationException @@ -133,7 +123,7 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: Array[String]) extends throw new UnsupportedOperationException } - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = throw new UnsupportedOperationException + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode = throw new UnsupportedOperationException def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { throw new UnsupportedOperationException diff --git a/hail/src/main/scala/is/hail/types/physical/PTuple.scala b/hail/src/main/scala/is/hail/types/physical/PTuple.scala index b8356025d93..a30a94d0c64 100644 --- a/hail/src/main/scala/is/hail/types/physical/PTuple.scala +++ b/hail/src/main/scala/is/hail/types/physical/PTuple.scala @@ -1,10 +1,6 @@ package is.hail.types.physical -import is.hail.annotations.{CodeOrdering, Region} -import is.hail.asm4s.{Code, Value, coerce} -import is.hail.expr.ir.{EmitMethodBuilder, SortOrder} import is.hail.types.virtual.{TTuple, TupleField} -import is.hail.utils._ case class PTupleField(index: Int, typ: PType) @@ -17,11 +13,5 @@ trait PTuple extends PBaseStruct { lazy val fields: IndexedSeq[PField] = _types.zipWithIndex.map { case (PTupleField(tidx, t), i) => PField(s"$tidx", t, i) } lazy val nFields: Int = fields.size - final def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: Array[SortOrder], missingFieldsEqual: Boolean): CodeOrdering = { - assert(other isOfType this, s"$other != $this") - assert(so == null || so.size == types.size) - CodeOrdering.rowOrdering(this, other.asInstanceOf[PTuple], mb, so, missingFieldsEqual) - } - def identBase: String = "tuple" } diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index e0b7a9386f8..2216b8c9e43 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -342,20 +342,6 @@ abstract class PType extends Serializable with Requiredness { def _pretty(sb: StringBuilder, indent: Int, compact: Boolean) - def codeOrdering(mb: EmitMethodBuilder[_]): CodeOrdering = - codeOrdering(mb, this) - - def codeOrdering(mb: EmitMethodBuilder[_], so: SortOrder): CodeOrdering = - codeOrdering(mb, this, so) - - def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: SortOrder): CodeOrdering = - so match { - case Ascending => codeOrdering(mb, other) - case Descending => codeOrdering(mb, other).reverse - } - - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering - def byteSize: Long def alignment: Long = byteSize diff --git a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala index b8253486bfd..c70cfd685b6 100644 --- a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala +++ b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala @@ -1,7 +1,8 @@ package is.hail.types.physical -import is.hail.annotations.{Annotation, CodeOrdering, Region} +import is.hail.annotations.{Annotation, Region} import is.hail.asm4s.{Code, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{Ascending, Descending, EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.SCode @@ -13,12 +14,6 @@ trait PUnrealizable extends PType { override def alignment: Long = unsupported - override def codeOrdering(mb: EmitMethodBuilder[_], other: PType, so: SortOrder): CodeOrdering = - unsupported - - def codeOrdering(mb: EmitMethodBuilder[_], other: PType): CodeOrdering = - unsupported - protected[physical] def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = unsupported diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala index 4aab5dfab88..1d4abe67b06 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala @@ -1,8 +1,9 @@ package is.hail.types.physical.stypes -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, Value} -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, PCodeParamType, SortOrder} +import is.hail.expr.ir.orderings.CodeOrdering +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, EmitParamType, PCodeParamType, SortOrder} import is.hail.types.TypeWithRequiredness import is.hail.types.physical.{PCode, PType} import is.hail.types.virtual.Type @@ -18,8 +19,6 @@ trait SType { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering - def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable def fromCodes(codes: IndexedSeq[Code[_]]): SCode @@ -29,4 +28,8 @@ trait SType { def paramType: PCodeParamType = PCodeParamType(pType) def asIdent: String = pType.asIdent + + def asEmitParam: EmitParamType = EmitParamType(pType) + + def equalsExceptTopLevelRequiredness(that: SType): Boolean = pType.equalModuloRequired(that.pType) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala index b6ab9b3774d..f849530b5ef 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala @@ -1,16 +1,17 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SStruct, SStructSettable} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} import is.hail.types.physical.stypes.{SCode, SSettable, SType} import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStructSettable, PType} import is.hail.utils.FastIndexedSeq -case class SBaseStructPointer(pType: PBaseStruct) extends SStruct { - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) +case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { + def size: Int = pType.size def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBaseStructPointerCode(this, pType.store(cb, region, value, deepCopy)) @@ -38,6 +39,8 @@ case class SBaseStructPointer(pType: PBaseStruct) extends SStruct { } def canonicalPType(): PType = pType + + override val fieldTypes: Array[SType] = pType.types.map(_.sType) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala index f7e9bda6c5c..7a6505d7de5 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.interfaces.SBinary import is.hail.types.physical.stypes.{SCode, SType} @@ -10,8 +11,6 @@ import is.hail.utils._ case class SBinaryPointer(pType: PBinary) extends SBinary { - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBinaryPointerCode(this, pType.store(cb, region, value, deepCopy)) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala index 087efac916f..7dd297e08b9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.interfaces.SCall import is.hail.types.physical.stypes.{SCode, SType} @@ -13,8 +14,6 @@ import is.hail.variant.Genotype case class SCanonicalCall(required: Boolean) extends SCall { override def pType: PCall = PCanonicalCall(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SCanonicalCall(_) => @@ -122,4 +121,6 @@ class SCanonicalCallCode(required: Boolean, val call: Code[Int]) extends PCallCo def memoizeField(cb: EmitCodeBuilder, name: String): PCallValue = memoize(cb, name, cb.fieldBuilder) def store(mb: EmitMethodBuilder[_], r: Value[Region], dst: Code[Long]): Code[Unit] = Region.storeInt(dst, call) + + def loadCanonicalRepresentation(cb: EmitCodeBuilder): Code[Int] = call } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala index 43130b4b723..ff601cb89c1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala @@ -1,17 +1,18 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.interfaces.SLocus import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.{PCanonicalLocus, PCode, PLocusCode, PLocusValue, PSettable, PStringCode, PType} import is.hail.utils.FastIndexedSeq -import is.hail.variant.Locus +import is.hail.variant.{Locus, ReferenceGenome} case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) + override def rg: ReferenceGenome = pType.rg def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SCanonicalLocusPointerCode(this, pType.store(cb, region, value, deepCopy)) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala index 28c4e4421bb..d55dc320a53 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.services.shuffler.Wire import is.hail.types.physical.stypes.interfaces.SShuffle @@ -13,8 +14,6 @@ case class SCanonicalShufflePointer(pType: PCanonicalShuffle) extends SShuffle { lazy val binarySType = SBinaryPointer(pType.representation) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SCanonicalShufflePointerCode(this, pType.representation.loadCheapPCode(cb, pType.store(cb, region, value, deepCopy))) } @@ -85,4 +84,6 @@ class SCanonicalShufflePointerCode(val st: SCanonicalShufflePointer, val shuffle def memoizeField(cb: EmitCodeBuilder, name: String): SCanonicalShufflePointerSettable = memoize(cb, name, cb.fieldBuilder) def store(mb: EmitMethodBuilder[_], r: Value[Region], dst: Code[Long]): Code[Unit] = shuffle.store(mb, r, dst) + + def binaryRepr: SBinaryPointerCode = shuffle } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala index bdacdefa517..2cad2d82c97 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.interfaces.SContainer @@ -12,8 +13,6 @@ import is.hail.utils.FastIndexedSeq case class SIndexablePointer(pType: PContainer) extends SContainer { override def elementType: SType = pType.elementType.sType - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIndexablePointerCode(this, pType.store(cb, region, value, deepCopy)) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala index 35c842d30b7..d890a9473b1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} import is.hail.types.physical.stypes.interfaces.SInterval import is.hail.types.physical.stypes.{SCode, SType} @@ -10,8 +11,6 @@ import is.hail.utils.FastIndexedSeq case class SIntervalPointer(pType: PInterval) extends SInterval { - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIntervalPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -41,6 +40,7 @@ case class SIntervalPointer(pType: PInterval) extends SInterval { new SIntervalPointerCode(this, a) } + override def pointType: SType = pType.pointType.sType def canonicalPType(): PType = pType } @@ -89,8 +89,8 @@ class SIntervalPointerSettable( // FIXME orderings should take emitcodes/iemitcodes def isEmpty(cb: EmitCodeBuilder): Code[Boolean] = { - val gt = cb.emb.getCodeOrdering(pt.pointType, CodeOrdering.Gt()) - val gteq = cb.emb.getCodeOrdering(pt.pointType, CodeOrdering.Gteq()) + val gt = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gt()) + val gteq = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gteq()) val start = cb.memoize(loadStart(cb), "start") val end = cb.memoize(loadEnd(cb), "end") diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala index fa6a1686718..82b1d8a150d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value, const} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayValue} import is.hail.types.physical.stypes.{SCode, SType} @@ -13,8 +14,6 @@ case class SNDArrayPointer(pType: PCanonicalNDArray) extends SNDArray { override def elementType: SType = pType.elementType.sType - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SNDArrayPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -106,11 +105,13 @@ class SNDArrayPointerSettable( override def strides(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] = strides override def sameShape(other: SNDArrayValue, cb: EmitCodeBuilder): Code[Boolean] = { - val otherPtr = other.asInstanceOf[SNDArrayPointerSettable] - val comparator = this.pt.shapeType.codeOrdering(cb.emb, otherPtr.pt.shapeType) - val thisShape = PCode(this.pt.shapeType, this.pt.representation.loadField(a, "shape")) - val otherShape = PCode(otherPtr.pt.shapeType, otherPtr.pt.representation.loadField(otherPtr.a, "shape")) - comparator.equivNonnull(cb, thisShape, otherShape) + val otherShapes = other.shapes(cb) + val b = cb.newLocal[Boolean]("sameShape_b", true) + assert(shape.length == otherShapes.length) + shape.zip(otherShapes).foreach { case (s1, s2) => + cb.assign(b, b && s1.ceq(s2)) + } + b } def firstDataAddress(cb: EmitCodeBuilder): Value[Long] = dataFirstElement diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala index c84b52e522c..2098960f60f 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.interfaces.{SString, SStringCode} import is.hail.types.physical.stypes.{SCode, SType} @@ -10,8 +11,6 @@ import is.hail.utils.FastIndexedSeq case class SStringPointer(pType: PString) extends SString { - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SStringPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -67,6 +66,8 @@ class SStringPointerCode(val st: SStringPointer, val a: Code[Long]) extends PStr def memoize(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.localBuilder) def memoizeField(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.fieldBuilder) + + def binaryRepr: SBinaryPointerCode = new SBinaryPointerCode(SBinaryPointer(st.pType.binaryRepresentation), a) } object SStringPointerSettable { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala index dfe8d77572a..e5f746cdf0f 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala @@ -1,23 +1,27 @@ package is.hail.types.physical.stypes.concrete -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, IEmitSCode, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.stypes.interfaces.{SStruct, SStructSettable} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStruct, PStructSettable, PSubsetStruct, PType} import is.hail.types.virtual.TStruct -case class SSubsetStruct(parent: SStruct, fieldNames: IndexedSeq[String]) extends SStruct { +case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) extends SBaseStruct { + + val size: Int = fieldNames.size + val fieldIdx: Map[String, Int] = fieldNames.zipWithIndex.toMap val newToOldFieldMapping: Map[Int, Int] = fieldIdx .map { case (f, i) => (i, parent.pType.virtualType.asInstanceOf[TStruct].fieldIdx(f)) } + val fieldTypes: Array[SType] = Array.tabulate(size)(i => parent.fieldTypes(newToOldFieldMapping(i))) + val pType: PSubsetStruct = PSubsetStruct(parent.pType.asInstanceOf[PStruct], fieldNames.toArray ) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SSubsetStruct(parent2, fd2) if parent == parent2 && fieldNames == fd2 && !deepCopy => @@ -57,7 +61,7 @@ class SSubsetStructSettable(val st: SSubsetStruct, prev: PStructSettable) extend def isFieldMissing(fieldIdx: Int): Code[Boolean] = prev.isFieldMissing(st.newToOldFieldMapping(fieldIdx)) - def store(cb: EmitCodeBuilder, pv: PCode): Unit = prev.store(cb, pv) + def store(cb: EmitCodeBuilder, pv: PCode): Unit = prev.store(cb, pv.asInstanceOf[SSubsetStructCode].prev) } class SSubsetStructCode(val st: SSubsetStruct, val prev: PBaseStructCode) extends PBaseStructCode { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala similarity index 92% rename from hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStruct.scala rename to hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala index 631cd7a6618..227c256b63a 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala @@ -3,11 +3,15 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.Code import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} import is.hail.types.physical.PBaseStruct -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} import is.hail.types.physical.stypes.concrete.{SSubsetStruct, SSubsetStructCode} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -trait SStruct extends SType { +trait SBaseStruct extends SType { override def fromCodes(codes: IndexedSeq[Code[_]]): SBaseStructCode + + def size: Int + + val fieldTypes: Array[SType] } trait SStructSettable extends SBaseStructValue with SSettable @@ -25,7 +29,7 @@ trait SBaseStructValue extends SValue { } trait SBaseStructCode extends SCode { self => - def st: SStruct + def st: SBaseStruct def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala index 068f0eeb97a..b9da86c29ba 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala @@ -25,4 +25,6 @@ trait SCallCode extends SCode { def memoize(cb: EmitCodeBuilder, name: String): SCallValue def memoizeField(cb: EmitCodeBuilder, name: String): SCallValue + + def loadCanonicalRepresentation(cb: EmitCodeBuilder): Code[Int] } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala index 727949d58cd..6d9b2224109 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala @@ -5,9 +5,13 @@ import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} import is.hail.types.physical.PInterval import is.hail.types.physical.stypes.{SCode, SType, SValue} -trait SInterval extends SType +trait SInterval extends SType { + def pointType: SType +} trait SIntervalValue extends SValue { + def st: SInterval + def includesStart(): Value[Boolean] def includesEnd(): Value[Boolean] @@ -24,6 +28,8 @@ trait SIntervalValue extends SValue { } trait SIntervalCode extends SCode { + def st: SInterval + def includesStart(): Code[Boolean] def includesEnd(): Code[Boolean] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala index b9114ffed52..a58a934c132 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala @@ -2,11 +2,12 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.Code import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.{PCode, PLocus, PStringCode, PValue} import is.hail.types.physical.stypes.{SCode, SType, SValue} -import is.hail.variant.Locus +import is.hail.variant.{Locus, ReferenceGenome} -trait SLocus extends SType +trait SLocus extends SType { + def rg: ReferenceGenome +} trait SLocusValue extends SValue { def contig(cb: EmitCodeBuilder): SStringCode diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala index 3c265107cd1..d28a92c4dda 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala @@ -1,8 +1,9 @@ package is.hail.types.physical.stypes.interfaces -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, Value} import is.hail.expr.ir.EmitStream.SizedStream +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SSettable, SType} import is.hail.types.physical.{PCanonicalStream, PCode, PStream, PStreamCode, PType, PValue} @@ -19,8 +20,6 @@ case class SStream(elementType: SType, separateRegions: Boolean = false) extends def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = throw new UnsupportedOperationException - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = throw new UnsupportedOperationException - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException def fromCodes(codes: IndexedSeq[Code[_]]): SCode = throw new UnsupportedOperationException diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala index 5cbdc6e24d7..3c9987e29b4 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.interfaces -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, UnitInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SSettable, SType} import is.hail.types.physical.{PCode, PType, PUnrealizableCode, PValue, PVoid} @@ -12,8 +13,6 @@ case object SVoid extends SType { def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = value - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = throw new UnsupportedOperationException - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = IndexedSeq() def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala index 14a906de43a..6a1105387f0 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.primitives -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.{PBoolean, PCode, PSettable, PType, PValue} @@ -13,8 +14,6 @@ case class SBoolean(required: Boolean) extends SPrimitive { override def pType: PBoolean = PBoolean(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SBoolean(_) => diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala index 944a272452f..2e788af86c2 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.primitives -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, FloatInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.{PCode, PFloat32, PSettable, PType, PValue} @@ -12,8 +13,6 @@ case class SFloat32(required: Boolean) extends SPrimitive { override def pType: PFloat32 = PFloat32(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SFloat32(r) => diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala index 83d530429fe..d72346bc173 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.primitives -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, DoubleInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.{PCode, PFloat64, PSettable, PType, PValue} @@ -12,8 +13,6 @@ case class SFloat64(required: Boolean) extends SPrimitive { override def pType: PFloat64 = PFloat64(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SFloat64(r) => diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala index 1978db6d5de..9350bd95524 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala @@ -1,7 +1,8 @@ package is.hail.types.physical.stypes.primitives -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.{PCode, PInt32, PSettable, PType, PValue} @@ -12,8 +13,6 @@ case class SInt32(required: Boolean) extends SPrimitive { override def pType: PInt32 = PInt32(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SInt32(r) => diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala index ec6b8b1ccb1..7ebc3e5c3bf 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala @@ -1,9 +1,9 @@ package is.hail.types.physical.stypes.primitives -import is.hail.annotations.{CodeOrdering, Region} -import is.hail.asm4s.{BooleanInfo, Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.annotations.Region +import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.EmitCodeBuilder +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.{PCode, PInt64, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq @@ -12,8 +12,6 @@ case class SInt64(required: Boolean) extends SPrimitive { override def pType: PInt64 = PInt64(required) - def codeOrdering(mb: EmitMethodBuilder[_], other: SType, so: SortOrder): CodeOrdering = pType.codeOrdering(mb, other.pType, so) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SInt64(r) => diff --git a/hail/src/main/scala/is/hail/utils/package.scala b/hail/src/main/scala/is/hail/utils/package.scala index 0a6be50ec76..f5a2b2c3d27 100644 --- a/hail/src/main/scala/is/hail/utils/package.scala +++ b/hail/src/main/scala/is/hail/utils/package.scala @@ -621,7 +621,6 @@ package object utils extends Logging caught = true try { r.close() - throw original } catch { case duringClose: Exception => if (original == duringClose) { @@ -633,6 +632,7 @@ package object utils extends Logging throw duringClose } } + throw original } finally { if (!caught) { r.close() diff --git a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala index fdd6bdfdf7a..177761ea475 100644 --- a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala @@ -6,6 +6,7 @@ import is.hail.annotations._ import is.hail.check.{Gen, Prop} import is.hail.asm4s._ import is.hail.TestUtils._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.rvd.RVDType import is.hail.types.physical._ import is.hail.types.virtual._ @@ -39,7 +40,8 @@ class OrderingSuite extends HailSuite { fb.emitWithBuilder { cb => val cv1 = t.loadCheapPCode(cb, fb.getCodeParam[Long](2)) val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](3)) - fb.apply_method.getCodeOrdering(t, op)(cb, EmitCode.present(cb.emb, cv1), EmitCode.present(cb.emb, cv2)) + fb.ecb.getOrderingFunction(cv1.st, cv2.st, op) + .apply(cb, EmitCode.present(cb.emb, cv1), EmitCode.present(cb.emb, cv2)) } fb.resultWithIndex()(0, r) } @@ -60,7 +62,8 @@ class OrderingSuite extends HailSuite { val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](5)) val ev1 = EmitCode(Code._empty, m1, cv1) val ev2 = EmitCode(Code._empty, m2, cv2) - fb.apply_method.getCodeOrdering(t, op)(cb, ev1, ev2) + fb.ecb.getOrderingFunction(ev1.st, ev2.st, op) + .apply(cb, ev1, ev2) } fb.resultWithIndex()(0, r) } diff --git a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala index 92db130f3f1..2d630d174b2 100644 --- a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala @@ -3,18 +3,20 @@ package is.hail.expr.ir import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import is.hail.HailSuite -import is.hail.annotations.{CodeOrdering, Region} +import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.check.{Gen, Prop} import is.hail.expr.ir.agg._ +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.io.{InputBuffer, OutputBuffer, StreamBufferSpec} +import is.hail.types.physical.stypes.primitives.SInt64 import is.hail.utils._ import org.testng.annotations.Test import scala.collection.mutable class TestBTreeKey(mb: EmitMethodBuilder[_]) extends BTreeKey { - private val comp = mb.getCodeOrdering(PInt64(), CodeOrdering.Compare()) + private val comp = mb.ecb.getOrderingFunction(SInt64(false), SInt64(false), CodeOrdering.Compare()) def storageType: PTuple = PCanonicalTuple(required = true, PInt64(), PCanonicalTuple(false)) def compType: PType = PInt64() def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala index 4647d70b021..60f19e1498c 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala @@ -539,10 +539,13 @@ class TableIRSuite extends HailSuite { // Catches a bug in the partitioner created by the importer. @Test def testTableJoinOfImport() { + val mnr = MatrixNativeReader(fs, "src/test/resources/sample.vcf.mt") + val mt2 = MatrixRead(mnr.fullMatrixType, false, false, mnr) + val t2 = MatrixRowsTable(mt2) val mt = importVCF(ctx, "src/test/resources/sample.vcf") var t: TableIR = MatrixRowsTable(mt) t = TableMapRows(t, SelectFields(Ref("row", t.typ.rowType), Seq("locus", "alleles"))) - val join: TableIR = TableJoin(t, t, "inner", 2) + val join: TableIR = TableJoin(t, t2, "inner", 2) assertEvalsTo(TableCount(join), 346L) } diff --git a/monitoring/monitoring/templates/billing.html b/monitoring/monitoring/templates/billing.html index 82f5309ab8a..124f9bd24bb 100644 --- a/monitoring/monitoring/templates/billing.html +++ b/monitoring/monitoring/templates/billing.html @@ -1,5 +1,9 @@ {% extends "layout.html" %} {% block title %}Billing{% endblock %} +{% block head %} + + +{% endblock %} {% block content %}
@@ -52,28 +56,31 @@

Compute Engine Costs by Source

PR

Costs by Product

- - - - - - - - - - - {% for record in cost_by_sku_label %} - - - - - - - {% endfor %} - -
ServiceProductSourceCost
{{ record['service_description'] }}{{ record['sku_description'] }} - {% if record['source'] is not none %} - {{ record['source'] }} - {% endif %} - {{ record['cost'] }}
+
+ + + + + + + + + + + + {% for record in cost_by_sku_label %} + + + + + + + {% endfor %} + +
ServiceProductSourceCost
{{ record['service_description'] }}{{ record['sku_description'] }} + {% if record['source'] is not none %} + {{ record['source'] }} + {% endif %} + {{ record['cost'] }}
+
{% endblock %} diff --git a/query/Dockerfile b/query/Dockerfile index ff4eb925015..5182cbb4de7 100644 --- a/query/Dockerfile +++ b/query/Dockerfile @@ -1,9 +1,9 @@ -FROM {{ service_base_image.image }} +FROM {{ service_java_run_base_image.image }} +COPY query/log4j.properties / +COPY query/hail.jar / COPY query/setup.py /query/ COPY query/query /query/query/ RUN hail-pip-install /query && rm -rf /query -COPY query/hail.jar / - EXPOSE 5000 diff --git a/query/Makefile b/query/Makefile index c4cb28a0712..df754ae0227 100644 --- a/query/Makefile +++ b/query/Makefile @@ -13,12 +13,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build + $(MAKE) -C ../docker service-java-run-base $(MAKE) -C ../hail shadowJar # janky cp ../hail/build/libs/hail-all-spark.jar ./hail.jar -docker pull $(QUERY_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + python3 ../ci/jinja2_render.py '{"service_java_run_base_image":{"image":"service-java-run-base"}}' Dockerfile Dockerfile.out docker build -t query -f Dockerfile.out --cache-from query,$(QUERY_LATEST),service-base .. .PHONY: push diff --git a/query/deployment.yaml b/query/deployment.yaml index b79f8339141..7e9a7118be7 100644 --- a/query/deployment.yaml +++ b/query/deployment.yaml @@ -22,7 +22,7 @@ spec: priorityClassName: production {% endif %} containers: - - name: query + - name: query-python image: "{{ query_image.image }}" command: {% if not deploy %} @@ -42,13 +42,6 @@ spec: value: "{{ code.sha }}" - name: HAIL_QUERY_WORKER_IMAGE value: {{ query_image.image }} -{% if deploy %} - - name: HAIL_QUERY_BUCKET - value: hail-query -{% else %} - - name: HAIL_QUERY_BUCKET - value: cpg-hail-test -{% endif %} ports: - containerPort: 5000 volumeMounts: @@ -61,10 +54,12 @@ spec: - name: ssl-config mountPath: /ssl-config readOnly: true + - name: unix-domain-socket + mountPath: /sock resources: requests: - cpu: "600m" - memory: "2G" + cpu: "300m" + memory: "1G" limits: cpu: "1" memory: "2.5G" @@ -73,7 +68,53 @@ spec: port: 5000 initialDelaySeconds: 5 periodSeconds: 5 + - name: query-scala + image: "{{ query_image.image }}" + command: +{% if not deploy %} + - /controller.sh +{% endif %} + - java + - -Dlog4j.configuration=file:/log4j.properties + - -cp + - /hail.jar:/usr/local/lib/python3.7/dist-packages/pyspark/jars/* + - is.hail.backend.service.ServiceBackendMain + - /sock/sock + env: + - name: HAIL_DOMAIN + value: "{{ global.domain }}" + - name: HAIL_DEPLOY_CONFIG_FILE + value: /deploy-config/deploy-config.json + - name: HAIL_DEFAULT_NAMESPACE + value: "{{ default_ns.name }}" + - name: HAIL_SHA + value: "{{ code.sha }}" + - name: HAIL_QUERY_WORKER_IMAGE + value: {{ query_image.image }} + ports: + - containerPort: 5000 + volumeMounts: + - name: deploy-config + mountPath: /deploy-config + readOnly: true + - name: session-secret-key + mountPath: /session-secret-key + readOnly: true + - name: ssl-config + mountPath: /ssl-config + readOnly: true + - name: unix-domain-socket + mountPath: /sock + resources: + requests: + cpu: "300m" + memory: "1G" + limits: + cpu: "1" + memory: "2.5G" volumes: + - name: unix-domain-socket + emptyDir: {} - name: deploy-config secret: secretName: deploy-config @@ -84,3 +125,32 @@ spec: secret: optional: false secretName: ssl-config-query +{% if not deploy %} +--- +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: query +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: query + minReplicas: 3 + maxReplicas: 32 + metrics: + - type: Resource + resource: + name: cpu + targetAverageUtilization: 80 +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: query +spec: + minAvailable: 2 + selector: + matchLabels: + app: query +{% endif %} diff --git a/query/log4j.properties b/query/log4j.properties new file mode 100644 index 00000000000..2b29f6a0db2 --- /dev/null +++ b/query/log4j.properties @@ -0,0 +1,3 @@ +log4j.rootLogger=INFO, AlwaysOnConsoleAppender +log4j.appender.AlwaysOnConsoleAppender=org.apache.log4j.ConsoleAppender +log4j.appender.AlwaysOnConsoleAppender.layout=is.hail.services.JSONLogLayout diff --git a/query/query/query.py b/query/query/query.py index 31e74bb6ede..4e8b5d6be35 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -1,3 +1,4 @@ +from typing import Dict import traceback import os import base64 @@ -5,64 +6,51 @@ import logging import uvloop import asyncio +import aiohttp from aiohttp import web import kubernetes_asyncio as kube -from py4j.java_gateway import JavaGateway, GatewayParameters, launch_gateway -from hailtop.utils import blocking_to_async, retry_transient_errors, find_spark_home +from collections import defaultdict +from hailtop.utils import blocking_to_async, retry_transient_errors from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger from hailtop.hailctl import version -from gear import setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only +from gear import ( + setup_aiohttp_session, + rest_authenticated_users_only, + rest_authenticated_developers_only, +) + +from .sockets import connect_to_java uvloop.install() DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] -log = logging.getLogger('batch') +log = logging.getLogger(__name__) routes = web.RouteTableDef() # Store this value once so we don't hit the desk HAIL_VERSION = version() -def java_to_web_response(jresp): - status = jresp.status() - value = jresp.value() - log.info(f'response status {status} value {value}') - if status in (400, 500): - return web.Response(status=status, text=value) - assert status == 200, status - return web.json_response(status=status, text=value) - - -async def send_ws_response(thread_pool, endpoint, ws, f, *args, **kwargs): - try: - jresp = await blocking_to_async(thread_pool, f, *args, **kwargs) - except Exception: - log.exception(f'error calling {f.__name__} for {endpoint}') - status = 500 - value = traceback.format_exc() - else: - status = jresp.status() - value = jresp.value() - log.info(f'{endpoint}: response status {status} value {value}') - await ws.send_json({'status': status, 'value': value}) - - async def add_user(app, userdata): username = userdata['username'] users = app['users'] if username in users: return - jbackend = app['jbackend'] k8s_client = app['k8s_client'] gsa_key_secret = await retry_transient_errors( k8s_client.read_namespaced_secret, userdata['gsa_key_secret_name'], DEFAULT_NAMESPACE, - _request_timeout=5.0) + _request_timeout=5.0, + ) + + if username in users: + return gsa_key = base64.b64decode(gsa_key_secret.data['key.json']).decode() - jbackend.addUser(username, gsa_key) + with connect_to_java() as java: + java.add_user(username, gsa_key) users.add(username) @@ -71,60 +59,102 @@ async def healthcheck(request): # pylint: disable=unused-argument return web.Response() -def blocking_execute(jbackend, userdata, body): - return jbackend.execute(userdata['username'], userdata['session_id'], body['billing_project'], body['bucket'], body['code']) +def blocking_execute(userdata, body): + with connect_to_java() as java: + log.info(f'executing {body["token"]}') + return java.execute( + userdata['username'], + userdata['session_id'], + body['billing_project'], + body['bucket'], + body['code'], + body['token'], + ) -def blocking_load_references_from_dataset(jbackend, userdata, body): - return jbackend.loadReferencesFromDataset( - userdata['username'], userdata['session_id'], body['billing_project'], body['bucket'], body['path']) +def blocking_load_references_from_dataset(userdata, body): + with connect_to_java() as java: + return java.load_references_from_dataset( + userdata['username'], + userdata['session_id'], + body['billing_project'], + body['bucket'], + body['path'], + ) -def blocking_value_type(jbackend, userdata, body): - return jbackend.valueType(userdata['username'], body['code']) +def blocking_value_type(userdata, body): + with connect_to_java() as java: + return java.value_type(userdata['username'], body['code']) -def blocking_table_type(jbackend, userdata, body): - return jbackend.tableType(userdata['username'], body['code']) +def blocking_table_type(userdata, body): + with connect_to_java() as java: + return java.table_type(userdata['username'], body['code']) -def blocking_matrix_type(jbackend, userdata, body): - return jbackend.matrixTableType(userdata['username'], body['code']) +def blocking_matrix_type(userdata, body): + with connect_to_java() as java: + return java.matrix_table_type(userdata['username'], body['code']) -def blocking_blockmatrix_type(jbackend, userdata, body): - return jbackend.blockMatrixType(userdata['username'], body['code']) +def blocking_blockmatrix_type(userdata, body): + with connect_to_java() as java: + return java.block_matrix_type(userdata['username'], body['code']) -def blocking_get_reference(jbackend, userdata, body): # pylint: disable=unused-argument - return jbackend.referenceGenome(userdata['username'], body['name']) +def blocking_get_reference(userdata, body): # pylint: disable=unused-argument + with connect_to_java() as java: + return java.reference_genome(userdata['username'], body['name']) async def handle_ws_response(request, userdata, endpoint, f): app = request.app - jbackend = app['jbackend'] + user_queries: Dict[str, asyncio.Future] = request.app['queries'][ + userdata['username'] + ] - await add_user(app, userdata) - log.info(f'{endpoint}: connecting websocket') ws = web.WebSocketResponse(heartbeat=30, max_msg_size=0) - task = None await ws.prepare(request) + body = await ws.receive_json() + + query = user_queries.get(body['token']) + if query is None: + await add_user(app, userdata) + query = asyncio.ensure_future( + retry_transient_errors( + blocking_to_async, app['thread_pool'], f, userdata, body + ) + ) + user_queries[body['token']] = query + try: - log.info(f'{endpoint}: websocket prepared {ws}') - body = await ws.receive_json() - log.info(f'{endpoint}: {body}') - task = asyncio.ensure_future(send_ws_response(app['thread_pool'], endpoint, ws, f, jbackend, userdata, body)) - r = await ws.receive() - log.info(f'{endpoint}: Received websocket message. Expected CLOSE, got {r}') - return ws + receive = asyncio.ensure_future( + ws.receive() + ) # receive automatically ping-pongs which keeps the socket alive + await asyncio.wait([receive, query], return_when=asyncio.FIRST_COMPLETED) + if receive.done(): + # we expect no messages from the client + response = receive.result() + assert response.type in ( + aiohttp.WSMsgType.CLOSE, + aiohttp.WSMsgType.CLOSING, + ), f'{endpoint}: Received websocket message. Expected CLOSE or CLOSING, got {response}' + if not query.done(): + return + if query.exception() is not None: + exc = query.exception() + exc_str = traceback.format_exception(type(exc), exc, exc.__traceback__) + await ws.send_json({'status': 500, 'value': exc_str}) + else: + await ws.send_json({'status': 200, 'value': query.result()}) + assert await ws.receive_str() == 'bye' + del user_queries[body['token']] finally: - if not ws.closed: - await ws.close() - log.info(f'{endpoint}: Websocket was not closed. Closing.') - if task is not None and not task.done(): - task.cancel() - log.info(f'{endpoint}: Task has been cancelled due to websocket closure.') - log.info(f'{endpoint}: websocket connection closed') + receive.cancel() + query.cancel() + await ws.close() + return ws @routes.get('/api/v1alpha/execute') @@ -136,45 +166,61 @@ async def execute(request, userdata): @routes.get('/api/v1alpha/load_references_from_dataset') @rest_authenticated_users_only async def load_references_from_dataset(request, userdata): - return await handle_ws_response(request, userdata, 'load_references_from_dataset', blocking_load_references_from_dataset) + return await handle_ws_response( + request, + userdata, + 'load_references_from_dataset', + blocking_load_references_from_dataset, + ) @routes.get('/api/v1alpha/type/value') @rest_authenticated_users_only async def value_type(request, userdata): - return await handle_ws_response(request, userdata, 'type/value', blocking_value_type) + return await handle_ws_response( + request, userdata, 'type/value', blocking_value_type + ) @routes.get('/api/v1alpha/type/table') @rest_authenticated_users_only async def table_type(request, userdata): - return await handle_ws_response(request, userdata, 'type/table', blocking_table_type) + return await handle_ws_response( + request, userdata, 'type/table', blocking_table_type + ) @routes.get('/api/v1alpha/type/matrix') @rest_authenticated_users_only async def matrix_type(request, userdata): - return await handle_ws_response(request, userdata, 'type/matrix', blocking_matrix_type) + return await handle_ws_response( + request, userdata, 'type/matrix', blocking_matrix_type + ) @routes.get('/api/v1alpha/type/blockmatrix') @rest_authenticated_users_only async def blockmatrix_type(request, userdata): - return await handle_ws_response(request, userdata, 'type/blockmatrix', blocking_blockmatrix_type) + return await handle_ws_response( + request, userdata, 'type/blockmatrix', blocking_blockmatrix_type + ) @routes.get('/api/v1alpha/references/get') @rest_authenticated_users_only async def get_reference(request, userdata): # pylint: disable=unused-argument - return await handle_ws_response(request, userdata, 'references/get', blocking_get_reference) + return await handle_ws_response( + request, userdata, 'references/get', blocking_get_reference + ) @routes.get('/api/v1alpha/flags/get') @rest_authenticated_developers_only async def get_flags(request, userdata): # pylint: disable=unused-argument app = request.app - jresp = await blocking_to_async(app['thread_pool'], app['jbackend'].flags) - return java_to_web_response(jresp) + with connect_to_java() as java: + jresp = await blocking_to_async(app['thread_pool'], java.flags) + return web.json_response(jresp) @routes.get('/api/v1alpha/flags/get/{flag}') @@ -182,8 +228,9 @@ async def get_flags(request, userdata): # pylint: disable=unused-argument async def get_flag(request, userdata): # pylint: disable=unused-argument app = request.app f = request.match_info['flag'] - jresp = await blocking_to_async(app['thread_pool'], app['jbackend'].getFlag, f) - return java_to_web_response(jresp) + with connect_to_java() as java: + jresp = await blocking_to_async(app['thread_pool'], java.get_flag, f) + return web.json_response(jresp) @routes.get('/api/v1alpha/flags/set/{flag}') @@ -192,11 +239,12 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument app = request.app f = request.match_info['flag'] v = request.query.get('value') - if v is None: - jresp = await blocking_to_async(app['thread_pool'], app['jbackend'].unsetFlag, f) - else: - jresp = await blocking_to_async(app['thread_pool'], app['jbackend'].setFlag, f, v) - return java_to_web_response(jresp) + with connect_to_java() as java: + if v is None: + jresp = await blocking_to_async(app['thread_pool'], java.unset_flag, f) + else: + jresp = await blocking_to_async(app['thread_pool'], java.set_flag, f, v) + return web.json_response(jresp) @routes.get('/api/v1alpha/version') @@ -210,25 +258,9 @@ async def rest_get_version(request): # pylint: disable=W0613 async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool - - spark_home = find_spark_home() - port = launch_gateway(die_on_exit=True, classpath=f'{spark_home}/jars/*:/hail.jar') - gateway = JavaGateway( - gateway_parameters=GatewayParameters(port=port), - auto_convert=True) - app['gateway'] = gateway - - hail_pkg = getattr(gateway.jvm, 'is').hail - app['hail_pkg'] = hail_pkg - - jbackend = hail_pkg.backend.service.ServiceBackend.apply() - app['jbackend'] = jbackend - - jhc = hail_pkg.HailContext.apply( - jbackend, 'hail.log', False, False, 50, False, 3) - app['jhc'] = jhc - + app['user_keys'] = dict() app['users'] = set() + app['queries'] = defaultdict(dict) kube.config.load_incluster_config() k8s_client = kube.client.CoreV1Api() @@ -236,15 +268,22 @@ async def on_startup(app): async def on_cleanup(app): - del app['k8s_client'] - await asyncio.wait(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) + if 'k8s_client' in app: + del app['k8s_client'] + await asyncio.wait( + *(t for t in asyncio.all_tasks() if t is not asyncio.current_task()) + ) async def on_shutdown(app): # Filter the asyncio.current_task(), because if we await # the current task we'll end up in a deadlock - remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - log.info(f"On shutdown request received, with {len(remaining_tasks)} remaining tasks") + remaining_tasks = [ + t for t in asyncio.all_tasks() if t is not asyncio.current_task() + ] + log.info( + f"On shutdown request received, with {len(remaining_tasks)} remaining tasks" + ) await asyncio.wait(*remaining_tasks) log.info("All tasks on shutdown have completed") @@ -266,4 +305,6 @@ def run(): host='0.0.0.0', port=5000, access_log_class=AccessLogger, - ssl_context=internal_server_ssl_context()) + ssl_context=internal_server_ssl_context(), + ) + diff --git a/query/query/sockets.py b/query/query/sockets.py new file mode 100644 index 00000000000..999af210240 --- /dev/null +++ b/query/query/sockets.py @@ -0,0 +1,266 @@ +import json +import socket +import struct +import logging +from hailtop.utils import sync_retry_transient_errors, TransientError + + +log = logging.getLogger('query.sockets') + + +def connect_to_java() -> 'ServiceBackendSocketConnection': + return ServiceBackendSocketConnection() + + +class EndOfStream(TransientError): + pass + + +class ServiceBackendSocketConnection: + LOAD_REFERENCES_FROM_DATASET = 1 + VALUE_TYPE = 2 + TABLE_TYPE = 3 + MATRIX_TABLE_TYPE = 4 + BLOCK_MATRIX_TYPE = 5 + REFERENCE_GENOME = 6 + EXECUTE = 7 + FLAGS = 8 + GET_FLAG = 9 + UNSET_FLAG = 10 + SET_FLAG = 11 + ADD_USER = 12 + GOODBYE = 254 + + FNAME = '/sock/sock' + + def __init__(self): + pass + + def __enter__(self) -> 'ServiceBackendSocketConnection': + self._conn = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) # pylint: disable=attribute-defined-outside-init + sync_retry_transient_errors( + self._conn.connect, + ServiceBackendSocketConnection.FNAME) + return self + + def __exit__(self, type, value, traceback): + self.write_int(ServiceBackendSocketConnection.GOODBYE) + response = self.read_int() + assert response == ServiceBackendSocketConnection.GOODBYE, response + self._conn.close() + + def write_int(self, v: int): + self._conn.sendall(struct.pack(' bytes: + b = bytearray() + left = n + while left > 0: + t = self._conn.recv(left) + if not t: + log.warning('unexpected EOS, Java violated protocol, this will be retried') + raise EndOfStream() + left -= len(t) + b.extend(t) + return b + + def read_byte(self) -> int: + b = self.read(1) + return b[0] + + def read_bool(self) -> bool: + return self.read_byte() != 0 + + def read_int(self) -> int: + b = self.read(4) + return struct.unpack(' int: + b = self.read(8) + return struct.unpack(' bytes: + n = self.read_int() + return self.read(n) + + def read_str(self) -> str: + b = self.read_bytes() + return b.decode('utf-8') + + def load_references_from_dataset(self, username: str, session_id: str, billing_project: str, bucket: str, path: str): + self.write_int(ServiceBackendSocketConnection.LOAD_REFERENCES_FROM_DATASET) + self.write_str(username) + self.write_str(session_id) + self.write_str(billing_project) + self.write_str(bucket) + self.write_str(path) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def value_type(self, username: str, s: str): + self.write_int(ServiceBackendSocketConnection.VALUE_TYPE) + self.write_str(username) + self.write_str(s) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def table_type(self, username: str, s: str): + self.write_int(ServiceBackendSocketConnection.TABLE_TYPE) + self.write_str(username) + self.write_str(s) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def matrix_table_type(self, username: str, s: str): + self.write_int(ServiceBackendSocketConnection.MATRIX_TABLE_TYPE) + self.write_str(username) + self.write_str(s) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def block_matrix_type(self, username: str, s: str): + self.write_int(ServiceBackendSocketConnection.BLOCK_MATRIX_TYPE) + self.write_str(username) + self.write_str(s) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def reference_genome(self, username: str, name: str): + self.write_int(ServiceBackendSocketConnection.REFERENCE_GENOME) + self.write_str(username) + self.write_str(name) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def execute(self, username: str, session_id: str, billing_project: str, bucket: str, code: str, token: str): + self.write_int(ServiceBackendSocketConnection.EXECUTE) + self.write_str(username) + self.write_str(session_id) + self.write_str(billing_project) + self.write_str(bucket) + self.write_str(code) + self.write_str(token) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def flags(self): + self.write_int(ServiceBackendSocketConnection.FLAGS) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def get_flag(self, name: str): + self.write_int(ServiceBackendSocketConnection.GET_FLAG) + self.write_str(name) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def unset_flag(self, name: str): + self.write_int(ServiceBackendSocketConnection.UNSET_FLAG) + self.write_str(name) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def set_flag(self, name: str, value: str): + self.write_int(ServiceBackendSocketConnection.SET_FLAG) + self.write_str(name) + self.write_str(value) + success = self.read_bool() + if success: + s = self.read_str() + try: + return json.loads(s) + except json.decoder.JSONDecodeError as err: + raise ValueError(f'could not decode {s}') from err + jstacktrace = self.read_str() + raise ValueError(jstacktrace) + + def add_user(self, name: str, gsa_key: str): + self.write_int(ServiceBackendSocketConnection.ADD_USER) + self.write_str(name) + self.write_str(gsa_key) + success = self.read_bool() + if success: + return + jstacktrace = self.read_str() + raise ValueError(jstacktrace) diff --git a/query/test/test_query.py b/query/test/test_query.py deleted file mode 100644 index 2dbdb302371..00000000000 --- a/query/test/test_query.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest -import hail as hl -from hailtop.hailctl.dev.query import cli - -def test_simple_table(): - t = hl.utils.range_table(50, 3) - t = t.filter((t.idx % 3 == 0) | ((t.idx / 7) % 3 == 0)) - n = t.count() - print(f'n {n}') - assert n == 17 - -# FIXME(danking): disabled while I work on a fix -# def test_simple_shuffle(): -# expected = [hl.Struct(idx=i) for i in range(99, -1, -1)] -# t = hl.utils.range_table(100) -# actual = t.order_by(-t.idx).collect() -# assert actual == expected diff --git a/tls/config.yaml b/tls/config.yaml index 4fe9ba2ad1a..c03d037fc29 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -71,9 +71,6 @@ principals: - name: shuffler domain: shuffler kind: json -- name: image-fetcher - domain: image-fetcher - kind: curl - name: benchmark domain: benchmark kind: json From 8d9f04c5089ff228ae4e2adfdb828579d3f10e7a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 12:41:56 +1100 Subject: [PATCH 192/501] prod_deploy: make sure that HEAD is the same as SHA that triggered the build --- .github/workflows/prod_deploy.yaml | 2 +- ci/ci/ci.py | 8 +++++++- ci/ci/github.py | 9 +++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 9cc2971141f..5caca8ea73c 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,4 +9,4 @@ jobs: steps: - name: "deploy" run: | - curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"]}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy diff --git a/ci/ci/ci.py b/ci/ci/ci.py index c78e8cd8e13..78e64f51873 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -152,8 +152,14 @@ async def prod_deploy(request, userdata): watched_branch = WatchedBranch( 0, FQBranch.from_short_str('populationgenomics/hail:main'), True ) + # We only allow to deploy to prod from the HEAD revision; however we are using + # the sha parameter to verify that HEAD points to what we expect watched_branch.sha = 'HEAD' - await watched_branch._start_deploy(app['batch_client'], steps) + await watched_branch._start_deploy( + app['batch_client'], + steps, + sha_must_be=params.get('sha'), + ) batch = watched_branch.deploy_batch if not isinstance(batch, MergeFailureBatch): diff --git a/ci/ci/github.py b/ci/ci/github.py index 005da121b81..7f2c48a6a1d 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -9,7 +9,8 @@ from hailtop.config import get_deploy_config from hailtop.batch_client.aioclient import Batch -from hailtop.utils import check_shell, check_shell_output, RETRY_FUNCTION_SCRIPT +from hailtop.utils import check_shell, check_shell_output, RETRY_FUNCTION_SCRIPT, \ + sync_check_shell_output from .constants import GITHUB_CLONE_URL, AUTHORIZED_USERS, GITHUB_STATUS_CONTEXT from .build import BuildConfiguration, Code from .globals import is_test_deployment @@ -760,7 +761,7 @@ async def _heal(self, batch_client, dbpool, gh): log.info(f'cancel batch {batch.id} for {attrs["pr"]} {attrs["source_sha"]} => {attrs["target_sha"]}') await batch.cancel() - async def _start_deploy(self, batch_client, steps=()): + async def _start_deploy(self, batch_client, steps=(), sha_must_be=None): # not deploying assert not self.deploy_batch or self.deploy_state @@ -776,6 +777,10 @@ async def _start_deploy(self, batch_client, steps=()): (cd {shq(repo_dir)}; {self.checkout_script()}) ''' ) + if sha_must_be: + out, err = sync_check_shell_output(f'(cd {repo_dir}; git rev-parse {self.sha})') + current_sha = out.decode().strip() + assert current_sha == sha_must_be, (current_sha, sha_must_be) with open(f'{repo_dir}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) From 48c9c63b748db7d1151a77ca44d032b94fc5c440 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 13:17:11 +1100 Subject: [PATCH 193/501] Throw proper exception --- .github/workflows/prod_deploy.yaml | 2 +- ci/ci/ci.py | 2 +- ci/ci/github.py | 17 ++++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 5caca8ea73c..75dbeff09b0 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,4 +9,4 @@ jobs: steps: - name: "deploy" run: | - curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "expected_sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 78e64f51873..5e5e104322c 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -158,7 +158,7 @@ async def prod_deploy(request, userdata): await watched_branch._start_deploy( app['batch_client'], steps, - sha_must_be=params.get('sha'), + expected_sha=params.get('expected_sha'), ) batch = watched_branch.deploy_batch diff --git a/ci/ci/github.py b/ci/ci/github.py index 7f2c48a6a1d..13599f51678 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -761,7 +761,7 @@ async def _heal(self, batch_client, dbpool, gh): log.info(f'cancel batch {batch.id} for {attrs["pr"]} {attrs["source_sha"]} => {attrs["target_sha"]}') await batch.cancel() - async def _start_deploy(self, batch_client, steps=(), sha_must_be=None): + async def _start_deploy(self, batch_client, steps=(), expected_sha=None): # not deploying assert not self.deploy_batch or self.deploy_state @@ -777,11 +777,18 @@ async def _start_deploy(self, batch_client, steps=(), sha_must_be=None): (cd {shq(repo_dir)}; {self.checkout_script()}) ''' ) - if sha_must_be: - out, err = sync_check_shell_output(f'(cd {repo_dir}; git rev-parse {self.sha})') + if expected_sha is not None: + out, err = sync_check_shell_output(f'(cd {shq(repo_dir)}; git rev-parse {self.sha})') current_sha = out.decode().strip() - assert current_sha == sha_must_be, (current_sha, sha_must_be) - with open(f'{repo_dir}/build.yaml', 'r') as f: + if current_sha != expected_sha: + msg = ( + f'SHA of the cloned repository {current_sha} does not match' + f'the expected SHA {expected_sha}' + ) + log.exception(msg) + raise ValueError(msg) + + with open(f'{shq(repo_dir)}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) log.info(f'creating deploy batch for {self.branch.short_str()}') From ddf8bc53a6f445036d8d1032608b90996f255ac6 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 13:20:40 +1100 Subject: [PATCH 194/501] Revert excaping in open('r') --- ci/ci/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ci/github.py b/ci/ci/github.py index 13599f51678..978509d1da2 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -788,7 +788,7 @@ async def _start_deploy(self, batch_client, steps=(), expected_sha=None): log.exception(msg) raise ValueError(msg) - with open(f'{shq(repo_dir)}/build.yaml', 'r') as f: + with open(f'{repo_dir}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) log.info(f'creating deploy batch for {self.branch.short_str()}') From b8148b80816c2c2771734a8eb4532b795228e6fb Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 14:16:23 +1100 Subject: [PATCH 195/501] prod_deploy: add sha param; if not specified, replace HEAD with the actual sha --- .github/workflows/prod_deploy.yaml | 2 +- ci/ci/ci.py | 13 +++++-------- ci/ci/github.py | 18 +++++++----------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 75dbeff09b0..5caca8ea73c 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,4 +9,4 @@ jobs: steps: - name: "deploy" run: | - curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "expected_sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 5e5e104322c..cc444074f47 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -152,14 +152,11 @@ async def prod_deploy(request, userdata): watched_branch = WatchedBranch( 0, FQBranch.from_short_str('populationgenomics/hail:main'), True ) - # We only allow to deploy to prod from the HEAD revision; however we are using - # the sha parameter to verify that HEAD points to what we expect - watched_branch.sha = 'HEAD' - await watched_branch._start_deploy( - app['batch_client'], - steps, - expected_sha=params.get('expected_sha'), - ) + if params.get('sha'): + watched_branch.sha = params['sha'] + else: + watched_branch.sha = 'HEAD' + await watched_branch._start_deploy(app['batch_client'], steps) batch = watched_branch.deploy_batch if not isinstance(batch, MergeFailureBatch): diff --git a/ci/ci/github.py b/ci/ci/github.py index 978509d1da2..151b908f6d0 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -761,7 +761,7 @@ async def _heal(self, batch_client, dbpool, gh): log.info(f'cancel batch {batch.id} for {attrs["pr"]} {attrs["source_sha"]} => {attrs["target_sha"]}') await batch.cancel() - async def _start_deploy(self, batch_client, steps=(), expected_sha=None): + async def _start_deploy(self, batch_client, steps=()): # not deploying assert not self.deploy_batch or self.deploy_state @@ -777,16 +777,12 @@ async def _start_deploy(self, batch_client, steps=(), expected_sha=None): (cd {shq(repo_dir)}; {self.checkout_script()}) ''' ) - if expected_sha is not None: - out, err = sync_check_shell_output(f'(cd {shq(repo_dir)}; git rev-parse {self.sha})') - current_sha = out.decode().strip() - if current_sha != expected_sha: - msg = ( - f'SHA of the cloned repository {current_sha} does not match' - f'the expected SHA {expected_sha}' - ) - log.exception(msg) - raise ValueError(msg) + # The repository is cloned multiple times during the build process. + # To make sure that the revision is consistent during the entire build, + # we are replacing HEAD with the actual SHA of the revision. + if self.sha == 'HEAD': + self.sha = sync_check_shell_output( + f'(cd {shq(repo_dir)}; git rev-parse {self.sha})') with open(f'{repo_dir}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) From e2f265bda3ca8eab42ba83d1aea21e2ee0911aaa Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 14:23:04 +1100 Subject: [PATCH 196/501] Fix --- ci/ci/github.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/ci/github.py b/ci/ci/github.py index 151b908f6d0..74eb1d63453 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -781,8 +781,9 @@ async def _start_deploy(self, batch_client, steps=()): # To make sure that the revision is consistent during the entire build, # we are replacing HEAD with the actual SHA of the revision. if self.sha == 'HEAD': - self.sha = sync_check_shell_output( + out, err = sync_check_shell_output( f'(cd {shq(repo_dir)}; git rev-parse {self.sha})') + self.sha = out.decode().strip() with open(f'{repo_dir}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) From 41a009eb54438c53fa6e4777fe9e1e3aab249b05 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 15:59:33 +1100 Subject: [PATCH 197/501] Require the sha param and forbit HEAD --- ci/ci/ci.py | 17 +++++++++++++---- ci/ci/github.py | 8 -------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index cc444074f47..28eb5062a39 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -149,13 +149,22 @@ async def prod_deploy(request, userdata): log.info('prod deploy failed: ' + message, exc_info=True) raise web.HTTPBadRequest(text=message) from e + if 'sha' not in params: + message = f'parameter "sha" is required.\n\n{params}' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) + if params['sha'] == 'HEAD': + message = ( + f'SHA must be a specific commit hash, and can\'t be a HEAD reference. ' + f'The reason is that HEAD can change in the middle of the deploy.\n\n{params}' + ) + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) + watched_branch = WatchedBranch( 0, FQBranch.from_short_str('populationgenomics/hail:main'), True ) - if params.get('sha'): - watched_branch.sha = params['sha'] - else: - watched_branch.sha = 'HEAD' + watched_branch.sha = params['sha'] await watched_branch._start_deploy(app['batch_client'], steps) batch = watched_branch.deploy_batch diff --git a/ci/ci/github.py b/ci/ci/github.py index 74eb1d63453..fdf9a5aa7ef 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -777,14 +777,6 @@ async def _start_deploy(self, batch_client, steps=()): (cd {shq(repo_dir)}; {self.checkout_script()}) ''' ) - # The repository is cloned multiple times during the build process. - # To make sure that the revision is consistent during the entire build, - # we are replacing HEAD with the actual SHA of the revision. - if self.sha == 'HEAD': - out, err = sync_check_shell_output( - f'(cd {shq(repo_dir)}; git rev-parse {self.sha})') - self.sha = out.decode().strip() - with open(f'{repo_dir}/build.yaml', 'r') as f: config = BuildConfiguration(self, f.read(), scope='deploy', requested_step_names=steps) From e7a65c0508b5fff4d9da449ee982c64aafe28f8c Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 5 Mar 2021 16:00:39 +1100 Subject: [PATCH 198/501] Remove unused import --- ci/ci/github.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/ci/github.py b/ci/ci/github.py index fdf9a5aa7ef..005da121b81 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -9,8 +9,7 @@ from hailtop.config import get_deploy_config from hailtop.batch_client.aioclient import Batch -from hailtop.utils import check_shell, check_shell_output, RETRY_FUNCTION_SCRIPT, \ - sync_check_shell_output +from hailtop.utils import check_shell, check_shell_output, RETRY_FUNCTION_SCRIPT from .constants import GITHUB_CLONE_URL, AUTHORIZED_USERS, GITHUB_STATUS_CONTEXT from .build import BuildConfiguration, Code from .globals import is_test_deployment From 52b257636a227307e0b9336d28441da109908f3e Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 5 Mar 2021 17:05:53 +1100 Subject: [PATCH 199/501] Better cURL flags. (#76) --- .github/workflows/condarise.yaml | 5 ++--- .github/workflows/prod_deploy.yaml | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index d6dd62ba93d..02a97b4abac 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -46,9 +46,8 @@ jobs: # Only run this step once. if: ${{ matrix.os == 'ubuntu-latest' }} run: | - curl --fail \ - -X POST \ + curl --fail --silent --show-error -X POST \ -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d "{\"ref\": \"main\", \"inputs\": {\"hail_version\": \"${{ env.version }}\"}}" + -d '{"ref": "main", "inputs": {"hail_version": "${{ env.version }}"}}' diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 5caca8ea73c..e665a145906 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,4 +9,8 @@ jobs: steps: - name: "deploy" run: | - curl --fail -X POST -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" -H "Content-Type:application/json" -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + curl --fail --silent --show-error -X POST \ + -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ + -H "Content-Type:application/json" \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' \ + https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy From 568f611a18fc365730a6d396cb4778121c4d2928 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 8 Mar 2021 14:05:53 +1100 Subject: [PATCH 200/501] Roll back version endpoint, which currently prevents the query service from starting --- query/query/query.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/query/query/query.py b/query/query/query.py index 4e8b5d6be35..1846dd99257 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -28,8 +28,6 @@ DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] log = logging.getLogger(__name__) routes = web.RouteTableDef() -# Store this value once so we don't hit the desk -HAIL_VERSION = version() async def add_user(app, userdata): @@ -247,14 +245,6 @@ async def set_flag(request, userdata): # pylint: disable=unused-argument return web.json_response(jresp) -@routes.get('/api/v1alpha/version') -async def rest_get_version(request): # pylint: disable=W0613 - try: - return web.Response(text=HAIL_VERSION) - except Exception as e: - return web.json_response({"error": str(e)}) - - async def on_startup(app): thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=16) app['thread_pool'] = thread_pool From 05fad42e2f36dbfd759433f23bce7272c7d6dadb Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 9 Mar 2021 19:00:35 +1100 Subject: [PATCH 201/501] Add a --service-account parameter to hailctl dataproc start. --- hail/python/hailtop/hailctl/dataproc/start.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hail/python/hailtop/hailctl/dataproc/start.py b/hail/python/hailtop/hailctl/dataproc/start.py index 97060cc80a3..b8aff6137fb 100755 --- a/hail/python/hailtop/hailctl/dataproc/start.py +++ b/hail/python/hailtop/hailctl/dataproc/start.py @@ -185,6 +185,7 @@ def init_parser(parser): parser.add_argument('--bucket', type=str, help='The Google Cloud Storage bucket to use for cluster staging (just the bucket name, no gs:// prefix).') parser.add_argument('--network', type=str, help='the network for all nodes in this cluster') + parser.add_argument('--service-account', type=str, help='The Google Service Account to use for cluster creation (default to the Compute Engine service account).') parser.add_argument('--master-tags', type=str, help='comma-separated list of instance tags to apply to the mastern node') parser.add_argument('--wheel', help='Non-default Hail installation. Warning: experimental.') @@ -357,6 +358,8 @@ def disk_size(size): cmd.append('--max-age={}'.format(args.max_age)) if args.expiration_time: cmd.append('--expiration_time={}'.format(args.expiration_time)) + if args.service_account: + cmd.append('--service-account={}'.format(args.service_account)) cmd.extend(pass_through_args) From dc005a9a1e746f2f752e934b42c5dcf076acbd10 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Tue, 9 Mar 2021 08:57:43 +1100 Subject: [PATCH 202/501] Add hail_version to MANFIEST --- docker/Dockerfile.service-base | 1 + hail/python/MANIFEST.in | 6 ++++++ hail/python/setup.py | 1 + 3 files changed, 8 insertions(+) create mode 100644 hail/python/MANIFEST.in diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index b360166a77e..5b35ac54828 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -8,6 +8,7 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ COPY /hail_version /hailtop/hailtop/hail_version +COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py diff --git a/hail/python/MANIFEST.in b/hail/python/MANIFEST.in new file mode 100644 index 00000000000..9f27c7be163 --- /dev/null +++ b/hail/python/MANIFEST.in @@ -0,0 +1,6 @@ +include hail/hail_pip_version +include hail/hail_version +include hail/experimental/datasets.json +include hail/backend/hail-all-spark.jar +include hailtop/hail_version +include hailtop/hailctl/deploy.yaml \ No newline at end of file diff --git a/hail/python/setup.py b/hail/python/setup.py index bcaf3c7337c..90c68b19cd7 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -41,6 +41,7 @@ 'hail_version', 'experimental/datasets.json'], 'hail.backend': ['hail-all-spark.jar'], + 'hailtop': ['hail_version'], 'hailtop.hailctl': ['hail_version', 'deploy.yaml']}, classifiers=[ "Programming Language :: Python :: 3", From 83203dbe8b0f4b967e8fc045a0dc17a3f3e87464 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Tue, 9 Mar 2021 08:57:43 +1100 Subject: [PATCH 203/501] Rebase --- docker/Dockerfile.service-base | 7 ++----- hail/python/MANIFEST.in | 6 ++++++ hail/python/setup.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 hail/python/MANIFEST.in diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index dfd3e7bb779..5b35ac54828 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -7,11 +7,8 @@ RUN hail-pip-install -r service-base-requirements.txt COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ -COPY hail/Makefile hail/env_var.mk .git/ hailtop/ -RUN mkdir -p hailtop/python/hail hailtop/python/hailtop/hailctl hailtop/python/hail/docs/_static && \ - (cd hailtop && echo $(pwd) && make python-version-info) && \ - cp hailtop/python/hail/hail_*version hailtop/hailtop/hailctl && \ - rm -rf hailtop/Makefile hailtop/env_var.mk .git/ +COPY /hail_version /hailtop/hailtop/hail_version +COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in RUN hail-pip-install /hailtop && rm -rf /hailtop COPY gear/setup.py /gear/setup.py diff --git a/hail/python/MANIFEST.in b/hail/python/MANIFEST.in new file mode 100644 index 00000000000..9f27c7be163 --- /dev/null +++ b/hail/python/MANIFEST.in @@ -0,0 +1,6 @@ +include hail/hail_pip_version +include hail/hail_version +include hail/experimental/datasets.json +include hail/backend/hail-all-spark.jar +include hailtop/hail_version +include hailtop/hailctl/deploy.yaml \ No newline at end of file diff --git a/hail/python/setup.py b/hail/python/setup.py index bcaf3c7337c..90c68b19cd7 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -41,6 +41,7 @@ 'hail_version', 'experimental/datasets.json'], 'hail.backend': ['hail-all-spark.jar'], + 'hailtop': ['hail_version'], 'hailtop.hailctl': ['hail_version', 'deploy.yaml']}, classifiers=[ "Programming Language :: Python :: 3", From 811024ff8b23f67c200ed8e9afdb374b23468825 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 11 Mar 2021 12:49:21 +1100 Subject: [PATCH 204/501] Overwrite ssl-config-hail-root secret for dev --- build.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.yaml b/build.yaml index aed6d2e8741..6391483ff1b 100644 --- a/build.yaml +++ b/build.yaml @@ -154,6 +154,10 @@ steps: image: valueFrom: create_certs_image.image script: | + until ! kubectl get secret -n {{ default_ns.name }} ssl-config-hail-root + do + kubectl delete secret -n {{ default_ns.name }} ssl-config-hail-root + done openssl req -new -x509 -subj /CN=hail-root -nodes -newkey rsa:4096 -keyout hail-root-key.pem -out hail-root-cert.pem until kubectl get secret -n {{ default_ns.name }} ssl-config-hail-root do From 51d859a55390a9bd03c4ea0a063de9689042ab8f Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 11 Mar 2021 12:55:45 +1100 Subject: [PATCH 205/501] Silent the get secret stderr --- build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 6391483ff1b..cfb092ca0fe 100644 --- a/build.yaml +++ b/build.yaml @@ -154,7 +154,7 @@ steps: image: valueFrom: create_certs_image.image script: | - until ! kubectl get secret -n {{ default_ns.name }} ssl-config-hail-root + until ! kubectl get secret -n {{ default_ns.name }} ssl-config-hail-root 2>/dev/null do kubectl delete secret -n {{ default_ns.name }} ssl-config-hail-root done From 9a1c3a56e3c765e7d8d3e5a83cba1d12ba307a1e Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 11 Mar 2021 13:35:11 +1100 Subject: [PATCH 206/501] Add comment --- build.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.yaml b/build.yaml index cfb092ca0fe..209a30bc951 100644 --- a/build.yaml +++ b/build.yaml @@ -154,6 +154,9 @@ steps: image: valueFrom: create_certs_image.image script: | + # Create dev certificates, or overwrite the existing ones. The usecase for + # overwriting is to update the dev certificates, that expire after 30 days + # (the default `-days` parameter value for `openssl req`). until ! kubectl get secret -n {{ default_ns.name }} ssl-config-hail-root 2>/dev/null do kubectl delete secret -n {{ default_ns.name }} ssl-config-hail-root From 286c13f05b62a1a11051664471c736931a19fbd1 Mon Sep 17 00:00:00 2001 From: MIchael Franklin Date: Fri, 12 Mar 2021 08:30:08 +1100 Subject: [PATCH 207/501] Test cp hail_version to root --- docker/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Makefile b/docker/Makefile index 02d4942bb84..0111fbe8d9b 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -49,7 +49,7 @@ base-stmp: hail-ubuntu-stmp Dockerfile.base core-site.xml requirements.txt ../py service-base: base-stmp -docker pull $(SERVICE_BASE_LATEST) $(MAKE) -C ../hail python/hailtop/hail_version - cp ../hail/python/hailtop/hail_version ../hail_version + cp ../hail/python/hailtop/hail_version /hail_version python3 ../ci/jinja2_render.py '{"base_image":{"image":"base"}}' Dockerfile.service-base Dockerfile.service-base.out [ "$(shell bash stat-permissions.sh Dockerfile.service-base.out)" = "644" ] [ "$(shell bash stat-permissions.sh service-base-requirements.txt)" = "644" ] From f9f9bbac5d3eb39a3b0def36d5374069fe07bfb3 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 12 Mar 2021 08:43:53 +1100 Subject: [PATCH 208/501] Merge upstream changes (#82) * [batch] Worker cleanup (#10155) * [batch] Worker cleanup * more changes * wip * delint * additions? * fix * [query] Add `source_file_field` to `import_table` (#10164) * [query] Add `source_file_field` to `import_table` CHANGELOG: Add `source_file_field` parameter to `hl.import_table` to allow lines to be associated with their original source file. * ugh * [ci] add authorize sha and action items table to user page (#10142) * [ci] add authorize sha and action items table to user page * [ci] track review requested in addition to assigned for PR reviews * [ci] add CI dropdown with link to user page (#10163) * [batch] add more logs and do not wait for asyncgens (#10136) * [batch] add more logs and do not wait for asyncgens I think there is some unresolved issue with asyncgen shutdown that is keeping workers alive. This is not an issue in worker because worker calls sys.exit which forcibly stops execution. cc: @daniel-goldstein @jigold. * fix lint * [query-service] maybe fix event loop not initialized (#10153) * [query-service] maybe fix event loop not initialized The event loop is supposed to be initialized in the main thread. Sometimes our tests get placed in the non-main thread (always a thread named Dummy-1). Hopefully the session-scoped fixture is run in the main thread. * fix * [prometheus] add prometheus to track SLIs (#10165) * [prometheus] add prometheus to track SLIs * add wraps * [query] apply nest-asyncio as early as possible (#10158) * [query] apply nest-asyncio as early as possible * fix * [grafana] set pod fsGroup to grafana user (#10162) * fix linting errors (#10171) * [query] Remove verbose print (#10167) Looks like this got added in some dndarray work * [ci] update assignees and reviewers on PR github update (#10168) * [query-service] fix receive logic (#10159) * [query-service] fix receive logic Only one coro waits on receive now. We still error if a message is sent before we make our first response. * fix * fix * CHANGELOG: Fixed incorrect error message when incorrect type specified with hl.loop (#10174) * [linting] add curlylint check for any service that renders jinja2 (#10172) * [linting] add curlylint check for any service that renders jinja2 templates * [linting] spaces not tabs * [website] fix website (#10173) * [website] fix website I build old versions of the docs and use them in new websites. This does not work for versions of the docs before I introduced the new system. In particular versions 0.2.63 and before generate old-style docs. * tutorials are templated * [ci] change mention for deploy failure (#10178) * [gateway] move ukbb routing into gateway (#10179) * [query] Fix filter intervals (keep=False) memory leak (#10182) * [query-service] remove service backend tests (#10180) They are too flaky currently due to the version issue. * [website] pass response body as kwarg (#10176) * Release 0.2.64 (#10183) * Bump version number * Updated changelog * [nginx] ensure nginx configs dont overwrite each other in build.yaml (#10181) * [query-service] teach query service to read MTs and Ts created by Spark (#10184) * [query-service] teach query service to read MTs and Ts created by Spark Hail-on-Spark uses HadoopFS which emulates directories by creating size-zero files with the name `gs://bucket/dirname/`. Note: the object name literally ends in a slash. Such files should not be included in `listStatus` (they should always be empty anyway). Unfortunately, my fix in https://github.com/hail-is/hail/pull/9914 was wrong because `GoogleStorageFileStatus` removes the trailing slash. This prevented the path from matching `path`, which always ends in a `/`. * fix Co-authored-by: jigold Co-authored-by: Tim Poterba Co-authored-by: Daniel Goldstein Co-authored-by: Dan King Co-authored-by: John Compitello --- Makefile | 6 +- address/Makefile | 1 + address/address/templates/index.html | 13 +- atgu/Makefile | 1 + auth/Makefile | 1 + auth/auth/templates/account-error.html | 4 +- batch/Makefile | 1 + batch/batch/driver/main.py | 26 ++- batch/batch/front_end/front_end.py | 111 +++------ batch/batch/front_end/templates/batch.html | 201 ++++++++-------- .../front_end/templates/billing_projects.html | 22 +- batch/batch/worker/worker.py | 30 ++- benchmark-service/Makefile | 1 + .../benchmark/templates/batch.html | 80 +++---- .../benchmark/templates/job.html | 95 ++++---- .../benchmark/templates/lookup.html | 90 ++++---- build.yaml | 214 +++++------------ ci/Makefile | 1 + ci/ci/ci.py | 114 ++++++++++ ci/ci/github.py | 8 +- ci/ci/templates/batch.html | 8 +- ci/ci/templates/dev-deploy-table.html | 3 - ci/ci/templates/job-table.html | 7 +- ci/ci/templates/pr.html | 3 - ci/ci/templates/user.html | 21 +- ci/deployment.yaml | 1 + docker/requirements.txt | 1 + gateway/gateway.nginx.conf | 35 +++ gear/gear/__init__.py | 4 +- gear/gear/metrics.py | 18 ++ grafana/deployment.yaml | 2 + hail/Makefile | 2 +- hail/python/dev-requirements.txt | 1 + hail/python/hail/__init__.py | 7 +- hail/python/hail/docs/change_log.md | 34 ++- hail/python/hail/experimental/loop.py | 5 +- hail/python/hail/ir/__init__.py | 3 +- hail/python/hail/ir/table_ir.py | 22 -- hail/python/hail/ir/table_reader.py | 5 +- hail/python/hail/methods/impex.py | 14 +- hail/python/hailtop/config/deploy_config.py | 4 + hail/python/test/hail/conftest.py | 9 + .../hail/experimental/test_experimental.py | 9 + hail/python/test/hail/helpers.py | 5 - hail/python/test/hail/methods/test_impex.py | 9 + .../main/scala/is/hail/expr/ir/Parser.scala | 1 - .../scala/is/hail/io/fs/GoogleStorageFS.scala | 5 +- hail/src/main/scala/is/hail/rvd/RVD.scala | 8 +- .../scala/is/hail/rvd/RVDPartitioner.scala | 1 - .../scala/is/hail/utils/TextTableReader.scala | 20 +- letsencrypt/domains.txt | 1 + monitoring/Makefile | 1 + notebook/Makefile | 1 + .../notebook/templates/workshop/index.html | 2 +- prometheus/Dockerfile.nginx | 12 + prometheus/Makefile | 24 ++ prometheus/nginx.conf | 101 ++++++++ prometheus/prometheus.yaml | 215 ++++++++++++++++++ query/query/query.py | 12 +- router/deployment.yaml | 12 + router/router.nginx.conf.in | 43 +--- tls/config.yaml | 3 + web_common/Makefile | 1 + web_common/web_common/styles/main.scss | 5 + web_common/web_common/templates/header.html | 10 +- web_common/web_common/templates/layout.html | 26 +-- website/Makefile | 5 +- website/website/templates/base.html | 14 +- website/website/templates/dynamic-base.html | 16 +- website/website/website.py | 3 +- 70 files changed, 1141 insertions(+), 653 deletions(-) create mode 100644 gear/gear/metrics.py create mode 100644 prometheus/Dockerfile.nginx create mode 100644 prometheus/Makefile create mode 100644 prometheus/nginx.conf create mode 100644 prometheus/prometheus.yaml diff --git a/Makefile b/Makefile index 5afd698aab7..c9e20498f81 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ check-hail: .PHONY: check-services check-services: check-auth check-batch check-ci check-gear check-memory \ check-notebook check-query check-router-resolver check-scorecard check-web-common \ - check-atgu + check-atgu check-website .PHONY: check-auth check-auth: @@ -64,3 +64,7 @@ check-web-common: .PHONY: check-atgu check-atgu: $(MAKE) -C atgu check + +.PHONY: check-website +check-website: + $(MAKE) -C website check diff --git a/address/Makefile b/address/Makefile index 956ed8e0dac..315eb864b60 100644 --- a/address/Makefile +++ b/address/Makefile @@ -10,6 +10,7 @@ PYTHON := PYTHONPATH=$(PYTHONPATH) python3 check: $(PYTHON) -m flake8 --config ../setup.cfg address $(PYTHON) -m pylint --rcfile ../pylintrc address --score=n + curlylint . .PHONY: build build: diff --git a/address/address/templates/index.html b/address/address/templates/index.html index 5c4c18f8f0c..cea97d02e85 100644 --- a/address/address/templates/index.html +++ b/address/address/templates/index.html @@ -22,14 +22,13 @@

Cache

{% for row in rows %} - {{ row["name"] }} - {{ row["addresses"] }} - {{ row["ports"] }} - {{ row["lifetime"] }} - {{ row["lock"] }} - + {{ row["name"] }} + {{ row["addresses"] }} + {{ row["ports"] }} + {{ row["lifetime"] }} + {{ row["lock"] }} + {% endfor %} -
{% endblock %} diff --git a/atgu/Makefile b/atgu/Makefile index c4f796c279e..b0e5aac4ed4 100644 --- a/atgu/Makefile +++ b/atgu/Makefile @@ -5,4 +5,5 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 --config ../setup.cfg atgu $(PYTHON) -m pylint --rcfile ../pylintrc atgu --score=n + curlylint . ../check-sql.sh diff --git a/auth/Makefile b/auth/Makefile index 1a423eda6af..3005be21efa 100644 --- a/auth/Makefile +++ b/auth/Makefile @@ -10,6 +10,7 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 auth $(PYTHON) -m pylint --rcfile ../pylintrc auth --score=n + curlylint . ../check-sql.sh .PHONY: build diff --git a/auth/auth/templates/account-error.html b/auth/auth/templates/account-error.html index ae2b70b99dc..eacab662d0a 100644 --- a/auth/auth/templates/account-error.html +++ b/auth/auth/templates/account-error.html @@ -4,10 +4,12 @@
Account is in a bad state!

-
Please contact hail-team@broadinstitute.org
+
+ Please contact hail-team@broadinstitute.org
Username: {{ username }}
Email: {{ email }}
State: {{ state }}
+
{% endblock %} diff --git a/batch/Makefile b/batch/Makefile index 6273c03defd..e63d44158cc 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -13,6 +13,7 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 --config ../setup.cfg batch $(PYTHON) -m pylint --rcfile ../pylintrc batch --score=n + curlylint . ../check-sql.sh .PHONY: build-prereqs diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index 3bccd4d39ec..1eb8ed10cdc 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -79,11 +79,13 @@ async def wrapped(request): return wrapped -def instance_from_request(request): +def instance_name_from_request(request): instance_name = request.headers.get('X-Hail-Instance-Name') - if not instance_name: - return None + return instance_name + +def instance_from_request(request): + instance_name = instance_name_from_request(request) inst_coll_manager = request.app['inst_coll_manager'] return inst_coll_manager.get_instance(instance_name) @@ -93,16 +95,17 @@ def activating_instances_only(fun): async def wrapped(request): instance = instance_from_request(request) if not instance: - log.info('instance not found') + instance_name = instance_name_from_request(request) + log.info(f'instance {instance_name} not found') raise web.HTTPUnauthorized() if instance.state != 'pending': - log.info('instance not pending') + log.info(f'instance {instance.name} not pending') raise web.HTTPUnauthorized() activation_token = authorization_token(request) if not activation_token: - log.info('activation token not found') + log.info(f'activation token not found for instance {instance.name}') raise web.HTTPUnauthorized() db = request.app['db'] @@ -110,7 +113,7 @@ async def wrapped(request): 'SELECT state FROM instances WHERE name = %s AND activation_token = %s;', (instance.name, activation_token)) if not record: - log.info('instance, activation token not found in database') + log.info(f'instance {instance.name}, activation token not found in database') raise web.HTTPUnauthorized() resp = await fun(request, instance) @@ -124,16 +127,17 @@ def active_instances_only(fun): async def wrapped(request): instance = instance_from_request(request) if not instance: - log.info('instance not found') + instance_name = instance_name_from_request(request) + log.info(f'instance not found {instance_name}') raise web.HTTPUnauthorized() if instance.state != 'active': - log.info('instance not active') + log.info(f'instance not active {instance.name}') raise web.HTTPUnauthorized() token = authorization_token(request) if not token: - log.info('token not found') + log.info(f'token not found for instance {instance.name}') raise web.HTTPUnauthorized() db = request.app['db'] @@ -141,7 +145,7 @@ async def wrapped(request): 'SELECT state FROM instances WHERE name = %s AND token = %s;', (instance.name, token)) if not record: - log.info('instance, token not found in database') + log.info(f'instance {instance.name}, token not found in database') raise web.HTTPUnauthorized() await instance.mark_healthy() diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 54a2b7f708f..f3202fa158b 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -11,12 +11,10 @@ import aiohttp from aiohttp import web import aiohttp_session -import prometheus_client as pc import pymysql -from prometheus_async.aio import time as prom_async_time -from prometheus_async.aio.web import server_stats import google.oauth2.service_account import google.api_core.exceptions +from prometheus_async.aio.web import server_stats # type: ignore from hailtop.utils import (time_msecs, time_msecs_str, humanize_timedelta_msecs, request_retry_transient_errors, run_if_changed, retry_long_running, LoggingTimer, cost_str) @@ -31,7 +29,8 @@ rest_authenticated_users_only, web_authenticated_users_only, web_authenticated_developers_only, - check_csrf_token, transaction) + check_csrf_token, transaction, + monitor_endpoint) from web_common import (setup_aiohttp_jinja2, setup_common_static_routes, render_template, set_message) @@ -56,42 +55,6 @@ log = logging.getLogger('batch.front_end') -REQUEST_TIME = pc.Summary('batch_request_latency_seconds', 'Batch request latency in seconds', ['endpoint', 'verb']) -REQUEST_TIME_GET_JOBS = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/jobs', verb="GET") -REQUEST_TIME_GET_JOB = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/jobs/job_id', verb="GET") -REQUEST_TIME_GET_JOB_LOG = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/log', verb="GET") -REQUEST_TIME_GET_ATTEMPTS = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/attempts', verb="GET") -REQUEST_TIME_GET_BATCHES = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches', verb="GET") -REQUEST_TIME_POST_CREATE_JOBS = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/jobs/create', verb="POST") -REQUEST_TIME_POST_CREATE_BATCH = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/create', verb='POST') -REQUEST_TIME_POST_GET_BATCH = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id', verb='GET') -REQUEST_TIME_PATCH_CANCEL_BATCH = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/cancel', verb="PATCH") -REQUEST_TIME_PATCH_CLOSE_BATCH = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id/close', verb="PATCH") -REQUEST_TIME_DELETE_BATCH = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches/batch_id', verb="DELETE") -REQUEST_TIME_GET_BATCH_UI = REQUEST_TIME.labels(endpoint='/batches/batch_id', verb='GET') -REQUEST_TIME_POST_CANCEL_BATCH_UI = REQUEST_TIME.labels(endpoint='/batches/batch_id/cancel', verb='POST') -REQUEST_TIME_POST_DELETE_BATCH_UI = REQUEST_TIME.labels(endpoint='/batches/batch_id/delete', verb='POST') -REQUEST_TIME_GET_BATCHES_UI = REQUEST_TIME.labels(endpoint='/batches', verb='GET') -REQUEST_TIME_GET_JOB_UI = REQUEST_TIME.labels(endpoint='/batches/batch_id/jobs/job_id', verb="GET") -REQUEST_TIME_GET_BILLING_PROJECTS = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects', verb="GET") -REQUEST_TIME_GET_BILLING_PROJECT = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/billing_project', verb="GET") -REQUEST_TIME_GET_BILLING_LIMITS_UI = REQUEST_TIME.labels(endpoint='/billing_limits', verb="GET") -REQUEST_TIME_POST_BILLING_LIMITS_EDIT_UI = REQUEST_TIME.labels(endpoint='/billing_projects/billing_project/edit', verb="POST") -REQUEST_TIME_POST_BILLING_LIMITS_EDIT = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/billing_project/edit', verb="POST") -REQUEST_TIME_GET_BILLING_UI = REQUEST_TIME.labels(endpoint='/billing', verb="GET") -REQUEST_TIME_GET_BILLING_PROJECTS_UI = REQUEST_TIME.labels(endpoint='/billing_projects', verb="GET") -REQUEST_TIME_POST_BILLING_PROJECT_REMOVE_USER_UI = REQUEST_TIME.labels(endpoint='/billing_projects/billing_project/users/user/remove', verb="POST") -REQUEST_TIME_POST_BILLING_PROJECT_REMOVE_USER_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/billing_project/users/{user}/remove', verb="POST") -REQUEST_TIME_POST_BILLING_PROJECT_ADD_USER_UI = REQUEST_TIME.labels(endpoint='/billing_projects/billing_project/users/add', verb="POST") -REQUEST_TIME_POST_BILLING_PROJECT_ADD_USER_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/{billing_project}/users/{user}/add', verb="POST") -REQUEST_TIME_POST_CREATE_BILLING_PROJECT_UI = REQUEST_TIME.labels(endpoint='/billing_projects/create', verb="POST") -REQUEST_TIME_POST_CREATE_BILLING_PROJECT_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/{billing_project}/create', verb="POST") -REQUEST_TIME_POST_CLOSE_BILLING_PROJECT_UI = REQUEST_TIME.labels(endpoint='/billing_projects/{billing_project}/close', verb="POST") -REQUEST_TIME_POST_CLOSE_BILLING_PROJECT_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/{billing_project}/close', verb="POST") -REQUEST_TIME_POST_REOPEN_BILLING_PROJECT_UI = REQUEST_TIME.labels(endpoint='/billing_projects/{billing_project}/reopen', verb="POST") -REQUEST_TIME_POST_REOPEN_BILLING_PROJECT_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/{billing_project}/reopen', verb="POST") -REQUEST_TIME_POST_DELETE_BILLING_PROJECT_API = REQUEST_TIME.labels(endpoint='/api/v1alpha/billing_projects/{billing_project}/reopen', verb="POST") - routes = web.RouteTableDef() deploy_config = get_deploy_config() @@ -291,7 +254,7 @@ async def _query_batch_jobs(request, batch_id): @routes.get('/api/v1alpha/batches/{batch_id}/jobs') -@prom_async_time(REQUEST_TIME_GET_JOBS) +@monitor_endpoint @rest_billing_project_users_only async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argument db = request.app['db'] @@ -465,7 +428,7 @@ async def _get_full_job_status(app, record): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log') -@prom_async_time(REQUEST_TIME_GET_JOB_LOG) +@monitor_endpoint @rest_billing_project_users_only async def get_job_log(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -582,7 +545,7 @@ async def _query_batches(request, user, q): @routes.get('/api/v1alpha/batches') -@prom_async_time(REQUEST_TIME_GET_BATCHES) +@monitor_endpoint @rest_authenticated_users_only async def get_batches(request, userdata): # pylint: disable=unused-argument user = userdata['username'] @@ -610,7 +573,7 @@ def check_service_account_permissions(user, sa): @routes.post('/api/v1alpha/batches/{batch_id}/jobs/create') -@prom_async_time(REQUEST_TIME_POST_CREATE_JOBS) +@monitor_endpoint @rest_authenticated_users_only async def create_jobs(request, userdata): app = request.app @@ -932,7 +895,7 @@ async def insert(tx): @routes.post('/api/v1alpha/batches/create') -@prom_async_time(REQUEST_TIME_POST_CREATE_BATCH) +@monitor_endpoint @rest_authenticated_users_only async def create_batch(request, userdata): app = request.app @@ -1055,14 +1018,14 @@ async def _delete_batch(app, batch_id): @routes.get('/api/v1alpha/batches/{batch_id}') -@prom_async_time(REQUEST_TIME_POST_GET_BATCH) +@monitor_endpoint @rest_billing_project_users_only async def get_batch(request, userdata, batch_id): # pylint: disable=unused-argument return web.json_response(await _get_batch(request.app, batch_id)) @routes.patch('/api/v1alpha/batches/{batch_id}/cancel') -@prom_async_time(REQUEST_TIME_PATCH_CANCEL_BATCH) +@monitor_endpoint @rest_billing_project_users_only async def cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument await _handle_api_error(_cancel_batch, request.app, batch_id) @@ -1070,7 +1033,7 @@ async def cancel_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.patch('/api/v1alpha/batches/{batch_id}/close') -@prom_async_time(REQUEST_TIME_PATCH_CLOSE_BATCH) +@monitor_endpoint @rest_authenticated_users_only async def close_batch(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -1111,7 +1074,7 @@ async def close_batch(request, userdata): @routes.delete('/api/v1alpha/batches/{batch_id}') -@prom_async_time(REQUEST_TIME_DELETE_BATCH) +@monitor_endpoint @rest_billing_project_users_only async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument await _delete_batch(request.app, batch_id) @@ -1119,7 +1082,7 @@ async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/batches/{batch_id}') -@prom_async_time(REQUEST_TIME_GET_BATCH_UI) +@monitor_endpoint @web_billing_project_users_only() async def ui_batch(request, userdata, batch_id): app = request.app @@ -1142,7 +1105,7 @@ async def ui_batch(request, userdata, batch_id): @routes.post('/batches/{batch_id}/cancel') -@prom_async_time(REQUEST_TIME_POST_CANCEL_BATCH_UI) +@monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument @@ -1155,7 +1118,7 @@ async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.post('/batches/{batch_id}/delete') -@prom_async_time(REQUEST_TIME_POST_DELETE_BATCH_UI) +@monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument @@ -1167,7 +1130,7 @@ async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.get('/batches', name='batches') -@prom_async_time(REQUEST_TIME_GET_BATCHES_UI) +@monitor_endpoint @web_authenticated_users_only() async def ui_batches(request, userdata): user = userdata['username'] @@ -1265,7 +1228,7 @@ async def _get_attempts(app, batch_id, job_id): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/attempts') -@prom_async_time(REQUEST_TIME_GET_ATTEMPTS) +@monitor_endpoint @rest_billing_project_users_only async def get_attempts(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -1274,7 +1237,7 @@ async def get_attempts(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}') -@prom_async_time(REQUEST_TIME_GET_JOB) +@monitor_endpoint @rest_billing_project_users_only async def get_job(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -1283,7 +1246,7 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume @routes.get('/batches/{batch_id}/jobs/{job_id}') -@prom_async_time(REQUEST_TIME_GET_JOB_UI) +@monitor_endpoint @web_billing_project_users_only() async def ui_get_job(request, userdata, batch_id): app = request.app @@ -1336,7 +1299,7 @@ async def ui_get_job(request, userdata, batch_id): @routes.get('/billing_limits') -@prom_async_time(REQUEST_TIME_GET_BILLING_LIMITS_UI) +@monitor_endpoint @web_authenticated_users_only() async def ui_get_billing_limits(request, userdata): app = request.app @@ -1397,7 +1360,7 @@ async def insert(tx): @routes.post('/api/v1alpha/billing_limits/{billing_project}/edit') -@prom_async_time(REQUEST_TIME_POST_BILLING_LIMITS_EDIT) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def post_edit_billing_limits(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1409,7 +1372,7 @@ async def post_edit_billing_limits(request, userdata): # pylint: disable=unused @routes.post('/billing_limits/{billing_project}/edit') -@prom_async_time(REQUEST_TIME_POST_BILLING_LIMITS_EDIT_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_edit_billing_limits_ui(request, userdata): # pylint: disable=unused-argument @@ -1493,7 +1456,7 @@ def billing_record_to_dict(record): @routes.get('/billing') -@prom_async_time(REQUEST_TIME_GET_BILLING_UI) +@monitor_endpoint @web_authenticated_developers_only() async def ui_get_billing(request, userdata): billing, start, end = await _query_billing(request) @@ -1534,7 +1497,7 @@ async def ui_get_billing(request, userdata): @routes.get('/billing_projects') -@prom_async_time(REQUEST_TIME_GET_BILLING_PROJECTS_UI) +@monitor_endpoint @web_authenticated_developers_only() async def ui_get_billing_projects(request, userdata): db: Database = request.app['db'] @@ -1547,7 +1510,7 @@ async def ui_get_billing_projects(request, userdata): @routes.get('/api/v1alpha/billing_projects') -@prom_async_time(REQUEST_TIME_GET_BILLING_PROJECTS) +@monitor_endpoint @rest_authenticated_users_only async def get_billing_projects(request, userdata): db: Database = request.app['db'] @@ -1563,7 +1526,7 @@ async def get_billing_projects(request, userdata): @routes.get('/api/v1alpha/billing_projects/{billing_project}') -@prom_async_time(REQUEST_TIME_GET_BILLING_PROJECT) +@monitor_endpoint @rest_authenticated_users_only async def get_billing_project(request, userdata): db: Database = request.app['db'] @@ -1617,7 +1580,7 @@ async def delete(tx): @routes.post('/billing_projects/{billing_project}/users/{user}/remove') -@prom_async_time(REQUEST_TIME_POST_BILLING_PROJECT_REMOVE_USER_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_billing_projects_remove_user(request, userdata): # pylint: disable=unused-argument @@ -1633,7 +1596,7 @@ async def post_billing_projects_remove_user(request, userdata): # pylint: disab @routes.post('/api/v1alpha/billing_projects/{billing_project}/users/{user}/remove') -@prom_async_time(REQUEST_TIME_POST_BILLING_PROJECT_REMOVE_USER_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_get_billing_projects_remove_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1676,7 +1639,7 @@ async def insert(tx): @routes.post('/billing_projects/{billing_project}/users/add') -@prom_async_time(REQUEST_TIME_POST_BILLING_PROJECT_ADD_USER_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_billing_projects_add_user(request, userdata): # pylint: disable=unused-argument @@ -1694,7 +1657,7 @@ async def post_billing_projects_add_user(request, userdata): # pylint: disable= @routes.post('/api/v1alpha/billing_projects/{billing_project}/users/{user}/add') -@prom_async_time(REQUEST_TIME_POST_BILLING_PROJECT_ADD_USER_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_billing_projects_add_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1728,7 +1691,7 @@ async def insert(tx): @routes.post('/billing_projects/create') -@prom_async_time(REQUEST_TIME_POST_CREATE_BILLING_PROJECT_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_create_billing_projects(request, userdata): # pylint: disable=unused-argument @@ -1745,7 +1708,7 @@ async def post_create_billing_projects(request, userdata): # pylint: disable=un @routes.post('/api/v1alpha/billing_projects/{billing_project}/create') -@prom_async_time(REQUEST_TIME_POST_CREATE_BILLING_PROJECT_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_get_create_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1782,7 +1745,7 @@ async def close_project(tx): @routes.post('/billing_projects/{billing_project}/close') -@prom_async_time(REQUEST_TIME_POST_CLOSE_BILLING_PROJECT_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_close_billing_projects(request, userdata): # pylint: disable=unused-argument @@ -1797,7 +1760,7 @@ async def post_close_billing_projects(request, userdata): # pylint: disable=unu @routes.post('/api/v1alpha/billing_projects/{billing_project}/close') -@prom_async_time(REQUEST_TIME_POST_CLOSE_BILLING_PROJECT_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_close_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1828,7 +1791,7 @@ async def open_project(tx): @routes.post('/billing_projects/{billing_project}/reopen') -@prom_async_time(REQUEST_TIME_POST_REOPEN_BILLING_PROJECT_UI) +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) async def post_reopen_billing_projects(request, userdata): # pylint: disable=unused-argument @@ -1843,7 +1806,7 @@ async def post_reopen_billing_projects(request, userdata): # pylint: disable=un @routes.post('/api/v1alpha/billing_projects/{billing_project}/reopen') -@prom_async_time(REQUEST_TIME_POST_REOPEN_BILLING_PROJECT_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_reopen_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1873,7 +1836,7 @@ async def delete_project(tx): @routes.post('/api/v1alpha/billing_projects/{billing_project}/delete') -@prom_async_time(REQUEST_TIME_POST_DELETE_BILLING_PROJECT_API) +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_delete_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] diff --git a/batch/batch/front_end/templates/batch.html b/batch/batch/front_end/templates/batch.html index 36abfed64cd..2bd63c82ef7 100644 --- a/batch/batch/front_end/templates/batch.html +++ b/batch/batch/front_end/templates/batch.html @@ -4,109 +4,106 @@ {% endblock %} {% block content %} -

Batch {{ batch['id'] }}

- {% if 'attributes' in batch %} - {% for name, value in batch['attributes'].items() %} -

{{ name }}: {{ value }}

- {% endfor %} - {% endif %} +

Batch {{ batch['id'] }}

+{% if 'attributes' in batch %} +{% for name, value in batch['attributes'].items() %} +

{{ name }}: {{ value }}

+{% endfor %} +{% endif %} -

Jobs

-
-
- - - - - - -
-

Search jobs with the given search terms. Return jobs - that match all terms. Terms:

-
    -
  • k=v - jobs with an attribute with key k and value v
  • -
  • has:k - jobs that have an attribute with key k
  • -
  • state - jobs in the given state, one of: -
      -
    • ready
    • -
    • running
    • -
    • live (ready or running)
    • -
    • cancelled
    • -
    • error
    • -
    • failed
    • -
    • bad (error or failed)
    • -
    • success
    • -
    • done (cancelled, error, failed or success)
    • -
    -
  • -
  • !term - jobs not matched by term
  • -
-
-
-
- - - - - - - - - - - - - {% for job in batch['jobs'] %} - - - - - - - - - {% endfor %} - -
IDNameStateExit CodeDurationCost
- {{ job['job_id'] }} - - {% if 'name' in job and job['name'] is not none %} - {{ job['name'] }} - {% endif %} - {{ job['state'] }} - {% if 'exit_code' in job and job['exit_code'] is not none %} - {{ job['exit_code'] }} - {% endif %} - - {% if 'duration' in job and job['duration'] is not none %} - {{ job['duration'] }} - {% endif %} - - {% if 'cost' in job and job['cost'] is not none %} - {{ job['cost'] }} - {% endif %} -
-
- {% if last_job_id is not none %} -
- {% if q is not none %} - - {% endif %} - - -
- {% endif %} +

Jobs

+
+
+
+ + +
+ + +
+

Search jobs with the given search terms. Return jobs + that match all terms. Terms:

+
    +
  • k=v - jobs with an attribute with key k and value v
  • +
  • has:k - jobs that have an attribute with key k
  • +
  • state - jobs in the given state, one of: +
      +
    • ready
    • +
    • running
    • +
    • live (ready or running)
    • +
    • cancelled
    • +
    • error
    • +
    • failed
    • +
    • bad (error or failed)
    • +
    • success
    • +
    • done (cancelled, error, failed or success)
    • +
    +
  • +
  • !term - jobs not matched by term
  • +
- +
+ + + + + + + + + + + + + {% for job in batch['jobs'] %} + + + + + + + + + {% endfor %} + +
IDNameStateExit CodeDurationCost
+ {{ job['job_id'] }} + + {% if 'name' in job and job['name'] is not none %} + {{ job['name'] }} + {% endif %} + {{ job['state'] }} + {% if 'exit_code' in job and job['exit_code'] is not none %} + {{ job['exit_code'] }} + {% endif %} + + {% if 'duration' in job and job['duration'] is not none %} + {{ job['duration'] }} + {% endif %} + + {% if 'cost' in job and job['cost'] is not none %} + {{ job['cost'] }} + {% endif %} +
+
+ {% if last_job_id is not none %} +
+ {% if q is not none %} + + {% endif %} + + +
+ {% endif %} +
+ {% endblock %} diff --git a/batch/batch/front_end/templates/billing_projects.html b/batch/batch/front_end/templates/billing_projects.html index 311989503e1..5122c4f8ee9 100644 --- a/batch/batch/front_end/templates/billing_projects.html +++ b/batch/batch/front_end/templates/billing_projects.html @@ -10,7 +10,7 @@

Billing Projects

User - {% for billing_project in billing_projects %} + {% for billing_project in billing_projects %} @@ -22,7 +22,7 @@

Billing Projects

- {% for user in billing_project['users'] %} + {% for user in billing_project['users'] %} {{ user }}
@@ -32,19 +32,19 @@

Billing Projects

+ {% endfor %} - {% endfor %} -
- - - - - -
+
+ + + + + +
- {% endfor %} + {% endfor %}
diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index a0e715170e7..309d3aae428 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -213,9 +213,11 @@ def __init__(self, container, name, state): self.state = state self.name = name self.timing = None + self._deleted = False async def __aenter__(self): if self.container.job.deleted: + self._deleted = True raise JobDeletedError() if self.state: log.info(f'{self.container} state changed: {self.container.state} => {self.state}') @@ -225,6 +227,9 @@ async def __aenter__(self): self.container.timing[self.name] = self.timing async def __aexit__(self, exc_type, exc, tb): + if self._deleted: + return + finish_time = time_msecs() self.timing['finish_time'] = finish_time start_time = self.timing['start_time'] @@ -235,6 +240,15 @@ def worker_fraction_in_1024ths(cpu_in_mcpu): return 1024 * cpu_in_mcpu // (CORES * 1000) +def user_error(e): + if isinstance(e, DockerError): + if e.status == 404 and 'pull access denied' in e.message: + return True + if e.status == 400 and 'executable file not found' in e.message: + return True + return False + + class Container: def __init__(self, job, name, spec): self.job = job @@ -445,8 +459,9 @@ async def run(self, worker): self.state = 'succeeded' else: self.state = 'failed' - except Exception: - log.exception(f'while running {self}') + except Exception as e: + if not isinstance(e, (JobDeletedError, JobTimeoutError)): + log.exception(f'while running {self}') self.state = 'error' self.error = traceback.format_exc() @@ -945,8 +960,9 @@ async def run(self, worker): self.state = 'succeeded' else: self.state = input.state - except Exception: - log.exception(f'while running {self}') + except Exception as e: + if not user_error(e): + log.exception(f'while running {self}') self.state = 'error' self.error = traceback.format_exc() @@ -1309,8 +1325,6 @@ async def run(self): await app_runner.cleanup() log.info('cleaned up app runner') - self.shutdown() - async def kill_1(self, request): # pylint: disable=unused-argument log.info('killed') self.stop_event.set() @@ -1466,12 +1480,14 @@ async def async_main(): finally: try: worker.shutdown() + log.info('worker shutdown') finally: await docker.close() + log.info('docker closed') loop = asyncio.get_event_loop() loop.run_until_complete(async_main()) -loop.run_until_complete(loop.shutdown_asyncgens()) +log.info('closing loop') loop.close() log.info('closed') sys.exit(0) diff --git a/benchmark-service/Makefile b/benchmark-service/Makefile index e3694fd5d38..6098045eff5 100644 --- a/benchmark-service/Makefile +++ b/benchmark-service/Makefile @@ -10,6 +10,7 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 benchmark $(PYTHON) -m pylint --rcfile ../pylintrc benchmark --score=n + curlylint . .PHONY: build build: diff --git a/benchmark-service/benchmark/templates/batch.html b/benchmark-service/benchmark/templates/batch.html index a4a2eca4b77..3ccf9625c78 100644 --- a/benchmark-service/benchmark/templates/batch.html +++ b/benchmark-service/benchmark/templates/batch.html @@ -1,53 +1,57 @@ {% extends "layout.html" %} {% block title %}Batch {{ batch['id'] }}{% endblock %} + {% block content %} +

Batch {{ batch['id'] }}

{% if 'attributes' in batch %}
- {% for name, value in batch['attributes'].items() %} -
{{ name }}: {{ value }}
- {% endfor %} - {% endif %} + {% for name, value in batch['attributes'].items() %} +
{{ name }}: {{ value }}
+ {% endfor %}
+{% endif %} +

Jobs

- - - - - - - - - - + + + + + + + + + + {% for job in jobs %} - - - - - + + + + + {% endfor %} - +
idnamestateexit_codeduration
idnamestateexit_codeduration
- {{ job['job_id'] }} - {{ job['name'] }}{{ job['state'] }} - {% if 'exit_code' in job and job['exit_code'] is not none %} - {% if job['exit_code'] == 0 %} - - Success 🎉 - {% else %} - - Failure 🤷‍♀️ - {% endif %} - ({{ job['exit_code'] }}) - - {% endif %} - - {% if 'duration' in job and job['duration'] is not none %} - {{ job['duration'] }} - {% endif %} - + {{ job['job_id'] }} + {{ job['name'] }}{{ job['state'] }} + {% if 'exit_code' in job and job['exit_code'] is not none %} + {% if job['exit_code'] == 0 %} + + Success 🎉 + + {% else %} + + Failure 🤷‍♀️ + ({{ job['exit_code'] }}) + + {% endif %} + {% endif %} + + {% if 'duration' in job and job['duration'] is not none %} + {{ job['duration'] }} + {% endif %} +
{% endblock %} diff --git a/benchmark-service/benchmark/templates/job.html b/benchmark-service/benchmark/templates/job.html index dda55cd6127..e0d398f70f5 100644 --- a/benchmark-service/benchmark/templates/job.html +++ b/benchmark-service/benchmark/templates/job.html @@ -6,44 +6,44 @@

Batch {{ batch_id }} Job {{ job_id }}

Attempts

{% if attempts %} - + - - - - - - + + + + + + - - + + {% for attempt in attempts %} - - - - - - + + + + + + {% endfor %} - +
Attempt IDInstanceStartEndDurationReasonAttempt IDInstanceStartEndDurationReason
{{ attempt['attempt_id'] }}{{ attempt['instance_name'] }} - {% if 'start_time' in attempt and attempt['start_time'] is not none %} - {{ attempt['start_time'] }} - {% endif %} - - {% if 'end_time' in attempt and attempt['end_time'] is not none %} - {{ attempt['end_time'] }} - {% endif %} - - {% if 'duration' in attempt and attempt['duration'] is not none %} - {{ attempt['duration'] }} - {% endif %} - - {% if 'reason' in attempt and attempt['reason'] is not none %} - {{ attempt['reason'] }} - {% endif %} - {{ attempt['attempt_id'] }}{{ attempt['instance_name'] }} + {% if 'start_time' in attempt and attempt['start_time'] is not none %} + {{ attempt['start_time'] }} + {% endif %} + + {% if 'end_time' in attempt and attempt['end_time'] is not none %} + {{ attempt['end_time'] }} + {% endif %} + + {% if 'duration' in attempt and attempt['duration'] is not none %} + {{ attempt['duration'] }} + {% endif %} + + {% if 'reason' in attempt and attempt['reason'] is not none %} + {{ attempt['reason'] }} + {% endif %} +
{% else %}

No attempts

@@ -53,21 +53,22 @@

Attempts

Log

{% if 'input' in job_log %} -

Input

-
{{ job_log['input'] }}
- {% endif %} +

Input

+
{{ job_log['input'] }}
+{% endif %} - {% if 'main' in job_log %} -

Main

-
{{ job_log['main'] }}
- {% endif %} +{% if 'main' in job_log %} +

Main

+
{{ job_log['main'] }}
+{% endif %} - {% if 'output' in job_log %} -

Output

-
{{ job_log['output'] }}
- {% endif %} - {% endif %} +{% if 'output' in job_log %} +

Output

+
{{ job_log['output'] }}
+{% endif %} + +{% endif %} -

Status

-
{{ job_status }}
- {% endblock %} +

Status

+
{{ job_status }}
+{% endblock %} diff --git a/benchmark-service/benchmark/templates/lookup.html b/benchmark-service/benchmark/templates/lookup.html index d22704ae9bd..9f0764b7526 100644 --- a/benchmark-service/benchmark/templates/lookup.html +++ b/benchmark-service/benchmark/templates/lookup.html @@ -7,56 +7,54 @@

Benchmark System

-
- -
- +
+ +
+
{% if file is not none %} -
-
SHA: - {% if benchmarks['sha'] is not none %} - {{ benchmarks['sha'] }} - {% else %} - unknown - {% endif %} -
-
Geometric Mean: - {% if benchmarks['geometric_mean'] is not none %} - {{ benchmarks['geometric_mean'] }} - {% endif %} -
-
- - - - - - - - - - - - - - - {% for d in benchmarks['data'].values() %} - - - - - - - - - - - {% endfor %} - -
namefailedf-statmeanmedianp-valuestdev
{{ d['name'] }}{{ d['failed'] }}{{ d['f-stat'] }}{{ d['mean'] }}{{ d['median'] }}{{ d['p-value'] }}{{ d['stdev'] }}
+
+
SHA: + {% if benchmarks['sha'] is not none %} + {{ benchmarks['sha'] }} + {% else %} + unknown + {% endif %} +
+
Geometric Mean: + {% if benchmarks['geometric_mean'] is not none %} + {{ benchmarks['geometric_mean'] }} + {% endif %} +
+
+ + + + + + + + + + + + + + {% for d in benchmarks['data'].values() %} + + + + + + + + + + {% endfor %} + +
namefailedf-statmeanmedianp-valuestdev
{{ d['name'] }}{{ d['failed'] }}{{ d['f-stat'] }}{{ d['mean'] }}{{ d['median'] }}{{ d['p-value'] }}{{ d['stdev'] }}
{% endif %} diff --git a/build.yaml b/build.yaml index aed6d2e8741..74d011fb17b 100644 --- a/build.yaml +++ b/build.yaml @@ -525,7 +525,7 @@ steps: python3 ../ci/jinja2_render.py '{"deploy": '${DEPLOY}', "default_ns": {"name": "{{ default_ns.name }}"}}' nginx.conf nginx.conf.out outputs: - from: /io/repo/grafana/nginx.conf.out - to: /nginx.conf.out + to: /grafana/nginx.conf.out dependsOn: - default_ns - service_base_image @@ -535,7 +535,7 @@ steps: contextPath: grafana publishAs: grafana inputs: - - from: /nginx.conf.out + - from: /grafana/nginx.conf.out to: /nginx.conf.out dependsOn: - hail_ubuntu_image @@ -1617,8 +1617,13 @@ steps: # dev deploy elides the hail-is remote, add it and retrieve the tags git remote add hail-is https://github.com/hail-is/hail.git git fetch hail-is - # if the tag doesn't exist, this commit is the release - git checkout $(cat hail/python/hail/hail_pip_version) || true + + # 0.2.63 was the last old-style docs + if [[ "$(cat hail/python/hail/hail_pip_version)" != "0.2.63" ]] + then + # if the tag doesn't exist, this commit is the release + git checkout $(cat hail/python/hail/hail_pip_version) || true + fi PYTHONPATH=$PYTHONPATH:$(pwd)/hail/python make -C hail hail-docs-no-test batch-docs @@ -1674,6 +1679,54 @@ steps: - grafana_nginx_image - deploy_router - create_certs + - kind: runImage + name: render_prom_nginx_conf + image: + valueFrom: service_base_image.image + script: | + set -ex + cd /io + rm -rf repo + mkdir repo + cd repo + {{ code.checkout_script }} + cd prometheus + {% if deploy %} + DEPLOY=true + {% else %} + DEPLOY=false + {% endif %} + python3 ../ci/jinja2_render.py '{"deploy": '${DEPLOY}', "default_ns": {"name": "{{ default_ns.name }}"}}' nginx.conf nginx.conf.out + outputs: + - from: /io/repo/prometheus/nginx.conf.out + to: /prometheus/nginx.conf.out + dependsOn: + - default_ns + - service_base_image + - kind: buildImage + name: prom_nginx_image + dockerFile: prometheus/Dockerfile.nginx + contextPath: prometheus + publishAs: prometheus + inputs: + - from: /prometheus/nginx.conf.out + to: /nginx.conf.out + dependsOn: + - hail_ubuntu_image + - render_prom_nginx_conf + - kind: deploy + name: deploy_prometheus + namespace: + valueFrom: default_ns.name + config: prometheus/prometheus.yaml + scopes: + - deploy + - dev + dependsOn: + - default_ns + - prom_nginx_image + - deploy_router + - create_certs - kind: runImage name: create_dummy_oauth2_client_secret image: @@ -2269,156 +2322,6 @@ steps: - memory_image - deploy_memory_sa - create_certs - - kind: runImage - name: test_hail_python_service_backend_0 - image: - valueFrom: hail_run_image.image - script: | - set -ex - cd /io - tar xzf test.tar.gz - tar xvf wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export PYTEST_SPLITS=3 - export PYTEST_SPLIT_INDEX=0 - export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data - export HAIL_QUERY_BACKEND=service - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - hailctl config set batch/billing_project test - hailctl config set batch/bucket cpg-hail-test - python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test - inputs: - - from: /wheel-container.tar - to: /io/wheel-container.tar - - from: /test.tar.gz - to: /io/test.tar.gz - secrets: - - name: gce-deploy-config - namespace: - valueFrom: default_ns.name - mountPath: /deploy-config - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - - name: ssl-config-query-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - - name: test-gsa-key - namespace: - valueFrom: default_ns.name - mountPath: /test-gsa-key - timeout: 3600 - dependsOn: - - default_ns - - upload_test_resources_to_gcs - - deploy_query - - deploy_memory - - deploy_shuffler - - hail_run_image - - build_hail - - kind: runImage - name: test_hail_python_service_backend_1 - image: - valueFrom: hail_run_image.image - script: | - set -ex - cd /io - tar xzf test.tar.gz - tar xvf wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export PYTEST_SPLITS=3 - export PYTEST_SPLIT_INDEX=1 - export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data - export HAIL_QUERY_BACKEND=service - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - hailctl config set batch/billing_project test - hailctl config set batch/bucket cpg-hail-test - python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test - inputs: - - from: /wheel-container.tar - to: /io/wheel-container.tar - - from: /test.tar.gz - to: /io/test.tar.gz - secrets: - - name: gce-deploy-config - namespace: - valueFrom: default_ns.name - mountPath: /deploy-config - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - - name: ssl-config-query-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - - name: test-gsa-key - namespace: - valueFrom: default_ns.name - mountPath: /test-gsa-key - timeout: 3600 - dependsOn: - - default_ns - - upload_test_resources_to_gcs - - deploy_query - - deploy_memory - - deploy_shuffler - - hail_run_image - - build_hail - - kind: runImage - name: test_hail_python_service_backend_2 - image: - valueFrom: hail_run_image.image - script: | - set -ex - cd /io - tar xzf test.tar.gz - tar xvf wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export PYTEST_SPLITS=3 - export PYTEST_SPLIT_INDEX=2 - export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data - export HAIL_QUERY_BACKEND=service - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - hailctl config set batch/billing_project test - hailctl config set batch/bucket cpg-hail-test - python3 -m pytest -n 8 --ignore=test/hailtop/ --log-cli-level=INFO -s -vv --instafail --durations=50 test - inputs: - - from: /wheel-container.tar - to: /io/wheel-container.tar - - from: /test.tar.gz - to: /io/test.tar.gz - secrets: - - name: gce-deploy-config - namespace: - valueFrom: default_ns.name - mountPath: /deploy-config - - name: test-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - - name: ssl-config-query-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - - name: test-gsa-key - namespace: - valueFrom: default_ns.name - mountPath: /test-gsa-key - timeout: 3600 - dependsOn: - - default_ns - - upload_test_resources_to_gcs - - deploy_query - - deploy_memory - - deploy_shuffler - - hail_run_image - - build_hail - kind: runImage name: test_lsm image: @@ -3779,9 +3682,6 @@ steps: - test_hailtop_batch_2 - test_hailtop_batch_3 - test_hailtop_batch_4 - - test_hail_python_service_backend_0 - - test_hail_python_service_backend_1 - - test_hail_python_service_backend_2 - kind: runImage name: delete_atgu_tables image: diff --git a/ci/Makefile b/ci/Makefile index b03f3c77fe1..ae8598574eb 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -16,6 +16,7 @@ check: $(PYTHON) -m flake8 ci $(PYTHON) -m pylint --rcfile ../pylintrc ci --score=n $(BLACK) --check --diff + curlylint . ../check-sql.sh .PHONY: blacken diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 28eb5062a39..f66314910d5 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -30,6 +30,7 @@ @routes.get('/batches') +@monitor_endpoint @web_authenticated_developers_only() async def get_batches(request, userdata): batch_client = request.app['batch_client'] @@ -40,6 +41,7 @@ async def get_batches(request, userdata): @routes.get('/batches/{batch_id}') +@monitor_endpoint @web_authenticated_developers_only() async def get_batch(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -54,6 +56,7 @@ async def get_batch(request, userdata): @routes.get('/batches/{batch_id}/jobs/{job_id}') +@monitor_endpoint @web_authenticated_developers_only() async def get_job(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -75,7 +78,117 @@ async def healthcheck(request): # pylint: disable=unused-argument return web.Response(status=200) +gh_router = gh_routing.Router() + + +@gh_router.register('pull_request') +async def pull_request_callback(event): + gh_pr = event.data['pull_request'] + number = gh_pr['number'] + target_branch = FQBranch.from_gh_json(gh_pr['base']) + for wb in watched_branches: + if (wb.prs and number in wb.prs) or (wb.branch == target_branch): + await wb.notify_github_changed(event.app) + + +@gh_router.register('push') +async def push_callback(event): + data = event.data + ref = data['ref'] + if ref.startswith('refs/heads/'): + branch_name = ref[len('refs/heads/') :] + branch = FQBranch(Repo.from_gh_json(data['repository']), branch_name) + for wb in watched_branches: + if wb.branch == branch or any(pr.branch == branch for pr in wb.prs.values()): + await wb.notify_github_changed(event.app) + + +@gh_router.register('pull_request_review') +async def pull_request_review_callback(event): + gh_pr = event.data['pull_request'] + number = gh_pr['number'] + for wb in watched_branches: + if number in wb.prs: + await wb.notify_github_changed(event.app) + + +async def github_callback_handler(request): + event = gh_sansio.Event.from_http(request.headers, await request.read()) + event.app = request.app + await gh_router.dispatch(event) + + +@routes.post('/github_callback') +async def github_callback(request): + await asyncio.shield(github_callback_handler(request)) + return web.Response(status=200) + + +async def batch_callback_handler(request): + app = request.app + params = await request.json() + log.info(f'batch callback {params}') + attrs = params.get('attributes') + if attrs: + target_branch = attrs.get('target_branch') + if target_branch: + for wb in watched_branches: + if wb.branch.short_str() == target_branch: + log.info(f'watched_branch {wb.branch.short_str()} notify batch changed') + await wb.notify_batch_changed(app) + + +@routes.get('/api/v1alpha/deploy_status') +@monitor_endpoint +@rest_authenticated_developers_only +async def deploy_status(request, userdata): # pylint: disable=unused-argument + batch_client = request.app['batch_client'] + + async def get_failure_information(batch): + if isinstance(batch, MergeFailureBatch): + return batch.exception + jobs = await collect_agen(batch.jobs()) + + async def fetch_job_and_log(j): + full_job = await batch_client.get_job(j['batch_id'], j['job_id']) + log = await full_job.log() + return {**full_job._status, 'log': log} + + return await asyncio.gather(*[fetch_job_and_log(j) for j in jobs if j['state'] in ('Error', 'Failed')]) + + wb_configs = [ + { + 'branch': wb.branch.short_str(), + 'sha': wb.sha, + 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and hasattr(wb.deploy_batch, 'id') else None, + 'deploy_state': wb.deploy_state, + 'repo': wb.branch.repo.short_str(), + 'failure_information': None + if wb.deploy_state == 'success' + else await get_failure_information(wb.deploy_batch), + } + for wb in watched_branches + ] + return web.json_response(wb_configs) + + +@routes.post('/api/v1alpha/update') +@monitor_endpoint +@rest_authenticated_developers_only +async def post_update(request, userdata): # pylint: disable=unused-argument + log.info('developer triggered update') + + async def update_all(): + for wb in watched_branches: + await wb.update(request.app) + + request.app['task_manager'].ensure_future(update_all()) + return web.Response(status=200) + + +>>>>>>> upstream/main @routes.post('/api/v1alpha/dev_deploy_branch') +@monitor_endpoint @rest_authenticated_developers_only async def dev_deploy_branch(request, userdata): app = request.app @@ -202,6 +315,7 @@ def run(): setup_common_static_routes(routes) app.add_routes(routes) + app.router.add_get("/metrics", server_stats) web.run_app( deploy_config.prefix_application(app, 'ci'), diff --git a/ci/ci/github.py b/ci/ci/github.py index 005da121b81..a6460a84f51 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -156,7 +156,7 @@ def clone_or_fetch_script(repo): class PR(Code): - def __init__(self, number, title, source_branch, source_sha, target_branch, author, assignees, labels): + def __init__(self, number, title, source_branch, source_sha, target_branch, author, assignees, reviewers, labels): self.number = number self.title = title self.source_branch = source_branch @@ -164,6 +164,7 @@ def __init__(self, number, title, source_branch, source_sha, target_branch, auth self.target_branch = target_branch self.author = author self.assignees = assignees + self.reviewers = reviewers self.labels = labels # pending, changes_requested, approve @@ -226,6 +227,8 @@ def update_from_gh_json(self, gh_json): assert self.number == gh_json['number'] self.title = gh_json['title'] self.author = gh_json['user']['login'] + self.assignees = {user['login'] for user in gh_json['assignees']} + self.reviewers = {user['login'] for user in gh_json['requested_reviewers']} new_labels = {label['name'] for label in gh_json['labels']} if new_labels != self.labels: @@ -257,6 +260,7 @@ def from_gh_json(gh_json, target_branch): target_branch, gh_json['user']['login'], {user['login'] for user in gh_json['assignees']}, + {user['login'] for user in gh_json['requested_reviewers']}, {label['name'] for label in gh_json['labels']}, ) @@ -693,7 +697,7 @@ async def _update_deploy(self, batch_client): 'to': 'team', 'topic': 'CI Deploy Failure', 'content': f''' -@*dev* +@**daniel king** state: {self.deploy_state} branch: {self.branch.short_str()} sha: {self.sha} diff --git a/ci/ci/templates/batch.html b/ci/ci/templates/batch.html index b5e2a3d85b6..63b6b1553f2 100644 --- a/ci/ci/templates/batch.html +++ b/ci/ci/templates/batch.html @@ -1,6 +1,9 @@ {% from "job-table.html" import job_table with context %} {% extends "layout.html" %} {% block title %}Batch {{ batch['id'] }}{% endblock %} +{% block head %} + +{% endblock %} {% block content %}

Batch {{ batch['id'] }}

Timing

@@ -20,5 +23,8 @@

Attributes

{% endif %}

Jobs

- {{ job_table(jobs) }} + {{ job_table(jobs, "jobs", "jobsSearchBar") }} + {% endblock %} diff --git a/ci/ci/templates/dev-deploy-table.html b/ci/ci/templates/dev-deploy-table.html index 53389d42b6f..810c2c5cc6d 100644 --- a/ci/ci/templates/dev-deploy-table.html +++ b/ci/ci/templates/dev-deploy-table.html @@ -24,9 +24,6 @@ {% if 'state' in batch and batch['state'] %} {{ batch['state'] }} {% endif %} - {% if not batch['complete'] %} - running - {% endif %} {% endfor %} diff --git a/ci/ci/templates/job-table.html b/ci/ci/templates/job-table.html index 494520295a2..af845ae07b7 100644 --- a/ci/ci/templates/job-table.html +++ b/ci/ci/templates/job-table.html @@ -1,11 +1,10 @@ -{% macro job_table(jobs) %} +{% macro job_table(jobs, id, searchBarId) %} {% block head %} - {% endblock %}
- - + +
diff --git a/ci/ci/templates/pr.html b/ci/ci/templates/pr.html index 50826c7fcf9..74e599e1fab 100644 --- a/ci/ci/templates/pr.html +++ b/ci/ci/templates/pr.html @@ -49,9 +49,6 @@

Build History

{% if 'state' in batch and batch['state'] %} {{ batch['state'] }} {% endif %} - {% if not batch['complete'] %} - running - {% endif %} {% endfor %} diff --git a/ci/ci/templates/user.html b/ci/ci/templates/user.html index 2b3996d3bb8..953f3e6bca1 100644 --- a/ci/ci/templates/user.html +++ b/ci/ci/templates/user.html @@ -35,6 +35,15 @@

Welcome, {{ username }}!


+
+ {% for wb in actionable_wbs %} + {% if wb.prs is not none %} +

{{ wb.branch }} Awaiting Action

+ {{ pr_table(wb, "actionitems", "actionitemsSearchBar") }} + {% endif %} + {% endfor %} +
+
{% for wb in pr_wbs %} {% if wb.prs is not none %} @@ -47,12 +56,22 @@

{{ wb.branch }} PRs

{% for wb in review_wbs %} {% if wb.prs is not none %} -

{{ wb.branch }} Assigned Reviews

+

{{ wb.branch }} Reviews

{{ pr_table(wb, "reviews", "reviewsSearchBar") }} {% endif %} {% endfor %}
+
+

Authorize SHA

+
+ + + + + +
+
{{ team_table(team_member) }}
diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 7a07fd74ebe..d1b24cb186a 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -15,6 +15,7 @@ spec: labels: app: ci hail.is/sha: "{{ code.sha }}" + grafanak8sapp: "true" spec: {% if deploy %} priorityClassName: production diff --git a/docker/requirements.txt b/docker/requirements.txt index c8f83816227..5b3c1ca3394 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -9,6 +9,7 @@ async-timeout==3.0.1 asyncinit==0.2.4 Authlib==0.11 black==20.8b1 +curlylint==0.12.0 decorator==4.4.0 dictdiffer==0.8.1 dill>=0.3.1.1,<0.4 diff --git a/gateway/gateway.nginx.conf b/gateway/gateway.nginx.conf index b847c81761b..b21f4d0cd13 100644 --- a/gateway/gateway.nginx.conf +++ b/gateway/gateway.nginx.conf @@ -97,3 +97,38 @@ server { include /etc/letsencrypt/options-ssl-nginx.conf; ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; } + +server { + server_name ukbb-rg.hail.is; + + location /rg_browser { + proxy_pass http://ukbb-rg-browser.ukbb-rg; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $updated_host; + proxy_set_header X-Forwarded-Proto $updated_scheme; + proxy_set_header X-Real-IP $http_x_real_ip; + + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_read_timeout 20d; + proxy_buffering off; + } + + location / { + proxy_pass http://ukbb-rg-static.ukbb-rg; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $updated_host; + proxy_set_header X-Forwarded-Proto $updated_scheme; + proxy_set_header X-Real-IP $http_x_real_ip; + } + + listen 443 ssl; + listen [::]:443 ssl; + ssl_certificate /etc/letsencrypt/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/privkey.pem; + include /etc/letsencrypt/options-ssl-nginx.conf; + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; +} diff --git a/gear/gear/__init__.py b/gear/gear/__init__.py index 3f8e04ac412..3b10cae322b 100644 --- a/gear/gear/__init__.py +++ b/gear/gear/__init__.py @@ -5,6 +5,7 @@ web_authenticated_developers_only, rest_authenticated_developers_only, maybe_parse_bearer_header from .csrf import new_csrf_token, check_csrf_token from .auth_utils import insert_user, create_session +from .metrics import monitor_endpoint __all__ = [ 'create_database_pool', @@ -22,5 +23,6 @@ 'insert_user', 'create_session', 'transaction', - 'maybe_parse_bearer_header' + 'maybe_parse_bearer_header', + 'monitor_endpoint' ] diff --git a/gear/gear/metrics.py b/gear/gear/metrics.py new file mode 100644 index 00000000000..e568c14f7d2 --- /dev/null +++ b/gear/gear/metrics.py @@ -0,0 +1,18 @@ +from functools import wraps +import prometheus_client as pc # type: ignore +from prometheus_async.aio import time as prom_async_time # type: ignore + +REQUEST_TIME = pc.Summary('http_request_latency_seconds', 'Endpoint latency in seconds', ['endpoint', 'verb']) +REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests', ['endpoint', 'verb', 'status']) + + +def monitor_endpoint(handler): + @wraps(handler) + async def wrapped(request, *args, **kwargs): + # Use the path template given to @route., not the fully resolved one + endpoint = request.match_info.route.resource.canonical + verb = request.method + response = await prom_async_time(REQUEST_TIME.labels(endpoint=endpoint, verb=verb), handler(request, *args, **kwargs)) + REQUEST_COUNT.labels(endpoint=endpoint, verb=verb, status=response.status).inc() + return response + return wrapped diff --git a/grafana/deployment.yaml b/grafana/deployment.yaml index 10a5a7ff71d..386ab8defad 100644 --- a/grafana/deployment.yaml +++ b/grafana/deployment.yaml @@ -17,6 +17,8 @@ spec: app: grafana spec: priorityClassName: infrastructure + securityContext: + fsGroup: 472 volumes: - name: grafana-configmap-volume configMap: diff --git a/hail/Makefile b/hail/Makefile index 69eebec57ef..b0570f3eb93 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -13,7 +13,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.11.12 SPARK_VERSION ?= 2.4.5 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 63 +HAIL_PATCH_VERSION := 64 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 diff --git a/hail/python/dev-requirements.txt b/hail/python/dev-requirements.txt index 3b7dea5dcca..039b6b4412c 100644 --- a/hail/python/dev-requirements.txt +++ b/hail/python/dev-requirements.txt @@ -4,6 +4,7 @@ pylint==2.6.0 astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131 pre-commit==2.9.2 black==20.8b1 +curlylint==0.12.0 pytest==4.6.3 pytest-html==1.20.0 pytest-xdist==1.28 diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index de930a82804..24e90c0dbe7 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -1,5 +1,8 @@ -import pkg_resources -import sys +import nest_asyncio +nest_asyncio.apply() + +import pkg_resources # noqa: E402 +import sys # noqa: E402 if sys.version_info < (3, 6): raise EnvironmentError('Hail requires Python 3.6 or later, found {}.{}'.format( diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index ea18e1d0d8a..ab49bd0db23 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -22,12 +22,30 @@ Please note that **forward compatibility should not be expected, especially relating to file formats**: this means that it may not be possible to use an earlier version of Hail to read files written in a later version. +## Version 0.2.64 + +Released 2021-03-11 + +### New features +- (hail#10164) Add source_file_field parameter to hl.import_table to allow lines to be associated with their original source file. + +### Bug fixes + +- (hail#10182) Fixed serious memory leak in certain uses of `filter_intervals`. +- (hail#10133) Fix bug where some pipelines incorrectly infer missingness, leading to a type error. +- (hail#10134) Teach `hl.king` to treat filtered entries as missing values. +- (hail#10158) Fixes hail usage in latest versions of jupyter that rely on `asyncio`. +- (hail#10174) Fixed bad error message when incorrect return type specified with `hl.loop`. + +--- + + ## Version 0.2.63 Released 2021-03-01 - (hail#10105) Hail will now return `frozenset` and `hail.utils.frozendict` instead of normal sets and dicts. - + ### Bug fixes @@ -77,7 +95,7 @@ Released 2020-12-03 - (hail#9775) Fixed race condition leading to invalid intermediate files in VCF combiner. - (hail#9751) Fix bug where constructing an array of empty structs causes type error. -- (hail#9731) Fix error and incorrect behavior when using `hl.import_matrix_table` with int64 data types. +- (hail#9731) Fix error and incorrect behavior when using `hl.import_matrix_table` with int64 data types. --- @@ -128,7 +146,7 @@ Released 2020-10-08 ### Bug fixes - (hail#9503) NDArrays can now hold arbitrary data types, though only ndarrays of primitives can be collected to Python. -- (hail#9501) Remove memory leak in `BlockMatrix.to_matrix_table_row_major` and `BlockMatrix.to_table_row_major`. +- (hail#9501) Remove memory leak in `BlockMatrix.to_matrix_table_row_major` and `BlockMatrix.to_table_row_major`. - (hail#9424) `hl.experimental.writeBlockMatrices` didn't correctly support `overwrite` flag. ### Performance improvements @@ -173,7 +191,7 @@ Released 2020-08-31 ### hailctl dataproc -- (hail#9263) Add support for `--expiration-time` argument to `hailctl dataproc start`. +- (hail#9263) Add support for `--expiration-time` argument to `hailctl dataproc start`. - (hail#9263) Add support for `--no-max-idle`, `no-max-age`, `--max-age`, and `--expiration-time` to `hailctl dataproc --modify`. --- @@ -204,7 +222,7 @@ Released 2020-08-07 ### VCF Combiner - (hail#9224)(hail#9237) **Breaking change**: Users are now required to pass a partitioning argument to the command-line interface or `run_combiner` method. See documentation for details. -- (hail#8963) Improved performance of VCF combiner by ~4x. +- (hail#8963) Improved performance of VCF combiner by ~4x. ### New features @@ -214,7 +232,7 @@ Released 2020-08-07 ### Bug fixes - (hail#9206)(hail#9207) Improved error messages from invalid usages of Hail expressions. -- (hail#9223) Fixed error in bounds checking for NDArray slicing. +- (hail#9223) Fixed error in bounds checking for NDArray slicing. --- @@ -299,8 +317,8 @@ Released 2020-06-23 ### Bug fixes - (hail#9009) Fix memory leak when counting per-partition. This caused excessive memory use in `BlockMatrix.write_from_entry_expr`, and likely in many other places. -- (hail#9006) Fix memory leak in `hl.export_bgen`. -- (hail#9001) Fix double close error that showed up on Azure Cloud. +- (hail#9006) Fix memory leak in `hl.export_bgen`. +- (hail#9001) Fix double close error that showed up on Azure Cloud. ## Version 0.2.46 diff --git a/hail/python/hail/experimental/loop.py b/hail/python/hail/experimental/loop.py index a495b55774e..c921625c631 100644 --- a/hail/python/hail/experimental/loop.py +++ b/hail/python/hail/experimental/loop.py @@ -155,8 +155,9 @@ def make_loop(*recur_exprs): uid_irs.append((uid, expr._ir)) loop_f = to_expr(f(make_loop, *loop_vars)) + if loop_f.dtype != typ: + raise TypeError(f"requested type {typ} does not match inferred type {loop_f.dtype}") check_tail_recursive(loop_f._ir) indices, aggregations = unify_all(*args, loop_f) - if loop_f.dtype != typ: - raise TypeError(f"requested type {typ} does not match inferred type {loop_f.typ}") + return construct_expr(ir.TailLoop(loop_name, loop_f._ir, uid_irs), loop_f.dtype, indices, aggregations) diff --git a/hail/python/hail/ir/__init__.py b/hail/python/hail/ir/__init__.py index 4d2766e2b8f..984b1a449bb 100644 --- a/hail/python/hail/ir/__init__.py +++ b/hail/python/hail/ir/__init__.py @@ -23,7 +23,7 @@ from .register_aggregators import register_aggregators from .table_ir import MatrixRowsTable, TableJoin, TableLeftJoinRightDistinct, \ TableIntervalJoin, TableUnion, TableRange, TableMapGlobals, TableExplode, \ - TableKeyBy, TableMapRows, TableRead, TableImport, MatrixEntriesTable, \ + TableKeyBy, TableMapRows, TableRead, MatrixEntriesTable, \ TableFilter, TableKeyByAndAggregate, \ TableAggregateByKey, MatrixColsTable, TableParallelize, TableHead, \ TableTail, TableOrderBy, TableDistinct, RepartitionStrategy, \ @@ -272,7 +272,6 @@ 'TableMapRows', 'TableMapPartitions', 'TableRead', - 'TableImport', 'MatrixEntriesTable', 'TableFilter', 'TableKeyByAndAggregate', diff --git a/hail/python/hail/ir/table_ir.py b/hail/python/hail/ir/table_ir.py index fc4c0cfdd3d..6d8da139926 100644 --- a/hail/python/hail/ir/table_ir.py +++ b/hail/python/hail/ir/table_ir.py @@ -1,5 +1,3 @@ -import json - import hail as hl from hail.expr.types import dtype from hail.ir.base_ir import BaseIR, TableIR @@ -250,26 +248,6 @@ def _compute_type(self): self._type = Env.backend().table_type(self) -class TableImport(TableIR): - def __init__(self, paths, typ, reader_options): - super().__init__() - self.paths = paths - self._typ = typ - self.reader_options = reader_options - - def head_str(self): - return '(({}) {} {}'.format( - ' '.join([escape_str(path) for path in self.paths]), - self._typ._parsable_string(), - escape_str(json.dumps(self.reader_options))) - - def _eq(self, other): - return self.paths == other.paths and self.typ == other.typ and self.reader_options == other.reader_options - - def _compute_type(self): - self._type = Env.backend().table_type(self) - - class MatrixEntriesTable(TableIR): def __init__(self, child): super().__init__(child) diff --git a/hail/python/hail/ir/table_reader.py b/hail/python/hail/ir/table_reader.py index 7171271f28c..b8d80968d4e 100644 --- a/hail/python/hail/ir/table_reader.py +++ b/hail/python/hail/ir/table_reader.py @@ -67,7 +67,7 @@ class TextTableReader(TableReader): def __init__(self, paths, min_partitions, types, comment, delimiter, missing, no_header, quote, skip_blank_lines, force_bgz, filter, find_replace, - force_gz): + force_gz, source_file_field): self.config = { 'files': paths, 'typeMapStr': {f: t._parsable_string() for f, t in types.items()}, @@ -80,7 +80,8 @@ def __init__(self, paths, min_partitions, types, comment, 'skipBlankLines': skip_blank_lines, 'forceBGZ': force_bgz, 'filterAndReplace': make_filter_and_replace(filter, find_replace), - 'forceGZ': force_gz + 'forceGZ': force_gz, + 'sourceFileField': source_file_field } def render(self): diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index 5157192e024..acc1e89911f 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -1317,7 +1317,8 @@ def import_gen(path, force_bgz=bool, filter=nullable(str), find_replace=nullable(sized_tupleof(str, str)), - force=bool) + force=bool, + source_file_field=nullable(str)) def import_table(paths, key=None, min_partitions=None, @@ -1332,7 +1333,8 @@ def import_table(paths, force_bgz=False, filter=None, find_replace=None, - force=False) -> Table: + force=False, + source_file_field=None) -> Table: """Import delimited text file (text table) as :class:`.Table`. The resulting :class:`.Table` will have no key fields. Use @@ -1520,7 +1522,9 @@ def import_table(paths, If ``True``, load gzipped files serially on one core. This should be used only when absolutely necessary, as processing time will be increased due to lack of parallelism. - + source_file_field : :class:`str`, optional + If defined, the source file name for each line will be a field of the table + with this name. Can be useful when importing multiple tables using glob patterns. Returns ------- :class:`.Table` @@ -1532,7 +1536,7 @@ def import_table(paths, tr = ir.TextTableReader(paths, min_partitions, types, comment, delimiter, missing, no_header, quote, skip_blank_lines, force_bgz, filter, find_replace, - force) + force, source_file_field) ht = Table(ir.TableRead(tr)) strs = [] @@ -1572,7 +1576,7 @@ def import_table(paths, tr = ir.TextTableReader(paths, min_partitions, all_types, comment, delimiter, missing, no_header, quote, skip_blank_lines, force_bgz, filter, find_replace, - force) + force, source_file_field) ht = Table(ir.TableRead(tr)) else: diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index 77cfd03e906..01f1f9fb796 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -117,6 +117,10 @@ def prefix_application(self, app, service, **kwargs): async def get_healthcheck(request): # pylint: disable=unused-argument,unused-variable return web.Response() + @root_routes.get('/metrics') + async def get_metrics(request): # pylint: disable=unused-argument,unused-variable + return web.HTTPFound(location=f'{base_path}/metrics') + root_app = web.Application(**kwargs) root_app.add_routes(root_routes) root_app.add_subapp(base_path, app) diff --git a/hail/python/test/hail/conftest.py b/hail/python/test/hail/conftest.py index fb335a8695d..31f5b7d0114 100644 --- a/hail/python/test/hail/conftest.py +++ b/hail/python/test/hail/conftest.py @@ -1,3 +1,4 @@ +import asyncio import hashlib import os @@ -19,3 +20,11 @@ def digest(s): for item in items: if not digest(item.name) % n_splits == split_index: item.add_marker(skip_this) + +@pytest.fixture(scope="session", autouse=True) +def ensure_event_loop_is_initialized_in_test_thread(): + try: + asyncio.get_event_loop() + except RuntimeError as err: + assert err.args[0] == "There is no current event loop in thread 'Dummy-1'" + asyncio.set_event_loop(asyncio.new_event_loop()) diff --git a/hail/python/test/hail/experimental/test_experimental.py b/hail/python/test/hail/experimental/test_experimental.py index 9c587792d97..43fe0d52df6 100644 --- a/hail/python/test/hail/experimental/test_experimental.py +++ b/hail/python/test/hail/experimental/test_experimental.py @@ -1,6 +1,7 @@ import numpy as np import hail as hl import unittest +import pytest from ..helpers import * from hail.utils import new_temp_file @@ -430,3 +431,11 @@ def triangle_loop(n, add_f): 'int32', 0, 0) assert_evals_to(calls_recur_from_nested_loop, 15 + 10 + 6 + 3 + 1) + + def test_loop_errors(self): + with pytest.raises(TypeError, match="requested type ndarray does not match inferred type ndarray"): + result = hl.experimental.loop( + lambda f, my_nd: + hl.if_else(my_nd[0, 0] == 1000, my_nd, f(my_nd + 1)), + hl.tndarray(hl.tint32, 2), hl.nd.zeros((20, 10), hl.tfloat64)) + diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 1b921902114..62e5b21c9c4 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -13,11 +13,6 @@ def startTestHailContext(): - try: - asyncio.get_event_loop() - except RuntimeError as err: - if 'There is no current event loop in thread' in err.args[0]: - asyncio.set_event_loop(asyncio.new_event_loop()) global _initialized if not _initialized: backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index 2d9b1ad01a8..9cf971d5328 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -1976,6 +1976,15 @@ def small_dataset_1(self): ] return hl.Table.parallelize(data, key='Sample') + def test_source_file(self): + ht = hl.import_table(resource('variantAnnotations.split.*.tsv'), source_file_field='source') + ht = ht.add_index() + assert ht.aggregate(hl.agg.all( + hl.if_else(ht.idx < 239, + ht.source.endswith('variantAnnotations.split.1.tsv'), + ht.source.endswith('variantAnnotations.split.2.tsv')))) + + @fails_service_backend() def test_read_write_identity(self): ht = self.small_dataset_1() diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 17c6fda9644..4ae6d03592f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -1451,7 +1451,6 @@ object IRParser { } def table_ir_1(env: IRParserEnvironment)(it: TokenIterator): StackFrame[TableIR] = { - // FIXME TableImport identifier(it) match { case "TableKeyBy" => val keys = identifiers(it) diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index d772178b6d2..0772aca1322 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -317,8 +317,9 @@ class GoogleStorageFS(serviceAccountKey: String) extends FS { val blobs = storage.list(bucket, BlobListOption.prefix(path), BlobListOption.currentDirectory()) blobs.getValues.iterator.asScala - .map(b => GoogleStorageFileStatus(b)) - .filter(fs => !(fs.isDirectory && fs.getPath == path)) + .map(b => (b, GoogleStorageFileStatus(b))) + .filter { case (b, fs) => !(fs.isDirectory && b.getName == path) } // elide directory markers created by Hadoop + .map { case (b, fs) => fs } .toArray } diff --git a/hail/src/main/scala/is/hail/rvd/RVD.scala b/hail/src/main/scala/is/hail/rvd/RVD.scala index 9fceca609bb..bfba9101151 100644 --- a/hail/src/main/scala/is/hail/rvd/RVD.scala +++ b/hail/src/main/scala/is/hail/rvd/RVD.scala @@ -629,15 +629,15 @@ class RVD( val kRowFieldIdx = typ.kFieldIdx val rowPType = typ.rowType - mapPartitions(typ) { (ctx, it) => - val kUR = new UnsafeRow(kPType) - it.filter { ptr => + filterWithContext[UnsafeRow]( + { (_, _) => new UnsafeRow(kPType) }, + { case (kUR, ctx, ptr) => ctx.rvb.start(kType) ctx.rvb.selectRegionValue(rowPType, kRowFieldIdx, ctx.r, ptr) kUR.set(ctx.region, ctx.rvb.end()) !intervalsBc.value.contains(kUR) } - } + ) } def filterToIntervals(intervals: RVDPartitioner): RVD = { diff --git a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala index b434f197529..c6b96944d04 100644 --- a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala +++ b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala @@ -290,7 +290,6 @@ class RVDPartitioner( } def keysIfOneToOne(): Option[IndexedSeq[Row]] = { - log.info(s"keysIfOneToOne ${kType} ${this}") if (kType.size == 0) { return None } diff --git a/hail/src/main/scala/is/hail/utils/TextTableReader.scala b/hail/src/main/scala/is/hail/utils/TextTableReader.scala index 5a9dc9ed235..9dc05818379 100644 --- a/hail/src/main/scala/is/hail/utils/TextTableReader.scala +++ b/hail/src/main/scala/is/hail/utils/TextTableReader.scala @@ -10,7 +10,7 @@ import is.hail.expr.ir.lowering.TableStage import is.hail.io.fs.{FS, FileStatus} import is.hail.rvd.RVDPartitioner import is.hail.types._ -import is.hail.types.physical.{PCanonicalStruct, PStruct, PType} +import is.hail.types.physical.{PCanonicalStringRequired, PCanonicalStruct, PStruct, PType} import is.hail.types.virtual._ import is.hail.utils.StringEscapeUtils._ import is.hail.utils._ @@ -44,7 +44,8 @@ case class TextTableReaderParameters( skipBlankLines: Boolean, forceBGZ: Boolean, filterAndReplace: TextInputFilterAndReplace, - forceGZ: Boolean) extends TextReaderOptions { + forceGZ: Boolean, + sourceFileField: Option[String]) extends TextReaderOptions { @transient val typeMap: Map[String, Type] = typeMapStr.mapValues(s => IRParser.parseType(s)).map(identity) val quote: java.lang.Character = if (quoteStr != null) quoteStr(0) else null @@ -245,7 +246,7 @@ object TextTableReader { } def readMetadata(fs: FS, options: TextTableReaderParameters): TextTableReaderMetadata = { - val TextTableReaderParameters(files, _, _, separator, missing, hasHeader, _, _, skipBlankLines, forceBGZ, filterAndReplace, forceGZ) = options + val TextTableReaderParameters(files, _, _, separator, missing, hasHeader, _, _, skipBlankLines, forceBGZ, filterAndReplace, forceGZ, sourceFileField) = options val fileStatuses: Array[FileStatus] = { val status = fs.globAllStatuses(files) @@ -290,6 +291,7 @@ object TextTableReader { duplicates.map { case (pre, post) => s"'$pre' -> '$post'" }.truncatable("\n ")) } + val sourceTypeOption = sourceFileField.map(f => (f, PCanonicalStringRequired)).toIndexedSeq val namesAndTypes = columns.map { c => types.get(c) match { @@ -299,7 +301,7 @@ object TextTableReader { (c, PType.canonical(TString)) } } - TextTableReaderMetadata(fileStatuses, header, PCanonicalStruct(true, namesAndTypes: _*)) + TextTableReaderMetadata(fileStatuses, header, PCanonicalStruct(true, (namesAndTypes ++ sourceTypeOption): _*)) } def apply(fs: FS, params: TextTableReaderParameters): TextTableReader = { @@ -344,11 +346,14 @@ class TextTableReader( val localFullRowType = fullRowPType val bodyPType: TStruct => PStruct = (requestedRowType: TStruct) => localFullRowType.subsetTo(requestedRowType).asInstanceOf[PStruct] val linesBody = lines.body - val nFieldOrig = localFullRowType.size + val nFieldOrig = localFullRowType.size - (params.sourceFileField.isDefined).toInt val transformer = localParams.filterAndReplace.transformer() val body = { (requestedRowType: TStruct) => - val useColIndices = requestedRowType.fieldNames.map(localFullRowType.virtualType.fieldIdx) + + val includeFileName = localParams.sourceFileField.exists(requestedRowType.hasField) + val dataFieldNames = if (includeFileName) requestedRowType.fieldNames.init else requestedRowType.fieldNames + val useColIndices = dataFieldNames.map(localFullRowType.virtualType.fieldIdx) val rowFields = requestedRowType.fields.toArray val requestedPType = bodyPType(requestedRowType) @@ -391,6 +396,9 @@ class TextTableReader( i += 1 } + if (includeFileName) + rvb.addString(bline.file) + rvb.endStruct() rvb.end() true diff --git a/letsencrypt/domains.txt b/letsencrypt/domains.txt index 6ca081b0f23..549ef8e0d18 100644 --- a/letsencrypt/domains.txt +++ b/letsencrypt/domains.txt @@ -15,3 +15,4 @@ query.{{ domain }} workshop.{{ domain }} atgu.{{ domain }} grafana.{{ domain }} +prometheus.{{ domain }} diff --git a/monitoring/Makefile b/monitoring/Makefile index eaf1da2a68c..75ee5a145aa 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -10,6 +10,7 @@ PYTHON := PYTHONPATH=$(PYTHONPATH)../hail/python:../gear:../web_common python3 check: $(PYTHON) -m flake8 monitoring $(PYTHON) -m pylint --rcfile ../pylintrc monitoring --score=n + curlylint . ../check-sql.sh .PHONY: build diff --git a/notebook/Makefile b/notebook/Makefile index 74ed712df71..a2fe2e40a7b 100644 --- a/notebook/Makefile +++ b/notebook/Makefile @@ -10,6 +10,7 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 notebook $(PYTHON) -m pylint --rcfile ../pylintrc notebook --score=n + curlylint . ../check-sql.sh .PHONY: build diff --git a/notebook/notebook/templates/workshop/index.html b/notebook/notebook/templates/workshop/index.html index 10af2baf7c9..853ddc421b9 100644 --- a/notebook/notebook/templates/workshop/index.html +++ b/notebook/notebook/templates/workshop/index.html @@ -6,7 +6,7 @@

Welcome!

{% if userdata %}

Welcome to the Hail-powered {{ userdata['workshop_name'] }} workshop!

{% else %} -

Welcome to the Hail workshop service! +

Welcome to the Hail workshop service!

{% endif %}

Navigate to the Notebook tab to launch Jupyter with workshop materials installed, or diff --git a/prometheus/Dockerfile.nginx b/prometheus/Dockerfile.nginx new file mode 100644 index 00000000000..cee46ac7600 --- /dev/null +++ b/prometheus/Dockerfile.nginx @@ -0,0 +1,12 @@ +FROM {{ hail_ubuntu_image.image }} + +RUN hail-apt-get-install nginx + +RUN rm -f /etc/nginx/sites-enabled/default && \ + rm -f /etc/nginx/nginx.conf +ADD nginx.conf.out /etc/nginx/nginx.conf + +RUN ln -sf /dev/stdout /var/log/nginx/access.log +RUN ln -sf /dev/stderr /var/log/nginx/error.log + +CMD ["nginx", "-g", "daemon off;"] diff --git a/prometheus/Makefile b/prometheus/Makefile new file mode 100644 index 00000000000..94d3fd96551 --- /dev/null +++ b/prometheus/Makefile @@ -0,0 +1,24 @@ +include ../config.mk + +.PHONY: build push deploy + +PROM_NGINX_LATEST = gcr.io/$(PROJECT)/prom_nginx:latest +PROM_NGINX_IMAGE = gcr.io/$(PROJECT)/prom_nginx:$(shell docker images -q --no-trunc prom_nginx | sed -e 's,[^:]*:,,') + +build: + $(MAKE) -C ../docker hail-ubuntu + -docker pull $(PROM_NGINX_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image": {"image": "hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out + python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out + docker build -t prom_nginx -f Dockerfile.nginx.out --cache-from prom_nginx,$(PROM_NGINX_LATEST),hail-ubuntu . + +push: build + docker tag prom_nginx $(PROM_NGINX_LATEST) + docker push $(PROM_NGINX_LATEST) + docker tag prom_nginx $(PROM_NGINX_IMAGE) + docker push $(PROM_NGINX_IMAGE) + +deploy: push + ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default + python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "prom_nginx_image": {"image": "$(PROM_NGINX_IMAGE)"}}' prometheus.yaml prometheus.yaml.out + kubectl -n $(NAMESPACE) apply -f prometheus.yaml.out diff --git a/prometheus/nginx.conf b/prometheus/nginx.conf new file mode 100644 index 00000000000..9e94b6de88b --- /dev/null +++ b/prometheus/nginx.conf @@ -0,0 +1,101 @@ +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; +} + +http { + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + server_names_hash_bucket_size 128; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE + ssl_prefer_server_ciphers on; + + log_format json-log escape=json '{' + '"message":"$scheme $request done in ${request_time}s: $status",' + '"response_status":$status,' + '"request_duration":$request_time,' + '"remote_address":"$remote_addr",' + '"x_real_ip":"$http_x_real_ip",' + '"request_start_time":"$time_local",' + '"body_bytes_sent":"$body_bytes_sent",' + '"http_referer":"$http_referer",' + '"http_user_agent":"$http_user_agent"' + '}'; + + access_log /var/log/nginx/access.log json-log; + error_log /var/log/nginx/error.log; + + gzip on; + + include /ssl-config/ssl-config-http.conf; + map $http_x_forwarded_proto $updated_scheme { + default $http_x_forwarded_proto; + '' $scheme; + } + map $http_x_forwarded_host $updated_host { + default $http_x_forwarded_host; + '' $http_host; + } + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + server_name prometheus.*; + + location = /auth { + internal; +{% if deploy %} + proxy_pass https://auth/api/v1alpha/verify_dev_credentials; +{% else %} + proxy_pass https://auth/{{ default_ns.name }}/auth/api/v1alpha/verify_dev_credentials; +{% endif %} + include /ssl-config/ssl-config-proxy.conf; + } + + location = /healthcheck { + return 204; + } + + location / { + auth_request /auth; + + proxy_pass http://127.0.0.1:9090/; + + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $updated_host; + proxy_set_header X-Forwarded-Proto $updated_scheme; + proxy_set_header X-Real-IP $http_x_real_ip; + + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } + + error_page 401 = @error401; + + location @error401 { +{% if deploy %} + return 302 https://auth.hail.is/login?next=https://$http_host$request_uri; +{% else %} + return 302 https://internal.hail.is/{{ default_ns.name }}/auth/login?next=https://internal.hail.is/{{ default_ns.name }}/prometheus; +{% endif %} + } + + + listen 443 ssl; + listen [::]:443 ssl; + } +} diff --git a/prometheus/prometheus.yaml b/prometheus/prometheus.yaml new file mode 100644 index 00000000000..89a88da7205 --- /dev/null +++ b/prometheus/prometheus.yaml @@ -0,0 +1,215 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: {{ default_ns.name }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus + namespace: {{ default_ns.name }} +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: + - extensions + resources: + - ingresses + verbs: ["get", "list", "watch"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: {{ default_ns.name }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: etc-prometheus + namespace: {{ default_ns.name }} +data: + prometheus.yml: | + global: + scrape_interval: 15s + scrape_configs: + - job_name: "kubernetes-kubelet" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc.cluster.local:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - job_name: "kubernetes-cadvisor" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc.cluster.local:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + - job_name: "kubernetes-apiservers" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - api_server: null + role: endpoints + namespaces: + names: [] + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + separator: ; + regex: default;kubernetes;https + replacement: $1 + action: keep + - job_name: "kubernetes-pods" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_grafanak8sapp] + action: keep + regex: true + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + name: prometheus + name: prometheus + namespace: {{ default_ns.name }} +spec: + serviceName: "prometheus" + selector: + matchLabels: + app: prometheus + replicas: 1 + template: + metadata: + labels: + app: prometheus + spec: + priorityClassName: infrastructure + serviceAccountName: prometheus + containers: + - name: prometheus + image: prom/prometheus:v2.19.2 + imagePullPolicy: Always + command: + - "/bin/prometheus" + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--web.enable-lifecycle" +{% if deploy %} + - "--web.external-url=https://prometheus.hail.is/" + - "--web.route-prefix=/" +{% else %} + - "--web.external-url=https://internal.hail.is/{{ default_ns.name }}/prometheus/" + - "--web.route-prefix=/{{ default_ns.name }}/prometheus/" +{% endif %} + ports: + - containerPort: 9090 + protocol: TCP + volumeMounts: + - mountPath: "/etc/prometheus" + name: etc-prometheus + - mountPath: "/prometheus" + name: prometheus-storage + resources: + requests: + cpu: "1" + memory: 5G + limits: + cpu: "1" + memory: 10G + - name: nginx + image: {{ prom_nginx_image.image }} + resources: + requests: + cpu: "20m" + memory: "20M" + limits: + cpu: "1" + memory: "1G" + ports: + - containerPort: 443 + volumeMounts: + - name: ssl-config-prometheus + mountPath: /ssl-config + readOnly: true + readinessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: etc-prometheus + configMap: + name: etc-prometheus + - name: ssl-config-prometheus + secret: + optional: false + secretName: ssl-config-prometheus + volumeClaimTemplates: + - metadata: + name: prometheus-storage + namespace: {{ default_ns.name }} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi diff --git a/query/query/query.py b/query/query/query.py index d7649b1b21d..0a394accdcd 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -6,7 +6,6 @@ import logging import uvloop import asyncio -import aiohttp from aiohttp import web import kubernetes_asyncio as kube from collections import defaultdict @@ -129,17 +128,12 @@ async def handle_ws_response(request, userdata, endpoint, f): user_queries[body['token']] = query try: - receive = asyncio.ensure_future( - ws.receive() - ) # receive automatically ping-pongs which keeps the socket alive + receive = asyncio.ensure_future(ws.receive_str()) # receive automatically ping-pongs which keeps the socket alive await asyncio.wait([receive, query], return_when=asyncio.FIRST_COMPLETED) if receive.done(): # we expect no messages from the client response = receive.result() - assert response.type in ( - aiohttp.WSMsgType.CLOSE, - aiohttp.WSMsgType.CLOSING, - ), f'{endpoint}: Received websocket message. Expected CLOSE or CLOSING, got {response}' + raise AssertionError(f'{endpoint}: client broke the protocol by sending: {response}') if not query.done(): return if query.exception() is not None: @@ -148,7 +142,7 @@ async def handle_ws_response(request, userdata, endpoint, f): await ws.send_json({'status': 500, 'value': exc_str}) else: await ws.send_json({'status': 200, 'value': query.result()}) - assert await ws.receive_str() == 'bye' + assert (await receive) == 'bye' del user_queries[body['token']] finally: receive.cancel() diff --git a/router/deployment.yaml b/router/deployment.yaml index 26f07318a60..59380108c13 100644 --- a/router/deployment.yaml +++ b/router/deployment.yaml @@ -269,6 +269,18 @@ spec: selector: app: grafana --- +apiVersion: v1 +kind: Service +metadata: + name: prometheus +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 443 + selector: + app: prometheus +--- apiVersion: apps/v1 kind: Deployment metadata: diff --git a/router/router.nginx.conf.in b/router/router.nginx.conf.in index 23fea5a4f1a..4eca32aec79 100644 --- a/router/router.nginx.conf.in +++ b/router/router.nginx.conf.in @@ -111,6 +111,18 @@ server { listen [::]:443 ssl; } +server { + server_name prometheus.*; + + location / { + proxy_pass https://prometheus/; + include /etc/nginx/proxy.conf; + } + + listen 443 ssl; + listen [::]:443 ssl; +} + server { server_name notebook.*; @@ -223,37 +235,6 @@ server { listen [::]:443 ssl; } -server { - server_name ukbb-rg.*; - - location /rg_browser { - proxy_pass http://ukbb-rg-browser.ukbb-rg; - proxy_set_header Host $http_host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Host $updated_host; - proxy_set_header X-Forwarded-Proto $updated_scheme; - proxy_set_header X-Real-IP $http_x_real_ip; - - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection $connection_upgrade; - proxy_read_timeout 20d; - proxy_buffering off; - } - - location / { - proxy_pass http://ukbb-rg-static.ukbb-rg; - proxy_set_header Host $http_host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Host $updated_host; - proxy_set_header X-Forwarded-Proto $updated_scheme; - proxy_set_header X-Real-IP $http_x_real_ip; - } - - listen 443 ssl; - listen [::]:443 ssl; -} - server { server_name auth.*; diff --git a/tls/config.yaml b/tls/config.yaml index b3e1a9af33b..0ac5c9a5357 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -98,3 +98,6 @@ principals: - name: grafana domain: grafana kind: nginx +- name: prometheus + domain: prometheus + kind: nginx diff --git a/web_common/Makefile b/web_common/Makefile index 867d53a2bcc..38b4fe1a70b 100644 --- a/web_common/Makefile +++ b/web_common/Makefile @@ -5,3 +5,4 @@ PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 check: $(PYTHON) -m flake8 web_common $(PYTHON) -m pylint --rcfile ../pylintrc web_common --score=n + curlylint . diff --git a/web_common/web_common/styles/main.scss b/web_common/web_common/styles/main.scss index 660efe617fd..7bcb3669591 100644 --- a/web_common/web_common/styles/main.scss +++ b/web_common/web_common/styles/main.scss @@ -161,6 +161,11 @@ a { left: 20px; } +.ci-caret:after { + /* 18 (measured width of CI) / 2 + 9 - 20 / 2 = 8px */ + left: 8px; +} + .monitoring-caret:after { /* 82 (measured width of Monitoring) / 2 + 9 - 20 / 2 = 42px */ left: 40px; diff --git a/web_common/web_common/templates/header.html b/web_common/web_common/templates/header.html index af6a87668ab..269da8938af 100644 --- a/web_common/web_common/templates/header.html +++ b/web_common/web_common/templates/header.html @@ -40,7 +40,15 @@

{% if userdata['is_developer'] == 1 %} - CI +
+
+ CI +
+
+ Me +
+
+
{% endif %} {% if userdata['is_developer'] == 1 %} diff --git a/web_common/web_common/templates/layout.html b/web_common/web_common/templates/layout.html index 036300c2b02..31f85f72137 100644 --- a/web_common/web_common/templates/layout.html +++ b/web_common/web_common/templates/layout.html @@ -2,27 +2,27 @@ Hail | {% block title %}{% endblock %} - - - - - + + + + + {% block head %}{% endblock %}
{% if notebook_service is defined and notebook_service == 'workshop' %} - {% include 'workshop/header.html' %} + {% include 'workshop/header.html' %} {% else %} - {% include 'header.html' %} + {% include 'header.html' %} {% endif %}
- {% if message is defined %} -
- {{ message['text'] }} -
- {% endif %} - {%block content %}{% endblock %} + {% if message is defined %} +
+ {{ message['text'] }} +
+ {% endif %} + {% block content %}{% endblock %}
diff --git a/website/Makefile b/website/Makefile index b1d15577e71..67dfd97fd34 100644 --- a/website/Makefile +++ b/website/Makefile @@ -4,6 +4,9 @@ include ../config.mk IMAGE = gcr.io/$(PROJECT)/website:$(shell docker images -q --no-trunc website | sed -e 's,[^:]*:,,') +check: + curlylint . + docs: $(MAKE) -C ../hail hail-docs-no-test batch-docs tar czf docs.tar.gz -C ../hail/build/www . @@ -27,7 +30,7 @@ push: build deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(IMAGE)"}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: diff --git a/website/website/templates/base.html b/website/website/templates/base.html index 66acab919c7..148ef1e4bd4 100644 --- a/website/website/templates/base.html +++ b/website/website/templates/base.html @@ -5,17 +5,17 @@ - + - - + + - + - - + + - + +{% endblock %} {% block content %}

{{ pr.title }} #{{ pr.number }}

@@ -20,7 +23,7 @@

{{ pr.title }}

Jobs

- {{ job_table(jobs) }} + {{ job_table(jobs, "jobs", "jobsSearchBar") }} {% elif exception is defined %}

Build error:

@@ -57,4 +60,7 @@ 

Build History

{% else %} No builds. {% endif %} + {% endblock %} diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index bfe5d7be5e0..ce4237d940e 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -87,6 +87,8 @@ def __init__(self, billing_project: str = None, bucket: str = None, *, deploy_co if bucket is None: bucket = get_user_config().get('batch', 'bucket', fallback=None) + if bucket is None: + bucket = os.environ.get('HAIL_BUCKET') if bucket is None: raise ValueError( 'the bucket parameter of ServiceBackend must be set ' diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 43aefbf34db..77abe84617f 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -160,6 +160,10 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', To initialize Hail explicitly with non-default arguments, be sure to do so directly after importing the module, as in the above example. + To facilitate the migration from Spark to the ServiceBackend, this method + calls init_service when the environment variable HAIL_QUERY_BACKEND is set + to "service". + Note ---- If a :class:`pyspark.SparkContext` is already running, then Hail must be @@ -214,8 +218,6 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', Local temporary directory. Used on driver and executor nodes. Must use the file scheme. Defaults to TMPDIR, or /tmp. """ - from hail.backend.spark_backend import SparkBackend - if Env._hc: if idempotent: return @@ -223,6 +225,19 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', warning('Hail has already been initialized. If this call was intended to change configuration,' ' close the session with hl.stop() first.') + if os.environ.get('HAIL_QUERY_BACKEND') == 'service': + return init_service( + log=log, + quiet=quiet, + append=append, + tmpdir=tmp_dir, + local_tmpdir=local_tmpdir, + default_reference=default_reference, + global_seed=global_seed, + skip_logging_configuration=skip_logging_configuration) + + from hail.backend.spark_backend import SparkBackend + log = _get_log(log) tmpdir = _get_tmpdir(tmp_dir) local_tmpdir = _get_local_tmpdir(local_tmpdir) diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 3971fd0058f..a31de19459a 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -17,7 +17,7 @@ identity, nullable, tupleof, sliceof, dictof from hail.utils.java import Env, warning from hail.utils.linkedlist import LinkedList -from hail.utils.misc import wrap_to_list, get_nice_field_error, get_nice_attr_error +from hail.utils.misc import wrap_to_list, wrap_to_tuple, get_nice_field_error, get_nice_attr_error import numpy as np @@ -4099,6 +4099,36 @@ def __matmul__(self, other): return res if result_ndim > 0 else res[()] + @typecheck_method(axis=nullable(oneof(int, tupleof(int)))) + def sum(self, axis=None): + """Sum out one or more axes of an ndarray. + + Parameters + ---------- + axis : :class:`int` :class:`tuple` + The axis or axes to sum out. + + Returns + ------- + :class:`.NDArrayNumericExpression` + """ + if axis is None: + axis = tuple(range(self.ndim)) + + axis = wrap_to_tuple(axis) + res_ir = ir.NDArrayAgg(self._ir, axis) + + axes_set = set(axis) + if len(axes_set) < len(axis): + raise ValueError("duplicate value in 'axis'") + for element in axes_set: + if element < 0 or element >= self.ndim: + raise ValueError(f"axis {element} is out of bounds for ndarray of dimension {self.ndim}") + + num_axes_deleted = len(axes_set) + + return construct_expr(res_ir, tndarray(self._type.element_type, self.ndim - num_axes_deleted), self._indices, self._aggregations) + scalars = {tbool: BooleanExpression, tint32: Int32Expression, diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index e89bd520223..aa5812468fd 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -434,7 +434,7 @@ def select_array_indices(hl_array, indices): return indices.map(lambda i: hl_array[i]) def dot_rows_with_themselves(matrix): - return (matrix * matrix) @ hl.nd.ones(matrix.shape[1]) + return (matrix * matrix).sum(1) def no_missing(hail_array): return hail_array.all(lambda element: hl.is_defined(element)) @@ -491,7 +491,7 @@ def process_block(block): def process_y_group(idx): X = hl.nd.array(block[entries_field_name].map(lambda row: mean_impute(select_array_indices(row, ht.kept_samples[idx])))).T n = ht.ns[idx] - sum_x = (X.T @ hl.nd.ones((n,))) + sum_x = X.sum(0) Qtx = ht.__cov_Qts[idx] @ X ytx = ht.__y_nds[idx].T @ X xyp = ytx - (ht.__Qtys[idx].T @ Qtx) diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index eeaddc0ded7..f4f7743bdc4 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -479,16 +479,25 @@ def test_ndarray_save(): assert np.array_equal(expected, actual) -@skip_unless_spark_backend() -@run_with_cxx_compile() def test_ndarray_sum(): np_m = np.array([[1, 2], [3, 4]]) m = hl.nd.array(np_m) - assert_all_eval_to( + assert_ndarrays_eq( (m.sum(axis=0), np_m.sum(axis=0)), (m.sum(axis=1), np_m.sum(axis=1)), - (m.sum(), np_m.sum())) + (m.sum(), np_m.sum()), + (m.sum(tuple([])), np_m.sum(tuple([]))), + (m.sum((0, 1)), np_m.sum((0, 1))) + ) + + with pytest.raises(ValueError) as exc: + m.sum(3) + assert "out of bounds for ndarray of dimension 2" in str(exc.value) + + with pytest.raises(ValueError) as exc: + m.sum((1, 1)) + assert "duplicate" in str(exc.value) def test_ndarray_transpose(): diff --git a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala index dca940ed71b..cc2c4e57dd1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala +++ b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala @@ -43,7 +43,10 @@ object RelationalSpec { } if (!FileFormat.version.supports(fileVersion)) - fatal(s"incompatible file format when reading: $path\n supported version: ${ FileFormat.version }, found $fileVersion") + fatal(s"incompatible file format when reading: $path\n supported version: ${ FileFormat.version }, found $fileVersion" + + s"\n The cause of this error is usually an attempt to use an older version of Hail to read files " + + s"generated by a newer version. This is not supported (Hail native files are back-compatible, but not forward-compatible)." + + s"\n To read this file, use a newer version of Hail.") jv } diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 6f31217657c..7035797f6e2 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -1678,6 +1678,7 @@ class Emit[C]( case x: NDArrayConcat => emitDeforestedNDArrayI(x) case x: NDArraySlice => emitDeforestedNDArrayI(x) case x: NDArrayFilter => emitDeforestedNDArrayI(x) + case x: NDArrayAgg => emitDeforestedNDArrayI(x) case x@RunAgg(body, result, states) => val newContainer = AggContainer.fromBuilder(cb, states.toArray, "run_agg") emitVoid(body, container = Some(newContainer)) @@ -2799,6 +2800,33 @@ class Emit[C]( } }) } + case NDArrayAgg(child, axesToSumOut) => + deforest(child).map(cb) { childEmitter => + val childDims = child.typ.asInstanceOf[TNDArray].nDims + val axesToKeep = (0 until childDims).filter(axis => !axesToSumOut.contains(axis)) + val newOutputShape = axesToKeep.map(idx => childEmitter.outputShape(idx)) + val newOutputShapeComplement = axesToSumOut.map(idx => childEmitter.outputShape(idx)) + + new NDArrayEmitter(newOutputShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val numericElementType = coerce[PNumeric](child.pType.asInstanceOf[PNDArray].elementType) + val runningSum = NumericPrimitives.newLocal(cb, "ndarray_agg_running_sum", numericElementType.virtualType) + cb.assign(runningSum, numericElementType.zero) + + SNDArray.forEachIndex(cb, newOutputShapeComplement, "NDArrayAgg_Sum_loop"){ case (cb, coordsBeingSummedOut) => + // Build the new list we need to pass down into child + val idxVarsIt = idxVars.toIterator + val summedOutIt = coordsBeingSummedOut.toIterator + val fullIndicesForChild = (0 until childDims).map(idx => + if (axesToSumOut.contains(idx)) summedOutIt.next() else idxVarsIt.next() + ) + cb.assign(runningSum, numericElementType.add(runningSum, childEmitter.outputElement(cb, fullIndicesForChild).code)) + } + + PCode.apply(numericElementType, runningSum) + } + } + } case _ => val ndI = emit(x) ndI.map(cb){ ndPCode => diff --git a/hail/src/main/scala/is/hail/expr/ir/NumericPrimitives.scala b/hail/src/main/scala/is/hail/expr/ir/NumericPrimitives.scala new file mode 100644 index 00000000000..559156b280e --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/NumericPrimitives.scala @@ -0,0 +1,17 @@ +package is.hail.expr.ir + +import is.hail.asm4s.Settable +import is.hail.types.virtual.{Type, TInt32, TInt64, TFloat32, TFloat64} + +object NumericPrimitives { + + def newLocal(cb: EmitCodeBuilder, name: String, typ: Type): Settable[Any] = { + coerce[Any](typ match { + case TInt32 => cb.newLocal[Int](name) + case TInt64 => cb.newLocal[Long](name) + case TFloat32 => cb.newLocal[Float](name) + case TFloat64 => cb.newLocal[Double](name) + }) + } + +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala index b2f43354fd1..7d8c217e59e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala @@ -13,17 +13,17 @@ object Int32Ordering { val type2: SInt32 = t2 def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.tcode[Int], y.tcode[Int]) + Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.asInt.intCode(cb), y.asInt.intCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] < y.tcode[Int] + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) < y.asInt.intCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] <= y.tcode[Int] + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) <= y.asInt.intCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] > y.tcode[Int] + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) > y.asInt.intCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int] >= y.tcode[Int] + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) >= y.asInt.intCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Int].ceq(y.tcode[Int]) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb).ceq(y.asInt.intCode(cb)) } } } @@ -37,17 +37,17 @@ object Int64Ordering { val type2: SInt64 = t2 def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.tcode[Long], y.tcode[Long]) + Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.asLong.longCode(cb), y.asLong.longCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] < y.tcode[Long] + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) < y.asLong.longCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] <= y.tcode[Long] + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) <= y.asLong.longCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] > y.tcode[Long] + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) > y.asLong.longCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long] >= y.tcode[Long] + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) >= y.asLong.longCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Long].ceq(y.tcode[Long]) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb).ceq(y.asLong.longCode(cb)) } } } @@ -60,17 +60,17 @@ object Float32Ordering { val type2: SFloat32 = t2 def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.tcode[Float], y.tcode[Float]) + Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.asFloat.floatCode(cb), y.asFloat.floatCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] < y.tcode[Float] + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) < y.asFloat.floatCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] <= y.tcode[Float] + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) <= y.asFloat.floatCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] > y.tcode[Float] + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) > y.asFloat.floatCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float] >= y.tcode[Float] + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) >= y.asFloat.floatCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Float].ceq(y.tcode[Float]) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb).ceq(y.asFloat.floatCode(cb)) } } } @@ -83,17 +83,17 @@ object Float64Ordering { val type2: SFloat64 = t2 def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.tcode[Double], y.tcode[Double]) + Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.asDouble.doubleCode(cb), y.asDouble.doubleCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] < y.tcode[Double] + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) < y.asDouble.doubleCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] <= y.tcode[Double] + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) <= y.asDouble.doubleCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] > y.tcode[Double] + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) > y.asDouble.doubleCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double] >= y.tcode[Double] + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) >= y.asDouble.doubleCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.tcode[Double].ceq(y.tcode[Double]) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb).ceq(y.asDouble.doubleCode(cb)) } } } @@ -106,7 +106,7 @@ object BooleanOrdering { val type2: SBoolean = t2 def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = - Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.tcode[Boolean], y.tcode[Boolean]) + Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.asBoolean.boolCode(cb), y.asBoolean.boolCode(cb)) } } } diff --git a/libhail/src/hail/query/backend/compile.hpp b/libhail/src/hail/query/backend/compile.hpp index 59b02aa6169..3beae5a2d1b 100644 --- a/libhail/src/hail/query/backend/compile.hpp +++ b/libhail/src/hail/query/backend/compile.hpp @@ -47,7 +47,7 @@ class CompileFunction { /* Indexed by parameter index, the entry is the index of the first `llvm_function` parameter. */ std::vector param_llvm_start; - + llvm::Function *llvm_function; // FIXME rename llvm_builder llvm::IRBuilder<> llvm_ir_builder; @@ -56,7 +56,7 @@ class CompileFunction { // FIXME move to SType const SType *get_default_stype(const Type *t); - + llvm::Type *get_llvm_type(PrimitiveType pt) const; llvm::AllocaInst *make_entry_alloca(llvm::Type *llvm_type); diff --git a/website/website/website.py b/website/website/website.py index e707ea035f5..a8b515f4505 100644 --- a/website/website/website.py +++ b/website/website/website.py @@ -9,7 +9,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger -from gear import setup_aiohttp_session, web_maybe_authenticated_user +from gear import setup_aiohttp_session, web_maybe_authenticated_user, monitor_endpoint from web_common import (setup_aiohttp_jinja2, setup_common_static_routes, render_template, sass_compile) @@ -47,9 +47,7 @@ async def get_robots(request): # pylint: disable=unused-argument DOCS_PATH = f'{MODULE_PATH}/docs/' STATIC_DOCS_PATHS = ['0.2/_static', '0.2/_sources', - 'batch/_static', - 'batch/_images', - 'batch/_sources'] + 'batch'] FQ_STATIC_DOCS_PATHS: Set[str] = set() @@ -59,20 +57,21 @@ async def get_robots(request): # pylint: disable=unused-argument docs_pages = set( - x[0][len(DOCS_PATH):] + '/' + y - for x in os.walk(DOCS_PATH) - if x[0] not in FQ_STATIC_DOCS_PATHS - for y in x[2]) + dirname[len(DOCS_PATH):] + '/' + file + for dirname, _, filenames in os.walk(DOCS_PATH) + if dirname not in FQ_STATIC_DOCS_PATHS + for file in filenames) @routes.get('/docs/{tail:.*}') +@monitor_endpoint @web_maybe_authenticated_user async def serve_docs(request, userdata): tail = request.match_info['tail'] if tail in docs_pages: if tail.endswith('.html'): return await render_template('website', request, userdata, tail, dict()) - return web.FileResponse(tail) + return web.FileResponse(f'{DOCS_PATH}/{tail}') raise web.HTTPNotFound() From ab484a96940182da66f9520e67fd33d84781630f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 18 Mar 2021 10:24:23 +1100 Subject: [PATCH 231/501] Fix tmp_dir default, which doesn't work for the service backend. (#93) --- hail/python/hail/context.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 77abe84617f..6bf4cc61e42 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -129,7 +129,7 @@ def stop(self): _optimizer_iterations=nullable(int)) def init(sc=None, app_name='Hail', master=None, local='local[*]', log=None, quiet=False, append=False, - min_block_size=0, branching_factor=50, tmp_dir='/tmp', + min_block_size=0, branching_factor=50, tmp_dir=None, default_reference='GRCh37', idempotent=False, global_seed=6348563392232659379, spark_conf=None, @@ -239,7 +239,7 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', from hail.backend.spark_backend import SparkBackend log = _get_log(log) - tmpdir = _get_tmpdir(tmp_dir) + tmpdir = _get_tmpdir(tmp_dir if tmp_dir else '/tmp') local_tmpdir = _get_local_tmpdir(local_tmpdir) optimizer_iterations = get_env_or_default(_optimizer_iterations, 'HAIL_OPTIMIZER_ITERATIONS', 3) From 1617888f14db8c7c026c904590480e11b476f3c3 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 18 Mar 2021 11:00:20 +1100 Subject: [PATCH 232/501] Fix merge. (#94) --- ci/ci/ci.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 00f0776db9c..4e522e0f559 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -211,7 +211,6 @@ def run(): setup_common_static_routes(routes) app.add_routes(routes) - app.router.add_get("/metrics", server_stats) web.run_app( deploy_config.prefix_application(app, 'ci'), From 1e18de7791c7ec8361c13c89f955311be25be568 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 19 Mar 2021 09:11:18 +1100 Subject: [PATCH 233/501] Fix type for tmp_dir. --- hail/python/hail/context.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 6bf4cc61e42..7d99765cb03 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -119,7 +119,7 @@ def stop(self): append=bool, min_block_size=int, branching_factor=int, - tmp_dir=str, + tmp_dir=nullable(str), default_reference=enumeration('GRCh37', 'GRCh38', 'GRCm38', 'CanFam3'), idempotent=bool, global_seed=nullable(int), @@ -239,7 +239,7 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', from hail.backend.spark_backend import SparkBackend log = _get_log(log) - tmpdir = _get_tmpdir(tmp_dir if tmp_dir else '/tmp') + tmpdir = _get_tmpdir(tmp_dir) local_tmpdir = _get_local_tmpdir(local_tmpdir) optimizer_iterations = get_env_or_default(_optimizer_iterations, 'HAIL_OPTIMIZER_ITERATIONS', 3) From 0f38bc807fba2aa258356201faf1d62094b0a02c Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 19 Mar 2021 17:53:46 +1100 Subject: [PATCH 234/501] Condarise: remove old packages --- .github/workflows/condarise.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 02a97b4abac..819a8872e7d 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -37,6 +37,12 @@ jobs: - name: Build package run: conda build conda/hail + - name: Remove old versions + run: | + anaconda -t ${{ secrets.ANACONDA_TOKEN }} show cpg/hail 2>&1 \ + | grep "^ +" | head -n-8 | sed 's# \+ ##' \ + | xargs -I '{}' -L1 echo anaconda -t ${{ secrets.ANACONDA_TOKEN }} remove -f cpg/hail/{} + - name: Upload to anaconda package repository run: | anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ From 6b568ade3d3f375ef6041e46526f982bda61361b Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 19 Mar 2021 18:20:43 +1100 Subject: [PATCH 235/501] Add comment --- .github/workflows/condarise.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 819a8872e7d..6cf9c30107f 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -37,10 +37,13 @@ jobs: - name: Build package run: conda build conda/hail + # Remove old packages to save space on anaconda - name: Remove old versions run: | + # Keep 8 latest versions + KEEP=8 anaconda -t ${{ secrets.ANACONDA_TOKEN }} show cpg/hail 2>&1 \ - | grep "^ +" | head -n-8 | sed 's# \+ ##' \ + | grep "^ +" | head -n-${KEEP} | sed 's# \+ ##' \ | xargs -I '{}' -L1 echo anaconda -t ${{ secrets.ANACONDA_TOKEN }} remove -f cpg/hail/{} - name: Upload to anaconda package repository From 5577469b366a3929e81913eb98de09e9fc951a06 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 19 Mar 2021 18:23:58 +1100 Subject: [PATCH 236/501] Condarise: do clean up for ubuntu only --- .github/workflows/condarise.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml index 6cf9c30107f..816003713fc 100644 --- a/.github/workflows/condarise.yaml +++ b/.github/workflows/condarise.yaml @@ -39,6 +39,8 @@ jobs: # Remove old packages to save space on anaconda - name: Remove old versions + # Only run this step once. + if: ${{ matrix.os == 'ubuntu-latest' }} run: | # Keep 8 latest versions KEEP=8 From 403e773315fb2605b5fb48bdcd226b59cde244c9 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sat, 20 Mar 2021 18:09:48 +1100 Subject: [PATCH 237/501] Update bokeh requirements Thanks to @pdiakumis for finding this in https://github.com/hail-is/hail/pull/9799. --- conda/hail/meta-template.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index 4bf74f09e07..9be0b9ef8c0 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -26,7 +26,7 @@ requirements: - pyspark >=2.4,<2.4.2 - aiohttp - aiohttp-session - - bokeh >1.1,<1.3 + - bokeh >1.3,<2.0 - decorator <5 - deprecated - gcsfs From 317f9a312c2aa8e5591157e7b7203e1bd823b450 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 24 Mar 2021 09:58:34 +1100 Subject: [PATCH 238/501] Merge upstream changes (#98) * [batch] Worker cleanup (#10155) * [batch] Worker cleanup * more changes * wip * delint * additions? * fix * [query] Add `source_file_field` to `import_table` (#10164) * [query] Add `source_file_field` to `import_table` CHANGELOG: Add `source_file_field` parameter to `hl.import_table` to allow lines to be associated with their original source file. * ugh * [ci] add authorize sha and action items table to user page (#10142) * [ci] add authorize sha and action items table to user page * [ci] track review requested in addition to assigned for PR reviews * [ci] add CI dropdown with link to user page (#10163) * [batch] add more logs and do not wait for asyncgens (#10136) * [batch] add more logs and do not wait for asyncgens I think there is some unresolved issue with asyncgen shutdown that is keeping workers alive. This is not an issue in worker because worker calls sys.exit which forcibly stops execution. cc: @daniel-goldstein @jigold. * fix lint * [query-service] maybe fix event loop not initialized (#10153) * [query-service] maybe fix event loop not initialized The event loop is supposed to be initialized in the main thread. Sometimes our tests get placed in the non-main thread (always a thread named Dummy-1). Hopefully the session-scoped fixture is run in the main thread. * fix * [prometheus] add prometheus to track SLIs (#10165) * [prometheus] add prometheus to track SLIs * add wraps * [query] apply nest-asyncio as early as possible (#10158) * [query] apply nest-asyncio as early as possible * fix * [grafana] set pod fsGroup to grafana user (#10162) * fix linting errors (#10171) * [query] Remove verbose print (#10167) Looks like this got added in some dndarray work * [ci] update assignees and reviewers on PR github update (#10168) * [query-service] fix receive logic (#10159) * [query-service] fix receive logic Only one coro waits on receive now. We still error if a message is sent before we make our first response. * fix * fix * CHANGELOG: Fixed incorrect error message when incorrect type specified with hl.loop (#10174) * [linting] add curlylint check for any service that renders jinja2 (#10172) * [linting] add curlylint check for any service that renders jinja2 templates * [linting] spaces not tabs * [website] fix website (#10173) * [website] fix website I build old versions of the docs and use them in new websites. This does not work for versions of the docs before I introduced the new system. In particular versions 0.2.63 and before generate old-style docs. * tutorials are templated * [ci] change mention for deploy failure (#10178) * [gateway] move ukbb routing into gateway (#10179) * [query] Fix filter intervals (keep=False) memory leak (#10182) * [query-service] remove service backend tests (#10180) They are too flaky currently due to the version issue. * [website] pass response body as kwarg (#10176) * Release 0.2.64 (#10183) * Bump version number * Updated changelog * [nginx] ensure nginx configs dont overwrite each other in build.yaml (#10181) * [query-service] teach query service to read MTs and Ts created by Spark (#10184) * [query-service] teach query service to read MTs and Ts created by Spark Hail-on-Spark uses HadoopFS which emulates directories by creating size-zero files with the name `gs://bucket/dirname/`. Note: the object name literally ends in a slash. Such files should not be included in `listStatus` (they should always be empty anyway). Unfortunately, my fix in https://github.com/hail-is/hail/pull/9914 was wrong because `GoogleStorageFileStatus` removes the trailing slash. This prevented the path from matching `path`, which always ends in a `/`. * fix * [website] dont jinja render any of the batch docs (#10190) * [googlestoragefs] ignore the directory check entirely (#10185) * [googlestoragefs] ignore the directory check entirely If a file exists with the *same name as the directory we are listing*, then it must be a directory marker. It does not matter if that file is a directory or not. * Update GoogleStorageFS.scala * [ci] fix focus on slash and search job page for PRs (#10194) * [query] Improve file compatibility error (#10191) * Call init_service from init based on HAIL_QUERY_BACKEND value. (#10189) * [query] NDArray Sum (#10187) * Attempt implementing the sum rule in Emit * Connected the python code, but not working yet * NDArrayExpression.sum is working now * Add default arg when no axis is provided * More comprehensive test * Unused imports * Use sum appropriately in linear_regression_rows_nd * Deleted extra blank line * Don't use typeToTypeInfo, make NumericPrimitives the source of these decisions * Better assertions, with tests * Got the summation index correct * Add documentation * [website] fix resource path for non-html files in the docs (#10196) * [query] Remove tcode from primitive orderings (#10193) * [query] BlockMatrix map (#10195) * Add map, but protect users of the spark backend from writing arbitrary maps * If densify would have been a no-op, that should work * Densify and Sparsify are no-ops for now * Rename map to map_dense and map_sparse. Give better implementations for add, multiply, divide, subtract of a scalar * Make the maps underscore methods * [query] Remove all uses of .tcode[Boolean] (#10198) * [ci] make test hello speak https (#10192) * [tls] make hello use tls * change pylint ignore message * [query] blanczos_pca dont do extra loading work (#10201) * Use the checkpointed table from mt_to_table_of_ndarray to avoid recomputing mt * Keep extra row fields from being included * Add query graceful shutdown for rolling updates (#10106) * Merge pull request #35 from populationgenomics/add-query-graceful-shutdown Add query graceful shutdown * Remove unused argument from query:on_shutdown * [auth] add more options for obtaining session id for dev credentials (#10203) * [auth] add more options for obtaining session id for dev credentials * [auth] extract userinfo query for use in both userinfo and verify_dev_credentials * remove unused import * [query] Default to Spark 3 (#10054) * Change hail to use spark3 and scala 2.12 by default, change build_hail_spar3 to instead test spark2 for backwards support * Update Makefile * Update dataproc image version * Scale down the dataproc version, since latest dataproc is using Spark release candidate * Update pyspark version in requirements.txt * Bump scala/spark patch versions * We want to use the newer py4j jar when using spark 3 * Upgrade json4s * I now want Spark 3.1.1, since it's been released * Upgrade to 3.1.1 in the Makefile, fix a deprecateed IOUtils method * Update pyspark as well * Don't update json4s * Try upgrading version * Fixed issue for constructing bufferspecs * Should at least be using newest one * Remove abstracts from type hints * Revert "Remove abstracts from type hints" This reverts commit 1e0d194e06a1416e706e78dad2426941ce2c1a42. * Things don't go well if I don't use the same json4s version as Spark * Mixed a typeHintFieldName * See if this fixes my BlockMatrixSparsity issue * json4s can't handle a curried apply method * This works so long as the jar file is included in the libs directory * Makefile changes to support pulling elasticsearch * Use dataproc image for Spark 3.1.1 * Update patch version of dataproc image, no longer uses Spark RC * Fixed up Makefile, now correctly depends on copying the jar * Now we just check that the specified version is 7, as that's all we support * Delete build_hail_spark2, we can't support spark2 * Version checks for Scala and Spark * Updated installation docs * Spark versions warning * Update some old pysparks * [batch] Add more info to UI pages (#10070) * [batch] Add more info to UI pages * fixes * addr comment * addr comments * Bump jinja2 from 2.10.1 to 2.11.3 in /docker (#10209) Bumps [jinja2](https://github.com/pallets/jinja) from 2.10.1 to 2.11.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/master/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/2.10.1...2.11.3) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * [docker][hail] update to latest pytest (#10177) * [docker][hail] update to latest pytest Issues like this https://ci.hail.is/batches/221291/jobs/112 do not appear locally for me, I suspect this is due to my using a much newer pytest. * fix many tests incorrectly using pytest * another one * remove unnecessary pip installs in service test dockerfiles * fix * [gateway] Cut out router and router-resolver from gateway internal routing (#10207) * [gateway] cut out router-resolver from internal auth flow * [gateway] cut out router from internal * [datasets] add pan-ukb datasets (#10186) * add available pan-ukb datasets * add rst files for schemas * reference associated variant indices HT in the block matrix descriptions * [query] Add json warn context to `parse_json` (#10160) We don't test the logs, but I did test this manually, it works as expected. * [query] fix tmp_dir default in init(), which doesn't work for the service backend (#10199) * Fix tmp_dir default, which doesn't work for the service backend. * Fix type for tmp_dir. * [gitignore]ignore website and doc files (#10214) * Remove duplicate on_shutdown in query service Co-authored-by: jigold Co-authored-by: Tim Poterba Co-authored-by: Daniel Goldstein Co-authored-by: Dan King Co-authored-by: John Compitello Co-authored-by: Christopher Vittal Co-authored-by: Michael Franklin Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Patrick Cummings <42842025+pwc2@users.noreply.github.com> Co-authored-by: Carolin Diaz <63973811+CDiaz96@users.noreply.github.com> --- .gitignore | 5 + auth/auth/auth.py | 47 +-- batch/Dockerfile.worker | 2 +- batch/batch/batch.py | 3 + batch/batch/front_end/front_end.py | 27 +- batch/batch/front_end/templates/batch.html | 23 +- batch/batch/front_end/templates/batches.html | 8 +- batch/batch/front_end/templates/job.html | 12 + batch/test/test_dag.py | 1 + benchmark-service/Dockerfile.test | 3 - benchmark-service/test/test_update_commits.py | 3 +- build.yaml | 24 +- ci/Dockerfile.test | 1 - ci/test/resources/build.yaml | 30 ++ ci/test/resources/config.yaml | 4 + ci/test/resources/deployment.yaml | 10 +- ci/test/resources/hello.py | 9 +- ci/test/resources/statefulset.yaml | 10 +- ci/test/test_ci.py | 3 +- docker/Dockerfile.base | 2 +- docker/Dockerfile.service-java-run-base | 2 +- docker/requirements.txt | 10 +- gateway/gateway.nginx.conf | 12 +- hail/Makefile | 20 +- hail/build.gradle | 42 +-- hail/python/dev-requirements.txt | 6 +- hail/python/hail/backend/local_backend.py | 2 +- .../datasets/schemas/panukb_ld_scores_AFR.rst | 28 ++ .../datasets/schemas/panukb_ld_scores_AMR.rst | 28 ++ .../datasets/schemas/panukb_ld_scores_CSA.rst | 28 ++ .../datasets/schemas/panukb_ld_scores_EAS.rst | 28 ++ .../datasets/schemas/panukb_ld_scores_EUR.rst | 28 ++ .../datasets/schemas/panukb_ld_scores_MID.rst | 28 ++ .../schemas/panukb_ld_variant_indices_AFR.rst | 26 ++ .../schemas/panukb_ld_variant_indices_AMR.rst | 26 ++ .../schemas/panukb_ld_variant_indices_CSA.rst | 26 ++ .../schemas/panukb_ld_variant_indices_EAS.rst | 26 ++ .../schemas/panukb_ld_variant_indices_EUR.rst | 26 ++ .../schemas/panukb_ld_variant_indices_MID.rst | 26 ++ hail/python/hail/docs/install/linux.rst | 2 +- hail/python/hail/docs/install/macosx.rst | 2 +- .../hail/docs/install/other-cluster.rst | 4 +- hail/python/hail/experimental/datasets.json | 306 ++++++++++++++++++ .../hail/experimental/table_ndarray_utils.py | 9 +- hail/python/hail/linalg/blockmatrix.py | 16 + hail/python/hail/methods/pca.py | 12 +- hail/python/hailtop/batch_client/aioclient.py | 5 +- hail/python/hailtop/batch_client/client.py | 5 +- hail/python/hailtop/hailctl/dataproc/start.py | 2 +- hail/python/requirements.txt | 2 +- hail/python/test/hail/expr/test_ndarrays.py | 30 +- hail/python/test/hail/linalg/test_linalg.py | 4 +- .../hail/backend/service/ServiceBackend.scala | 4 +- .../scala/is/hail/expr/AnnotationImpex.scala | 4 +- .../expr/ir/AbstractMatrixTableSpec.scala | 6 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 33 +- .../is/hail/expr/ir/BlockMatrixWriter.scala | 3 +- .../src/main/scala/is/hail/expr/ir/Emit.scala | 15 +- .../scala/is/hail/expr/ir/EmitStream.scala | 2 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 12 +- .../scala/is/hail/expr/ir/MatrixWriter.scala | 6 +- .../scala/is/hail/expr/ir/TableWriter.scala | 5 +- .../ir/functions/RelationalFunctions.scala | 7 +- .../expr/ir/functions/StringFunctions.scala | 7 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 6 +- .../main/scala/is/hail/io/BufferSpecs.scala | 2 +- .../scala/is/hail/rvd/AbstractRVDSpec.scala | 5 +- .../scala/is/hail/types/BlockMatrixType.scala | 6 +- monitoring/Dockerfile.test | 1 - monitoring/test/test_monitoring.py | 3 +- query/query/query.py | 10 +- router/deployment.yaml | 3 +- router/router.nginx.conf.in | 2 +- 73 files changed, 975 insertions(+), 211 deletions(-) create mode 100644 ci/test/resources/config.yaml create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst create mode 100644 hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst diff --git a/.gitignore b/.gitignore index d12b6a4c27d..bdec059923d 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,8 @@ GTAGS *.dylib */hail.jar infra/.terraform.lock.hcl +hail/python/hail/docs/experimental/hail.experimental.DB.rst +hail/python/hailtop/batch/docs/api/ +web_common/web_common/static/css/ +website/docs.tar.gz +website/website/static/css/ diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 90c8c8c0323..1ab47f4a995 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,7 +8,6 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow -from hailtop.auth import async_get_userinfo from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger @@ -526,18 +525,7 @@ async def rest_logout(request, userdata): return web.Response(status=200) -@routes.get('/api/v1alpha/userinfo') -async def userinfo(request): - if 'Authorization' not in request.headers: - log.info('Authorization not in request.headers') - raise web.HTTPUnauthorized() - - auth_header = request.headers['Authorization'] - session_id = maybe_parse_bearer_header(auth_header) - if not session_id: - log.info('Bearer not in Authorization header') - raise web.HTTPUnauthorized() - +async def get_userinfo(request, session_id): # b64 encoding of 32-byte session ID is 44 bytes if len(session_id) != 44: log.info('Session id != 44 bytes') @@ -554,18 +542,41 @@ async def userinfo(request): if len(users) != 1: log.info(f'Unknown session id: {session_id}') raise web.HTTPUnauthorized() - user = users[0] + return users[0] + + +@routes.get('/api/v1alpha/userinfo') +async def userinfo(request): + if 'Authorization' not in request.headers: + log.info('Authorization not in request.headers') + raise web.HTTPUnauthorized() + + auth_header = request.headers['Authorization'] + session_id = maybe_parse_bearer_header(auth_header) + if not session_id: + log.info('Bearer not in Authorization header') + raise web.HTTPUnauthorized() + + return web.json_response(await get_userinfo(request, session_id)) + - return web.json_response(user) +async def get_session_id(request): + if 'X-Hail-Internal-Authorization' in request.headers: + return maybe_parse_bearer_header(request.headers['X-Hail-Internal-Authorization']) + + if 'Authorization' in request.headers: + return maybe_parse_bearer_header(request.headers['Authorization']) + + session = await aiohttp_session.get_session(request) + return session.get('session_id') @routes.get('/api/v1alpha/verify_dev_credentials') async def verify_dev_credentials(request): - session = await aiohttp_session.get_session(request) - session_id = session.get('session_id') + session_id = await get_session_id(request) if not session_id: raise web.HTTPUnauthorized() - userdata = await async_get_userinfo(session_id=session_id) + userdata = await get_userinfo(request, session_id) is_developer = userdata is not None and userdata['is_developer'] == 1 if not is_developer: raise web.HTTPUnauthorized() diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index 2ef29ed6aed..5680613518c 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -10,7 +10,7 @@ RUN hail-apt-get-install \ COPY docker/hail-ubuntu/pip.conf /root/.config/pip/pip.conf COPY docker/hail-ubuntu/hail-pip-install /bin/hail-pip-install COPY docker/requirements.txt . -RUN hail-pip-install -r requirements.txt pyspark==2.4.0 +RUN hail-pip-install -r requirements.txt pyspark==3.1.1 ENV SPARK_HOME /usr/local/lib/python3.7/site-packages/pyspark ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" diff --git a/batch/batch/batch.py b/batch/batch/batch.py index 0a1201070de..db0eb257166 100644 --- a/batch/batch/batch.py +++ b/batch/batch/batch.py @@ -41,6 +41,7 @@ def _time_msecs_str(t): d = { 'id': record['id'], + 'user': record['user'], 'billing_project': record['billing_project'], 'token': record['token'], 'state': state, @@ -85,6 +86,8 @@ def job_record_to_dict(record, name): 'batch_id': record['batch_id'], 'job_id': record['job_id'], 'name': name, + 'user': record['user'], + 'billing_project': record['billing_project'], 'state': record['state'], 'exit_code': exit_code, 'duration': duration diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index f3202fa158b..a8721383aa7 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -222,7 +222,8 @@ async def _query_batch_jobs(request, batch_id): where_args.extend(args) sql = f''' -SELECT jobs.*, batches.format_version, job_attributes.value AS name, SUM(`usage` * rate) AS cost +SELECT jobs.*, batches.user, batches.billing_project, batches.format_version, + job_attributes.value AS name, SUM(`usage` * rate) AS cost FROM jobs INNER JOIN batches ON jobs.batch_id = batches.id LEFT JOIN job_attributes @@ -1150,7 +1151,7 @@ async def _get_job(app, batch_id, job_id): db: Database = app['db'] record = await db.select_and_fetchone(''' -SELECT jobs.*, ip_address, format_version, SUM(`usage` * rate) AS cost +SELECT jobs.*, user, billing_project, ip_address, format_version, SUM(`usage` * rate) AS cost FROM jobs INNER JOIN batches ON jobs.batch_id = batches.id @@ -1252,28 +1253,31 @@ async def ui_get_job(request, userdata, batch_id): app = request.app job_id = int(request.match_info['job_id']) - job_status, attempts, job_log = await asyncio.gather(_get_job(app, batch_id, job_id), - _get_attempts(app, batch_id, job_id), - _get_job_log(app, batch_id, job_id)) + job, attempts, job_log = await asyncio.gather(_get_job(app, batch_id, job_id), + _get_attempts(app, batch_id, job_id), + _get_job_log(app, batch_id, job_id)) - job_status_status = job_status['status'] + job['duration'] = humanize_timedelta_msecs(job['duration']) + job['cost'] = cost_str(job['cost']) + + job_status = job['status'] container_status_spec = dictfix.NoneOr({ 'name': str, 'timing': {'pulling': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), 'running': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)})}, 'container_status': {'out_of_memory': False}, 'state': str}) - job_status_status_spec = { + job_status_spec = { 'container_statuses': {'input': container_status_spec, 'main': container_status_spec, 'output': container_status_spec}} - job_status_status = dictfix.dictfix(job_status_status, job_status_status_spec) - container_statuses = job_status_status['container_statuses'] + job_status = dictfix.dictfix(job_status, job_status_spec) + container_statuses = job_status['container_statuses'] step_statuses = [container_statuses['input'], container_statuses['main'], container_statuses['output']] - job_specification = job_status['spec'] + job_specification = job['spec'] if 'process' in job_specification: process_specification = job_specification['process'] process_type = process_specification['type'] @@ -1289,11 +1293,12 @@ async def ui_get_job(request, userdata, batch_id): page_context = { 'batch_id': batch_id, 'job_id': job_id, + 'job': job, 'job_log': job_log, 'attempts': attempts, 'step_statuses': step_statuses, 'job_specification': job_specification, - 'job_status_str': json.dumps(job_status, indent=2) + 'job_status_str': json.dumps(job, indent=2) } return await render_template('batch', request, userdata, 'job.html', page_context) diff --git a/batch/batch/front_end/templates/batch.html b/batch/batch/front_end/templates/batch.html index 2bd63c82ef7..107061262ab 100644 --- a/batch/batch/front_end/templates/batch.html +++ b/batch/batch/front_end/templates/batch.html @@ -4,7 +4,28 @@ {% endblock %} {% block content %} +

Batch {{ batch['id'] }}

+ +

Properties

+
    +
  • User: {{ batch['user'] }}
  • +
  • Billing Project: {{ batch['billing_project'] }}
  • +
  • Time Created: {% if 'time_created' in batch and batch['time_created'] is not none %}{{ batch['time_created'] }}{% endif %}
  • +
  • Time Closed: {% if 'time_closed' in batch and batch['time_closed'] is not none %}{{ batch['time_closed'] }}{% endif %}
  • +
  • Time Completed: {% if 'time_completed' in batch and batch['time_completed'] is not none %}{{ batch['time_completed'] }}{% endif %}
  • +
  • Total Jobs: {{ batch['n_jobs'] }}
  • +
      +
    • Pending Jobs: {{ batch['n_jobs'] - batch['n_completed'] }}
    • +
    • Succeeded Jobs: {{ batch['n_succeeded'] }}
    • +
    • Failed Jobs: {{ batch['n_failed'] }}
    • +
    • Cancelled Jobs: {{ batch['n_cancelled'] }}
    • +
    +
  • Duration: {% if 'duration' in batch and batch['duration'] is not none %}{{ batch['duration'] }}{% endif %}
  • +
  • Cost: {% if 'cost' in batch and batch['cost'] is not none %}{{ batch['cost'] }}{% endif %}
  • +
+ +

Attributes

{% if 'attributes' in batch %} {% for name, value in batch['attributes'].items() %}

{{ name }}: {{ value }}

@@ -64,7 +85,7 @@

Jobs

{% for job in batch['jobs'] %} - + + @@ -68,7 +70,11 @@

Batches

{% for batch in batches %} - + + + - @@ -29,9 +28,6 @@ * {% endif %} -
id
+ {{ job['job_id'] }} diff --git a/batch/batch/front_end/templates/batches.html b/batch/batch/front_end/templates/batches.html index ef5ee38890a..799f3463216 100644 --- a/batch/batch/front_end/templates/batches.html +++ b/batch/batch/front_end/templates/batches.html @@ -52,6 +52,8 @@

Batches

IDUserBilling Project Name Submitted Completed
{{ batch['id'] }} + {{ batch['id'] }} + {{ batch['user'] }}{{ batch['billing_project'] }} {% if 'attributes' in batch and 'name' in batch['attributes'] and batch['attributes']['name'] is not none %} {{ batch['attributes']['name'] }} diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html index 2d66b461303..f9e8ec79ddb 100644 --- a/batch/batch/front_end/templates/job.html +++ b/batch/batch/front_end/templates/job.html @@ -3,6 +3,18 @@ {% block content %}

Batch {{ batch_id }} Job {{ job_id }}

+

Properties

+
    +
  • Batch ID: {{ batch_id }}
  • +
  • Job ID: {{ job_id }}
  • +
  • User: {{ job['user'] }}
  • +
  • Billing Project: {{ job['billing_project'] }}
  • +
  • State: {{ job['state'] }}
  • +
  • Exit Code: {% if 'exit_code' in job and job['exit_code'] is not none %}{{ job['exit_code'] }}{% endif %}
  • +
  • Duration: {% if 'duration' in job and job['duration'] is not none %}{{ job['duration'] }}{% endif %}
  • +
  • Cost: {% if 'cost' in job and job['cost'] is not none %}{{ job['cost'] }}{% endif %}
  • +
+

Attempts

{% if attempts %} diff --git a/batch/test/test_dag.py b/batch/test/test_dag.py index 3adae54d1c6..42c8d286604 100644 --- a/batch/test/test_dag.py +++ b/batch/test/test_dag.py @@ -156,6 +156,7 @@ def test(): callback_body.pop('duration') assert (callback_body == { 'id': b.id, + 'user': 'test', 'billing_project': 'test', 'token': token, 'state': 'success', diff --git a/benchmark-service/Dockerfile.test b/benchmark-service/Dockerfile.test index 8629c8d2b96..f739af15f07 100644 --- a/benchmark-service/Dockerfile.test +++ b/benchmark-service/Dockerfile.test @@ -1,6 +1,3 @@ FROM {{ service_base_image.image }} COPY benchmark-service/test/ /test/ -RUN python3 -m pip install --no-cache-dir \ - pytest-instafail==0.4.1 \ - pytest-asyncio==0.10.0 diff --git a/benchmark-service/test/test_update_commits.py b/benchmark-service/test/test_update_commits.py index 5ab7ecb8dfe..39b904d95ae 100644 --- a/benchmark-service/test/test_update_commits.py +++ b/benchmark-service/test/test_update_commits.py @@ -9,14 +9,13 @@ from hailtop.httpx import client_session import hailtop.utils as utils -pytestmark = pytest.mark.asyncio - logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) sha = 'd626f793ad700c45a878d192652a0378818bbd8b' +@pytest.mark.asyncio async def test_update_commits(): deploy_config = get_deploy_config() headers = service_auth_headers(deploy_config, 'benchmark') diff --git a/build.yaml b/build.yaml index 43ecc016348..c884d9d830f 100644 --- a/build.yaml +++ b/build.yaml @@ -684,26 +684,6 @@ steps: to: /cluster-tests.tar.gz dependsOn: - hail_build_image - - kind: runImage - name: build_hail_spark3 - image: - valueFrom: hail_build_image.image - resources: - memory: "7.5G" - cpu: "4" - script: | - set -ex - cd / - rm -rf repo - mkdir repo - cd repo - {{ code.checkout_script }} - cd hail - time retry ./gradlew --version - export SPARK_VERSION="3.0.1" SCALA_VERSION="2.12.12" - time retry make jars python-version-info wheel - dependsOn: - - hail_build_image - kind: buildImage name: batch_worker_image dockerFile: batch/Dockerfile.worker @@ -2830,6 +2810,8 @@ steps: mkdir -p ./ci/test ./hail/python cp /repo/hail/ci/test/resources/build.yaml ./ cp -R /repo/hail/ci/test/resources ./ci/test/ + cp /repo/hail/tls/Dockerfile ./ci/test/resources/Dockerfile.certs + cp /repo/hail/tls/create_certs.py ./ci/test/resources/ cp /repo/hail/pylintrc ./ cp /repo/hail/setup.cfg ./ cp -R /repo/hail/docker ./ @@ -3289,7 +3271,7 @@ steps: script: | set -ex gcloud auth activate-service-account --key-file=/secrets/ci-deploy-0-1--hail-is-hail.json - SPARK_VERSION=2.4.5 + SPARK_VERSION=3.1.1 BRANCH=0.2 SHA="{{ code.sha }}" GS_JAR=gs://hail-common/builds/${BRANCH}/jars/hail-${BRANCH}-${SHA}-Spark-${SPARK_VERSION}.jar diff --git a/ci/Dockerfile.test b/ci/Dockerfile.test index 4f2dfbe8150..15ea0b73172 100644 --- a/ci/Dockerfile.test +++ b/ci/Dockerfile.test @@ -4,4 +4,3 @@ COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ RUN hail-pip-install /hailtop && rm -rf /hailtop COPY ci/test/ /test/ -RUN hail-pip-install pytest-instafail==0.4.1 pytest-asyncio==0.10.0 diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index 7078e450185..93bbee7ec08 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -52,6 +52,36 @@ steps: publishAs: service-base dependsOn: - base_image + - kind: buildImage + name: create_certs_image + dockerFile: ci/test/resources/Dockerfile.certs + contextPath: ci/test/resources + publishAs: test_hello_create_certs_image + dependsOn: + - service_base_image + - kind: runImage + name: create_certs + image: + valueFrom: create_certs_image.image + script: | + set -ex + python3 create_certs.py \ + {{ default_ns.name }} \ + config.yaml \ + /ssl-config-hail-root/hail-root-key.pem \ + /ssl-config-hail-root/hail-root-cert.pem + serviceAccount: + name: admin + namespace: + valueFrom: default_ns.name + secrets: + - name: ssl-config-hail-root + namespace: + valueFrom: default_ns.name + mountPath: /ssl-config-hail-root + dependsOn: + - default_ns + - create_certs_image - kind: buildImage name: hello_image dockerFile: ci/test/resources/Dockerfile diff --git a/ci/test/resources/config.yaml b/ci/test/resources/config.yaml new file mode 100644 index 00000000000..5a67fd4191a --- /dev/null +++ b/ci/test/resources/config.yaml @@ -0,0 +1,4 @@ +principals: +- name: hello + domain: hello + kind: json diff --git a/ci/test/resources/deployment.yaml b/ci/test/resources/deployment.yaml index 4ef6e318c76..95c7dedee2d 100644 --- a/ci/test/resources/deployment.yaml +++ b/ci/test/resources/deployment.yaml @@ -50,6 +50,9 @@ spec: - name: session-secret-key mountPath: /session-secret-key readOnly: true + - name: ssl-config + mountPath: /ssl-config + readOnly: true env: - name: HAIL_IP valueFrom: @@ -74,6 +77,10 @@ spec: secret: optional: false secretName: session-secret-key + - name: ssl-config + secret: + optional: false + secretName: ssl-config-hello --- apiVersion: v1 kind: Service @@ -83,8 +90,7 @@ metadata: app: hello spec: ports: - - name: http - port: 80 + - port: 443 protocol: TCP targetPort: 5000 selector: diff --git a/ci/test/resources/hello.py b/ci/test/resources/hello.py index 0e914e097e0..5bfc3fded0a 100644 --- a/ci/test/resources/hello.py +++ b/ci/test/resources/hello.py @@ -2,6 +2,7 @@ from aiohttp import web from hailtop.config import get_deploy_config +from hailtop.tls import internal_server_ssl_context from gear import setup_aiohttp_session @@ -14,15 +15,17 @@ @routes.get('/healthcheck') -async def get_healthcheck(request): # pylint: disable=W0613 +async def get_healthcheck(request): # pylint: disable=unused-argument return web.Response() @routes.get('/sha') -async def get_sha(request): +async def get_sha(request): # pylint: disable=unused-argument return web.Response(text=SHA) setup_aiohttp_session(app) app.add_routes(routes) -web.run_app(deploy_config.prefix_application(app, 'hello'), host='0.0.0.0', port=5000) +web.run_app( + deploy_config.prefix_application(app, 'hello'), host='0.0.0.0', port=5000, ssl_context=internal_server_ssl_context() +) diff --git a/ci/test/resources/statefulset.yaml b/ci/test/resources/statefulset.yaml index 0bdd9a37796..1b9b9a9f2f9 100644 --- a/ci/test/resources/statefulset.yaml +++ b/ci/test/resources/statefulset.yaml @@ -49,6 +49,9 @@ spec: - name: session-secret-key mountPath: /session-secret-key readOnly: true + - name: ssl-config + mountPath: /ssl-config + readOnly: true env: - name: HAIL_IP value: "{{ global.ip }}" @@ -67,6 +70,10 @@ spec: secret: optional: false secretName: session-secret-key + - name: ssl-config + secret: + optional: false + secretName: ssl-config-hello --- apiVersion: v1 kind: Service @@ -76,8 +83,7 @@ metadata: app: hello-stateful-set spec: ports: - - name: http - port: 80 + - port: 443 protocol: TCP targetPort: 5000 selector: diff --git a/ci/test/test_ci.py b/ci/test/test_ci.py index bd4be73cd7a..b712ffd1a87 100644 --- a/ci/test/test_ci.py +++ b/ci/test/test_ci.py @@ -9,12 +9,11 @@ from hailtop.httpx import client_session import hailtop.utils as utils -pytestmark = pytest.mark.asyncio - logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) +@pytest.mark.asyncio async def test_deploy(): deploy_config = get_deploy_config() ci_deploy_status_url = deploy_config.url('ci', '/api/v1alpha/deploy_status') diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index 1e899fab56c..11d470f54ee 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -26,7 +26,7 @@ RUN /bin/sh -c 'curl https://sdk.cloud.google.com | bash' && \ ENV PATH $PATH:/google-cloud-sdk/bin COPY docker/requirements.txt . -RUN hail-pip-install -r requirements.txt pyspark==2.4.0 +RUN hail-pip-install -r requirements.txt pyspark==3.1.1 ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" diff --git a/docker/Dockerfile.service-java-run-base b/docker/Dockerfile.service-java-run-base index 4ca7f6dd789..c05ae40e118 100644 --- a/docker/Dockerfile.service-java-run-base +++ b/docker/Dockerfile.service-java-run-base @@ -8,7 +8,7 @@ RUN hail-apt-get-install \ liblapack3 COPY docker/requirements.txt . -RUN hail-pip-install -r requirements.txt pyspark==2.4.0 +RUN hail-pip-install -r requirements.txt pyspark==3.1.1 ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark ENV PYSPARK_PYTHON python3 diff --git a/docker/requirements.txt b/docker/requirements.txt index 5b3c1ca3394..13d0ec46796 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -24,7 +24,7 @@ google-cloud-logging==1.12.1 google-cloud-storage==1.25.0 humanize==1.0.0 hurry.filesize==0.9 -Jinja2==2.10.1 +Jinja2==2.11.3 # keyrings.alt>3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6 keyrings.alt>=3.1 kubernetes-asyncio==9.1.0 @@ -38,11 +38,11 @@ pyjwt==1.7.1 pylint==2.6.0 astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131 PyMySQL==0.9.2 -pytest==4.6.3 -pytest-asyncio==0.10.0 +pytest==6.2.2 +pytest-asyncio==0.14.0 pytest-html==1.20.0 -pytest-instafail==0.4.1 -pytest-xdist==1.28 +pytest-instafail==0.4.2 +pytest-xdist==2.2.1 python-dateutil==2.8.1 python-json-logger==0.1.11 requests==2.22.0 diff --git a/gateway/gateway.nginx.conf b/gateway/gateway.nginx.conf index b21f4d0cd13..3408533c769 100644 --- a/gateway/gateway.nginx.conf +++ b/gateway/gateway.nginx.conf @@ -17,11 +17,6 @@ server { } } -map $maybe_router_scheme $router_scheme { - default $maybe_router_scheme; - '' http; -} - server { server_name internal.hail.populationgenomics.org.au; client_max_body_size 8m; @@ -29,7 +24,7 @@ server { location = /auth { internal; resolver kube-dns.kube-system.svc.cluster.local; - proxy_pass https://router-resolver.default.svc.cluster.local/auth/$namespace; + proxy_pass https://auth/api/v1alpha/verify_dev_credentials; include /ssl-config/ssl-config-proxy.conf; } @@ -38,10 +33,9 @@ server { set $service $2; auth_request /auth; - auth_request_set $router_ip $upstream_http_x_router_ip; - auth_request_set $maybe_router_scheme $upstream_http_x_router_scheme; - proxy_pass $router_scheme://$router_ip$request_uri; + resolver kube-dns.kube-system.svc.cluster.local; + proxy_pass https://$service.$namespace.svc.cluster.local; proxy_set_header Host $service.internal; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/hail/Makefile b/hail/Makefile index 36c57a99717..9596c2997fa 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -10,8 +10,8 @@ MAKEFLAGS += --no-builtin-rules REVISION := $(shell git rev-parse HEAD) SHORT_REVISION := $(shell git rev-parse --short=12 HEAD) BRANCH := $(shell git rev-parse --abbrev-ref HEAD) -SCALA_VERSION ?= 2.11.12 -SPARK_VERSION ?= 2.4.5 +SCALA_VERSION ?= 2.12.13 +SPARK_VERSION ?= 3.1.1 HAIL_MAJOR_MINOR_VERSION := 0.2 HAIL_PATCH_VERSION := 64 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) @@ -59,12 +59,23 @@ SHADOW_TEST_JAR := build/libs/hail-all-spark-test.jar PYTHON_JAR := python/hail/backend/hail-all-spark.jar WHEEL := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl EGG := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3.6.egg +ELASTICSEARCH_JAR := libs/elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311.jar GRADLE_ARGS += -Dscala.version=$(SCALA_VERSION) -Dspark.version=$(SPARK_VERSION) -Delasticsearch.major-version=$(ELASTIC_MAJOR_VERSION) +.PHONY: elasticsearchJar +elasticsearchJar: $(ELASTICSEARCH_JAR) + +$(ELASTICSEARCH_JAR): + @mkdir -p libs + gsutil cp gs://hail-common/elasticsearch-libs/elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311.jar libs/ + .PHONY: shadowJar shadowJar: $(SHADOW_JAR) +$(JAR_SOURCES): $(ELASTICSEARCH_JAR) +$(JAR_TEST_SOURCES): $(ELASTICSEARCH_JAR) + ifdef HAIL_COMPILE_NATIVES $(SHADOW_JAR): native-lib-prebuilt endif @@ -372,7 +383,10 @@ native-lib-prebuilt: native-lib-reset-prebuilt: $(MAKE) -C src/main/c reset-prebuilt -clean: clean-env native-lib-clean +clean-libs: + rm -rf libs + +clean: clean-env clean-libs native-lib-clean $(MAKE) -C python/hail/docs clean $(MAKE) -C python/hailtop/batch/docs clean ./gradlew clean $(GRADLE_ARGS) diff --git a/hail/build.gradle b/hail/build.gradle index 91202a23138..9a3593eb394 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -18,6 +18,9 @@ plugins { import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar repositories { + flatDir { + dirs 'libs' + } mavenCentral() jcenter() maven { url "https://repository.cloudera.com/artifactory/cloudera-repos/" } @@ -46,8 +49,17 @@ tasks.withType(JavaCompile) { project.ext { cachedBreezeVersion = null - sparkVersion = System.getProperty("spark.version", "2.4.5") - scalaVersion = System.getProperty("scala.version", "2.11.12") + sparkVersion = System.getProperty("spark.version", "3.1.1") + if (sparkVersion.startsWith("2.")) { + throw new UnsupportedOperationException("Hail no longer supports Spark 2.") + } + else if (sparkVersion != "3.1.1") { + project.logger.lifecycle("WARNING: Hail only tested with Spark 3.1.1, use other versions at your own risk.") + } + scalaVersion = System.getProperty("scala.version", "2.12.13") + if (!scalaVersion.startsWith("2.12.")) { + throw new UnsupportedOperationException("Hail currently only supports Scala 2.12") + } scalaMajorVersion = (scalaVersion =~ /^\d+.\d+/)[0] } @@ -100,19 +112,6 @@ String breezeVersion() { return cachedBreezeVersion } -String elasticHadoopVersion() { - def elasticMajorVersion = System.getProperty("elasticsearch.major-version", "7") - if (elasticMajorVersion == "6") { - return "6.8.13" - } - else if (elasticMajorVersion == "7") { - return "7.8.1" - } - else { - throw new UnsupportedOperationException("elasticsearch.major-version must be 6 or 7") - } -} - configurations { justSpark @@ -126,9 +125,8 @@ configurations { } } eachDependency { DependencyResolveDetails details -> - if (details.requested.group == 'org.json4s') { - // JSON4S 3.6.0+ contain a known bug (https://github.com/json4s/json4s/issues/507) - details.useVersion('3.5.3') + if (details.requested.group == 'org.apache.spark') { + details.useVersion(sparkVersion) } else if (details.requested.group == 'org.scalanlp' && details.requested.version == '1.0') { // Breeze 1.0 contains a known bug (https://github.com/scalanlp/breeze/issues/772) details.useVersion('1.1') @@ -181,7 +179,12 @@ dependencies { bundled group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25' - bundled 'org.elasticsearch:elasticsearch-spark-20_2.11:' + elasticHadoopVersion() + def elasticMajorVersion = System.getProperty("elasticsearch.major-version", "7") + if (elasticMajorVersion != "7") { + throw new UnsupportedOperationException("elasticsearch.major-version must be 7") + } + // This comes from a local libs directory, see Makefile + bundled 'org.elasticsearch:elasticsearch-spark-30_2.12-8.0.0-SNAPSHOT-custom-hail-spark311' bundled 'com.google.cloud:google-cloud-storage:1.106.0' @@ -310,7 +313,6 @@ tasks.withType(ShadowJar) { // we should really shade indeed, but it has native libraries // relocate 'com.indeed', 'is.hail.relocated.com.indeed' relocate 'com.google.cloud', 'is.hail.relocated.com.google.cloud' - relocate 'org.elasticsearch', 'is.hail.relocated.org.elasticsearch' relocate 'com.github.samtools', 'is.hail.relocated.com.github.samtools' relocate 'org.lz4', 'is.hail.relocated.org.lz4' relocate 'org.freemarker', 'is.hail.relocated.org.freemarker' diff --git a/hail/python/dev-requirements.txt b/hail/python/dev-requirements.txt index 039b6b4412c..a2f706f9483 100644 --- a/hail/python/dev-requirements.txt +++ b/hail/python/dev-requirements.txt @@ -5,10 +5,10 @@ astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131 pre-commit==2.9.2 black==20.8b1 curlylint==0.12.0 -pytest==4.6.3 +pytest==6.2.2 pytest-html==1.20.0 -pytest-xdist==1.28 -pytest-instafail==0.4.1 +pytest-xdist==2.2.1 +pytest-instafail==0.4.2 sphinx==3.2.1 sphinx-autodoc-typehints==1.11.0 nbsphinx==0.7.1 diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py index c246b401498..6ab7220e6ed 100644 --- a/hail/python/hail/backend/local_backend.py +++ b/hail/python/hail/backend/local_backend.py @@ -126,7 +126,7 @@ def __init__(self, tmpdir, log, quiet, append, branching_factor, port = launch_gateway( redirect_stdout=sys.stdout, redirect_stderr=sys.stderr, - jarpath=f'{spark_home}/jars/py4j-0.10.7.jar', + jarpath=f'{spark_home}/jars/py4j-0.10.9.jar', classpath=f'{spark_home}/jars/*:{hail_jar_path}', die_on_exit=True) self._gateway = JavaGateway( diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst new file mode 100644 index 00000000000..a308200981c --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AFR.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_AFR: + +panukb_ld_scores_AFR +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst new file mode 100644 index 00000000000..759c06d15ff --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_AMR.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_AMR: + +panukb_ld_scores_AMR +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst new file mode 100644 index 00000000000..c0d11c300bf --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_CSA.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_CSA: + +panukb_ld_scores_CSA +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst new file mode 100644 index 00000000000..c036bb98686 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EAS.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_EAS: + +panukb_ld_scores_EAS +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst new file mode 100644 index 00000000000..3697ceb561e --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_EUR.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_EUR: + +panukb_ld_scores_EUR +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst new file mode 100644 index 00000000000..de24078b6cd --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_scores_MID.rst @@ -0,0 +1,28 @@ +.. _panukb_ld_scores_MID: + +panukb_ld_scores_MID +==================== + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'varid': str + 'AF': float64 + 'ld_score': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst new file mode 100644 index 00000000000..b82c3826cf1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AFR.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_AFR: + +panukb_ld_variant_indices_AFR +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst new file mode 100644 index 00000000000..09445a9e971 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_AMR.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_AMR: + +panukb_ld_variant_indices_AMR +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst new file mode 100644 index 00000000000..7c0d9e8c8ca --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_CSA.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_CSA: + +panukb_ld_variant_indices_CSA +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst new file mode 100644 index 00000000000..583575bf111 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EAS.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_EAS: + +panukb_ld_variant_indices_EAS +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst new file mode 100644 index 00000000000..97c87a3cb93 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_EUR.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_EUR: + +panukb_ld_variant_indices_EUR +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst new file mode 100644 index 00000000000..b6a7969b6f1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/panukb_ld_variant_indices_MID.rst @@ -0,0 +1,26 @@ +.. _panukb_ld_variant_indices_MID: + +panukb_ld_variant_indices_MID +============================= + +* **Versions:** 0.2 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (0.2, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'n_samples': int32 + 'pop': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'idx': int64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/install/linux.rst b/hail/python/hail/docs/install/linux.rst index 66eb949ccc4..8af0649b4ae 100644 --- a/hail/python/hail/docs/install/linux.rst +++ b/hail/python/hail/docs/install/linux.rst @@ -3,7 +3,7 @@ Install Hail on GNU/Linux ========================= - Install Java 8. -- Install Python 3.6 or 3.7. +- Install Python 3.6+. - Install a recent version of the C and C++ standard libraries. GCC 5.0, LLVM version 3.4, or any later versions suffice. - Install BLAS and LAPACK. diff --git a/hail/python/hail/docs/install/macosx.rst b/hail/python/hail/docs/install/macosx.rst index 0a8cf198408..a826a4520d4 100644 --- a/hail/python/hail/docs/install/macosx.rst +++ b/hail/python/hail/docs/install/macosx.rst @@ -3,6 +3,6 @@ Install Hail on Mac OS X ======================== - Install `Java 8 `__. -- Install Python 3.6 or 3.7. We recommend `Miniconda `__; however, the latest version of Miniconda installs Python 3.8 by default. Please follow `these instructions `__ to create a Python 3.6 or 3.7 environment. +- Install Python 3.6+. We recommend `Miniconda `__. - Open Terminal.app and execute ``pip install hail``. - `Run your first Hail query! `__ diff --git a/hail/python/hail/docs/install/other-cluster.rst b/hail/python/hail/docs/install/other-cluster.rst index b294e46bb1e..af7514b9114 100644 --- a/hail/python/hail/docs/install/other-cluster.rst +++ b/hail/python/hail/docs/install/other-cluster.rst @@ -5,13 +5,13 @@ Install Hail on a Spark Cluster If you are using Google Dataproc, please see `these simpler instructions `__. -Hail should work with any Spark 2.4.x cluster built with Scala 2.11. +Hail should work with any Spark 3.1.1 cluster built with Scala 2.12. Hail needs to be built from source on the leader node. Building Hail from source requires: - Java 8 JDK. -- Python 3.6 or 3.7. +- Python 3.6+. - A recent C and a C++ compiler, GCC 5.0, LLVM 3.4, or later versions of either suffice. - BLAS and LAPACK. diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index 59507dd4fdc..f6cac416040 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -1581,5 +1581,311 @@ "version": "1.1" } ] + }, + "panukb_ld_block_matrix_AFR": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for African ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_AFR.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_block_matrix_AMR": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Admixed American ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_AMR.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_block_matrix_CSA": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Central/South Asian ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_CSA.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_block_matrix_EAS": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for East Asian ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_EAS.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_block_matrix_EUR": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for European ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_EUR.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_block_matrix_MID": { + "description": "Pan-UKB: linkage disequilibrium (LD) matrix Hail BlockMatrix for Middle Eastern ancestry population. To determine which row/column corresponds to which variant, see the associated variant indices Hail Table: panukb_ld_variant_indices_MID.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldadj.bm" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_AFR": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for African ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_AMR": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Admixed American ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_CSA": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Central/South Asian ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_EAS": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for East Asian ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_EUR": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for European ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_scores_MID": { + "description": "Pan-UKB: linkage disequilibrium (LD) scores Hail Table for Middle Eastern ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldscore.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_AFR": { + "description": "Pan-UKB: variant indices Hail Table for African ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AFR.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_AMR": { + "description": "Pan-UKB: variant indices Hail Table for Admixed American ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.AMR.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_CSA": { + "description": "Pan-UKB: variant indices Hail Table for Central/South Asian ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.CSA.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_EAS": { + "description": "Pan-UKB: variant indices Hail Table for East Asian ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EAS.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_EUR": { + "description": "Pan-UKB: variant indices Hail Table for European ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.EUR.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_ld_variant_indices_MID": { + "description": "Pan-UKB: variant indices Hail Table for Middle Eastern ancestry population.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/ld_release/UKBB.MID.ldadj.variant.ht" + } + }, + "version": "0.2" + } + ] + }, + "panukb_meta_analysis": { + "description": "Pan-UKB: pan-ancestry GWAS of UK Biobank, full meta-analysis Hail MatrixTable.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview/index.html", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/sumstats_release/meta_analysis.mt" + }, + "gcp": { + "us": "gs://ukb-diverse-pops-public/sumstats_release/meta_analysis.mt" + } + }, + "version": "0.1" + } + ] + }, + "panukb_summary_stats": { + "description": "Pan-UKB: pan-ancestry GWAS of UK Biobank, full summary statistics Hail MatrixTable.", + "url": "https://pan.ukbb.broadinstitute.org/docs/technical-overview/index.html", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://pan-ukb-us-east-1/sumstats_release/results_full.mt" + }, + "gcp": { + "us": "gs://ukb-diverse-pops-public/sumstats_release/results_full.mt" + } + }, + "version": "0.1" + } + ] } } diff --git a/hail/python/hail/experimental/table_ndarray_utils.py b/hail/python/hail/experimental/table_ndarray_utils.py index e107db97520..7d9fcef5cef 100644 --- a/hail/python/hail/experimental/table_ndarray_utils.py +++ b/hail/python/hail/experimental/table_ndarray_utils.py @@ -3,7 +3,7 @@ from hail.utils.java import Env -def mt_to_table_of_ndarray(entry_expr, block_size=16): +def mt_to_table_of_ndarray(entry_expr, block_size=16, return_checkpointed_table_also=False): check_entry_indexed('mt_to_table_of_ndarray/entry_expr', entry_expr) mt = matrix_table_source('mt_to_table_of_ndarray/entry_expr', entry_expr) @@ -35,8 +35,11 @@ def get_even_partitioning(ht, partition_size, total_num_rows): ht = ht.checkpoint(temp_file_name) num_rows = ht.count() new_partitioning = get_even_partitioning(ht, block_size, num_rows) - ht = hl.read_table(temp_file_name, _intervals=new_partitioning) + new_part_ht = hl.read_table(temp_file_name, _intervals=new_partitioning) - grouped = ht._group_within_partitions("groups", block_size) + grouped = new_part_ht._group_within_partitions("groups", block_size) A = grouped.select(ndarray=hl.nd.array(grouped.groups.map(lambda group: group.xs))) + + if return_checkpointed_table_also: + return A, ht return A diff --git a/hail/python/hail/linalg/blockmatrix.py b/hail/python/hail/linalg/blockmatrix.py index 3190aa6ac8f..b7231575bfc 100644 --- a/hail/python/hail/linalg/blockmatrix.py +++ b/hail/python/hail/linalg/blockmatrix.py @@ -1375,6 +1375,8 @@ def __add__(self, b): ------- :class:`.BlockMatrix` """ + if isinstance(b, (int, float)): + return self._map_dense(lambda entry: entry + b) return self._apply_map2(BlockMatrix._binary_op('+'), b, sparsity_strategy="Union") @typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type)) @@ -1389,6 +1391,8 @@ def __sub__(self, b): ------- :class:`.BlockMatrix` """ + if isinstance(b, (int, float)): + return self._map_dense(lambda entry: entry - b) return self._apply_map2(BlockMatrix._binary_op('-'), b, sparsity_strategy="Union") @typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type)) @@ -1403,6 +1407,9 @@ def __mul__(self, b): ------- :class:`.BlockMatrix` """ + if isinstance(b, (int, float)): + # sparse since multiplying by zero is zero + return self._map_sparse(lambda entry: entry * b) return self._apply_map2(BlockMatrix._binary_op('*'), b, sparsity_strategy="Intersection") @typecheck_method(b=oneof(numeric, np.ndarray, block_matrix_type)) @@ -1417,6 +1424,9 @@ def __truediv__(self, b): ------- :class:`.BlockMatrix` """ + if isinstance(b, (int, float)): + # sparse since dividing by zero is zero + return self._map_sparse(lambda entry: entry / b) return self._apply_map2(BlockMatrix._binary_op('/'), b, sparsity_strategy="NeedsDense") @typecheck_method(b=numeric) @@ -1528,6 +1538,12 @@ def __pow__(self, x): """ return self._apply_map(lambda i: i ** x, needs_dense=False) + def _map_dense(self, func): + return self._apply_map(func, True) + + def _map_sparse(self, func): + return self._apply_map(func, False) + def sqrt(self): """Element-wise square root. diff --git a/hail/python/hail/methods/pca.py b/hail/python/hail/methods/pca.py index 53583baf123..db8f42769f4 100644 --- a/hail/python/hail/methods/pca.py +++ b/hail/python/hail/methods/pca.py @@ -300,10 +300,10 @@ def _blanczos_pca(entry_expr, k=10, compute_loadings=False, q_iterations=2, over (:obj:`list` of :obj:`float`, :class:`.Table`, :class:`.Table`) List of eigenvalues, table with column scores, table with row loadings. """ - + check_entry_indexed('mt_to_table_of_ndarray/entry_expr', entry_expr) mt = matrix_table_source('pca/entry_expr', entry_expr) - A = mt_to_table_of_ndarray(entry_expr, block_size) + A, ht = mt_to_table_of_ndarray(entry_expr, block_size, return_checkpointed_table_also=True) A = A.persist() # Set Parameters @@ -365,10 +365,12 @@ def hailBlanczos(A, G, k, q): cols_and_scores = hl.zip(A.index_globals().cols, hail_array_scores).map(lambda tup: tup[0].annotate(scores=tup[1])) st = hl.Table.parallelize(cols_and_scores, key=list(mt.col_key)) - lt = mt.rows().select() + lt = ht.select() lt = lt.annotate_globals(U=U) - lt = lt.add_index() - lt = lt.annotate(loadings=lt.U[lt.idx, :]._data_array()).select_globals() + idx_name = '_tmp_pca_loading_index' + lt = lt.add_index(idx_name) + lt = lt.annotate(loadings=lt.U[lt[idx_name], :]._data_array()).select_globals() + lt = lt.drop(lt[idx_name]) if compute_loadings: return eigens, st, lt diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 976cebd9ebf..0eb986ecbdf 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -203,6 +203,8 @@ async def is_complete(self): # { # batch_id: int # job_id: int + # user: str + # billing_project: str # name: optional(str) # state: str (Ready, Running, Success, Error, Failure, Cancelled) # exit_code: optional(int) @@ -349,7 +351,8 @@ async def get_job_log(self, job_id: int) -> Optional[Dict[str, Any]]: return await self._client.get_job_log(self.id, job_id) # { - # id: int, + # id: int + # user: str # billing_project: str # token: str # state: str, (open, failure, cancelled, success, running) diff --git a/hail/python/hailtop/batch_client/client.py b/hail/python/hailtop/batch_client/client.py index 530fe1592d2..d4a67d51839 100644 --- a/hail/python/hailtop/batch_client/client.py +++ b/hail/python/hailtop/batch_client/client.py @@ -82,6 +82,8 @@ def is_complete(self): # { # batch_id: int # job_id: int + # user: str + # billing_project: str # name: optional(str) # state: str (Ready, Running, Success, Error, Failure, Cancelled) # exit_code: optional(int) @@ -128,7 +130,8 @@ def cancel(self): async_to_blocking(self._async_batch.cancel()) # { - # id: int, + # id: int + # user: str # billing_project: str # token: str # state: str, (open, failure, cancelled, success, running) diff --git a/hail/python/hailtop/hailctl/dataproc/start.py b/hail/python/hailtop/hailctl/dataproc/start.py index b8aff6137fb..81e341f1e2f 100755 --- a/hail/python/hailtop/hailctl/dataproc/start.py +++ b/hail/python/hailtop/hailctl/dataproc/start.py @@ -136,7 +136,7 @@ ANNOTATION_DB_BUCKETS = ["hail-datasets-us", "hail-datasets-eu", "gnomad-public-requester-pays"] -IMAGE_VERSION = '1.4-debian9' +IMAGE_VERSION = '2.0.6-debian10' def init_parser(parser): diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index c3909bbd60f..86353224136 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -13,7 +13,7 @@ numpy<2 pandas>=1.1.0,<1.1.5 parsimonious<0.9 PyJWT -pyspark>=2.4,<2.4.2 +pyspark>=3.1.1,<3.2.0 python-json-logger==0.1.11 requests==2.22.0 scipy>1.2,<1.7 diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index f4f7743bdc4..715d3e6b4cb 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -59,7 +59,7 @@ def test_ndarray_ref(): with pytest.raises(HailUserError) as exc: hl.eval(hl.nd.array([1, 2, 3])[4]) - assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc) + assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc.value) @fails_service_backend() @@ -295,31 +295,31 @@ def test_ndarray_reshape(): with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((-1, -1))) - assert "more than one -1" in str(exc) + assert "more than one -1" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((20,))) - assert "requested shape is incompatible with number of elements" in str(exc) + assert "requested shape is incompatible with number of elements" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(a.reshape((3,))) - assert "requested shape is incompatible with number of elements" in str(exc) + assert "requested shape is incompatible with number of elements" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(a.reshape(())) - assert "requested shape is incompatible with number of elements" in str(exc) + assert "requested shape is incompatible with number of elements" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((0, 2, 2))) - assert "requested shape is incompatible with number of elements" in str(exc) + assert "requested shape is incompatible with number of elements" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((2, 2, -2))) - assert "must contain only nonnegative numbers or -1" in str(exc) + assert "must contain only nonnegative numbers or -1" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(shape_zero.reshape((0, -1))) - assert "Can't reshape" in str(exc) + assert "Can't reshape" in str(exc.value) with pytest.raises(TypeError): a.reshape(hl.tuple(['4', '5'])) @@ -616,11 +616,11 @@ def test_ndarray_matmul(): with pytest.raises(FatalError) as exc: hl.eval(r @ r) - assert "Matrix dimensions incompatible: 3 2" in str(exc) + assert "Matrix dimensions incompatible: 3 2" in str(exc.value) with pytest.raises(FatalError) as exc: hl.eval(hl.nd.array([1, 2]) @ hl.nd.array([1, 2, 3])) - assert "Matrix dimensions incompatible" in str(exc) + assert "Matrix dimensions incompatible" in str(exc.value) def test_ndarray_big(): @@ -651,7 +651,7 @@ def test_ndarray_arange(): with pytest.raises(FatalError) as exc: hl.eval(hl.nd.arange(5, 20, 0)) - assert "Array range cannot have step size 0" in str(exc) + assert "Array range cannot have step size 0" in str(exc.value) def test_ndarray_mixed(): @@ -680,7 +680,7 @@ def test_ndarray_diagonal(): with pytest.raises(AssertionError) as exc: hl.nd.diagonal(hl.nd.array([1, 2])) - assert "2 dimensional" in str(exc) + assert "2 dimensional" in str(exc.value) def test_ndarray_qr(): @@ -782,11 +782,11 @@ def assert_same_qr(hl_ndarray, np_ndarray): with pytest.raises(ValueError) as exc: hl.nd.qr(wiki_example, mode="invalid") - assert "Unrecognized mode" in str(exc) + assert "Unrecognized mode" in str(exc.value) with pytest.raises(AssertionError) as exc: hl.nd.qr(hl.nd.arange(6)) - assert "requires 2 dimensional" in str(exc) + assert "requires 2 dimensional" in str(exc.value) def test_svd(): @@ -1053,4 +1053,4 @@ def test_agg_ndarray_sum(): mismatched = hl.utils.range_table(5) mismatched = mismatched.annotate(x=hl.nd.ones((mismatched.idx,))) mismatched.aggregate(hl.agg.ndarray_sum(mismatched.x)) - assert "Can't sum" in str(exc) + assert "Can't sum" in str(exc.value) diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 02d56855b30..289b284e44c 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -1097,11 +1097,11 @@ def test_filtering(self): with pytest.raises(ValueError) as exc: bm.filter_cols([0]).filter_cols([3]).to_numpy() - assert "index" in str(exc) + assert "index" in str(exc.value) with pytest.raises(ValueError) as exc: bm.filter_rows([0]).filter_rows([3]).to_numpy() - assert "index" in str(exc) + assert "index" in str(exc.value) @skip_unless_spark_backend() def test_sparsify_blocks(self): diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 15fffb0d966..c94265dc509 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -4,7 +4,6 @@ import java.io._ import java.net._ import java.nio.charset.StandardCharsets import java.util.concurrent._ - import is.hail.HailContext import is.hail.annotations._ import is.hail.asm4s._ @@ -33,6 +32,7 @@ import org.json4s.{DefaultFormats, Formats} import org.newsclub.net.unix.{AFUNIXSocket, AFUNIXSocketAddress, AFUNIXServerSocket} +import java.nio.charset.Charset import scala.collection.mutable import scala.reflect.ClassTag import scala.annotation.switch @@ -89,7 +89,7 @@ object Worker { val fs = retryTransientErrors { using(new FileInputStream(s"$scratchDir/gsa-key/key.json")) { is => - new GoogleStorageFS(IOUtils.toString(is)) + new GoogleStorageFS(IOUtils.toString(is, Charset.defaultCharset())) } } diff --git a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala index 78da7ed33ab..c04d672e55d 100644 --- a/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala +++ b/hail/src/main/scala/is/hail/expr/AnnotationImpex.scala @@ -140,10 +140,10 @@ object JSONAnnotationImpex { } } - def irImportAnnotation(s: String, t: Type): Row = { + def irImportAnnotation(s: String, t: Type, warnContext: mutable.HashSet[String]): Row = { try { // wraps in a Row to handle returned missingness - Row(importAnnotation(JsonMethods.parse(s), t, true, null)) + Row(importAnnotation(JsonMethods.parse(s), t, true, warnContext)) } catch { case e: Throwable => fatal(s"Error parsing JSON:\n type: $t\n value: $s", e) diff --git a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala index cc2c4e57dd1..33338e79c4e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala +++ b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala @@ -21,8 +21,7 @@ object RelationalSpec { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints(List( classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec], - classOf[RelationalSpec], classOf[MatrixTableSpec], classOf[TableSpec])) - override val typeHintFieldName = "name" + classOf[RelationalSpec], classOf[MatrixTableSpec], classOf[TableSpec]), typeHintFieldName="name") } + new TableTypeSerializer + new MatrixTypeSerializer @@ -150,8 +149,7 @@ object MatrixTableSpec { def fromJValue(fs: FS, path: String, jv: JValue): MatrixTableSpec = { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints(List( - classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec])) - override val typeHintFieldName = "name" + classOf[ComponentSpec], classOf[RVDComponentSpec], classOf[PartitionCountsComponentSpec]), typeHintFieldName = "name") } + new MatrixTypeSerializer val params = jv.extract[MatrixTableSpecParameters] diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index 4c4e866fe84..bbe0e9289d4 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -92,8 +92,8 @@ case class BlockMatrixRead(reader: BlockMatrixReader) extends BlockMatrixIR { object BlockMatrixReader { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints( - List(classOf[BlockMatrixNativeReader], classOf[BlockMatrixBinaryReader], classOf[BlockMatrixPersistReader])) - override val typeHintFieldName: String = "name" + List(classOf[BlockMatrixNativeReader], classOf[BlockMatrixBinaryReader], classOf[BlockMatrixPersistReader]), + typeHintFieldName = "name") } def fromJValue(ctx: ExecuteContext, jv: JValue): BlockMatrixReader = { @@ -270,6 +270,17 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = { val prev = child.execute(ctx) + val functionArgs = f match { + case ApplyUnaryPrimOp(_, arg1) => IndexedSeq(arg1) + case Apply(_, _, args, _) => args + case ApplyBinaryPrimOp(_, l, r) => IndexedSeq(l, r) + } + + assert(functionArgs.forall(ir => IsConstant(ir) || ir.isInstanceOf[Ref]), + "Spark backend without lowering does not support general mapping over " + + "BlockMatrix entries. Use predefined functions like `BlockMatrix.abs`.") + + val (name, breezeF): (String, DenseMatrix[Double] => DenseMatrix[Double]) = f match { case ApplyUnaryPrimOp(Negate(), _) => ("negate", BlockMatrix.negationOp) case Apply("abs", _, _, _) => ("abs", numerics.abs(_)) @@ -457,7 +468,7 @@ case class BlockMatrixDot(left: BlockMatrixIR, right: BlockMatrixIR) extends Blo val (tensorShape, isRowVector) = BlockMatrixIR.matrixShapeToTensorShape(lRows, rCols) val sparsity = if (left.typ.isSparse || right.typ.isSparse) - BlockMatrixSparsity( + BlockMatrixSparsity.constructFromShapeAndFunction( BlockMatrixType.numBlocks(lRows, blockSize), BlockMatrixType.numBlocks(rCols, blockSize)) { (i: Int, j: Int) => Array.tabulate(BlockMatrixType.numBlocks(rCols, blockSize)) { k => @@ -527,14 +538,14 @@ case class BlockMatrixBroadcast( BlockMatrixSparsity.dense case IndexedSeq(0) => // broadcast col vector assert(Set(1, shape(0)) == Set(child.typ.nRows, child.typ.nCols)) - BlockMatrixSparsity(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(0 -> j)) + BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(0 -> j)) case IndexedSeq(1) => // broadcast row vector assert(Set(1, shape(1)) == Set(child.typ.nRows, child.typ.nCols)) - BlockMatrixSparsity(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(i -> 0)) + BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((i: Int, j: Int) => child.typ.hasBlock(i -> 0)) case IndexedSeq(0, 0) => // diagonal as col vector assert(shape(0) == 1L) assert(shape(1) == java.lang.Math.min(child.typ.nRows, child.typ.nCols)) - BlockMatrixSparsity(nRowBlocks, nColBlocks)((_, j: Int) => child.typ.hasBlock(j -> j)) + BlockMatrixSparsity.constructFromShapeAndFunction(nRowBlocks, nColBlocks)((_, j: Int) => child.typ.hasBlock(j -> j)) case IndexedSeq(1, 0) => // transpose assert(child.typ.blockSize == blockSize) assert(shape(0) == child.typ.nCols && shape(1) == child.typ.nRows) @@ -613,11 +624,11 @@ case class BlockMatrixAgg( outIndexExpr match { case IndexedSeq() => BlockMatrixSparsity.dense case IndexedSeq(1) => // col vector result; agg over row - BlockMatrixSparsity(child.typ.nRowBlocks, 1) { (i, _) => + BlockMatrixSparsity.constructFromShapeAndFunction(child.typ.nRowBlocks, 1) { (i, _) => (0 until child.typ.nColBlocks).exists(j => child.typ.hasBlock(i -> j)) } case IndexedSeq(0) => // row vector result; agg over col - BlockMatrixSparsity(1, child.typ.nColBlocks) { (_, j) => + BlockMatrixSparsity.constructFromShapeAndFunction(1, child.typ.nColBlocks) { (_, j) => (0 until child.typ.nRowBlocks).exists(i => child.typ.hasBlock(i -> j)) } } @@ -733,7 +744,7 @@ case class BandSparsifier(blocksOnly: Boolean, l: Long, u: Long) extends BlockMa val leftBuffer = java.lang.Math.floorDiv(-l, childType.blockSize) val rightBuffer = java.lang.Math.floorDiv(u, childType.blockSize) - BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks) { (i, j) => + BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks) { (i, j) => j >= (i - leftBuffer) && j <= (i + rightBuffer) && childType.hasBlock(i -> j) } } @@ -753,7 +764,7 @@ case class RowIntervalSparsifier(blocksOnly: Boolean, starts: IndexedSeq[Long], val blockStarts = starts.grouped(childType.blockSize).map(idxs => childType.getBlockIdx(idxs.min)).toArray val blockStops = stops.grouped(childType.blockSize).map(idxs => childType.getBlockIdx(idxs.max - 1)).toArray - BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks) { (i, j) => + BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks) { (i, j) => blockStarts(i) <= j && blockStops(i) >= j && childType.hasBlock(i -> j) } } @@ -800,7 +811,7 @@ case class PerBlockSparsifier(blocks: IndexedSeq[Int]) extends BlockMatrixSparsi val blockSet = blocks.toSet override def definedBlocks(childType: BlockMatrixType): BlockMatrixSparsity = { - BlockMatrixSparsity(childType.nRowBlocks, childType.nColBlocks){ case(i: Int, j: Int) => + BlockMatrixSparsity.constructFromShapeAndFunction(childType.nRowBlocks, childType.nColBlocks){ case(i: Int, j: Int) => blockSet.contains(i + j * childType.nRowBlocks) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala index bd81517bec7..cc8dc82003b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala @@ -22,8 +22,7 @@ object BlockMatrixWriter { override val typeHints = ShortTypeHints( List(classOf[BlockMatrixNativeWriter], classOf[BlockMatrixBinaryWriter], classOf[BlockMatrixRectanglesWriter], classOf[BlockMatrixBinaryMultiWriter], classOf[BlockMatrixTextMultiWriter], - classOf[BlockMatrixPersistWriter], classOf[BlockMatrixNativeMultiWriter])) - override val typeHintFieldName: String = "name" + classOf[BlockMatrixPersistWriter], classOf[BlockMatrixNativeMultiWriter]), typeHintFieldName = "name") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 7035797f6e2..c905f5cd27b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -515,7 +515,7 @@ class Emit[C]( case If(cond, cnsq, altr) => assert(cnsq.typ == TVoid && altr.typ == TVoid) - emitI(cond).consume(cb, {}, m => cb.ifx(m.tcode[Boolean], emitVoid(cnsq), emitVoid(altr))) + emitI(cond).consume(cb, {}, m => cb.ifx(m.asBoolean.boolCode(cb), emitVoid(cnsq), emitVoid(altr))) case Let(name, value, body) => value.pType match { case streamType: PCanonicalStream => @@ -1011,7 +1011,7 @@ class Emit[C]( FastIndexedSeq(typeInfo[Region], keyValTyp.asEmitParam, keyValTyp.asEmitParam), BooleanInfo) isSame.emitWithBuilder { cb => - emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.tcode[Boolean]) + emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.asBoolean.boolCode(cb)) } val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx") @@ -2186,7 +2186,10 @@ class Emit[C]( val cmp2 = ApplyComparisonOp(EQWithNA(eltVType), In(0, eltType), In(1, eltType)) InferPType(cmp2) val EmitCode(s, m, pv) = emitInMethod(cmp2, discardNext) - discardNext.emit(Code(s, m || pv.tcode[Boolean])) + discardNext.emitWithBuilder { cb => + cb += s + m || pv.asBoolean.boolCode(cb) + } val lessThan = ApplyComparisonOp(Compare(eltVType), In(0, eltType), In(1, eltType)) < 0 InferPType(lessThan) (a, lessThan, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => @@ -2208,7 +2211,10 @@ class Emit[C]( val cmp2 = ApplyComparisonOp(EQWithNA(keyType.virtualType), k0, k1).deepCopy() InferPType(cmp2) val EmitCode(s, m, pv) = emitInMethod(cmp2, discardNext) - discardNext.emit(Code(s, m || pv.tcode[Boolean])) + discardNext.emitWithBuilder { cb => + cb += s + m || pv.asBoolean.boolCode(cb) + } val lessThan = (ApplyComparisonOp(Compare(keyType.virtualType), k0, k1) < 0).deepCopy() InferPType(lessThan) (a, lessThan, Code(sorter.pruneMissing, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => @@ -2963,4 +2969,3 @@ abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]]) finish(cb) } } - diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala index b04c961a373..6425b263df6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitStream.scala @@ -1954,7 +1954,7 @@ object EmitStream { emitIR(condIR).flatMap(cb) { cond => val xCond = mb.genFieldThisRef[Boolean]("stream_if_cond") - cb += (xCond := cond.tcode[Boolean]) + cb.assign(xCond, cond.asBoolean.boolCode(cb)) var leftSS: SizedStream = null var rightSS: SizedStream = null val Lmissing = CodeLabel() diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index e0c06538520..8adf9632e48 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -649,9 +649,8 @@ object PartitionReader { classOf[PartitionNativeReaderIndexed], classOf[PartitionZippedNativeReader], classOf[AbstractTypedCodecSpec], - classOf[TypedCodecSpec]) - ) + BufferSpec.shortTypeHints - override val typeHintFieldName = "name" + classOf[TypedCodecSpec]), + typeHintFieldName = "name") + BufferSpec.shortTypeHints } + new TStructSerializer + new TypeSerializer + @@ -664,9 +663,8 @@ object PartitionWriter { override val typeHints = ShortTypeHints(List( classOf[PartitionNativeWriter], classOf[AbstractTypedCodecSpec], - classOf[TypedCodecSpec]) + classOf[TypedCodecSpec]), typeHintFieldName = "name" ) + BufferSpec.shortTypeHints - override val typeHintFieldName = "name" } + new TStructSerializer + new TypeSerializer + @@ -683,9 +681,9 @@ object MetadataWriter { classOf[RelationalWriter], classOf[RVDSpecMaker], classOf[AbstractTypedCodecSpec], - classOf[TypedCodecSpec]) + classOf[TypedCodecSpec]), + typeHintFieldName = "name" ) + BufferSpec.shortTypeHints - override val typeHintFieldName = "name" } + new TStructSerializer + new TypeSerializer + diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala index e22acac94b8..e72c37ed32a 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala @@ -28,8 +28,7 @@ object MatrixWriter { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints( List(classOf[MatrixNativeWriter], classOf[MatrixVCFWriter], classOf[MatrixGENWriter], - classOf[MatrixBGENWriter], classOf[MatrixPLINKWriter], classOf[WrappedMatrixWriter])) - override val typeHintFieldName = "name" + classOf[MatrixBGENWriter], classOf[MatrixPLINKWriter], classOf[WrappedMatrixWriter]), typeHintFieldName = "name") } } @@ -339,8 +338,7 @@ case class MatrixPLINKWriter( object MatrixNativeMultiWriter { implicit val formats: Formats = new DefaultFormats() { - override val typeHints = ShortTypeHints(List(classOf[MatrixNativeMultiWriter])) - override val typeHintFieldName = "name" + override val typeHints = ShortTypeHints(List(classOf[MatrixNativeMultiWriter]), typeHintFieldName = "name") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala index 742443d35e9..76cd2ee8723 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala @@ -21,10 +21,9 @@ import is.hail.variant.ReferenceGenome import org.json4s.{DefaultFormats, Formats, ShortTypeHints} object TableWriter { - implicit val formats: Formats = new DefaultFormats() { + implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints( - List(classOf[TableNativeWriter], classOf[TableTextWriter])) - override val typeHintFieldName = "name" + List(classOf[TableNativeWriter], classOf[TableTextWriter]), typeHintFieldName = "name") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala index a8bd02549bb..9d36254542f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala @@ -5,6 +5,7 @@ import is.hail.types.virtual.Type import is.hail.types.{BlockMatrixType, MatrixType, RTable, TableType, TypeWithRequiredness} import is.hail.linalg.BlockMatrix import is.hail.methods._ +import is.hail.utils._ import is.hail.rvd.RVDType import org.json4s.{Extraction, JValue, ShortTypeHints} import org.json4s.jackson.{JsonMethods, Serialization} @@ -129,14 +130,16 @@ object RelationalFunctions { classOf[WrappedMatrixToTableFunction], classOf[WrappedMatrixToValueFunction], classOf[PCRelate] - )) + ), typeHintFieldName = "name") def extractTo[T: Manifest](ctx: ExecuteContext, config: String): T = { val jv = JsonMethods.parse(config) (jv \ "name").extract[String] match { case "VEP" => VEP.fromJValue(ctx.fs, jv).asInstanceOf[T] - case _ => + case _ => { + log.info("JSON: " + jv.toString) jv.extract[T] + } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index fcacbc5be04..e79b3e6752c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -299,8 +299,11 @@ object StringFunctions extends RegistryFunctions { (rType: Type, _: Seq[PType]) => PType.canonical(rType, true), typeParameters = Array(tv("T")) ) { case (er, cb, _, resultType, Array(s: PStringCode)) => - val row = Code.invokeScalaObject2[String, Type, Row](JSONAnnotationImpex.getClass, "irImportAnnotation", - s.loadString(), er.mb.ecb.getType(resultType.virtualType.asInstanceOf[TTuple].types(0))) + val warnCtx = cb.emb.genFieldThisRef[mutable.HashSet[String]]("parse_json_context") + cb.ifx(warnCtx.load().isNull, cb.assign(warnCtx, Code.newInstance[mutable.HashSet[String]]())) + + val row = Code.invokeScalaObject3[String, Type, mutable.HashSet[String], Row](JSONAnnotationImpex.getClass, "irImportAnnotation", + s.loadString(), er.mb.ecb.getType(resultType.virtualType.asInstanceOf[TTuple].types(0)), warnCtx) unwrapReturn(cb, er.region, resultType, row) } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index 4c9bb06ab1a..4b7be878a58 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -311,8 +311,10 @@ object LowerBlockMatrixIR { MakeTuple.ordered(FastSeq(rows, cols)) }.mapBody { (ctx, body) => NDArraySlice(body, GetField(ctx, "new")) } - case BlockMatrixDensify(child) => unimplemented(bmir) - case BlockMatrixSparsify(child, sparsifier) => unimplemented(bmir) + // Both densify and sparsify change the sparsity pattern tracked on the BlockMatrixType. + case BlockMatrixDensify(child) => lower(child) + case BlockMatrixSparsify(child, sparsifier) => lower(child) + case RelationalLetBlockMatrix(name, value, body) => unimplemented(bmir) case ValueToBlockMatrix(child, shape, blockSize) if !child.typ.isInstanceOf[TArray] => throw new LowererUnsupportedOperation("use explicit broadcast for scalars!") diff --git a/hail/src/main/scala/is/hail/io/BufferSpecs.scala b/hail/src/main/scala/is/hail/io/BufferSpecs.scala index abe80042761..b98fca3aa4a 100644 --- a/hail/src/main/scala/is/hail/io/BufferSpecs.scala +++ b/hail/src/main/scala/is/hail/io/BufferSpecs.scala @@ -66,7 +66,7 @@ object BufferSpec { classOf[LEB128BufferSpec], classOf[BlockingBufferSpec], classOf[StreamBufferSpec] - )) + ), typeHintFieldName = "name") } trait BufferSpec extends Spec { diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala index 657d303e583..e5cb8e3a029 100644 --- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala +++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala @@ -30,9 +30,8 @@ object AbstractRVDSpec { classOf[compatibility.IndexSpec], classOf[compatibility.UnpartitionedRVDSpec], classOf[AbstractTypedCodecSpec], - classOf[TypedCodecSpec]) - ) + BufferSpec.shortTypeHints - override val typeHintFieldName = "name" + classOf[TypedCodecSpec]), + typeHintFieldName = "name") + BufferSpec.shortTypeHints } + new TStructSerializer + new TypeSerializer + diff --git a/hail/src/main/scala/is/hail/types/BlockMatrixType.scala b/hail/src/main/scala/is/hail/types/BlockMatrixType.scala index ef05efc0f50..84e957b41c5 100644 --- a/hail/src/main/scala/is/hail/types/BlockMatrixType.scala +++ b/hail/src/main/scala/is/hail/types/BlockMatrixType.scala @@ -7,11 +7,11 @@ import is.hail.linalg.BlockMatrix object BlockMatrixSparsity { private val builder: BoxedArrayBuilder[(Int, Int)] = new BoxedArrayBuilder[(Int, Int)] - val dense: BlockMatrixSparsity = BlockMatrixSparsity(None) + val dense: BlockMatrixSparsity = new BlockMatrixSparsity(None: Option[IndexedSeq[(Int, Int)]]) def apply(definedBlocks: IndexedSeq[(Int, Int)]): BlockMatrixSparsity = BlockMatrixSparsity(Some(definedBlocks)) - def apply(nRows: Int, nCols: Int)(exists: (Int, Int) => Boolean): BlockMatrixSparsity = { + def constructFromShapeAndFunction(nRows: Int, nCols: Int)(exists: (Int, Int) => Boolean): BlockMatrixSparsity = { var i = 0 builder.clear() while (i < nRows) { @@ -47,7 +47,7 @@ case class BlockMatrixSparsity(definedBlocks: Option[IndexedSeq[(Int, Int)]]) { def condense(blockOverlaps: => (Array[Array[Int]], Array[Array[Int]])): BlockMatrixSparsity = { definedBlocks.map { _ => val (ro, co) = blockOverlaps - BlockMatrixSparsity(ro.length, co.length) { (i, j) => + BlockMatrixSparsity.constructFromShapeAndFunction(ro.length, co.length) { (i, j) => ro(i).exists(ii => co(j).exists(jj => hasBlock(ii -> jj))) } }.getOrElse(BlockMatrixSparsity.dense) diff --git a/monitoring/Dockerfile.test b/monitoring/Dockerfile.test index 35290dacdd8..c5f31840903 100644 --- a/monitoring/Dockerfile.test +++ b/monitoring/Dockerfile.test @@ -1,4 +1,3 @@ FROM {{ service_base_image.image }} COPY monitoring/test/ /test/ -RUN hail-pip-install pytest-instafail==0.4.1 pytest-asyncio==0.10.0 diff --git a/monitoring/test/test_monitoring.py b/monitoring/test/test_monitoring.py index 81d695a6f4f..9952c0b9c9c 100644 --- a/monitoring/test/test_monitoring.py +++ b/monitoring/test/test_monitoring.py @@ -8,12 +8,11 @@ from hailtop.httpx import client_session import hailtop.utils as utils -pytestmark = pytest.mark.asyncio - logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) +@pytest.mark.asyncio async def test_billing_monitoring(): deploy_config = get_deploy_config() monitoring_deploy_config_url = deploy_config.url('monitoring', '/api/v1alpha/billing') diff --git a/query/query/query.py b/query/query/query.py index a6ed04646b9..00af2adb260 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -264,15 +264,11 @@ async def on_cleanup(app): ) -async def on_shutdown(app): +async def on_shutdown(_): # Filter the asyncio.current_task(), because if we await # the current task we'll end up in a deadlock - remaining_tasks = [ - t for t in asyncio.all_tasks() if t is not asyncio.current_task() - ] - log.info( - f"On shutdown request received, with {len(remaining_tasks)} remaining tasks" - ) + remaining_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + log.info(f"On shutdown request received, with {len(remaining_tasks)} remaining tasks") await asyncio.wait(*remaining_tasks) log.info("All tasks on shutdown have completed") diff --git a/router/deployment.yaml b/router/deployment.yaml index 59380108c13..7c9f2a1e32f 100644 --- a/router/deployment.yaml +++ b/router/deployment.yaml @@ -176,8 +176,7 @@ metadata: app: hello spec: ports: - - name: http - port: 80 + - port: 443 protocol: TCP targetPort: 5000 selector: diff --git a/router/router.nginx.conf.in b/router/router.nginx.conf.in index 4eca32aec79..2ab84162c1d 100644 --- a/router/router.nginx.conf.in +++ b/router/router.nginx.conf.in @@ -66,7 +66,7 @@ server { server_name hello.*; location / { - proxy_pass http://hello/; + proxy_pass https://hello/; include /etc/nginx/proxy.conf; } From b45cf79b07ab0ee5d5ade8569b8b9d7073f0e873 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 24 Mar 2021 12:02:46 +1100 Subject: [PATCH 239/501] Conda: make sure gsutil is available on build stage --- conda/hail/meta-template.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index 9be0b9ef8c0..25e675ede0b 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -20,6 +20,8 @@ requirements: - lz4 - pytest-runner - pip + - google-cloud-sdk + - google-cloud-storage run: - python - openjdk 8.* From 6fec6e99829fe95b91e67067cc5fba26e6067823 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 24 Mar 2021 12:06:55 +1100 Subject: [PATCH 240/501] Update pyspark --- conda/hail/meta-template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index 25e675ede0b..99ac7cf0e8c 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -15,7 +15,7 @@ requirements: - rsync host: - python - - pyspark >=2.4,<2.4.2 + - pyspark >=3.1.1,<3.2.0 - openjdk 8.* - lz4 - pytest-runner @@ -25,7 +25,7 @@ requirements: run: - python - openjdk 8.* - - pyspark >=2.4,<2.4.2 + - pyspark >=3.1.1,<3.2.0 - aiohttp - aiohttp-session - bokeh >1.3,<2.0 From 89410ec34d134aa719b3faa7c47b333c85f686a5 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 30 Mar 2021 07:28:53 +1100 Subject: [PATCH 241/501] Fix cert config --- tls/config.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tls/config.yaml b/tls/config.yaml index 1d0269c0024..51010afc8c8 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -136,6 +136,3 @@ principals: domains: - prometheus kind: nginx -- name: prometheus - domain: prometheus - kind: nginx From 46c59160f0e41da27d580ae9698000199bcb28ea Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 30 Mar 2021 07:40:48 +1100 Subject: [PATCH 242/501] Fix merge conflict --- auth/auth/auth.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index e43bf1fd7bc..eb66c3a87e5 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -8,10 +8,7 @@ import google.auth.transport.requests import google.oauth2.id_token import google_auth_oauthlib.flow -<<<<<<< HEAD -======= from prometheus_async.aio.web import server_stats # type: ignore ->>>>>>> upstream/main from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger From 7d8f848a864701c546908eb3ce8481582365acef Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Wed, 31 Mar 2021 09:00:26 +1100 Subject: [PATCH 243/501] Re-add query version import --- query/query/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/query/query.py b/query/query/query.py index c801fc0b262..8cce8a8dae1 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -14,6 +14,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger +from hailtop import version from gear import setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only, monitor_endpoint from .sockets import connect_to_java @@ -308,4 +309,3 @@ def run(): access_log_class=AccessLogger, ssl_context=internal_server_ssl_context(), ) - From 70f227f00f7e015cf4a4fececc26fed9547bdb62 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sun, 4 Apr 2021 15:43:01 +1000 Subject: [PATCH 244/501] Post to Slack for deployments --- .github/workflows/prod_deploy.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index e665a145906..536faabb9a4 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -7,10 +7,17 @@ jobs: invoke-prod-deploy: runs-on: ubuntu-latest steps: - - name: "deploy" + - name: deploy run: | - curl --fail --silent --show-error -X POST \ + echo DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' \ - https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy + https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) >> $GITHUB_ENV + + - name: post to Slack + run: | + curl --fail --silent --show-error -X POST \ + -H "Content-type: application/json" \ + -d "{\"text\": \"Deploying Hail Batch: $DEPLOY_BATCH_URL\"}" \ + ${{ secrets.SLACK_WEBHOOK }} From e5b23cfac5f39448b0a1738fb63efdc48824615f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 6 Apr 2021 12:02:18 +1000 Subject: [PATCH 245/501] Remove zulip-config --- build.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/build.yaml b/build.yaml index e58ae84ebe3..c2a1c2006bd 100644 --- a/build.yaml +++ b/build.yaml @@ -13,7 +13,6 @@ steps: - gcr-push-service-account-key - test-gsa-key - auth-oauth2-client-secret - - zulip-config - benchmark-gsa-key - kind: buildImage name: echo_image From 4bca564fc27e3705bd81fbd8bb749c05a2ecbb1f Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 14 Apr 2021 13:38:10 +1000 Subject: [PATCH 246/501] Remove www subdomain (#108) --- letsencrypt/subdomains.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/letsencrypt/subdomains.txt b/letsencrypt/subdomains.txt index b6ecc8c0be1..7331c5be6cc 100644 --- a/letsencrypt/subdomains.txt +++ b/letsencrypt/subdomains.txt @@ -1,6 +1,5 @@ ci notebook -www batch batch-driver benchmark From de6eb8a8dfee19dcc35ccd0433dc275a70b2d36c Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 14 Apr 2021 13:38:19 +1000 Subject: [PATCH 247/501] Check for cURL failure on prod deploy (#107) --- .github/workflows/prod_deploy.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 536faabb9a4..cc28b89016e 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -9,11 +9,12 @@ jobs: steps: - name: deploy run: | - echo DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ + DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' \ - https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) >> $GITHUB_ENV + https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) + echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV - name: post to Slack run: | From ae636b45169266a65021026defbd50302f8eac41 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 14 Apr 2021 13:48:47 +1000 Subject: [PATCH 248/501] Remove router from prod deploy steps (#109) --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index cc28b89016e..478b59931c9 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query", "deploy_router"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV From 1ba0649577dfa394fcc93dae1863e7f16214c489 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 14 Apr 2021 14:30:56 +1000 Subject: [PATCH 249/501] Add hail_version in TLS cookbook (#110) --- dev-docs/tls-cookbook.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dev-docs/tls-cookbook.md b/dev-docs/tls-cookbook.md index 17f360515eb..70399905bfc 100644 --- a/dev-docs/tls-cookbook.md +++ b/dev-docs/tls-cookbook.md @@ -1,4 +1,5 @@ # TLS Cookbook + ## Create a Self-Signed x509 Certificate in PEM Format Produce an x509 certificate. The key is a 4096-bit RSA key. The cert is valid @@ -121,6 +122,10 @@ kubectl create secret generic \ 3. Update all the service certificates: ``` +export HAIL=$HOME/hail + +make -C $HAIL/hail python/hailtop/hail_version + PYTHONPATH=$HAIL/hail/python \ python3 $HAIL/tls/create_certs.py \ default \ @@ -133,8 +138,10 @@ PYTHONPATH=$HAIL/hail/python \ not actually services, but including them in the next step is OK). ``` -SERVICES_TO_RESTART=$(python3 -c 'import yaml -x = yaml.safe_load(open("$HAIL_HOME/tls/config.yaml"))["principals"] +SERVICES_TO_RESTART=$(python3 -c 'import os +import yaml +hail_dir = os.getenv("HAIL") +x = yaml.safe_load(open(f"{hail_dir}/tls/config.yaml"))["principals"] print(",".join(x["name"] for x in x))') ``` From 1f21febcb3960891ff25de16d9b6f3c8eed8d9a4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Sat, 1 May 2021 12:45:26 +1000 Subject: [PATCH 250/501] Add Bloop to Gradle build config --- hail/build.gradle | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hail/build.gradle b/hail/build.gradle index 5533d455a53..a40f9381576 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -3,6 +3,10 @@ buildscript { mavenCentral() jcenter() } + + dependencies { + classpath 'ch.epfl.scala:gradle-bloop_2.12:1.4.8' + } } plugins { @@ -350,3 +354,7 @@ task shadowTestJar(type: ShadowJar) { from project.sourceSets.main.output, project.sourceSets.test.output configurations = [project.configurations.hailTestJar] } + +allprojects { + apply plugin: 'bloop' +} From e9865da6fe7ee2af4a575485c80bca0d6b722cb6 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 3 May 2021 20:20:00 +1000 Subject: [PATCH 251/501] Drop duplicated docker_prefix setting --- ci/Makefile | 2 +- ci/deployment.yaml | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ci/Makefile b/ci/Makefile index 66227167126..40c2067d193 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -69,5 +69,5 @@ push: build .PHONY: deploy deploy: push push-ci-utils hail-kaniko-push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","docker_prefix":"$(DOCKER_PREFIX)","docker_root_image":"$(DOCKER_ROOT_IMAGE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_kaniko_image":{"image":"$(HAIL_KANIKO_IMAGE)"}}' deployment.yaml deployment.yaml.out + python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_kaniko_image":{"image":"$(HAIL_KANIKO_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 1fe817d9894..602ed75dab6 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -58,10 +58,6 @@ spec: key: docker_root_image - name: HAIL_GCP_ZONE value: "{{ global.zone }}" - - name: HAIL_DOCKER_PREFIX - value: "{{ global.docker_prefix }}" - - name: HAIL_DOCKER_ROOT_IMAGE - value: "{{ global.docker_root_image }}" - name: HAIL_CI_UTILS_IMAGE value: "{{ ci_utils_image.image }}" - name: HAIL_KANIKO_IMAGE From b364564bd4081e071034104b51b3cb01579fd6a5 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 4 May 2021 11:33:57 +1000 Subject: [PATCH 252/501] Kaniko config: parameterise docker registry --- ci/ci/build.py | 10 +++++++--- ...oogle-application-credentials-to-kaniko-auth-config | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ci/ci/build.py b/ci/ci/build.py index 81a9415d239..dc7cb67bc76 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -315,14 +315,17 @@ def build(self, batch, code, scope): mv /python3.7-slim-stretch/Dockerfile.out {shq(dockerfile_in_context)} -set +e +set +ex /busybox/sh /convert-google-application-credentials-to-kaniko-auth-config -set -e +set -ex exec /kaniko/executor --dockerfile={shq(dockerfile_in_context)} --context=dir://{shq(context)} --destination={shq(self.image)} --cache=true --cache-repo={shq(cache_repo)} --snapshotMode=redo --use-new-run''' log.info(f'step {self.name}, script:\n{script}') + docker_registry = DOCKER_PREFIX.split('/')[0] + # docker_registry = 'https://gcr.io' + self.job = batch.create_job( KANIKO_IMAGE, command=['/busybox/sh', '-c', script], @@ -334,7 +337,8 @@ def build(self, batch, code, scope): } ], env={ - 'GOOGLE_APPLICATION_CREDENTIALS': '/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json' + 'GOOGLE_APPLICATION_CREDENTIALS': '/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json', + '$REGISTRY': docker_registry, }, attributes={'name': self.name}, resources=self.resources, diff --git a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config index 50f6a8d691a..ea7cf517d50 100644 --- a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config +++ b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config @@ -1,4 +1,4 @@ set +e -echo '{"auths": { "https://gcr.io": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ +echo '{"auths": { "$REGISTRY": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ > /kaniko/.docker/config.json From ece8f555f27356252d53734dcfb57f4b3703d9f8 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 4 May 2021 12:49:52 +1000 Subject: [PATCH 253/501] Fix --- ci/ci/build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/ci/build.py b/ci/ci/build.py index dc7cb67bc76..3c2f502c2ec 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -315,9 +315,9 @@ def build(self, batch, code, scope): mv /python3.7-slim-stretch/Dockerfile.out {shq(dockerfile_in_context)} -set +ex +set +e /busybox/sh /convert-google-application-credentials-to-kaniko-auth-config -set -ex +set -e exec /kaniko/executor --dockerfile={shq(dockerfile_in_context)} --context=dir://{shq(context)} --destination={shq(self.image)} --cache=true --cache-repo={shq(cache_repo)} --snapshotMode=redo --use-new-run''' @@ -338,7 +338,7 @@ def build(self, batch, code, scope): ], env={ 'GOOGLE_APPLICATION_CREDENTIALS': '/secrets/gcr-push-service-account-key/gcr-push-service-account-key.json', - '$REGISTRY': docker_registry, + 'REGISTRY': docker_registry, }, attributes={'name': self.name}, resources=self.resources, From bf437772fbeabc1ea20f65d5b29d0aa9255b98b5 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 4 May 2021 17:27:11 +1000 Subject: [PATCH 254/501] Fix setting up REGISTRY for kaniko --- ...convert-google-application-credentials-to-kaniko-auth-config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config index ea7cf517d50..e5f976696a0 100644 --- a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config +++ b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config @@ -1,4 +1,4 @@ set +e -echo '{"auths": { "$REGISTRY": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ +echo '{"auths": { "'$REGISTRY'": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ > /kaniko/.docker/config.json From 111cd198ec426c2341a4ab2401899165ae5abd6f Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 4 May 2021 18:03:31 +1000 Subject: [PATCH 255/501] Remove test-dataproc-service-account-key --- build.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/build.yaml b/build.yaml index 1f89feaa294..d5a82856aa6 100644 --- a/build.yaml +++ b/build.yaml @@ -48,7 +48,6 @@ steps: - test-gsa-key - auth-oauth2-client-secret - benchmark-gsa-key - - test-dataproc-service-account-key - kind: buildImage2 name: echo_image dockerFile: /io/echo/Dockerfile From 47342c8b4b9cba4d972f78df5a2d52d1f01811be Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 4 May 2021 18:36:00 +1000 Subject: [PATCH 256/501] Add auth-gsa-key mount for create_accounts --- build.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build.yaml b/build.yaml index d5a82856aa6..ea9d6e9b00b 100644 --- a/build.yaml +++ b/build.yaml @@ -425,6 +425,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /deploy-config + - name: auth-gsa-key + namespace: + valueFrom: default_ns.name + mountPath: /auth-gsa-key inputs: - from: /repo/ci/bootstrap_create_accounts.py to: /io/bootstrap_create_accounts.py From 13ecc35ed737036e879f27300d879242824cc607 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 5 May 2021 13:53:58 +1000 Subject: [PATCH 257/501] Fix hardcoded bucket gs://hail-test-dmk9z -> gs://cpg-hail-test --- build.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/build.yaml b/build.yaml index d68d48efdf2..f5099667bb3 100644 --- a/build.yaml +++ b/build.yaml @@ -2441,7 +2441,7 @@ steps: {% if deploy %} destination=$(cat /global-config/hail_query_gcs_path)/jars/ {% else %} - destination=gs://hail-test-dmk9z/{{ token }}/jars/ + destination=gs://cpg-hail-test/{{ token }}/jars/ {% endif %} gsutil -m cp /io/hail.jar ${destination}$(cat /io/git_version).jar @@ -2508,8 +2508,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export PYTEST_SPLITS=3 export PYTEST_SPLIT_INDEX=0 - export HAIL_TEST_RESOURCES_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data export HAIL_QUERY_BACKEND=service export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json hailctl config set batch/billing_project test @@ -2558,8 +2558,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export PYTEST_SPLITS=3 export PYTEST_SPLIT_INDEX=1 - export HAIL_TEST_RESOURCES_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data export HAIL_QUERY_BACKEND=service export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json hailctl config set batch/billing_project test @@ -2608,8 +2608,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export PYTEST_SPLITS=3 export PYTEST_SPLIT_INDEX=2 - export HAIL_TEST_RESOURCES_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/test/resources - export HAIL_DOCTEST_DATA_DIR=gs://hail-test-dmk9z/{{ upload_test_resources_to_gcs.token }}/doctest/data + export HAIL_TEST_RESOURCES_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/test/resources + export HAIL_DOCTEST_DATA_DIR=gs://cpg-hail-test/{{ upload_test_resources_to_gcs.token }}/doctest/data export HAIL_QUERY_BACKEND=service export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json hailctl config set batch/billing_project test From 376bb63e5d645efb3c7665ce92ce98f5f27f1de0 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 5 May 2021 15:29:34 +1000 Subject: [PATCH 258/501] Configure GCS query bucket --- infra/global.tfvars | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infra/global.tfvars b/infra/global.tfvars index 532bb083507..eaf01ca359a 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -8,3 +8,5 @@ domain = "hail.populationgenomics.org.au" batch_logs_bucket_location = "australia-southeast1" batch_logs_bucket_storage_class = "STANDARD" use_artifact_registry = true +hail_query_bucket_location = "australia-southeast1" +hail_query_bucket_storage_class = "STANDARD" From 9012f51f675167cc17fa3edf9bd4d153c8273c6d Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 5 May 2021 15:30:51 +1000 Subject: [PATCH 259/501] Add missing DOCKER_PREFIX (#113) --- batch/batch/driver/create_instance.py | 1 + 1 file changed, 1 insertion(+) diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index 0a3ced6c0c4..1c76d7f9983 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -266,6 +266,7 @@ async def create_instance(app, zone, machine_name, machine_type, activation_toke -e PROJECT=$PROJECT \ -e ZONE=$ZONE \ -e DOCKER_ROOT_IMAGE=$DOCKER_ROOT_IMAGE \ +-e DOCKER_PREFIX=$DOCKER_PREFIX \ -e WORKER_CONFIG=$WORKER_CONFIG \ -e MAX_IDLE_TIME_MSECS=$MAX_IDLE_TIME_MSECS \ -e WORKER_DATA_DISK_MOUNT=/mnt/disks/$WORKER_DATA_DISK_NAME \ From 5659445fd5bca8a15f902f4e41dd8ba19a34757d Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Thu, 1 Jul 2021 16:12:08 +1000 Subject: [PATCH 260/501] Dataproc: pass --scopes to cluster creation command (#117) --- hail/python/hailtop/hailctl/dataproc/start.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hail/python/hailtop/hailctl/dataproc/start.py b/hail/python/hailtop/hailctl/dataproc/start.py index 1369f9bdc88..5369da40f56 100755 --- a/hail/python/hailtop/hailctl/dataproc/start.py +++ b/hail/python/hailtop/hailctl/dataproc/start.py @@ -187,6 +187,7 @@ def init_parser(parser): parser.add_argument('--network', type=str, help='the network for all nodes in this cluster') parser.add_argument('--service-account', type=str, help='The Google Service Account to use for cluster creation (default to the Compute Engine service account).') parser.add_argument('--master-tags', type=str, help='comma-separated list of instance tags to apply to the mastern node') + parser.add_argument('--scopes', help='Specifies access scopes for the node instances') parser.add_argument('--wheel', help='Non-default Hail installation. Warning: experimental.') @@ -338,6 +339,8 @@ def disk_size(size): conf.flags['project'] = args.project if args.bucket: conf.flags['bucket'] = args.bucket + if args.scopes: + conf.flags['scopes'] = args.scopes account = gcloud.get_config("account") if account: From ac05eca2859d0103ceb200257019d75677801a13 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 2 Jul 2021 11:01:57 +1000 Subject: [PATCH 261/501] Add janus --- conda/hail/meta-template.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index 99ac7cf0e8c..a3941f62c44 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -47,6 +47,7 @@ requirements: - google-cloud-sdk - google-cloud-storage - google-api-core + - janus test: imports: From 3da91afc88a838c10564ec8aedf3fb6e102a7cb2 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 2 Jul 2021 11:06:15 +1000 Subject: [PATCH 262/501] Pin janus --- conda/hail/meta-template.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index a3941f62c44..3e0494b4e93 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -47,7 +47,7 @@ requirements: - google-cloud-sdk - google-cloud-storage - google-api-core - - janus + - janus >=0.6,<0.7 test: imports: From 7d3b7bce312e85e4ac5ba89336f9fb8999d17000 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Fri, 2 Jul 2021 12:01:16 +1000 Subject: [PATCH 263/501] Revert "Merge from Upstream" --- .git-blame-ignore-revs | 2 - .gitignore | 1 - address/Makefile | 21 +- admin-pod/Makefile | 6 +- auth/Makefile | 21 +- batch/Makefile | 37 +- batch/batch/batch_configuration.py | 1 - batch/batch/driver/create_instance.py | 12 +- batch/batch/driver/gce.py | 50 +- batch/batch/driver/instance_collection.py | 28 +- batch/batch/driver/job.py | 7 +- batch/batch/driver/pool.py | 69 +- batch/batch/front_end/front_end.py | 46 +- batch/batch/front_end/templates/job.html | 37 +- batch/batch/front_end/validate.py | 6 +- batch/batch/utils.py | 11 +- batch/batch/worker/disk.py | 25 +- batch/batch/worker/flock.py | 51 + batch/batch/worker/worker.py | 91 +- batch/deployment.yaml | 4 - batch/test/test_batch.py | 37 +- batch2/proxy.py | 19 - batch2/react-batch/.eslintrc.json | 35 - batch2/react-batch/.gitignore | 5 - batch2/react-batch/index.html | 13 - batch2/react-batch/package-lock.json | 6591 ----------------- batch2/react-batch/package.json | 30 - batch2/react-batch/src/App.tsx | 24 - .../react-batch/src/components/BatchTable.tsx | 20 - .../react-batch/src/components/JobTable.tsx | 21 - batch2/react-batch/src/main.tsx | 13 - batch2/react-batch/src/pages/BatchPage.tsx | 14 - batch2/react-batch/src/pages/BatchesPage.tsx | 14 - batch2/react-batch/tsconfig.json | 20 - batch2/react-batch/vite.config.ts | 14 - batch2/svelte-batch/.gitignore | 4 - batch2/svelte-batch/index.html | 13 - batch2/svelte-batch/package-lock.json | 940 --- batch2/svelte-batch/package.json | 17 - batch2/svelte-batch/public/favicon.ico | Bin 1150 -> 0 bytes batch2/svelte-batch/src/App.svelte | 22 - batch2/svelte-batch/src/assets/svelte.png | Bin 5185 -> 0 bytes .../src/components/BatchTable.svelte | 15 - .../src/components/JobTable.svelte | 14 - batch2/svelte-batch/src/global.d.ts | 2 - batch2/svelte-batch/src/main.ts | 7 - .../svelte-batch/src/pages/BatchPage.svelte | 25 - .../svelte-batch/src/pages/BatchesPage.svelte | 24 - batch2/svelte-batch/svelte.config.cjs | 7 - batch2/svelte-batch/tsconfig.json | 37 - batch2/svelte-batch/vite.config.js | 13 - benchmark-service/Makefile | 21 +- .../python/benchmark_hail/compare/compare.py | 4 +- .../python/benchmark_hail/run/__init__.py | 4 +- .../benchmark_hail/run/methods_benchmarks.py | 34 +- .../benchmark_hail/run/sentinel_benchmarks.py | 50 - benchmark/scripts/benchmark_in_batch.py | 6 +- bootstrap-gateway/Makefile | 20 +- build.yaml | 72 +- ci/Dockerfile.ci-utils | 1 + ci/Makefile | 64 +- ci/buildkit/Dockerfile | 6 - ci/ci/build.py | 52 +- ci/ci/constants.py | 1 - ci/ci/environment.py | 2 +- ci/ci/templates/pr-table.html | 4 - ci/ci/templates/pr.html | 1 - ci/deployment.yaml | 4 +- ci/kaniko/Dockerfile | 29 + ...ication-credentials-to-kaniko-auth-config} | 4 +- ci/test/resources/build.yaml | 8 +- ...extract_1000_Genomes_30x_GRCh38_samples.sh | 5 - .../extract_1000_Genomes_NYGC_30x_GRCh38.py | 32 - datasets/extract/extract_CADD.py | 35 - datasets/extract/extract_dbSNP.py | 29 - .../notebooks/1kg_NYGC_30x_datasets.ipynb | 814 -- datasets/notebooks/CADD_datasets.ipynb | 124 - datasets/notebooks/dbSNP_datasets.ipynb | 685 -- datasets/notebooks/reformat_buckets.ipynb | 195 - datasets/notebooks/reformat_buckets.txt | 201 - .../notebooks/reformat_buckets_mappings.json | 605 -- dev-docs/batch-operation.md | 38 +- dev-docs/compiler-team/development_tools.md | 79 - dev-docs/development_process.md | 6 +- dev-docs/google-cloud-cookbook.md | 30 - docker-build.sh | 20 - docker/Dockerfile.base | 40 +- docker/Dockerfile.service-base | 14 +- docker/Makefile | 114 +- docker/publish-public-images.sh | 34 +- docker/python-dill/push.sh | 21 +- docker/requirements.txt | 7 +- gateway/Makefile | 18 +- grafana/Makefile | 18 +- grafana/deployment.yaml | 2 +- hail/Makefile | 6 +- hail/build.gradle | 5 +- hail/python/MANIFEST.in | 1 - hail/python/hail/__init__.py | 3 +- hail/python/hail/backend/spark_backend.py | 17 +- hail/python/hail/context.py | 5 +- hail/python/hail/docs/change_log.md | 35 +- hail/python/hail/docs/conf.py | 4 +- .../1000_Genomes_HighCov_autosomes.rst | 214 - .../schemas/1000_Genomes_HighCov_chrX.rst | 214 - .../schemas/1000_Genomes_HighCov_chrY.rst | 175 - .../1000_Genomes_Retracted_autosomes.rst | 128 - .../schemas/1000_Genomes_Retracted_chrX.rst | 128 - .../schemas/1000_Genomes_Retracted_chrY.rst | 117 - .../schemas/1000_Genomes_autosomes.rst | 3 +- .../datasets/schemas/1000_Genomes_chrX.rst | 3 +- .../datasets/schemas/1000_Genomes_chrY.rst | 3 +- .../hail/docs/datasets/schemas/CADD.rst | 3 +- ...Subcutaneous_all_snp_gene_associations.rst | 39 - ...eral_Omentum_all_snp_gene_associations.rst | 39 - ...ery_Coronary_all_snp_gene_associations.rst | 39 - ...rtery_Tibial_all_snp_gene_associations.rst | 39 - ..._cortex_BA24_all_snp_gene_associations.rst | 39 - ...asal_ganglia_all_snp_gene_associations.rst | 39 - ...r_Hemisphere_all_snp_gene_associations.rst | 39 - ...Brain_Cortex_all_snp_gene_associations.rst | 39 - ...l_Cortex_BA9_all_snp_gene_associations.rst | 39 - ..._Hippocampus_all_snp_gene_associations.rst | 39 - ...asal_ganglia_all_snp_gene_associations.rst | 39 - ...asal_ganglia_all_snp_gene_associations.rst | 39 - ...cervical_c-1_all_snp_gene_associations.rst | 39 - ...tantia_nigra_all_snp_gene_associations.rst | 39 - ...mmary_Tissue_all_snp_gene_associations.rst | 39 - ..._fibroblasts_all_snp_gene_associations.rst | 39 - ..._lymphocytes_all_snp_gene_associations.rst | 39 - ...n_Transverse_all_snp_gene_associations.rst | 39 - ...eal_Junction_all_snp_gene_associations.rst | 39 - ...hagus_Mucosa_all_snp_gene_associations.rst | 39 - ...al_Appendage_all_snp_gene_associations.rst | 39 - ...ft_Ventricle_all_snp_gene_associations.rst | 39 - ...idney_Cortex_all_snp_gene_associations.rst | 39 - ...livary_Gland_all_snp_gene_associations.rst | 39 - ...cle_Skeletal_all_snp_gene_associations.rst | 39 - ...Nerve_Tibial_all_snp_gene_associations.rst | 39 - ...QTL_Prostate_all_snp_gene_associations.rst | 39 - ...d_Suprapubic_all_snp_gene_associations.rst | 39 - ...ed_Lower_leg_all_snp_gene_associations.rst | 39 - ...rminal_Ileum_all_snp_gene_associations.rst | 39 - ..._eQTL_Testis_all_snp_gene_associations.rst | 39 - ..._eQTL_Uterus_all_snp_gene_associations.rst | 39 - ..._eQTL_Vagina_all_snp_gene_associations.rst | 39 - ..._Whole_Blood_all_snp_gene_associations.rst | 39 - ...Ex_eQTL_allpairs_Adipose_Subcutaneous.rst} | 6 +- ...QTL_allpairs_Adipose_Visceral_Omentum.rst} | 4 +- .../GTEx_eQTL_allpairs_Adrenal_Gland.rst | 39 + .../GTEx_eQTL_allpairs_Artery_Aorta.rst | 39 + .../GTEx_eQTL_allpairs_Artery_Coronary.rst | 39 + .../GTEx_eQTL_allpairs_Artery_Tibial.rst | 39 + .../GTEx_eQTL_allpairs_Brain_Amygdala.rst | 39 + ..._Brain_Anterior_cingulate_cortex_BA24.rst} | 6 +- ...L_allpairs_Brain_Caudate_basal_ganglia.rst | 39 + ...L_allpairs_Brain_Cerebellar_Hemisphere.rst | 39 + .../GTEx_eQTL_allpairs_Brain_Cerebellum.rst | 39 + .../GTEx_eQTL_allpairs_Brain_Cortex.rst | 39 + ...QTL_allpairs_Brain_Frontal_Cortex_BA9.rst} | 4 +- .../GTEx_eQTL_allpairs_Brain_Hippocampus.rst | 39 + .../GTEx_eQTL_allpairs_Brain_Hypothalamus.rst | 39 + ...Brain_Nucleus_accumbens_basal_ganglia.rst} | 4 +- ...L_allpairs_Brain_Putamen_basal_ganglia.rst | 39 + ...lpairs_Brain_Spinal_cord_cervical_c-1.rst} | 4 +- ..._eQTL_allpairs_Brain_Substantia_nigra.rst} | 4 +- ...x_eQTL_allpairs_Breast_Mammary_Tissue.rst} | 4 +- ...L_allpairs_Cells_Cultured_fibroblasts.rst} | 4 +- ...irs_Cells_EBV-transformed_lymphocytes.rst} | 4 +- .../GTEx_eQTL_allpairs_Colon_Sigmoid.rst | 39 + .../GTEx_eQTL_allpairs_Colon_Transverse.rst | 39 + ...s_Esophagus_Gastroesophageal_Junction.rst} | 4 +- .../GTEx_eQTL_allpairs_Esophagus_Mucosa.rst | 39 + ...TEx_eQTL_allpairs_Esophagus_Muscularis.rst | 39 + ..._eQTL_allpairs_Heart_Atrial_Appendage.rst} | 4 +- ...TEx_eQTL_allpairs_Heart_Left_Ventricle.rst | 39 + .../GTEx_eQTL_allpairs_Kidney_Cortex.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Liver.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Lung.rst | 39 + ...TEx_eQTL_allpairs_Minor_Salivary_Gland.rst | 39 + .../GTEx_eQTL_allpairs_Muscle_Skeletal.rst | 39 + .../GTEx_eQTL_allpairs_Nerve_Tibial.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Ovary.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Pancreas.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Pituitary.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Prostate.rst | 39 + ...pairs_Skin_Not_Sun_Exposed_Suprapubic.rst} | 4 +- ...L_allpairs_Skin_Sun_Exposed_Lower_leg.rst} | 6 +- ...lpairs_Small_Intestine_Terminal_Ileum.rst} | 4 +- .../schemas/GTEx_eQTL_allpairs_Spleen.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Stomach.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Testis.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Thyroid.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Uterus.rst | 39 + .../schemas/GTEx_eQTL_allpairs_Vagina.rst | 39 + .../GTEx_eQTL_allpairs_Whole_Blood.rst | 39 + ...eral_Omentum_all_snp_gene_associations.rst | 42 - ...drenal_Gland_all_snp_gene_associations.rst | 42 - ...Artery_Aorta_all_snp_gene_associations.rst | 42 - ..._cortex_BA24_all_snp_gene_associations.rst | 42 - ...asal_ganglia_all_snp_gene_associations.rst | 42 - ...r_Hemisphere_all_snp_gene_associations.rst | 42 - ...Brain_Cortex_all_snp_gene_associations.rst | 42 - ...l_Cortex_BA9_all_snp_gene_associations.rst | 42 - ..._Hippocampus_all_snp_gene_associations.rst | 42 - ...asal_ganglia_all_snp_gene_associations.rst | 42 - ...asal_ganglia_all_snp_gene_associations.rst | 42 - ...cervical_c-1_all_snp_gene_associations.rst | 42 - ...tantia_nigra_all_snp_gene_associations.rst | 42 - ...mmary_Tissue_all_snp_gene_associations.rst | 42 - ..._fibroblasts_all_snp_gene_associations.rst | 42 - ..._lymphocytes_all_snp_gene_associations.rst | 42 - ...n_Transverse_all_snp_gene_associations.rst | 42 - ...eal_Junction_all_snp_gene_associations.rst | 42 - ...hagus_Mucosa_all_snp_gene_associations.rst | 42 - ...s_Muscularis_all_snp_gene_associations.rst | 42 - ...al_Appendage_all_snp_gene_associations.rst | 42 - ...ft_Ventricle_all_snp_gene_associations.rst | 42 - ...idney_Cortex_all_snp_gene_associations.rst | 42 - ...livary_Gland_all_snp_gene_associations.rst | 42 - ...cle_Skeletal_all_snp_gene_associations.rst | 42 - ...Nerve_Tibial_all_snp_gene_associations.rst | 42 - ...QTL_Prostate_all_snp_gene_associations.rst | 42 - ...d_Suprapubic_all_snp_gene_associations.rst | 42 - ...ed_Lower_leg_all_snp_gene_associations.rst | 42 - ...rminal_Ileum_all_snp_gene_associations.rst | 42 - ..._sQTL_Testis_all_snp_gene_associations.rst | 42 - ..._sQTL_Uterus_all_snp_gene_associations.rst | 42 - ..._sQTL_Vagina_all_snp_gene_associations.rst | 42 - ..._Whole_Blood_all_snp_gene_associations.rst | 42 - ...Ex_sQTL_allpairs_Adipose_Subcutaneous.rst} | 6 +- ...QTL_allpairs_Adipose_Visceral_Omentum.rst} | 4 +- .../GTEx_sQTL_allpairs_Adrenal_Gland.rst | 42 + .../GTEx_sQTL_allpairs_Artery_Aorta.rst | 42 + .../GTEx_sQTL_allpairs_Artery_Coronary.rst | 42 + .../GTEx_sQTL_allpairs_Artery_Tibial.rst | 42 + .../GTEx_sQTL_allpairs_Brain_Amygdala.rst | 42 + ..._Brain_Anterior_cingulate_cortex_BA24.rst} | 6 +- ...L_allpairs_Brain_Caudate_basal_ganglia.rst | 42 + ...L_allpairs_Brain_Cerebellar_Hemisphere.rst | 42 + .../GTEx_sQTL_allpairs_Brain_Cerebellum.rst | 42 + .../GTEx_sQTL_allpairs_Brain_Cortex.rst | 42 + ...QTL_allpairs_Brain_Frontal_Cortex_BA9.rst} | 4 +- .../GTEx_sQTL_allpairs_Brain_Hippocampus.rst | 42 + .../GTEx_sQTL_allpairs_Brain_Hypothalamus.rst | 42 + ...Brain_Nucleus_accumbens_basal_ganglia.rst} | 4 +- ...L_allpairs_Brain_Putamen_basal_ganglia.rst | 42 + ...lpairs_Brain_Spinal_cord_cervical_c-1.rst} | 4 +- ..._sQTL_allpairs_Brain_Substantia_nigra.rst} | 4 +- ...x_sQTL_allpairs_Breast_Mammary_Tissue.rst} | 4 +- ...L_allpairs_Cells_Cultured_fibroblasts.rst} | 4 +- ...irs_Cells_EBV-transformed_lymphocytes.rst} | 4 +- .../GTEx_sQTL_allpairs_Colon_Sigmoid.rst | 42 + .../GTEx_sQTL_allpairs_Colon_Transverse.rst | 42 + ...s_Esophagus_Gastroesophageal_Junction.rst} | 4 +- .../GTEx_sQTL_allpairs_Esophagus_Mucosa.rst | 42 + ...TEx_sQTL_allpairs_Esophagus_Muscularis.rst | 42 + ..._sQTL_allpairs_Heart_Atrial_Appendage.rst} | 4 +- ...TEx_sQTL_allpairs_Heart_Left_Ventricle.rst | 42 + .../GTEx_sQTL_allpairs_Kidney_Cortex.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Liver.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Lung.rst | 42 + ...TEx_sQTL_allpairs_Minor_Salivary_Gland.rst | 42 + .../GTEx_sQTL_allpairs_Muscle_Skeletal.rst | 42 + .../GTEx_sQTL_allpairs_Nerve_Tibial.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Ovary.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Pancreas.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Pituitary.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Prostate.rst | 42 + ...pairs_Skin_Not_Sun_Exposed_Suprapubic.rst} | 4 +- ...L_allpairs_Skin_Sun_Exposed_Lower_leg.rst} | 6 +- ...lpairs_Small_Intestine_Terminal_Ileum.rst} | 4 +- .../schemas/GTEx_sQTL_allpairs_Spleen.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Stomach.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Testis.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Thyroid.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Uterus.rst | 42 + .../schemas/GTEx_sQTL_allpairs_Vagina.rst | 42 + .../GTEx_sQTL_allpairs_Whole_Blood.rst | 42 + .../hail/docs/datasets/schemas/dbSNP.rst | 99 - .../hail/docs/datasets/schemas/dbSNP_rsid.rst | 31 - .../hail/docs/functions/collections.rst | 2 - hail/python/hail/docs/utils/index.rst | 4 - hail/python/hail/experimental/datasets.json | 1381 ++-- hail/python/hail/experimental/datasets.py | 29 +- hail/python/hail/experimental/db.py | 13 +- .../hail/experimental/haplotype_freq_em.py | 2 +- hail/python/hail/experimental/plots.py | 2 +- .../experimental/vcf_combiner/__main__.py | 2 +- .../experimental/vcf_combiner/vcf_combiner.py | 88 +- hail/python/hail/expr/__init__.py | 3 +- .../hail/expr/expressions/base_expression.py | 6 +- .../expr/expressions/typed_expressions.py | 116 +- hail/python/hail/expr/functions.py | 39 +- hail/python/hail/fs/fs.py | 4 - hail/python/hail/fs/google_fs.py | 3 - hail/python/hail/fs/hadoop_fs.py | 3 - hail/python/hail/fs/local_fs.py | 3 - hail/python/hail/ir/ir.py | 7 +- hail/python/hail/methods/__init__.py | 3 +- hail/python/hail/methods/impex.py | 28 +- hail/python/hail/methods/misc.py | 9 +- hail/python/hail/methods/statgen.py | 417 +- hail/python/hail/nd/__init__.py | 4 +- hail/python/hail/nd/nd.py | 20 +- hail/python/hail/table.py | 6 +- hail/python/hail/utils/__init__.py | 6 +- hail/python/hail/utils/hadoop_utils.py | 20 - hail/python/hail/utils/misc.py | 4 +- hail/python/hail/utils/tutorial.py | 69 +- hail/python/hailtop/aiogoogle/auth/session.py | 6 +- .../aiogoogle/client/compute_client.py | 3 - .../aiogoogle/client/storage_client.py | 70 +- hail/python/hailtop/aiotools/fs.py | 101 +- hail/python/hailtop/aiotools/s3asyncfs.py | 423 -- hail/python/hailtop/aiotools/stream.py | 84 +- hail/python/hailtop/batch/backend.py | 326 +- hail/python/hailtop/batch/batch.py | 42 +- .../hailtop/batch/batch_pool_executor.py | 3 +- hail/python/hailtop/batch/docs/api.rst | 1 - hail/python/hailtop/batch/docs/change_log.rst | 15 - hail/python/hailtop/batch/docs/conf.py | 1 - hail/python/hailtop/batch/job.py | 16 +- hail/python/hailtop/batch_client/aioclient.py | 5 +- hail/python/hailtop/batch_client/client.py | 5 +- hail/python/hailtop/google_storage.py | 2 +- .../dataproc/resources/init_notebook.py | 1 - hail/python/hailtop/httpx.py | 58 +- hail/python/hailtop/utils/__init__.py | 5 +- hail/python/hailtop/utils/utils.py | 173 +- .../python/hailtop/utils/validate/__init__.py | 3 +- .../python/hailtop/utils/validate/validate.py | 9 +- hail/python/requirements.txt | 5 +- hail/python/setup.py | 1 - hail/python/test/hail/expr/test_expr.py | 73 +- hail/python/test/hail/expr/test_ndarrays.py | 66 +- hail/python/test/hail/helpers.py | 10 +- hail/python/test/hail/linalg/test_linalg.py | 77 +- .../hail/matrixtable/test_matrix_table.py | 9 +- hail/python/test/hail/methods/test_impex.py | 25 +- hail/python/test/hail/methods/test_misc.py | 6 - hail/python/test/hail/methods/test_pca.py | 93 +- hail/python/test/hail/methods/test_statgen.py | 277 +- hail/python/test/hail/table/test_table.py | 7 +- .../python/test/hailtop/aiotools/test_copy.py | 199 +- hail/python/test/hailtop/batch/test_batch.py | 38 +- hail/python/test/hailtop/test_aiogoogle.py | 299 +- hail/python/test/hailtop/test_fs.py | 338 - hail/src/main/scala/is/hail/HailContext.scala | 3 +- .../scala/is/hail/asm4s/ClassBuilder.scala | 85 +- hail/src/main/scala/is/hail/asm4s/Code.scala | 20 - .../main/scala/is/hail/asm4s/package.scala | 28 +- .../is/hail/backend/local/LocalBackend.scala | 3 +- .../hail/backend/service/ServiceBackend.scala | 1 - .../is/hail/backend/spark/SparkBackend.scala | 29 +- .../experimental/ExperimentalFunctions.scala | 6 +- .../scala/is/hail/expr/ir/ArraySorter.scala | 243 +- .../main/scala/is/hail/expr/ir/BinaryOp.scala | 20 +- .../scala/is/hail/expr/ir/BinarySearch.scala | 89 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 11 +- .../main/scala/is/hail/expr/ir/Casts.scala | 41 +- .../main/scala/is/hail/expr/ir/Children.scala | 1 - .../main/scala/is/hail/expr/ir/Compile.scala | 56 +- .../is/hail/expr/ir/CompileAndEvaluate.scala | 3 +- .../src/main/scala/is/hail/expr/ir/Copy.scala | 3 - .../src/main/scala/is/hail/expr/ir/Emit.scala | 1818 +++-- .../is/hail/expr/ir/EmitClassBuilder.scala | 431 +- .../is/hail/expr/ir/EmitCodeBuilder.scala | 96 +- .../scala/is/hail/expr/ir/FoldConstants.scala | 1 - .../is/hail/expr/ir/GenericTableValue.scala | 7 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 73 +- .../scala/is/hail/expr/ir/InferPType.scala | 336 +- .../scala/is/hail/expr/ir/InferType.scala | 3 +- .../scala/is/hail/expr/ir/Interpret.scala | 19 +- .../main/scala/is/hail/expr/ir/MatrixIR.scala | 13 - .../scala/is/hail/expr/ir/MatrixValue.scala | 2 +- .../scala/is/hail/expr/ir/MatrixWriter.scala | 213 +- .../is/hail/expr/ir/NativeReaderOptions.scala | 2 - .../main/scala/is/hail/expr/ir/Param.scala | 20 +- .../main/scala/is/hail/expr/ir/Parser.scala | 18 +- .../main/scala/is/hail/expr/ir/Pretty.scala | 6 +- .../is/hail/expr/ir/PruneDeadFields.scala | 14 - .../scala/is/hail/expr/ir/Requiredness.scala | 37 +- .../main/scala/is/hail/expr/ir/Simplify.scala | 14 +- .../expr/ir/SpecializedArrayBuilders.scala | 237 +- .../main/scala/is/hail/expr/ir/TableIR.scala | 66 +- .../scala/is/hail/expr/ir/TableWriter.scala | 80 +- .../scala/is/hail/expr/ir/TypeCheck.scala | 3 - ...ala => TypeToIRIntermediateClassTag.scala} | 4 +- .../main/scala/is/hail/expr/ir/UnaryOp.scala | 9 +- .../is/hail/expr/ir/agg/AggregatorState.scala | 47 +- .../is/hail/expr/ir/agg/AppendOnlyBTree.scala | 2 +- .../expr/ir/agg/ApproxCDFAggregator.scala | 2 +- .../ArrayElementLengthCheckAggregator.scala | 2 +- .../expr/ir/agg/CallStatsAggregator.scala | 15 +- .../expr/ir/agg/CollectAsSetAggregator.scala | 7 +- .../is/hail/expr/ir/agg/CountAggregator.scala | 7 +- .../hail/expr/ir/agg/DensifyAggregator.scala | 10 +- .../expr/ir/agg/DownsampleAggregator.scala | 31 +- .../hail/expr/ir/agg/GroupedAggregator.scala | 13 +- .../expr/ir/agg/ImputeTypeAggregator.scala | 9 +- .../ir/agg/LinearRegressionAggregator.scala | 2 +- .../hail/expr/ir/agg/MonoidAggregator.scala | 12 +- .../expr/ir/agg/NDArraySumAggregator.scala | 28 +- .../hail/expr/ir/agg/StagedArrayBuilder.scala | 8 +- .../expr/ir/agg/StagedBlockLinkedList.scala | 7 +- .../hail/expr/ir/agg/TakeByAggregator.scala | 25 +- .../ir/analyses/ComputeMethodSplits.scala | 45 - .../analyses/ControlFlowPreventsSplit.scala | 33 - .../expr/ir/analyses/ParentPointers.scala | 17 - .../expr/ir/functions/ArrayFunctions.scala | 12 +- .../expr/ir/functions/CallFunctions.scala | 86 +- .../is/hail/expr/ir/functions/Functions.scala | 379 +- .../expr/ir/functions/GenotypeFunctions.scala | 17 +- .../hail/expr/ir/functions/GetElement.scala | 2 +- .../expr/ir/functions/IntervalFunctions.scala | 78 +- .../expr/ir/functions/LocusFunctions.scala | 103 +- .../expr/ir/functions/MathFunctions.scala | 117 +- .../expr/ir/functions/NDArrayFunctions.scala | 100 +- .../ir/functions/RandomSeededFunctions.scala | 42 +- .../functions/ReferenceGenomeFunctions.scala | 36 +- .../expr/ir/functions/StringFunctions.scala | 135 +- .../expr/ir/functions/UtilFunctions.scala | 208 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 164 +- .../ir/lowering/LowerDistributedSort.scala | 3 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 40 +- .../is/hail/expr/ir/lowering/LowerToCDA.scala | 3 +- .../expr/ir/lowering/RVDToTableStage.scala | 4 +- .../hail/expr/ir/ndarrays/EmitNDArray.scala | 661 -- .../expr/ir/orderings/BinaryOrdering.scala | 4 +- .../hail/expr/ir/orderings/CallOrdering.scala | 5 +- .../hail/expr/ir/orderings/CodeOrdering.scala | 88 +- .../expr/ir/orderings/IntervalOrdering.scala | 68 +- .../expr/ir/orderings/IterableOrdering.scala | 24 +- .../expr/ir/orderings/LocusOrdering.scala | 14 +- .../expr/ir/orderings/PrimitiveOrdering.scala | 82 +- .../expr/ir/orderings/ShuffleOrdering.scala | 4 +- .../expr/ir/orderings/StringOrdering.scala | 4 +- .../expr/ir/orderings/StructOrdering.scala | 42 +- .../main/scala/is/hail/expr/ir/package.scala | 15 +- .../is/hail/expr/ir/streams/EmitStream.scala | 219 +- .../is/hail/expr/ir/streams/StreamUtils.scala | 16 +- .../src/main/scala/is/hail/io/CodecSpec.scala | 7 +- .../scala/is/hail/io/TextMatrixReader.scala | 29 +- .../is/hail/io/bgen/BgenRDDPartitions.scala | 19 +- .../main/scala/is/hail/io/bgen/LoadBgen.scala | 2 - .../main/scala/is/hail/io/fs/HadoopFS.scala | 14 - .../scala/is/hail/io/gen/ExportBGEN.scala | 26 +- .../main/scala/is/hail/io/gen/LoadGen.scala | 8 +- .../scala/is/hail/io/index/IndexWriter.scala | 27 +- .../hail/io/index/InternalNodeBuilder.scala | 34 +- .../is/hail/io/index/LeafNodeBuilder.scala | 18 +- .../scala/is/hail/io/plink/LoadPlink.scala | 6 +- .../scala/is/hail/io/tabix/TabixReader.scala | 4 +- .../main/scala/is/hail/io/vcf/LoadVCF.scala | 17 +- .../scala/is/hail/linalg/BlockMatrix.scala | 7 +- .../is/hail/linalg/LinalgCodeUtils.scala | 82 +- hail/src/main/scala/is/hail/lir/PST.scala | 25 +- hail/src/main/scala/is/hail/lir/X.scala | 6 - hail/src/main/scala/is/hail/lir/package.scala | 6 +- .../is/hail/methods/LinearRegression.scala | 6 +- .../is/hail/methods/LogisticRegression.scala | 4 +- .../is/hail/methods/PoissonRegression.scala | 4 +- .../src/main/scala/is/hail/methods/Skat.scala | 4 +- .../scala/is/hail/rvd/AbstractRVDSpec.scala | 17 +- .../services/batch_client/BatchClient.scala | 5 +- .../main/scala/is/hail/services/package.scala | 4 +- .../is/hail/services/shuffler/package.scala | 7 +- .../scala/is/hail/stats/RegressionUtils.scala | 4 +- .../is/hail/types/TypeWithRequiredness.scala | 9 - .../scala/is/hail/types/encoded/EArray.scala | 12 +- .../is/hail/types/encoded/EBaseStruct.scala | 27 +- .../scala/is/hail/types/encoded/EBinary.scala | 10 +- .../types/encoded/EBlockMatrixNDArray.scala | 15 +- .../is/hail/types/encoded/EBoolean.scala | 10 +- .../is/hail/types/encoded/EFloat32.scala | 10 +- .../is/hail/types/encoded/EFloat64.scala | 10 +- .../scala/is/hail/types/encoded/EInt32.scala | 18 +- .../scala/is/hail/types/encoded/EInt64.scala | 10 +- .../types/encoded/ENDArrayColumnMajor.scala | 20 +- .../is/hail/types/encoded/EShuffle.scala | 8 +- .../scala/is/hail/types/encoded/EType.scala | 21 +- .../physical/PArrayBackedContainer.scala | 4 +- .../is/hail/types/physical/PBaseStruct.scala | 16 +- .../is/hail/types/physical/PBinary.scala | 20 + .../is/hail/types/physical/PBoolean.scala | 4 +- .../scala/is/hail/types/physical/PCall.scala | 25 +- .../hail/types/physical/PCanonicalArray.scala | 48 +- .../types/physical/PCanonicalBaseStruct.scala | 23 +- .../types/physical/PCanonicalBinary.scala | 4 +- .../hail/types/physical/PCanonicalCall.scala | 6 +- .../hail/types/physical/PCanonicalDict.scala | 19 +- .../types/physical/PCanonicalInterval.scala | 10 +- .../hail/types/physical/PCanonicalLocus.scala | 12 +- .../types/physical/PCanonicalNDArray.scala | 62 +- .../hail/types/physical/PCanonicalSet.scala | 13 +- .../types/physical/PCanonicalShuffle.scala | 6 +- .../types/physical/PCanonicalStream.scala | 6 +- .../types/physical/PCanonicalString.scala | 4 +- .../scala/is/hail/types/physical/PCode.scala | 318 + .../is/hail/types/physical/PContainer.scala | 12 +- .../scala/is/hail/types/physical/PDict.scala | 3 - .../is/hail/types/physical/PFloat32.scala | 4 +- .../is/hail/types/physical/PFloat64.scala | 4 +- .../scala/is/hail/types/physical/PInt32.scala | 6 +- .../scala/is/hail/types/physical/PInt64.scala | 4 +- .../is/hail/types/physical/PInterval.scala | 14 +- .../scala/is/hail/types/physical/PLocus.scala | 12 +- .../is/hail/types/physical/PNDArray.scala | 14 +- .../is/hail/types/physical/PPrimitive.scala | 2 +- .../is/hail/types/physical/PShuffle.scala | 16 +- .../is/hail/types/physical/PStream.scala | 6 +- .../is/hail/types/physical/PString.scala | 14 +- .../is/hail/types/physical/PStruct.scala | 2 +- .../hail/types/physical/PSubsetStruct.scala | 4 +- .../scala/is/hail/types/physical/PType.scala | 43 +- .../hail/types/physical/PUnrealizable.scala | 15 +- .../is/hail/types/physical/package.scala | 3 +- .../is/hail/types/physical/stypes/SCode.scala | 86 +- .../is/hail/types/physical/stypes/SType.scala | 104 +- .../physical/stypes/SingleCodeSCode.scala | 173 - .../stypes/concrete/SBaseStructPointer.scala | 42 +- .../stypes/concrete/SBinaryPointer.scala | 26 +- .../stypes/concrete/SCanonicalCall.scala | 65 +- .../concrete/SCanonicalLocusPointer.scala | 36 +- .../concrete/SCanonicalShufflePointer.scala | 36 +- .../stypes/concrete/SIndexablePointer.scala | 42 +- .../stypes/concrete/SInsertFieldsStruct.scala | 164 - .../stypes/concrete/SIntervalPointer.scala | 48 +- .../stypes/concrete/SNDArrayPointer.scala | 46 +- .../stypes/concrete/SStackStruct.scala | 161 - .../stypes/concrete/SStringPointer.scala | 38 +- .../stypes/concrete/SSubsetStruct.scala | 83 +- .../stypes/interfaces/SBaseStruct.scala | 63 +- .../physical/stypes/interfaces/SBinary.scala | 1 + .../physical/stypes/interfaces/SCall.scala | 1 + .../stypes/interfaces/SContainer.scala | 4 +- .../stypes/interfaces/SInterval.scala | 9 +- .../physical/stypes/interfaces/SLocus.scala | 3 - .../physical/stypes/interfaces/SNDArray.scala | 166 +- .../physical/stypes/interfaces/SStream.scala | 30 +- .../physical/stypes/interfaces/SVoid.scala | 23 +- .../physical/stypes/interfaces/package.scala | 23 +- .../physical/stypes/primitives/SBoolean.scala | 53 +- .../physical/stypes/primitives/SFloat32.scala | 62 +- .../physical/stypes/primitives/SFloat64.scala | 59 +- .../physical/stypes/primitives/SInt32.scala | 62 +- .../physical/stypes/primitives/SInt64.scala | 63 +- .../scala/is/hail/types/virtual/TStruct.scala | 4 +- .../main/scala/is/hail/utils/ArrayStack.scala | 2 +- .../is/hail/utils/BoxedArrayBuilder.scala | 2 +- hail/src/main/scala/is/hail/utils/Graph.scala | 3 +- .../scala/is/hail/utils/HailIterator.scala | 5 +- .../utils/MissingAnnotationArrayBuilder.scala | 68 + .../utils/MissingBooleanArrayBuilder.scala | 65 + .../utils/MissingDoubleArrayBuilder.scala | 70 + .../hail/utils/MissingFloatArrayBuilder.scala | 70 + .../hail/utils/MissingIntArrayBuilder.scala | 70 + .../hail/utils/MissingLongArrayBuilder.scala | 70 + .../scala/is/hail/utils/TextTableReader.scala | 2 - .../main/scala/is/hail/utils/package.scala | 4 +- .../utils/richUtils/RichCodeInputBuffer.scala | 15 +- .../richUtils/RichCodeOutputBuffer.scala | 15 +- .../is/hail/utils/richUtils/RichRow.scala | 10 +- .../is/hail/utils/richUtils/RichString.scala | 11 +- .../is/hail/variant/RegionValueVariant.scala | 21 +- hail/src/test/resources/bad_flag_number.vcf | 91 - hail/src/test/scala/is/hail/TestUtils.scala | 9 +- .../annotations/StagedConstructorSuite.scala | 14 +- .../test/scala/is/hail/asm4s/ASM4SSuite.scala | 44 +- .../is/hail/expr/ir/Aggregators2Suite.scala | 3 +- .../is/hail/expr/ir/ArrayFunctionsSuite.scala | 4 +- .../scala/is/hail/expr/ir/ETypeSuite.scala | 2 +- .../is/hail/expr/ir/EmitStreamSuite.scala | 20 +- .../scala/is/hail/expr/ir/FunctionSuite.scala | 7 +- .../test/scala/is/hail/expr/ir/IRSuite.scala | 315 +- .../scala/is/hail/expr/ir/OrderingSuite.scala | 22 +- .../scala/is/hail/expr/ir/PruneSuite.scala | 3 - .../hail/expr/ir/RandomFunctionsSuite.scala | 17 +- .../is/hail/expr/ir/RequirednessSuite.scala | 35 +- .../is/hail/expr/ir/StagedBTreeSuite.scala | 13 +- .../scala/is/hail/expr/ir/TableIRSuite.scala | 2 - .../hail/expr/ir/TakeByAggregatorSuite.scala | 2 +- .../scala/is/hail/expr/ir/TestUtils.scala | 2 +- .../scala/is/hail/expr/ir/TrapNodeSuite.scala | 37 - .../is/hail/expr/ir/agg/DownsampleSuite.scala | 8 +- .../ir/agg/StagedBlockLinkedListSuite.scala | 2 +- .../scala/is/hail/methods/SkatSuite.scala | 3 +- .../hail/services/shuffler/ShuffleSuite.scala | 5 +- .../hail/types/physical/PNDArraySuite.scala | 21 +- .../types/physical/PhysicalTestUtils.scala | 2 +- .../is/hail/utils/ArrayBuilderSuite.scala | 9 +- internal-gateway/Makefile | 18 +- js_common/.gitignore | 5 - js_common/batch-client.ts | 7 - js_common/hail.css | 26 - js_common/package-lock.json | 199 - js_common/package.json | 17 - js_common/react/batch-client.ts | 11 - js_common/react/hooks.ts | 32 - js_common/svelte/batch-client.ts | 11 - js_common/svelte/store.ts | 27 - js_common/types.ts | 36 - letsencrypt/Makefile | 18 +- memory/Makefile | 22 +- monitoring/Makefile | 21 +- notebook/Makefile | 35 +- .../templates/workshop/resources.html | 4 - package-lock.json | 216 - package.json | 5 - prometheus/Makefile | 18 +- pylintrc | 2 +- query/Makefile | 19 +- shuffler/Makefile | 21 +- site/Makefile | 2 +- tls/Dockerfile | 15 +- tls/create_certs.py | 5 +- ukbb-rg/Makefile | 30 +- web_common/web_common/styles/main.scss | 20 +- website/Makefile | 18 +- 620 files changed, 10684 insertions(+), 25849 deletions(-) create mode 100644 batch/batch/worker/flock.py delete mode 100644 batch2/proxy.py delete mode 100644 batch2/react-batch/.eslintrc.json delete mode 100644 batch2/react-batch/.gitignore delete mode 100644 batch2/react-batch/index.html delete mode 100644 batch2/react-batch/package-lock.json delete mode 100644 batch2/react-batch/package.json delete mode 100644 batch2/react-batch/src/App.tsx delete mode 100644 batch2/react-batch/src/components/BatchTable.tsx delete mode 100644 batch2/react-batch/src/components/JobTable.tsx delete mode 100644 batch2/react-batch/src/main.tsx delete mode 100644 batch2/react-batch/src/pages/BatchPage.tsx delete mode 100644 batch2/react-batch/src/pages/BatchesPage.tsx delete mode 100644 batch2/react-batch/tsconfig.json delete mode 100644 batch2/react-batch/vite.config.ts delete mode 100644 batch2/svelte-batch/.gitignore delete mode 100644 batch2/svelte-batch/index.html delete mode 100644 batch2/svelte-batch/package-lock.json delete mode 100644 batch2/svelte-batch/package.json delete mode 100644 batch2/svelte-batch/public/favicon.ico delete mode 100644 batch2/svelte-batch/src/App.svelte delete mode 100644 batch2/svelte-batch/src/assets/svelte.png delete mode 100644 batch2/svelte-batch/src/components/BatchTable.svelte delete mode 100644 batch2/svelte-batch/src/components/JobTable.svelte delete mode 100644 batch2/svelte-batch/src/global.d.ts delete mode 100644 batch2/svelte-batch/src/main.ts delete mode 100644 batch2/svelte-batch/src/pages/BatchPage.svelte delete mode 100644 batch2/svelte-batch/src/pages/BatchesPage.svelte delete mode 100644 batch2/svelte-batch/svelte.config.cjs delete mode 100644 batch2/svelte-batch/tsconfig.json delete mode 100644 batch2/svelte-batch/vite.config.js delete mode 100644 benchmark/python/benchmark_hail/run/sentinel_benchmarks.py delete mode 100644 ci/buildkit/Dockerfile create mode 100644 ci/kaniko/Dockerfile rename ci/{buildkit/convert-google-application-credentials-to-docker-auth-config => kaniko/convert-google-application-credentials-to-kaniko-auth-config} (71%) delete mode 100644 datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh delete mode 100644 datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py delete mode 100644 datasets/extract/extract_CADD.py delete mode 100644 datasets/extract/extract_dbSNP.py delete mode 100644 datasets/notebooks/1kg_NYGC_30x_datasets.ipynb delete mode 100644 datasets/notebooks/CADD_datasets.ipynb delete mode 100644 datasets/notebooks/dbSNP_datasets.ipynb delete mode 100644 datasets/notebooks/reformat_buckets.ipynb delete mode 100644 datasets/notebooks/reformat_buckets.txt delete mode 100644 datasets/notebooks/reformat_buckets_mappings.json delete mode 100644 dev-docs/compiler-team/development_tools.md delete mode 100644 dev-docs/google-cloud-cookbook.md delete mode 100755 docker-build.sh delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Spleen_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst} (86%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Stomach_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst} (83%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Thyroid_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst} (88%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst} (89%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Ovary_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Lung_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Pituitary_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Liver_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst} (89%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Pancreas_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst} (86%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst => GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Spleen_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst} (87%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Stomach_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst} (84%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Thyroid_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Liver_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Lung_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst} (92%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Pituitary_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Ovary_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Pancreas_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst} (87%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst => GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/dbSNP.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst delete mode 100644 hail/python/hailtop/aiotools/s3asyncfs.py delete mode 100644 hail/python/test/hailtop/test_fs.py rename hail/src/main/scala/is/hail/expr/ir/{PrimitiveTypeToIRIntermediateClassTag.scala => TypeToIRIntermediateClassTag.scala} (73%) delete mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala delete mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala delete mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala delete mode 100644 hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala create mode 100644 hail/src/main/scala/is/hail/types/physical/PCode.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala create mode 100644 hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala delete mode 100644 hail/src/test/resources/bad_flag_number.vcf delete mode 100644 hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala delete mode 100644 js_common/.gitignore delete mode 100644 js_common/batch-client.ts delete mode 100644 js_common/hail.css delete mode 100644 js_common/package-lock.json delete mode 100644 js_common/package.json delete mode 100644 js_common/react/batch-client.ts delete mode 100644 js_common/react/hooks.ts delete mode 100644 js_common/svelte/batch-client.ts delete mode 100644 js_common/svelte/store.ts delete mode 100644 js_common/types.ts delete mode 100644 package-lock.json delete mode 100644 package.json diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index f68453039ab..96d91bb687e 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,4 +1,2 @@ # YAML formatting 1a861505c1fc2ea3c9d7b32a47be7af10d13907c -# black format services code -4fccbe2d18c6d2f4059036d61489467c780bbc0e diff --git a/.gitignore b/.gitignore index 7f413f1d869..03eb40dec3a 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,6 @@ hs_err_pid*.log *hail/python/hail/docs/tutorials/data* *hail/python/hailtop/pipeline/docs/output* .mypy_cache/ -node_modules *.out GPATH GRTAGS diff --git a/address/Makefile b/address/Makefile index f38fcc5cfa7..315eb864b60 100644 --- a/address/Makefile +++ b/address/Makefile @@ -1,8 +1,7 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -ADDRESS_IMAGE := $(DOCKER_PREFIX)/address:$(TOKEN) +ADDRESS_LATEST = $(DOCKER_PREFIX)/address:latest +ADDRESS_IMAGE = $(DOCKER_PREFIX)/address:$(shell docker images -q --no-trunc address | sed -e 's,[^:]*:,,') PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$(PYTHONPATH) python3 @@ -15,12 +14,20 @@ check: .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. address/Dockerfile.out $(ADDRESS_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(ADDRESS_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -t address -f Dockerfile.out --cache-from address,$(ADDRESS_LATEST),service-base .. + +.PHONY: push +push: build + docker tag address $(ADDRESS_LATEST) + docker push $(ADDRESS_LATEST) + docker tag address $(ADDRESS_IMAGE) + docker push $(ADDRESS_IMAGE) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"address_image":{"image":"$(ADDRESS_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f service-account.yaml diff --git a/admin-pod/Makefile b/admin-pod/Makefile index baa722f688e..1efe07336e7 100644 --- a/admin-pod/Makefile +++ b/admin-pod/Makefile @@ -1,8 +1,10 @@ include ../config.mk +SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') + .PHONY: deploy deploy: ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' admin-pod.yaml admin-pod.yaml.out + $(MAKE) -C ../docker push + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"$(SERVICE_BASE_IMAGE)"}}' admin-pod.yaml admin-pod.yaml.out kubectl -n $(NAMESPACE) apply -f admin-pod.yaml.out diff --git a/auth/Makefile b/auth/Makefile index d7f7cb7d589..ee04c6773a8 100644 --- a/auth/Makefile +++ b/auth/Makefile @@ -1,8 +1,7 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -AUTH_IMAGE := $(DOCKER_PREFIX)/auth:$(TOKEN) +AUTH_LATEST = $(DOCKER_PREFIX)/auth:latest +AUTH_IMAGE = $(DOCKER_PREFIX)/auth:$(shell docker images -q --no-trunc auth:latest | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -16,12 +15,20 @@ check: .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. auth/Dockerfile.out $(AUTH_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(AUTH_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -f Dockerfile.out -t auth --cache-from auth,$(AUTH_LATEST),base .. + +.PHONY: push +push: build + docker tag auth $(AUTH_LATEST) + docker push $(AUTH_LATEST) + docker tag auth $(AUTH_IMAGE) + docker push $(AUTH_IMAGE) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f auth-driver-service-account.yaml python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"auth_image":{"image":"$(AUTH_IMAGE)"},"auth_database":{"user_secret_name":"sql-auth-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out diff --git a/batch/Makefile b/batch/Makefile index b928a2f346d..6e013a9ac72 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -1,9 +1,10 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) +BATCH_LATEST = $(DOCKER_PREFIX)/batch:latest +BATCH_IMAGE = $(DOCKER_PREFIX)/batch:$(shell docker images -q --no-trunc batch | sed -e 's,[^:]*:,,') -BATCH_IMAGE := $(DOCKER_PREFIX)/batch:$(TOKEN) -BATCH_WORKER_IMAGE := $(DOCKER_PREFIX)/batch-worker:$(TOKEN) +BATCH_WORKER_LATEST = $(DOCKER_PREFIX)/batch-worker:latest +BATCH_WORKER_IMAGE = $(DOCKER_PREFIX)/batch-worker:$(shell docker images -q --no-trunc batch-worker | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -15,24 +16,40 @@ check: curlylint . bash ../check-sql.sh +.PHONY: build-prereqs +build-prereqs: + $(MAKE) -C ../docker build + .PHONY: build-batch -build-batch: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh . Dockerfile.out $(BATCH_IMAGE) +build-batch: build-prereqs + -docker pull $(BATCH_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -t batch -f Dockerfile.out --cache-from batch,$(BATCH_LATEST),service-base . .PHONY: build-worker -build-worker: src/main/java/is/hail/JVMEntryway.class jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar +build-worker: build-prereqs + -docker pull $(BATCH_WORKER_LATEST) python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' Dockerfile.worker Dockerfile.worker.out - ../docker-build.sh .. batch/Dockerfile.worker.out $(BATCH_WORKER_IMAGE) + docker build -t batch-worker -f Dockerfile.worker.out --cache-from batch-worker,$(BATCH_WORKER_LATEST),service-base .. .PHONY: build build: build-batch build-worker +.PHONY: push +push: build + docker tag batch $(BATCH_LATEST) + docker push $(BATCH_LATEST) + docker tag batch $(BATCH_IMAGE) + docker push $(BATCH_IMAGE) + docker tag batch-worker $(BATCH_WORKER_LATEST) + docker push $(BATCH_WORKER_LATEST) + docker tag batch-worker $(BATCH_WORKER_IMAGE) + docker push $(BATCH_WORKER_IMAGE) + JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)","docker_root_image":"$(DOCKER_ROOT_IMAGE)"},"scope":"$(SCOPE)"}' .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default E=$(JINJA_ENVIRONMENT) && \ python3 ../ci/jinja2_render.py $$E deployment.yaml deployment.yaml.out && \ diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index 067ff5703cb..bf0b036eb59 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -5,7 +5,6 @@ REFRESH_INTERVAL_IN_SECONDS = int(os.environ.get('REFRESH_INTERVAL_IN_SECONDS', 5 * 60)) DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] PROJECT = os.environ['PROJECT'] -SCOPE = os.environ['HAIL_SCOPE'] GCP_REGION = os.environ['HAIL_GCP_REGION'] GCP_ZONE = os.environ['HAIL_GCP_ZONE'] diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index 2f22eba6a40..fce1133e1fa 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -161,6 +161,15 @@ async def create_instance( sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse/ sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse /gcsfuse +sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/xfsquota/ +sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/xfsquota /xfsquota + +touch /xfsquota/projects +touch /xfsquota/projid + +ln -s /xfsquota/projects /etc/projects +ln -s /xfsquota/projid /etc/projid + export HOME=/root CORES=$(nproc) @@ -197,7 +206,7 @@ async def create_instance( EOF -sudo tee /etc/google-fluentd/config.d/worker-log.conf < @type tail format json @@ -269,6 +278,7 @@ async def create_instance( -v /batch:/batch:shared \ -v /logs:/logs \ -v /gcsfuse:/gcsfuse:shared \ +-v /xfsquota:/xfsquota \ --mount type=bind,source=/mnt/disks/$WORKER_DATA_DISK_NAME,target=/host \ --mount type=bind,source=/dev,target=/dev,bind-propagation=rshared \ -p 5000:5000 \ diff --git a/batch/batch/driver/gce.py b/batch/batch/driver/gce.py index c78686c189c..a868202bcee 100644 --- a/batch/batch/driver/gce.py +++ b/batch/batch/driver/gce.py @@ -1,17 +1,17 @@ import re import json import logging +import dateutil.parser import datetime import aiohttp from gear import Database from hailtop import aiotools, aiogoogle -from hailtop.utils import periodically_call, time_msecs +from hailtop.utils import periodically_call from ..batch_configuration import PROJECT, DEFAULT_NAMESPACE from .zone_monitor import ZoneMonitor from .instance_collection_manager import InstanceCollectionManager -from ..utils import parse_timestamp_msecs log = logging.getLogger('gce_event_monitor') @@ -58,7 +58,7 @@ async def handle_event(self, event): log.warning(f'event has no payload {json.dumps(event)}') return - timestamp_msecs = parse_timestamp_msecs(event['timestamp']) + timestamp = dateutil.parser.isoparse(event['timestamp']).timestamp() * 1000 resource_type = event['resource']['type'] if resource_type != 'gce_instance': @@ -101,16 +101,16 @@ async def handle_event(self, event): log.error(f'event for unknown instance {name}: {json.dumps(event)}') return - if event_subtype == 'compute.instances.preempted': + if event_subtype == 'v1.compute.instances.preempted': log.info(f'event handler: handle preempt {instance}') - await self.handle_preempt_event(instance, timestamp_msecs) + await self.handle_preempt_event(instance, timestamp) elif event_subtype == 'v1.compute.instances.delete': if event_type == 'COMPLETED': log.info(f'event handler: delete {instance} done') - await self.handle_delete_done_event(instance, timestamp_msecs) + await self.handle_delete_done_event(instance, timestamp) elif event_type == 'STARTED': log.info(f'event handler: handle call delete {instance}') - await self.handle_call_delete_event(instance, timestamp_msecs) + await self.handle_call_delete_event(instance, timestamp) async def handle_events(self): row = await self.db.select_and_fetchone('SELECT * FROM `gevents_mark`;') @@ -120,9 +120,7 @@ async def handle_events(self): await self.db.execute_update('UPDATE `gevents_mark` SET mark = %s;', (mark,)) filter = f''' -(logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Factivity" OR -logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Fsystem_event" -) AND +logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Factivity" AND resource.type=gce_instance AND protoPayload.resourceName:"{self.machine_name_prefix}" AND timestamp >= "{mark}" @@ -154,34 +152,16 @@ async def delete_orphaned_disks(self): params = {'filter': f'(labels.namespace = {DEFAULT_NAMESPACE})'} for zone in self.zone_monitor.zones: - log.info(f'deleting orphaned disks for zone {zone}') async for disk in await self.compute_client.list(f'/zones/{zone}/disks', params=params): - disk_name = disk['name'] instance_name = disk['labels']['instance-name'] instance = self.inst_coll_manager.get_instance(instance_name) - - creation_timestamp_msecs = parse_timestamp_msecs(disk.get('creationTimestamp')) - last_attach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastAttachTimestamp')) - last_detach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastDetachTimestamp')) - - now_msecs = time_msecs() if instance is None: - log.exception(f'deleting disk {disk_name} from instance that no longer exists') - elif (last_attach_timestamp_msecs is None - and now_msecs - creation_timestamp_msecs > 10 * 60 * 1000): - log.exception(f'deleting disk {disk_name} that has not attached within 10 minutes') - elif (last_detach_timestamp_msecs is not None - and now_msecs - last_detach_timestamp_msecs > 5 * 60 * 1000): - log.exception(f'deleting detached disk {disk_name} that has not been cleaned up within 5 minutes') - else: - continue - - try: - await self.compute_client.delete_disk(f'/zones/{zone}/disks/{disk_name}') - except aiohttp.ClientResponseError as e: - if e.status == 404: - continue - log.exception(f'error while deleting orphaned disk {disk_name}') + try: + await self.compute_client.delete_disk(f'/zones/{zone}/disks/{disk["name"]}') + except aiohttp.ClientResponseError as e: + if e.status == 404: + continue + raise async def delete_orphaned_disks_loop(self): - await periodically_call(15, self.delete_orphaned_disks) + await periodically_call(300, self.delete_orphaned_disks) diff --git a/batch/batch/driver/instance_collection.py b/batch/batch/driver/instance_collection.py index 08021c2b5d8..e4747050e7d 100644 --- a/batch/batch/driver/instance_collection.py +++ b/batch/batch/driver/instance_collection.py @@ -1,9 +1,7 @@ -import asyncio import aiohttp import sortedcontainers import logging import dateutil.parser -import collections from typing import Dict from hailtop.utils import time_msecs, secret_alnum_string, periodically_call @@ -28,7 +26,6 @@ def __init__(self, app, name, machine_name_prefix, is_pool): self.is_pool = is_pool self.name_instance: Dict[str, Instance] = {} - self.live_free_cores_mcpu_by_zone: Dict[str, int] = collections.defaultdict(int) self.instances_by_last_updated = sortedcontainers.SortedSet(key=lambda instance: instance.last_updated) @@ -73,7 +70,6 @@ def adjust_for_remove_instance(self, instance): if instance.state in ('pending', 'active'): self.live_free_cores_mcpu -= max(0, instance.free_cores_mcpu) self.live_total_cores_mcpu -= instance.cores_mcpu - self.live_free_cores_mcpu_by_zone[instance.zone] -= max(0, instance.free_cores_mcpu) async def remove_instance(self, instance, reason, timestamp=None): await instance.deactivate(reason, timestamp) @@ -92,7 +88,6 @@ def adjust_for_add_instance(self, instance): if instance.state in ('pending', 'active'): self.live_free_cores_mcpu += max(0, instance.free_cores_mcpu) self.live_total_cores_mcpu += instance.cores_mcpu - self.live_free_cores_mcpu_by_zone[instance.zone] += max(0, instance.free_cores_mcpu) def add_instance(self, instance): assert instance.name not in self.name_instance @@ -128,13 +123,6 @@ async def check_on_instance(self, instance): return raise - if (instance.state == 'active' - and instance.failed_request_count > 5 - and time_msecs() - instance.last_updated > 5 * 60 * 1000): - log.exception(f'deleting {instance} with {instance.failed_request_count} failed request counts after more than 5 minutes') - await self.call_delete_instance(instance, 'not_responding') - return - # PROVISIONING, STAGING, RUNNING, STOPPING, TERMINATED gce_state = spec['status'] @@ -169,16 +157,12 @@ async def check_on_instance(self, instance): async def monitor_instances(self): if self.instances_by_last_updated: - # [:50] are the fifty smallest (oldest) - instances = self.instances_by_last_updated[:50] - - async def check(instance): - since_last_updated = time_msecs() - instance.last_updated - if since_last_updated > 60 * 1000: - log.info(f'checking on {instance}, last updated {since_last_updated / 1000}s ago') - await self.check_on_instance(instance) - - await asyncio.gather(*[check(instance) for instance in instances]) + # 0 is the smallest (oldest) + instance = self.instances_by_last_updated[0] + since_last_updated = time_msecs() - instance.last_updated + if since_last_updated > 60 * 1000: + log.info(f'checking on {instance}, last updated {since_last_updated / 1000}s ago') + await self.check_on_instance(instance) async def monitor_instances_loop(self): await periodically_call(1, self.monitor_instances) diff --git a/batch/batch/driver/job.py b/batch/batch/driver/job.py index e2854c5a372..ec4dd800250 100644 --- a/batch/batch/driver/job.py +++ b/batch/batch/driver/job.py @@ -248,12 +248,9 @@ async def make_request(): if instance.state in ('inactive', 'deleted'): return try: - async with aiohttp.ClientSession(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5)) as session: + async with aiohttp.ClientSession(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) as session: await session.delete(url) await instance.mark_healthy() - except asyncio.TimeoutError: - await instance.incr_failed_request_count() - return except aiohttp.ClientResponseError as err: if err.status == 404: await instance.mark_healthy() @@ -428,8 +425,6 @@ async def schedule_job(app, record, instance): await instance.mark_healthy() if e.status == 403: log.info(f'attempt already exists for job {id} on {instance}, aborting') - if e.status == 503: - log.info(f'job {id} cannot be scheduled because {instance} is shutting down, aborting') raise e except Exception: await instance.incr_failed_request_count() diff --git a/batch/batch/driver/pool.py b/batch/batch/driver/pool.py index e9118246d76..c0c19a3a97b 100644 --- a/batch/batch/driver/pool.py +++ b/batch/batch/driver/pool.py @@ -18,7 +18,7 @@ periodically_call, ) -from ..batch_configuration import STANDING_WORKER_MAX_IDLE_TIME_MSECS, WORKER_MAX_IDLE_TIME_MSECS, GCP_ZONE +from ..batch_configuration import STANDING_WORKER_MAX_IDLE_TIME_MSECS, WORKER_MAX_IDLE_TIME_MSECS from ..inst_coll_config import PoolConfig from ..utils import ( Box, @@ -165,7 +165,7 @@ def adjust_for_add_instance(self, instance): if instance.state == 'active' and instance.failed_request_count <= 1: self.healthy_instances_by_free_cores.add(instance) - async def create_instance(self, cores=None, max_idle_time_msecs=None, zone=None): + async def create_instance(self, cores=None, max_idle_time_msecs=None): if cores is None: cores = self.worker_cores @@ -174,10 +174,9 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None, zone=None) machine_name = self.generate_machine_name() + zone = self.zone_monitor.get_zone(cores, self.worker_local_ssd_data_disk, self.worker_pd_ssd_data_disk_size_gb) if zone is None: - zone = self.zone_monitor.get_zone(cores, self.worker_local_ssd_data_disk, self.worker_pd_ssd_data_disk_size_gb) - if zone is None: - return + return machine_type = f'n1-{self.worker_type}-{cores}' @@ -210,45 +209,18 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None, zone=None) job_private=False, ) - async def create_instances_from_ready_cores(self, ready_cores_mcpu, zone=None): - n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] - - instances_needed = (ready_cores_mcpu - self.live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( - self.worker_cores * 1000 - ) - instances_needed = min( - instances_needed, - self.max_live_instances - n_live_instances, - self.max_instances - self.n_instances, - # 20 queries/s; our GCE long-run quota - 300, - # n * 16 cores / 15s = excess_scheduling_rate/s = 10/s => n ~= 10 - 10, - ) - - if instances_needed > 0: - log.info(f'creating {instances_needed} new instances') - # parallelism will be bounded by thread pool - await asyncio.gather(*[self.create_instance(zone=zone) for _ in range(instances_needed)]) - async def create_instances(self): - ready_cores_mcpu_per_user = self.db.select_and_fetchall( + ready_cores = await self.db.select_and_fetchone( ''' -SELECT user, - CAST(COALESCE(SUM(ready_cores_mcpu), 0) AS SIGNED) AS ready_cores_mcpu +SELECT CAST(COALESCE(SUM(ready_cores_mcpu), 0) AS SIGNED) AS ready_cores_mcpu FROM user_inst_coll_resources WHERE inst_coll = %s -GROUP BY user; +LOCK IN SHARE MODE; ''', (self.name,), ) - if ready_cores_mcpu_per_user is None: - ready_cores_mcpu_per_user = {} - else: - ready_cores_mcpu_per_user = {r['user']: r['ready_cores_mcpu'] async for r in ready_cores_mcpu_per_user} - - ready_cores_mcpu = sum(ready_cores_mcpu_per_user.values()) + ready_cores_mcpu = ready_cores['ready_cores_mcpu'] free_cores_mcpu = sum([worker.free_cores_mcpu for worker in self.healthy_instances_by_free_cores]) free_cores = free_cores_mcpu / 1000 @@ -260,17 +232,29 @@ async def create_instances(self): ) if ready_cores_mcpu > 0 and free_cores < 500: - await self.create_instances_from_ready_cores(ready_cores_mcpu) + n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] - ci_ready_cores_mcpu = ready_cores_mcpu_per_user.get('ci', 0) - if ci_ready_cores_mcpu > 0 and self.live_free_cores_mcpu_by_zone[GCP_ZONE] == 0: - await self.create_instances_from_ready_cores(ci_ready_cores_mcpu, zone=GCP_ZONE) + instances_needed = (ready_cores_mcpu - self.live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( + self.worker_cores * 1000 + ) + instances_needed = min( + instances_needed, + self.max_live_instances - n_live_instances, + self.max_instances - self.n_instances, + # 20 queries/s; our GCE long-run quota + 300, + # n * 16 cores / 15s = excess_scheduling_rate/s = 10/s => n ~= 10 + 10, + ) + if instances_needed > 0: + log.info(f'creating {instances_needed} new instances') + # parallelism will be bounded by thread pool + await asyncio.gather(*[self.create_instance() for _ in range(instances_needed)]) n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] if self.enable_standing_worker and n_live_instances == 0 and self.max_instances > 0: await self.create_instance( - cores=self.standing_worker_cores, - max_idle_time_msecs=STANDING_WORKER_MAX_IDLE_TIME_MSECS + cores=self.standing_worker_cores, max_idle_time_msecs=STANDING_WORKER_MAX_IDLE_TIME_MSECS ) async def control_loop(self): @@ -449,7 +433,6 @@ def get_instance(user, cores_mcpu): for instance in self.pool.healthy_instances_by_free_cores: histogram[instance.free_cores_mcpu] += 1 log.info(f'schedule {self.pool}: no viable instances for {cores_mcpu}: {histogram}') - return None should_wait = True diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 6a0e4ee63a6..389d17dfa7f 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -16,7 +16,6 @@ import google.oauth2.service_account import google.api_core.exceptions import humanize -import traceback from prometheus_async.aio.web import server_stats # type: ignore from hailtop.utils import ( time_msecs, @@ -68,7 +67,7 @@ from ..inst_coll_config import InstanceCollectionConfigs from ..log_store import LogStore from ..database import CallError, check_call_procedure -from ..batch_configuration import BATCH_BUCKET_NAME, DEFAULT_NAMESPACE, SCOPE +from ..batch_configuration import BATCH_BUCKET_NAME, DEFAULT_NAMESPACE from ..globals import HTTP_CLIENT_MAX_SIZE, BATCH_FORMAT_VERSION, memory_to_worker_type from ..spec_writer import SpecWriter from ..batch_format_version import BatchFormatVersion @@ -100,21 +99,6 @@ async def wrapped(request, userdata, *args, **kwargs): return wrapped -def catch_ui_error_in_dev(fun): - @wraps(fun) - async def wrapped(request, userdata, *args, **kwargs): - try: - return await fun(request, userdata, *args, **kwargs) - except aiohttp.web_exceptions.HTTPFound as e: - raise e - except Exception as e: - if SCOPE == 'dev': - log.exception('error while populating ui page') - raise web.HTTPInternalServerError(text=traceback.format_exc()) from e - raise - return wrapped - - async def _user_can_access(db: Database, batch_id: int, user: str): record = await db.select_and_fetchone( ''' @@ -172,15 +156,12 @@ async def _handle_ui_error(session, f, *args, **kwargs): await f(*args, **kwargs) except KeyError as e: set_message(session, str(e), 'error') - log.info(f'ui error: KeyError {e}') return True except BatchOperationAlreadyCompletedError as e: set_message(session, e.message, e.ui_error_type) - log.info(f'ui error: BatchOperationAlreadyCompletedError {e.message}') return True except BatchUserError as e: set_message(session, e.message, e.ui_error_type) - log.info(f'ui error: BatchUserError {e.message}') return True else: return False @@ -341,7 +322,7 @@ async def _read_log_from_gcs(task): except google.api_core.exceptions.NotFound: id = (batch_id, job_id) log.exception(f'missing log file for {id} and task {task}') - data = 'ERROR: could not find log file' + data = 'ERROR: could not read log file' return task, data spec = json.loads(record['spec']) @@ -851,10 +832,6 @@ async def create_jobs(request, userdata): if user != 'ci' and not (network is None or network == 'public'): raise web.HTTPBadRequest(reason=f'unauthorized network {network}') - unconfined = spec.get('unconfined') - if user != 'ci' and unconfined: - raise web.HTTPBadRequest(reason=f'unauthorized use of unconfined={unconfined}') - spec_writer.add(json.dumps(spec)) db_spec = batch_format_version.db_spec(spec) @@ -1201,7 +1178,6 @@ async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/batches/{batch_id}') @monitor_endpoint @web_billing_project_users_only() -@catch_ui_error_in_dev async def ui_batch(request, userdata, batch_id): app = request.app batch = await _get_batch(app, batch_id) @@ -1222,7 +1198,6 @@ async def ui_batch(request, userdata, batch_id): @monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) -@catch_ui_error_in_dev async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument post = await request.post() q = post.get('q') @@ -1241,7 +1216,6 @@ async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unuse @monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) -@catch_ui_error_in_dev async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument post = await request.post() q = post.get('q') @@ -1258,7 +1232,6 @@ async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.get('/batches', name='batches') @monitor_endpoint @web_authenticated_users_only() -@catch_ui_error_in_dev async def ui_batches(request, userdata): user = userdata['username'] q = request.query.get('q', f'user:{user}') @@ -1373,7 +1346,6 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume @routes.get('/batches/{batch_id}/jobs/{job_id}') @monitor_endpoint @web_billing_project_users_only() -@catch_ui_error_in_dev async def ui_get_job(request, userdata, batch_id): app = request.app job_id = int(request.match_info['job_id']) @@ -1394,7 +1366,6 @@ async def ui_get_job(request, userdata, batch_id): 'running': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), }, 'short_error': dictfix.NoneOr(str), - 'error': dictfix.NoneOr(str), 'container_status': {'out_of_memory': dictfix.NoneOr(bool)}, 'state': str, } @@ -1409,7 +1380,6 @@ async def ui_get_job(request, userdata, batch_id): job_status = dictfix.dictfix(job_status, job_status_spec) container_statuses = job_status['container_statuses'] step_statuses = [container_statuses['input'], container_statuses['main'], container_statuses['output']] - step_errors = {step: status['error'] for step, status in container_statuses.items() if status is not None} for status in step_statuses: # backwards compatibility @@ -1448,8 +1418,6 @@ async def ui_get_job(request, userdata, batch_id): 'step_statuses': step_statuses, 'job_specification': job_specification, 'job_status_str': json.dumps(job, indent=2), - 'step_errors': step_errors, - 'error': job_status.get('error') } return await render_template('batch', request, userdata, 'job.html', page_context) @@ -1457,7 +1425,6 @@ async def ui_get_job(request, userdata, batch_id): @routes.get('/billing_limits') @monitor_endpoint @web_authenticated_users_only() -@catch_ui_error_in_dev async def ui_get_billing_limits(request, userdata): app = request.app db: Database = app['db'] @@ -1532,7 +1499,6 @@ async def post_edit_billing_limits(request, userdata): # pylint: disable=unused @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_edit_billing_limits_ui(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1614,7 +1580,6 @@ def billing_record_to_dict(record): @routes.get('/billing') @monitor_endpoint @web_authenticated_developers_only() -@catch_ui_error_in_dev async def ui_get_billing(request, userdata): billing, start, end = await _query_billing(request) @@ -1655,7 +1620,6 @@ async def ui_get_billing(request, userdata): @routes.get('/billing_projects') @monitor_endpoint @web_authenticated_developers_only() -@catch_ui_error_in_dev async def ui_get_billing_projects(request, userdata): db: Database = request.app['db'] billing_projects = await query_billing_projects(db) @@ -1747,7 +1711,6 @@ async def delete(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_billing_projects_remove_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1812,7 +1775,6 @@ async def insert(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_billing_projects_add_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] post = await request.post() @@ -1868,7 +1830,6 @@ async def insert(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_create_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] post = await request.post() @@ -1929,7 +1890,6 @@ async def close_project(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_close_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1975,7 +1935,6 @@ async def open_project(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) -@catch_ui_error_in_dev async def post_reopen_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -2038,7 +1997,6 @@ async def refresh_inst_colls(request): @routes.get('') @routes.get('/') @web_authenticated_users_only() -@catch_ui_error_in_dev async def index(request, userdata): # pylint: disable=unused-argument location = request.app.router['batches'].url_for() raise web.HTTPFound(location=location) diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html index 597fbe0b7ef..3dc1785b07b 100644 --- a/batch/batch/front_end/templates/job.html +++ b/batch/batch/front_end/templates/job.html @@ -101,50 +101,23 @@

Step Status

-{% if error is not none %} -

Error

-
{{ error }}
-{% endif %} - -{% if job_log or step_errors %} -

Logs

+{% if job_log %} +

Log

-{% if 'input' in job_log or 'input' in step_errors %} -

Input

{% if 'input' in job_log %} -

Log

+

Input

{{ job_log['input'] }}
{% endif %} -{% if 'input' in step_errors and step_errors['input'] is not none %} -

Error

-
{{ step_errors['input'] }}
-{% endif %} -{% endif %} -{% if 'main' in job_log or 'main' in step_errors %} -

Main

{% if 'main' in job_log %} -

Log

+

Main

{{ job_log['main'] }}
{% endif %} -{% if 'main' in step_errors and step_errors['main'] is not none %} -

Error

-
{{ step_errors['main'] }}
-{% endif %} -{% endif %} -{% if 'output' in job_log or 'output' in step_errors %} -

Output

{% if 'output' in job_log %} -

Log

+

Output

{{ job_log['output'] }}
{% endif %} -{% if 'output' in step_errors and step_errors['output'] is not none %} -

Error

-
{{ step_errors['output'] }}
-{% endif %} -{% endif %} - {% endif %}

Job Specification

diff --git a/batch/batch/front_end/validate.py b/batch/batch/front_end/validate.py index a8a004901b4..62518ca0b2a 100644 --- a/batch/batch/front_end/validate.py +++ b/batch/batch/front_end/validate.py @@ -20,7 +20,6 @@ regex, required, str_type, - non_empty_str_type, switch, ValidationError, ) @@ -49,8 +48,8 @@ 'gcsfuse': listof( keyed( { - required('bucket'): non_empty_str_type, - required('mount_path'): non_empty_str_type, + required('bucket'): str_type, + required('mount_path'): str_type, required('read_only'): bool_type, } ) @@ -59,7 +58,6 @@ required('job_id'): int_type, 'mount_tokens': bool_type, 'network': oneof('public', 'private'), - 'unconfined': bool_type, 'output_files': listof(keyed({required('from'): str_type, required('to'): str_type})), required('parent_ids'): listof(int_type), 'port': int_type, diff --git a/batch/batch/utils.py b/batch/batch/utils.py index 13f7dcd793d..58c2189de56 100644 --- a/batch/batch/utils.py +++ b/batch/batch/utils.py @@ -2,13 +2,11 @@ import math import json import secrets -import dateutil.parser from aiohttp import web from functools import wraps from collections import deque from gear import maybe_parse_bearer_header -from hailtop.utils import secret_alnum_string from .globals import RESERVED_STORAGE_GB_PER_CORE @@ -196,12 +194,6 @@ def is_valid_cores_mcpu(cores_mcpu: int): return quarter_cores & (quarter_cores - 1) == 0 -def parse_timestamp_msecs(ts): - if ts is None: - return ts - return dateutil.parser.isoparse(ts).timestamp() * 1000 - - class Box: def __init__(self, value): self.value = value @@ -256,8 +248,7 @@ def __init__(self): self._global_counter = WindowFractionCounter(10) def push(self, success: bool): - token = secret_alnum_string(6) - self._global_counter.push(token, success) + self._global_counter.push('exceeded_shares', success) def rate(self) -> float: return self._global_counter.fraction() diff --git a/batch/batch/worker/disk.py b/batch/batch/worker/disk.py index ebef846a02e..b5833666c5e 100644 --- a/batch/batch/worker/disk.py +++ b/batch/batch/worker/disk.py @@ -1,6 +1,6 @@ import logging -from hailtop.utils import check_shell_output, LoggingTimer, retry_all_errors_n_times +from hailtop.utils import check_shell_output, LoggingTimer log = logging.getLogger('disk') @@ -37,26 +37,15 @@ async def create(self, labels=None): async def delete(self): try: - await self._unmount() + await self._detach() finally: - try: - await self._detach() - finally: - await self._delete() - - async def _unmount(self): - await retry_all_errors_n_times(max_errors=10, msg=f'error while unmounting disk {self.name}', error_logging_interval=3)( - check_shell_output, f'umount -v {self.disk_path} {self.mount_path}' - ) + await self._delete() async def _format(self): - async def format_disk(): - await check_shell_output(f'mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard {self.disk_path}') - await check_shell_output(f'mkdir -p {self.mount_path}') - await check_shell_output(f'mount -o discard,defaults {self.disk_path} {self.mount_path}') - await check_shell_output(f'chmod a+w {self.mount_path}') - - await retry_all_errors_n_times(max_errors=10, msg=f'error while formatting disk {self.name}', error_logging_interval=3)(format_disk) + await check_shell_output(f'mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard {self.disk_path}') + await check_shell_output(f'mkdir -p {self.mount_path}') + await check_shell_output(f'mount -o discard,defaults {self.disk_path} {self.mount_path}') + await check_shell_output(f'chmod a+w {self.mount_path}') async def _create(self, labels=None): async with LoggingTimer(f'creating disk {self.name}'): diff --git a/batch/batch/worker/flock.py b/batch/batch/worker/flock.py new file mode 100644 index 00000000000..30e409b45c1 --- /dev/null +++ b/batch/batch/worker/flock.py @@ -0,0 +1,51 @@ +import fcntl +import os +import argparse +import subprocess as sp + +from pathlib import Path +from hailtop.utils import blocking_to_async + + +class Flock: + def __init__(self, path, pool=None, nonblock=False): + self.path = Path(path).resolve() + self.lock_path = self.path.parent + self.pool = pool + self.flock_flags = fcntl.LOCK_EX + if nonblock: + self.flock_flags |= fcntl.LOCK_NB + self.fd = -1 + + def __enter__(self): + self.lock_path.mkdir(parents=True, exist_ok=True) + self.fd = os.open(self.lock_path, os.O_RDONLY) + fcntl.flock(self.fd, self.flock_flags) + return self + + def __exit__(self, type, value, traceback): + fcntl.flock(self.fd, fcntl.LOCK_UN) + os.close(self.fd) + + async def __aenter__(self): + assert self.pool + return await blocking_to_async(self.pool, self.__enter__) + + async def __aexit__(self, exc_type, exc_val, exc_tb): + assert self.pool + return await blocking_to_async(self.pool, self.__exit__, exc_type, exc_val, exc_tb) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('path', type=str) + parser.add_argument('-c', dest='command', type=str, required=True) + parser.add_argument('-n', dest='nonblock', action='store_true') + args = parser.parse_args() + + with Flock(args.path): + try: + sp.check_output(args.command, stderr=sp.STDOUT, shell=True) + except sp.CalledProcessError as e: + print(e.output) + raise e diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index f21c227cc03..e57933dc38c 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict, Callable, Tuple +from typing import Optional, Dict, Callable import os import json import sys @@ -68,6 +68,7 @@ from ..utils import storage_gib_to_bytes, Box from .disk import Disk +from .flock import Flock # uvloop.install() @@ -280,8 +281,6 @@ def user_error(e): if isinstance(e, DockerError): if e.status == 404 and 'pull access denied' in e.message: return True - if e.status == 404 and 'not found: manifest unknown' in e.message: - return True if e.status == 400 and 'executable file not found' in e.message: return True return False @@ -361,10 +360,6 @@ def container_config(self): network = 'public' host_config['NetworkMode'] = network # not documented, I used strace to inspect the packets - unconfined = self.spec.get('unconfined') - if unconfined: - host_config['SecurityOpt'] = ["seccomp:unconfined", "apparmor:unconfined"] - config['HostConfig'] = host_config return config @@ -447,11 +442,8 @@ async def run(self, worker): docker.images.pull, self.image_ref_str, auth=auth ) except DockerError as e: - if e.status == 404: - if 'pull access denied' in e.message: - self.short_error = 'image cannot be pulled' - elif 'not found: manifest unknown' in e.message: - self.short_error = 'image not found' + if e.status == 404 and 'pull access denied' in e.message: + self.short_error = 'image cannot be pulled' raise if self.port is not None: @@ -472,8 +464,12 @@ async def run(self, worker): self.overlay_path = merged_overlay_path[:-7].replace(WORKER_DATA_DISK_MOUNT, '/host') os.makedirs(f'{self.overlay_path}/', exist_ok=True) + async with Flock('/xfsquota/projects', pool=worker.pool): + with open('/xfsquota/projects', 'a') as f: + f.write(f'{self.job.project_id}:{self.overlay_path}\n') + await check_shell_output( - f'xfs_quota -x -c "project -s -p {self.overlay_path} {self.job.project_id}" /host/' + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.job.project_name}" /host/' ) with self.step('starting'): @@ -539,6 +535,12 @@ async def get_log(self): return self.log async def delete_container(self): + if self.overlay_path: + path = self.overlay_path.replace('/', r'\/') + + async with Flock('/xfsquota/projects', pool=worker.pool): + await check_shell(f"sed -i '/:{path}/d' /xfsquota/projects") + if self.container: try: log.info(f'{self}: deleting container') @@ -604,7 +606,6 @@ def populate_secret_host_path(host_path, secret_data): async def add_gcsfuse_bucket(mount_path, bucket, key_file, read_only): - assert bucket os.makedirs(mount_path) options = ['allow_other'] if read_only: @@ -777,6 +778,7 @@ def __init__( self.secrets = secrets self.env = job_spec.get('env', []) + self.project_name = f'batch-{self.batch_id}-job-{self.job_id}' self.project_id = Job.get_next_xfsquota_project_id() self.task_manager = task_manager @@ -906,9 +908,6 @@ def __init__( if network: assert network in ('public', 'private') main_spec['network'] = network - unconfined = job_spec.get('unconfined') - if unconfined: - main_spec['unconfined'] = unconfined containers['main'] = Container(self, 'main', main_spec) if output_files: @@ -971,16 +970,25 @@ async def run(self, worker): await self.setup_io() + async with Flock('/xfsquota/projid', pool=worker.pool): + with open('/xfsquota/projid', 'a') as f: + f.write(f'{self.project_name}:{self.project_id}\n') + if not self.disk: + async with Flock('/xfsquota/projects', pool=worker.pool): + with open('/xfsquota/projects', 'a') as f: + f.write(f'{self.project_id}:{self.scratch}\n') data_disk_storage_in_bytes = storage_gib_to_bytes( self.external_storage_in_gib + self.data_disk_storage_in_gib ) else: data_disk_storage_in_bytes = storage_gib_to_bytes(self.data_disk_storage_in_gib) - await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') await check_shell_output( - f'xfs_quota -x -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_id}" /host/' + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.project_name}" /host/' + ) + await check_shell_output( + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_name}" /host/' ) if self.secrets: @@ -1064,7 +1072,15 @@ async def cleanup(self): await check_shell(f'fusermount -u {mount_path}') log.info(f'unmounted gcsfuse bucket {bucket} from {mount_path}') - await check_shell(f'xfs_quota -x -c "limit -p bsoft=0 bhard=0 {self.project_id}" /host') + await check_shell( + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft=0 bhard=0 {self.project_name}" /host' + ) + + async with Flock('/xfsquota/projid', pool=worker.pool): + await check_shell(f"sed -i '/{self.project_name}:{self.project_id}/d' /xfsquota/projid") + + async with Flock('/xfsquota/projects', pool=worker.pool): + await check_shell(f"sed -i '/{self.project_id}:/d' /xfsquota/projects") await blocking_to_async(self.pool, shutil.rmtree, self.scratch, ignore_errors=True) except asyncio.CancelledError: @@ -1183,9 +1199,19 @@ async def run(self, worker): os.makedirs(f'{self.scratch}/') - await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') + async with Flock('/xfsquota/projid', pool=worker.pool): + with open('/xfsquota/projid', 'a') as f: + f.write(f'{self.project_name}:{self.project_id}\n') + + async with Flock('/xfsquota/projects', pool=worker.pool): + with open('/xfsquota/projects', 'a') as f: + f.write(f'{self.project_id}:{self.scratch}\n') + + await check_shell_output( + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.project_name}" /host/' + ) await check_shell_output( - f'xfs_quota -x -c "limit -p bsoft={self.data_disk_storage_in_gib} bhard={self.data_disk_storage_in_gib} {self.project_id}" /host/' + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft={self.data_disk_storage_in_gib} bhard={self.data_disk_storage_in_gib} {self.project_name}" /host/' ) if self.secrets: @@ -1261,7 +1287,15 @@ async def cleanup(self): log.info(f'{self}: cleaning up') try: - await check_shell(f'xfs_quota -x -c "limit -p bsoft=0 bhard=0 {self.project_id}" /host') + await check_shell( + f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft=0 bhard=0 {self.project_name}" /host' + ) + + async with Flock('/xfsquota/projid', pool=worker.pool): + await check_shell(f"sed -i '/{self.project_name}:{self.project_id}/d' /xfsquota/projid") + + async with Flock('/xfsquota/projects', pool=worker.pool): + await check_shell(f"sed -i '/{self.project_id}:/d' /xfsquota/projects") await blocking_to_async(self.pool, shutil.rmtree, self.scratch, ignore_errors=True) except asyncio.CancelledError: @@ -1307,13 +1341,12 @@ def __str__(self): class Worker: def __init__(self): - self.active = False self.cores_mcpu = CORES * 1000 self.last_updated = time_msecs() self.cpu_sem = FIFOWeightedSemaphore(self.cores_mcpu) self.data_disk_space_remaining = Box(UNRESERVED_WORKER_DATA_DISK_SIZE_GB) self.pool = concurrent.futures.ThreadPoolExecutor() - self.jobs: Dict[Tuple[int, int], Job] = {} + self.jobs = {} self.stop_event = asyncio.Event() self.task_manager = aiotools.BackgroundTaskManager() self.jar_download_locks = defaultdict(asyncio.Lock) @@ -1370,10 +1403,6 @@ async def create_job_1(self, request): if id in self.jobs: return web.HTTPForbidden() - # check worker hasn't started shutting down - if not self.active: - return web.HTTPServiceUnavailable() - job = Job.create( batch_id, body['user'], body['gsa_key'], job_spec, format_version, self.task_manager, self.pool ) @@ -1470,7 +1499,6 @@ async def run(self): f'free worker data disk storage {self.data_disk_space_remaining.value}Gi' ) finally: - self.active = False log.info('shutting down') await site.stop() log.info('stopped site') @@ -1627,7 +1655,6 @@ async def activate(self): resp_json = await resp.json() self.headers = {'X-Hail-Instance-Name': NAME, 'Authorization': f'Bearer {resp_json["token"]}'} - self.active = True async def async_main(): @@ -1649,7 +1676,7 @@ async def async_main(): asyncio.get_event_loop().set_debug(True) log.debug('Tasks immediately after docker close') dump_all_stacktraces() - other_tasks = [t for t in asyncio.all_tasks() if t != asyncio.current_task()] + other_tasks = [t for t in asyncio.tasks() if t != asyncio.current_task()] if other_tasks: _, pending = await asyncio.wait(other_tasks, timeout=10 * 60, return_when=asyncio.ALL_COMPLETED) for t in pending: diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 8af559344fa..cb1f3a88c68 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -86,8 +86,6 @@ spec: - name: HAIL_SHOULD_PROFILE value: "1" {% endif %} - - name: HAIL_SCOPE - value: "{{ scope }}" {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME valueFrom: @@ -224,8 +222,6 @@ spec: value: "{{ global.k8s_server_url }}" - name: HAIL_SHA value: "{{ code.sha }}" - - name: HAIL_SCOPE - value: "{{ scope }}" {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME valueFrom: diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index 021561b78f3..7771927dd04 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -18,11 +18,23 @@ deploy_config = get_deploy_config() -DOCKER_PREFIX = os.environ.get('DOCKER_PREFIX') DOCKER_ROOT_IMAGE = os.environ.get('DOCKER_ROOT_IMAGE', 'gcr.io/hail-vdc/ubuntu:18.04') SCOPE = os.environ.get('HAIL_SCOPE', 'test') +def poll_until(p, max_polls=None): + i = 0 + while True and (max_polls is None or i < max_polls): + x = p() + if x: + return x + # max 4.5s + j = random.randrange(math.floor(1.1 ** min(i, 40))) + time.sleep(0.100 * j) + i = i + 1 + raise ValueError(f'poll_until: exceeded max polls: {i} {max_polls}') + + @pytest.fixture def client(): client = BatchClient('test') @@ -257,15 +269,6 @@ def test_fail(client): assert j._get_exit_code(status, 'main') == 1, str(status) -def test_unknown_image(client): - b = client.create_batch() - j = b.create_job(f'{DOCKER_PREFIX}/does-not-exist', ['echo', 'test']) - b.submit() - status = j.wait() - assert j._get_exit_code(status, 'main') is None - assert status['status']['container_statuses']['main']['short_error'] == 'image not found' - - def test_running_job_log_and_status(client): b = client.create_batch() j = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '300']) @@ -563,20 +566,6 @@ def test_batch_create_validation(): # token None/missing {'billing_project': 'foo', 'n_jobs': 5, 'token': None}, {'billing_project': 'foo', 'n_jobs': 5}, - # empty gcsfuse bucket name - { - 'billing_project': 'foo', - 'n_jobs': 5, - 'token': 'baz', - 'gcsfuse': [{'bucket': '', 'mount_path': '/bucket', 'read_only': False}], - }, - # empty gcsfuse mount_path name - { - 'billing_project': 'foo', - 'n_jobs': 5, - 'token': 'baz', - 'gcsfuse': [{'bucket': 'foo', 'mount_path': '', 'read_only': False}], - }, # attribute key/value None {'attributes': {'k': None}, 'billing_project': 'foo', 'n_jobs': 5, 'token': 'baz'}, ] diff --git a/batch2/proxy.py b/batch2/proxy.py deleted file mode 100644 index 0a3427237d9..00000000000 --- a/batch2/proxy.py +++ /dev/null @@ -1,19 +0,0 @@ -import asyncio -from aiohttp import web -from hailtop.batch_client.aioclient import BatchClient - -routes = web.RouteTableDef() - -client = BatchClient('test') - - -@routes.get('/api/{route:.*}') -async def proxy_api(request): - route = request.match_info['route'] - data = await client._get(f'/api/{route}') - return web.json_response(await data.json()) - - -app = web.Application() -app.add_routes(routes) -web.run_app(app, host='0.0.0.0', port=5050) diff --git a/batch2/react-batch/.eslintrc.json b/batch2/react-batch/.eslintrc.json deleted file mode 100644 index 854d0d491cc..00000000000 --- a/batch2/react-batch/.eslintrc.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "env": { - "browser": true, - "es2021": true - }, - "extends": [ - "airbnb" - ], - "parser": "@typescript-eslint/parser", - "parserOptions": { - "ecmaFeatures": { - "jsx": true - }, - "ecmaVersion": 12, - "sourceType": "module" - }, - "plugins": [ - "react", - "@typescript-eslint" - ], - "rules": { - "no-use-before-define": "off", - "@typescript-eslint/no-use-before-define": "off", - "react/jsx-filename-extension": [ - 2, - { - "extensions": [".tsx"] - } - ], - "react/jsx-one-expression-per-line": "off", - "import/extensions": "off", - "import/no-unresolved": "off", - "import/no-extraneous-dependencies": "off" - } -} diff --git a/batch2/react-batch/.gitignore b/batch2/react-batch/.gitignore deleted file mode 100644 index d451ff16c10..00000000000 --- a/batch2/react-batch/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -node_modules -.DS_Store -dist -dist-ssr -*.local diff --git a/batch2/react-batch/index.html b/batch2/react-batch/index.html deleted file mode 100644 index 3377ffaec2d..00000000000 --- a/batch2/react-batch/index.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - Batch2 React - - -
- - - diff --git a/batch2/react-batch/package-lock.json b/batch2/react-batch/package-lock.json deleted file mode 100644 index 74b6fd85ed7..00000000000 --- a/batch2/react-batch/package-lock.json +++ /dev/null @@ -1,6591 +0,0 @@ -{ - "name": "react-batch", - "version": "0.0.0", - "lockfileVersion": 2, - "requires": true, - "packages": { - "": { - "version": "0.0.0", - "dependencies": { - "react": "^17.0.2", - "react-dom": "^17.0.0", - "react-router-dom": "^5.2.0" - }, - "devDependencies": { - "@types/react": "^17.0.5", - "@types/react-dom": "^17.0.0", - "@types/react-router-dom": "^5.1.7", - "@typescript-eslint/eslint-plugin": "^4.23.0", - "@typescript-eslint/parser": "^4.23.0", - "@vitejs/plugin-react-refresh": "^1.3.1", - "eslint": "^7.26.0", - "eslint-config-airbnb": "^18.2.1", - "eslint-plugin-import": "^2.23.0", - "eslint-plugin-jsx-a11y": "^6.4.1", - "eslint-plugin-react": "^7.23.2", - "eslint-plugin-react-hooks": "^4.2.0", - "typescript": "^4.1.2", - "vite": "^2.2.3" - } - }, - "node_modules/@babel/code-frame": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.13.tgz", - "integrity": "sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==", - "dev": true, - "dependencies": { - "@babel/highlight": "^7.12.13" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.14.0.tgz", - "integrity": "sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==", - "dev": true - }, - "node_modules/@babel/core": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.14.3.tgz", - "integrity": "sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==", - "dev": true, - "dependencies": { - "@babel/code-frame": "^7.12.13", - "@babel/generator": "^7.14.3", - "@babel/helper-compilation-targets": "^7.13.16", - "@babel/helper-module-transforms": "^7.14.2", - "@babel/helpers": "^7.14.0", - "@babel/parser": "^7.14.3", - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2", - "convert-source-map": "^1.7.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.1.2", - "semver": "^6.3.0", - "source-map": "^0.5.0" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/core/node_modules/semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", - "dev": true, - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/generator": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.14.3.tgz", - "integrity": "sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==", - "dev": true, - "dependencies": { - "@babel/types": "^7.14.2", - "jsesc": "^2.5.1", - "source-map": "^0.5.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.13.16", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz", - "integrity": "sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==", - "dev": true, - "dependencies": { - "@babel/compat-data": "^7.13.15", - "@babel/helper-validator-option": "^7.12.17", - "browserslist": "^4.14.5", - "semver": "^6.3.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", - "dev": true, - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-function-name": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz", - "integrity": "sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==", - "dev": true, - "dependencies": { - "@babel/helper-get-function-arity": "^7.12.13", - "@babel/template": "^7.12.13", - "@babel/types": "^7.14.2" - } - }, - "node_modules/@babel/helper-get-function-arity": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz", - "integrity": "sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==", - "dev": true, - "dependencies": { - "@babel/types": "^7.12.13" - } - }, - "node_modules/@babel/helper-member-expression-to-functions": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz", - "integrity": "sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==", - "dev": true, - "dependencies": { - "@babel/types": "^7.13.12" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz", - "integrity": "sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==", - "dev": true, - "dependencies": { - "@babel/types": "^7.13.12" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz", - "integrity": "sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==", - "dev": true, - "dependencies": { - "@babel/helper-module-imports": "^7.13.12", - "@babel/helper-replace-supers": "^7.13.12", - "@babel/helper-simple-access": "^7.13.12", - "@babel/helper-split-export-declaration": "^7.12.13", - "@babel/helper-validator-identifier": "^7.14.0", - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2" - } - }, - "node_modules/@babel/helper-optimise-call-expression": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz", - "integrity": "sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==", - "dev": true, - "dependencies": { - "@babel/types": "^7.12.13" - } - }, - "node_modules/@babel/helper-plugin-utils": { - "version": "7.13.0", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz", - "integrity": "sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==", - "dev": true - }, - "node_modules/@babel/helper-replace-supers": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz", - "integrity": "sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==", - "dev": true, - "dependencies": { - "@babel/helper-member-expression-to-functions": "^7.13.12", - "@babel/helper-optimise-call-expression": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2" - } - }, - "node_modules/@babel/helper-simple-access": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz", - "integrity": "sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==", - "dev": true, - "dependencies": { - "@babel/types": "^7.13.12" - } - }, - "node_modules/@babel/helper-split-export-declaration": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz", - "integrity": "sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==", - "dev": true, - "dependencies": { - "@babel/types": "^7.12.13" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz", - "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==", - "dev": true - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.12.17", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz", - "integrity": "sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==", - "dev": true - }, - "node_modules/@babel/helpers": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.14.0.tgz", - "integrity": "sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==", - "dev": true, - "dependencies": { - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.0", - "@babel/types": "^7.14.0" - } - }, - "node_modules/@babel/highlight": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz", - "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==", - "dev": true, - "dependencies": { - "@babel/helper-validator-identifier": "^7.14.0", - "chalk": "^2.0.0", - "js-tokens": "^4.0.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.14.3.tgz", - "integrity": "sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==", - "dev": true, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/plugin-transform-react-jsx-self": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz", - "integrity": "sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==", - "dev": true, - "dependencies": { - "@babel/helper-plugin-utils": "^7.12.13" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-transform-react-jsx-source": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.14.2.tgz", - "integrity": "sha512-OMorspVyjxghAjzgeAWc6O7W7vHbJhV69NeTGdl9Mxgz6PaweAuo7ffB9T5A1OQ9dGcw0As4SYMUhyNC4u7mVg==", - "dev": true, - "dependencies": { - "@babel/helper-plugin-utils": "^7.13.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/runtime": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.14.0.tgz", - "integrity": "sha512-JELkvo/DlpNdJ7dlyw/eY7E0suy5i5GQH+Vlxaq1nsNJ+H7f4Vtv3jMeCEgRhZZQFXTjldYfQgv2qmM6M1v5wA==", - "dependencies": { - "regenerator-runtime": "^0.13.4" - } - }, - "node_modules/@babel/runtime-corejs3": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.14.0.tgz", - "integrity": "sha512-0R0HTZWHLk6G8jIk0FtoX+AatCtKnswS98VhXwGImFc759PJRp4Tru0PQYZofyijTFUr+gT8Mu7sgXVJLQ0ceg==", - "dev": true, - "dependencies": { - "core-js-pure": "^3.0.0", - "regenerator-runtime": "^0.13.4" - } - }, - "node_modules/@babel/template": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.12.13.tgz", - "integrity": "sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==", - "dev": true, - "dependencies": { - "@babel/code-frame": "^7.12.13", - "@babel/parser": "^7.12.13", - "@babel/types": "^7.12.13" - } - }, - "node_modules/@babel/traverse": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.14.2.tgz", - "integrity": "sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==", - "dev": true, - "dependencies": { - "@babel/code-frame": "^7.12.13", - "@babel/generator": "^7.14.2", - "@babel/helper-function-name": "^7.14.2", - "@babel/helper-split-export-declaration": "^7.12.13", - "@babel/parser": "^7.14.2", - "@babel/types": "^7.14.2", - "debug": "^4.1.0", - "globals": "^11.1.0" - } - }, - "node_modules/@babel/types": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.14.2.tgz", - "integrity": "sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==", - "dev": true, - "dependencies": { - "@babel/helper-validator-identifier": "^7.14.0", - "to-fast-properties": "^2.0.0" - } - }, - "node_modules/@eslint/eslintrc": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.1.tgz", - "integrity": "sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==", - "dev": true, - "dependencies": { - "ajv": "^6.12.4", - "debug": "^4.1.1", - "espree": "^7.3.0", - "globals": "^12.1.0", - "ignore": "^4.0.6", - "import-fresh": "^3.2.1", - "js-yaml": "^3.13.1", - "minimatch": "^3.0.4", - "strip-json-comments": "^3.1.1" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - } - }, - "node_modules/@eslint/eslintrc/node_modules/globals": { - "version": "12.4.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz", - "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==", - "dev": true, - "dependencies": { - "type-fest": "^0.8.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz", - "integrity": "sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==", - "dev": true, - "dependencies": { - "@nodelib/fs.stat": "2.0.4", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz", - "integrity": "sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==", - "dev": true, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz", - "integrity": "sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==", - "dev": true, - "dependencies": { - "@nodelib/fs.scandir": "2.1.4", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@types/history": { - "version": "4.7.8", - "resolved": "https://registry.npmjs.org/@types/history/-/history-4.7.8.tgz", - "integrity": "sha512-S78QIYirQcUoo6UJZx9CSP0O2ix9IaeAXwQi26Rhr/+mg7qqPy8TzaxHSUut7eGjL8WmLccT7/MXf304WjqHcA==", - "dev": true - }, - "node_modules/@types/json-schema": { - "version": "7.0.7", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.7.tgz", - "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", - "dev": true - }, - "node_modules/@types/json5": { - "version": "0.0.29", - "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", - "integrity": "sha1-7ihweulOEdK4J7y+UnC86n8+ce4=", - "dev": true - }, - "node_modules/@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "node_modules/@types/react": { - "version": "17.0.6", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", - "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", - "dev": true, - "dependencies": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "node_modules/@types/react-dom": { - "version": "17.0.5", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-17.0.5.tgz", - "integrity": "sha512-ikqukEhH4H9gr4iJCmQVNzTB307kROe3XFfHAOTxOXPOw7lAoEXnM5KWTkzeANGL5Ce6ABfiMl/zJBYNi7ObmQ==", - "dev": true, - "dependencies": { - "@types/react": "*" - } - }, - "node_modules/@types/react-router": { - "version": "5.1.14", - "resolved": "https://registry.npmjs.org/@types/react-router/-/react-router-5.1.14.tgz", - "integrity": "sha512-LAJpqYUaCTMT2anZheoidiIymt8MuX286zoVFPM3DVb23aQBH0mAkFvzpd4LKqiolV8bBtZWT5Qp7hClCNDENw==", - "dev": true, - "dependencies": { - "@types/history": "*", - "@types/react": "*" - } - }, - "node_modules/@types/react-router-dom": { - "version": "5.1.7", - "resolved": "https://registry.npmjs.org/@types/react-router-dom/-/react-router-dom-5.1.7.tgz", - "integrity": "sha512-D5mHD6TbdV/DNHYsnwBTv+y73ei+mMjrkGrla86HthE4/PVvL1J94Bu3qABU+COXzpL23T1EZapVVpwHuBXiUg==", - "dev": true, - "dependencies": { - "@types/history": "*", - "@types/react": "*", - "@types/react-router": "*" - } - }, - "node_modules/@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "node_modules/@typescript-eslint/eslint-plugin": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.24.0.tgz", - "integrity": "sha512-qbCgkPM7DWTsYQGjx9RTuQGswi+bEt0isqDBeo+CKV0953zqI0Tp7CZ7Fi9ipgFA6mcQqF4NOVNwS/f2r6xShw==", - "dev": true, - "dependencies": { - "@typescript-eslint/experimental-utils": "4.24.0", - "@typescript-eslint/scope-manager": "4.24.0", - "debug": "^4.1.1", - "functional-red-black-tree": "^1.0.1", - "lodash": "^4.17.15", - "regexpp": "^3.0.0", - "semver": "^7.3.2", - "tsutils": "^3.17.1" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "@typescript-eslint/parser": "^4.0.0", - "eslint": "^5.0.0 || ^6.0.0 || ^7.0.0" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@typescript-eslint/experimental-utils": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.24.0.tgz", - "integrity": "sha512-IwTT2VNDKH1h8RZseMH4CcYBz6lTvRoOLDuuqNZZoThvfHEhOiZPQCow+5El3PtyxJ1iDr6UXZwYtE3yZQjhcw==", - "dev": true, - "dependencies": { - "@types/json-schema": "^7.0.3", - "@typescript-eslint/scope-manager": "4.24.0", - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/typescript-estree": "4.24.0", - "eslint-scope": "^5.0.0", - "eslint-utils": "^2.0.0" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "*" - } - }, - "node_modules/@typescript-eslint/parser": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.24.0.tgz", - "integrity": "sha512-dj1ZIh/4QKeECLb2f/QjRwMmDArcwc2WorWPRlB8UNTZlY1KpTVsbX7e3ZZdphfRw29aTFUSNuGB8w9X5sS97w==", - "dev": true, - "dependencies": { - "@typescript-eslint/scope-manager": "4.24.0", - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/typescript-estree": "4.24.0", - "debug": "^4.1.1" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependencies": { - "eslint": "^5.0.0 || ^6.0.0 || ^7.0.0" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@typescript-eslint/scope-manager": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.24.0.tgz", - "integrity": "sha512-9+WYJGDnuC9VtYLqBhcSuM7du75fyCS/ypC8c5g7Sdw7pGL4NDTbeH38eJPfzIydCHZDoOgjloxSAA3+4l/zsA==", - "dev": true, - "dependencies": { - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/visitor-keys": "4.24.0" - }, - "engines": { - "node": "^8.10.0 || ^10.13.0 || >=11.10.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/types": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.24.0.tgz", - "integrity": "sha512-tkZUBgDQKdvfs8L47LaqxojKDE+mIUmOzdz7r+u+U54l3GDkTpEbQ1Jp3cNqqAU9vMUCBA1fitsIhm7yN0vx9Q==", - "dev": true, - "engines": { - "node": "^8.10.0 || ^10.13.0 || >=11.10.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@typescript-eslint/typescript-estree": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.24.0.tgz", - "integrity": "sha512-kBDitL/by/HK7g8CYLT7aKpAwlR8doshfWz8d71j97n5kUa5caHWvY0RvEUEanL/EqBJoANev8Xc/mQ6LLwXGA==", - "dev": true, - "dependencies": { - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/visitor-keys": "4.24.0", - "debug": "^4.1.1", - "globby": "^11.0.1", - "is-glob": "^4.0.1", - "semver": "^7.3.2", - "tsutils": "^3.17.1" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@typescript-eslint/visitor-keys": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.24.0.tgz", - "integrity": "sha512-4ox1sjmGHIxjEDBnMCtWFFhErXtKA1Ec0sBpuz0fqf3P+g3JFGyTxxbF06byw0FRsPnnbq44cKivH7Ks1/0s6g==", - "dev": true, - "dependencies": { - "@typescript-eslint/types": "4.24.0", - "eslint-visitor-keys": "^2.0.0" - }, - "engines": { - "node": "^8.10.0 || ^10.13.0 || >=11.10.1" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/typescript-eslint" - } - }, - "node_modules/@vitejs/plugin-react-refresh": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-refresh/-/plugin-react-refresh-1.3.3.tgz", - "integrity": "sha512-J3KFwSQKrEK7fgOwTx0PMTlsolZORUch6BswjsM50q+Y7zSvX1ROIRn+tK2VE8SCvbYRHtzEKFlYW3vsWyTosQ==", - "dev": true, - "dependencies": { - "@babel/core": "^7.12.13", - "@babel/plugin-transform-react-jsx-self": "^7.12.13", - "@babel/plugin-transform-react-jsx-source": "^7.12.13", - "react-refresh": "^0.9.0" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/acorn": { - "version": "7.4.1", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", - "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", - "dev": true, - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/acorn-jsx": { - "version": "5.3.1", - "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz", - "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==", - "dev": true, - "peerDependencies": { - "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" - } - }, - "node_modules/ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", - "dev": true, - "dependencies": { - "fast-deep-equal": "^3.1.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/ansi-colors": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", - "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", - "dev": true, - "dependencies": { - "color-convert": "^1.9.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/argparse": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", - "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", - "dev": true, - "dependencies": { - "sprintf-js": "~1.0.2" - } - }, - "node_modules/aria-query": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-4.2.2.tgz", - "integrity": "sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==", - "dev": true, - "dependencies": { - "@babel/runtime": "^7.10.2", - "@babel/runtime-corejs3": "^7.10.2" - }, - "engines": { - "node": ">=6.0" - } - }, - "node_modules/array-includes": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.3.tgz", - "integrity": "sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "get-intrinsic": "^1.1.1", - "is-string": "^1.0.5" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array-union": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", - "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/array.prototype.flat": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz", - "integrity": "sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/array.prototype.flatmap": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz", - "integrity": "sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1", - "function-bind": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/ast-types-flow": { - "version": "0.0.7", - "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", - "integrity": "sha1-9wtzXGvKGlycItmCw+Oef+ujva0=", - "dev": true - }, - "node_modules/astral-regex": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", - "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/axe-core": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.2.1.tgz", - "integrity": "sha512-evY7DN8qSIbsW2H/TWQ1bX3sXN1d4MNb5Vb4n7BzPuCwRHdkZ1H2eNLuSh73EoQqkGKUtju2G2HCcjCfhvZIAA==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/axobject-query": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-2.2.0.tgz", - "integrity": "sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==", - "dev": true - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true - }, - "node_modules/brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", - "dev": true, - "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", - "dev": true, - "dependencies": { - "fill-range": "^7.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browserslist": { - "version": "4.16.6", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.16.6.tgz", - "integrity": "sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==", - "dev": true, - "dependencies": { - "caniuse-lite": "^1.0.30001219", - "colorette": "^1.2.2", - "electron-to-chromium": "^1.3.723", - "escalade": "^3.1.1", - "node-releases": "^1.1.71" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - } - }, - "node_modules/call-bind": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", - "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.1", - "get-intrinsic": "^1.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001228", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001228.tgz", - "integrity": "sha512-QQmLOGJ3DEgokHbMSA8cj2a+geXqmnpyOFT0lhQV6P3/YOJvGDEwoedcwxEQ30gJIwIIunHIicunJ2rzK5gB2A==", - "dev": true, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - } - }, - "node_modules/chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", - "dev": true, - "dependencies": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", - "dev": true, - "dependencies": { - "color-name": "1.1.3" - } - }, - "node_modules/color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", - "dev": true - }, - "node_modules/colorette": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", - "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", - "dev": true - }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", - "dev": true - }, - "node_modules/confusing-browser-globals": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz", - "integrity": "sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==", - "dev": true - }, - "node_modules/contains-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/contains-path/-/contains-path-1.0.0.tgz", - "integrity": "sha1-NFizMhhWA+ju0Y9RjUoQiIo6vJE=", - "dev": true, - "dependencies": { - "normalize-path": "^2.1.1", - "path-starts-with": "^1.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/convert-source-map": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-1.7.0.tgz", - "integrity": "sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==", - "dev": true, - "dependencies": { - "safe-buffer": "~5.1.1" - } - }, - "node_modules/core-js-pure": { - "version": "3.12.1", - "resolved": "https://registry.npmjs.org/core-js-pure/-/core-js-pure-3.12.1.tgz", - "integrity": "sha512-1cch+qads4JnDSWsvc7d6nzlKAippwjUlf6vykkTLW53VSV+NkE6muGBToAjEA8pG90cSfcud3JgVmW2ds5TaQ==", - "dev": true, - "hasInstallScript": true, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/core-js" - } - }, - "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dev": true, - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "node_modules/damerau-levenshtein": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.7.tgz", - "integrity": "sha512-VvdQIPGdWP0SqFXghj79Wf/5LArmreyMsGLa6FG6iC4t3j7j5s71TrwWmT/4akbDQIqjfACkLZmjXhA7g2oUZw==", - "dev": true - }, - "node_modules/debug": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", - "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", - "dev": true, - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/deep-is": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", - "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", - "dev": true - }, - "node_modules/define-properties": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", - "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", - "dev": true, - "dependencies": { - "object-keys": "^1.0.12" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/dir-glob": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", - "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", - "dev": true, - "dependencies": { - "path-type": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/doctrine": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", - "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", - "dev": true, - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/electron-to-chromium": { - "version": "1.3.732", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.732.tgz", - "integrity": "sha512-qKD5Pbq+QMk4nea4lMuncUMhpEiQwaJyCW7MrvissnRcBDENhVfDmAqQYRQ3X525oTzhar9Zh1cK0L2d1UKYcw==", - "dev": true - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true - }, - "node_modules/enquirer": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz", - "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==", - "dev": true, - "dependencies": { - "ansi-colors": "^4.1.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/error-ex": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", - "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", - "dev": true, - "dependencies": { - "is-arrayish": "^0.2.1" - } - }, - "node_modules/es-abstract": { - "version": "1.18.0", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz", - "integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "es-to-primitive": "^1.2.1", - "function-bind": "^1.1.1", - "get-intrinsic": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.2", - "is-callable": "^1.2.3", - "is-negative-zero": "^2.0.1", - "is-regex": "^1.1.2", - "is-string": "^1.0.5", - "object-inspect": "^1.9.0", - "object-keys": "^1.1.1", - "object.assign": "^4.1.2", - "string.prototype.trimend": "^1.0.4", - "string.prototype.trimstart": "^1.0.4", - "unbox-primitive": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/es-to-primitive": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz", - "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==", - "dev": true, - "dependencies": { - "is-callable": "^1.1.4", - "is-date-object": "^1.0.1", - "is-symbol": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/esbuild": { - "version": "0.11.23", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.23.tgz", - "integrity": "sha512-iaiZZ9vUF5wJV8ob1tl+5aJTrwDczlvGP0JoMmnpC2B0ppiMCu8n8gmy5ZTGl5bcG081XBVn+U+jP+mPFm5T5Q==", - "dev": true, - "hasInstallScript": true, - "bin": { - "esbuild": "bin/esbuild" - } - }, - "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", - "dev": true, - "engines": { - "node": ">=0.8.0" - } - }, - "node_modules/eslint": { - "version": "7.26.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.26.0.tgz", - "integrity": "sha512-4R1ieRf52/izcZE7AlLy56uIHHDLT74Yzz2Iv2l6kDaYvEu9x+wMB5dZArVL8SYGXSYV2YAg70FcW5Y5nGGNIg==", - "dev": true, - "dependencies": { - "@babel/code-frame": "7.12.11", - "@eslint/eslintrc": "^0.4.1", - "ajv": "^6.10.0", - "chalk": "^4.0.0", - "cross-spawn": "^7.0.2", - "debug": "^4.0.1", - "doctrine": "^3.0.0", - "enquirer": "^2.3.5", - "eslint-scope": "^5.1.1", - "eslint-utils": "^2.1.0", - "eslint-visitor-keys": "^2.0.0", - "espree": "^7.3.1", - "esquery": "^1.4.0", - "esutils": "^2.0.2", - "file-entry-cache": "^6.0.1", - "functional-red-black-tree": "^1.0.1", - "glob-parent": "^5.0.0", - "globals": "^13.6.0", - "ignore": "^4.0.6", - "import-fresh": "^3.0.0", - "imurmurhash": "^0.1.4", - "is-glob": "^4.0.0", - "js-yaml": "^3.13.1", - "json-stable-stringify-without-jsonify": "^1.0.1", - "levn": "^0.4.1", - "lodash": "^4.17.21", - "minimatch": "^3.0.4", - "natural-compare": "^1.4.0", - "optionator": "^0.9.1", - "progress": "^2.0.0", - "regexpp": "^3.1.0", - "semver": "^7.2.1", - "strip-ansi": "^6.0.0", - "strip-json-comments": "^3.1.0", - "table": "^6.0.4", - "text-table": "^0.2.0", - "v8-compile-cache": "^2.0.3" - }, - "bin": { - "eslint": "bin/eslint.js" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - } - }, - "node_modules/eslint-config-airbnb": { - "version": "18.2.1", - "resolved": "https://registry.npmjs.org/eslint-config-airbnb/-/eslint-config-airbnb-18.2.1.tgz", - "integrity": "sha512-glZNDEZ36VdlZWoxn/bUR1r/sdFKPd1mHPbqUtkctgNG4yT2DLLtJ3D+yCV+jzZCc2V1nBVkmdknOJBZ5Hc0fg==", - "dev": true, - "dependencies": { - "eslint-config-airbnb-base": "^14.2.1", - "object.assign": "^4.1.2", - "object.entries": "^1.1.2" - }, - "engines": { - "node": ">= 6" - }, - "peerDependencies": { - "eslint": "^5.16.0 || ^6.8.0 || ^7.2.0", - "eslint-plugin-import": "^2.22.1", - "eslint-plugin-jsx-a11y": "^6.4.1", - "eslint-plugin-react": "^7.21.5", - "eslint-plugin-react-hooks": "^4 || ^3 || ^2.3.0 || ^1.7.0" - } - }, - "node_modules/eslint-config-airbnb-base": { - "version": "14.2.1", - "resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-14.2.1.tgz", - "integrity": "sha512-GOrQyDtVEc1Xy20U7vsB2yAoB4nBlfH5HZJeatRXHleO+OS5Ot+MWij4Dpltw4/DyIkqUfqz1epfhVR5XWWQPA==", - "dev": true, - "dependencies": { - "confusing-browser-globals": "^1.0.10", - "object.assign": "^4.1.2", - "object.entries": "^1.1.2" - }, - "engines": { - "node": ">= 6" - }, - "peerDependencies": { - "eslint": "^5.16.0 || ^6.8.0 || ^7.2.0", - "eslint-plugin-import": "^2.22.1" - } - }, - "node_modules/eslint-import-resolver-node": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz", - "integrity": "sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==", - "dev": true, - "dependencies": { - "debug": "^2.6.9", - "resolve": "^1.13.1" - } - }, - "node_modules/eslint-import-resolver-node/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/eslint-import-resolver-node/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true - }, - "node_modules/eslint-module-utils": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.6.1.tgz", - "integrity": "sha512-ZXI9B8cxAJIH4nfkhTwcRTEAnrVfobYqwjWy/QMCZ8rHkZHFjf9yO4BzpiF9kCSfNlMG54eKigISHpX0+AaT4A==", - "dev": true, - "dependencies": { - "debug": "^3.2.7", - "pkg-dir": "^2.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/eslint-module-utils/node_modules/debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dev": true, - "dependencies": { - "ms": "^2.1.1" - } - }, - "node_modules/eslint-plugin-import": { - "version": "2.23.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.23.2.tgz", - "integrity": "sha512-LmNoRptHBxOP+nb0PIKz1y6OSzCJlB+0g0IGS3XV4KaKk2q4szqQ6s6F1utVf5ZRkxk/QOTjdxe7v4VjS99Bsg==", - "dev": true, - "dependencies": { - "array-includes": "^3.1.3", - "array.prototype.flat": "^1.2.4", - "contains-path": "^1.0.0", - "debug": "^2.6.9", - "doctrine": "^2.1.0", - "eslint-import-resolver-node": "^0.3.4", - "eslint-module-utils": "^2.6.1", - "find-up": "^2.0.0", - "has": "^1.0.3", - "is-core-module": "^2.4.0", - "minimatch": "^3.0.4", - "object.values": "^1.1.3", - "pkg-up": "^2.0.0", - "read-pkg-up": "^3.0.0", - "resolve": "^1.20.0", - "tsconfig-paths": "^3.9.0" - }, - "engines": { - "node": ">=4" - }, - "peerDependencies": { - "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0" - } - }, - "node_modules/eslint-plugin-import/node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/eslint-plugin-import/node_modules/doctrine": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", - "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", - "dev": true, - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/eslint-plugin-import/node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true - }, - "node_modules/eslint-plugin-jsx-a11y": { - "version": "6.4.1", - "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz", - "integrity": "sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==", - "dev": true, - "dependencies": { - "@babel/runtime": "^7.11.2", - "aria-query": "^4.2.2", - "array-includes": "^3.1.1", - "ast-types-flow": "^0.0.7", - "axe-core": "^4.0.2", - "axobject-query": "^2.2.0", - "damerau-levenshtein": "^1.0.6", - "emoji-regex": "^9.0.0", - "has": "^1.0.3", - "jsx-ast-utils": "^3.1.0", - "language-tags": "^1.0.5" - }, - "engines": { - "node": ">=4.0" - }, - "peerDependencies": { - "eslint": "^3 || ^4 || ^5 || ^6 || ^7" - } - }, - "node_modules/eslint-plugin-react": { - "version": "7.23.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.23.2.tgz", - "integrity": "sha512-AfjgFQB+nYszudkxRkTFu0UR1zEQig0ArVMPloKhxwlwkzaw/fBiH0QWcBBhZONlXqQC51+nfqFrkn4EzHcGBw==", - "dev": true, - "dependencies": { - "array-includes": "^3.1.3", - "array.prototype.flatmap": "^1.2.4", - "doctrine": "^2.1.0", - "has": "^1.0.3", - "jsx-ast-utils": "^2.4.1 || ^3.0.0", - "minimatch": "^3.0.4", - "object.entries": "^1.1.3", - "object.fromentries": "^2.0.4", - "object.values": "^1.1.3", - "prop-types": "^15.7.2", - "resolve": "^2.0.0-next.3", - "string.prototype.matchall": "^4.0.4" - }, - "engines": { - "node": ">=4" - }, - "peerDependencies": { - "eslint": "^3 || ^4 || ^5 || ^6 || ^7" - } - }, - "node_modules/eslint-plugin-react-hooks": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz", - "integrity": "sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==", - "dev": true, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/eslint-plugin-react/node_modules/doctrine": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", - "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", - "dev": true, - "dependencies": { - "esutils": "^2.0.2" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/eslint-plugin-react/node_modules/resolve": { - "version": "2.0.0-next.3", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.3.tgz", - "integrity": "sha512-W8LucSynKUIDu9ylraa7ueVZ7hc0uAgJBxVsQSKOXOyle8a93qXhcz+XAXZ8bIq2d6i4Ehddn6Evt+0/UwKk6Q==", - "dev": true, - "dependencies": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/eslint-scope": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", - "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", - "dev": true, - "dependencies": { - "esrecurse": "^4.3.0", - "estraverse": "^4.1.1" - }, - "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/eslint-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz", - "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==", - "dev": true, - "dependencies": { - "eslint-visitor-keys": "^1.1.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/mysticatea" - } - }, - "node_modules/eslint-utils/node_modules/eslint-visitor-keys": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", - "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/eslint-visitor-keys": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", - "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", - "dev": true, - "engines": { - "node": ">=10" - } - }, - "node_modules/eslint/node_modules/@babel/code-frame": { - "version": "7.12.11", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", - "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==", - "dev": true, - "dependencies": { - "@babel/highlight": "^7.10.4" - } - }, - "node_modules/eslint/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/eslint/node_modules/chalk": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", - "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", - "dev": true, - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/eslint/node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/eslint/node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "node_modules/eslint/node_modules/globals": { - "version": "13.8.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz", - "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==", - "dev": true, - "dependencies": { - "type-fest": "^0.20.2" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/eslint/node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/eslint/node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/eslint/node_modules/type-fest": { - "version": "0.20.2", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", - "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", - "dev": true, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/espree": { - "version": "7.3.1", - "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz", - "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==", - "dev": true, - "dependencies": { - "acorn": "^7.4.0", - "acorn-jsx": "^5.3.1", - "eslint-visitor-keys": "^1.3.0" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - } - }, - "node_modules/espree/node_modules/eslint-visitor-keys": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", - "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true, - "bin": { - "esparse": "bin/esparse.js", - "esvalidate": "bin/esvalidate.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/esquery": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz", - "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==", - "dev": true, - "dependencies": { - "estraverse": "^5.1.0" - }, - "engines": { - "node": ">=0.10" - } - }, - "node_modules/esquery/node_modules/estraverse": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", - "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", - "dev": true, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/esrecurse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", - "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", - "dev": true, - "dependencies": { - "estraverse": "^5.2.0" - }, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/esrecurse/node_modules/estraverse": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", - "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", - "dev": true, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/estraverse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", - "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", - "dev": true, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/fast-deep-equal": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true - }, - "node_modules/fast-glob": { - "version": "3.2.5", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.5.tgz", - "integrity": "sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==", - "dev": true, - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.0", - "merge2": "^1.3.0", - "micromatch": "^4.0.2", - "picomatch": "^2.2.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/fast-json-stable-stringify": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", - "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", - "dev": true - }, - "node_modules/fast-levenshtein": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", - "dev": true - }, - "node_modules/fastq": { - "version": "1.11.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", - "integrity": "sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==", - "dev": true, - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/file-entry-cache": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", - "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", - "dev": true, - "dependencies": { - "flat-cache": "^3.0.4" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - } - }, - "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", - "dev": true, - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-up": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", - "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", - "dev": true, - "dependencies": { - "locate-path": "^2.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/flat-cache": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", - "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==", - "dev": true, - "dependencies": { - "flatted": "^3.1.0", - "rimraf": "^3.0.2" - }, - "engines": { - "node": "^10.12.0 || >=12.0.0" - } - }, - "node_modules/flatted": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz", - "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==", - "dev": true - }, - "node_modules/fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", - "dev": true - }, - "node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "hasInstallScript": true, - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", - "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", - "dev": true - }, - "node_modules/functional-red-black-tree": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz", - "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=", - "dev": true - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-intrinsic": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz", - "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/glob": { - "version": "7.1.7", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", - "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", - "dev": true, - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - }, - "engines": { - "node": "*" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/globals": { - "version": "11.12.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", - "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/globby": { - "version": "11.0.3", - "resolved": "https://registry.npmjs.org/globby/-/globby-11.0.3.tgz", - "integrity": "sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==", - "dev": true, - "dependencies": { - "array-union": "^2.1.0", - "dir-glob": "^3.0.1", - "fast-glob": "^3.1.1", - "ignore": "^5.1.4", - "merge2": "^1.3.0", - "slash": "^3.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/globby/node_modules/ignore": { - "version": "5.1.8", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", - "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", - "dev": true, - "engines": { - "node": ">= 4" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.6", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", - "integrity": "sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==", - "dev": true - }, - "node_modules/has": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", - "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.1" - }, - "engines": { - "node": ">= 0.4.0" - } - }, - "node_modules/has-bigints": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.1.tgz", - "integrity": "sha512-LSBS2LjbNBTf6287JEbEzvJgftkF5qFkmCo9hDRpAzKhUOlJ+hx8dd4USs00SgsUNwc4617J9ki5YtEClM2ffA==", - "dev": true, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/has-symbols": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz", - "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/history": { - "version": "4.10.1", - "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz", - "integrity": "sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==", - "dependencies": { - "@babel/runtime": "^7.1.2", - "loose-envify": "^1.2.0", - "resolve-pathname": "^3.0.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0", - "value-equal": "^1.0.1" - } - }, - "node_modules/hoist-non-react-statics": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", - "integrity": "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==", - "dependencies": { - "react-is": "^16.7.0" - } - }, - "node_modules/hosted-git-info": { - "version": "2.8.9", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", - "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", - "dev": true - }, - "node_modules/ignore": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz", - "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", - "dev": true, - "engines": { - "node": ">= 4" - } - }, - "node_modules/import-fresh": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", - "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", - "dev": true, - "dependencies": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - }, - "engines": { - "node": ">=6" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/imurmurhash": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", - "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=", - "dev": true, - "engines": { - "node": ">=0.8.19" - } - }, - "node_modules/inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", - "dev": true, - "dependencies": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true - }, - "node_modules/internal-slot": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.3.tgz", - "integrity": "sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==", - "dev": true, - "dependencies": { - "get-intrinsic": "^1.1.0", - "has": "^1.0.3", - "side-channel": "^1.0.4" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=", - "dev": true - }, - "node_modules/is-bigint": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.2.tgz", - "integrity": "sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==", - "dev": true, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-boolean-object": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.1.tgz", - "integrity": "sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-callable": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.3.tgz", - "integrity": "sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-core-module": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", - "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", - "dev": true, - "dependencies": { - "has": "^1.0.3" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-date-object": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.4.tgz", - "integrity": "sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/is-glob": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", - "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", - "dev": true, - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-negative-zero": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.1.tgz", - "integrity": "sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true, - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-number-object": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.5.tgz", - "integrity": "sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-regex": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.3.tgz", - "integrity": "sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "has-symbols": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-string": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.6.tgz", - "integrity": "sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-symbol": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz", - "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==", - "dev": true, - "dependencies": { - "has-symbols": "^1.0.2" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/isarray": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" - }, - "node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", - "dev": true - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "node_modules/js-yaml": { - "version": "3.14.1", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", - "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", - "dev": true, - "dependencies": { - "argparse": "^1.0.7", - "esprima": "^4.0.0" - }, - "bin": { - "js-yaml": "bin/js-yaml.js" - } - }, - "node_modules/jsesc": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", - "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", - "dev": true, - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/json-parse-better-errors": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz", - "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", - "dev": true - }, - "node_modules/json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true - }, - "node_modules/json-stable-stringify-without-jsonify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", - "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", - "dev": true - }, - "node_modules/json5": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz", - "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", - "dev": true, - "dependencies": { - "minimist": "^1.2.5" - }, - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/jsx-ast-utils": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz", - "integrity": "sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==", - "dev": true, - "dependencies": { - "array-includes": "^3.1.2", - "object.assign": "^4.1.2" - }, - "engines": { - "node": ">=4.0" - } - }, - "node_modules/language-subtag-registry": { - "version": "0.3.21", - "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz", - "integrity": "sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==", - "dev": true - }, - "node_modules/language-tags": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.5.tgz", - "integrity": "sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=", - "dev": true, - "dependencies": { - "language-subtag-registry": "~0.3.2" - } - }, - "node_modules/levn": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", - "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", - "dev": true, - "dependencies": { - "prelude-ls": "^1.2.1", - "type-check": "~0.4.0" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/load-json-file": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", - "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", - "dev": true, - "dependencies": { - "graceful-fs": "^4.1.2", - "parse-json": "^4.0.0", - "pify": "^3.0.0", - "strip-bom": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/locate-path": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", - "integrity": "sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=", - "dev": true, - "dependencies": { - "p-locate": "^2.0.0", - "path-exists": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "dev": true - }, - "node_modules/lodash.clonedeep": { - "version": "4.5.0", - "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", - "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=", - "dev": true - }, - "node_modules/lodash.truncate": { - "version": "4.4.2", - "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", - "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=", - "dev": true - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dev": true, - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.4.tgz", - "integrity": "sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==", - "dev": true, - "dependencies": { - "braces": "^3.0.1", - "picomatch": "^2.2.3" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/mini-create-react-context": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz", - "integrity": "sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==", - "dependencies": { - "@babel/runtime": "^7.12.1", - "tiny-warning": "^1.0.3" - }, - "peerDependencies": { - "prop-types": "^15.0.0", - "react": "^0.14.0 || ^15.0.0 || ^16.0.0 || ^17.0.0" - } - }, - "node_modules/minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", - "dev": true, - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, - "node_modules/minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", - "dev": true - }, - "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - }, - "node_modules/nanoid": { - "version": "3.1.23", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", - "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", - "dev": true, - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/natural-compare": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", - "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", - "dev": true - }, - "node_modules/node-releases": { - "version": "1.1.72", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-1.1.72.tgz", - "integrity": "sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==", - "dev": true - }, - "node_modules/normalize-package-data": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", - "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", - "dev": true, - "dependencies": { - "hosted-git-info": "^2.1.4", - "resolve": "^1.10.0", - "semver": "2 || 3 || 4 || 5", - "validate-npm-package-license": "^3.0.1" - } - }, - "node_modules/normalize-package-data/node_modules/semver": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", - "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", - "dev": true, - "bin": { - "semver": "bin/semver" - } - }, - "node_modules/normalize-path": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-2.1.1.tgz", - "integrity": "sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=", - "dev": true, - "dependencies": { - "remove-trailing-separator": "^1.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/object-inspect": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz", - "integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==", - "dev": true, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object-keys": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", - "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", - "dev": true, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.assign": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.2.tgz", - "integrity": "sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "has-symbols": "^1.0.1", - "object-keys": "^1.1.1" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object.entries": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.3.tgz", - "integrity": "sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1", - "has": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/object.fromentries": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.4.tgz", - "integrity": "sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/object.values": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.3.tgz", - "integrity": "sha512-nkF6PfDB9alkOUxpf1HNm/QlkeW3SReqL5WXeBLpEJJnlPSvRaDQpW3gQTksTN3fgJX4hL42RzKyOin6ff3tyw==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", - "dev": true, - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/optionator": { - "version": "0.9.1", - "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", - "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==", - "dev": true, - "dependencies": { - "deep-is": "^0.1.3", - "fast-levenshtein": "^2.0.6", - "levn": "^0.4.1", - "prelude-ls": "^1.2.1", - "type-check": "^0.4.0", - "word-wrap": "^1.2.3" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/p-limit": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-1.3.0.tgz", - "integrity": "sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==", - "dev": true, - "dependencies": { - "p-try": "^1.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/p-locate": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-2.0.0.tgz", - "integrity": "sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=", - "dev": true, - "dependencies": { - "p-limit": "^1.1.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/p-try": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/p-try/-/p-try-1.0.0.tgz", - "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "dev": true, - "dependencies": { - "callsites": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/parse-json": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", - "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", - "dev": true, - "dependencies": { - "error-ex": "^1.3.1", - "json-parse-better-errors": "^1.0.1" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/path-exists": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", - "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", - "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", - "dev": true - }, - "node_modules/path-starts-with": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/path-starts-with/-/path-starts-with-1.0.0.tgz", - "integrity": "sha1-soJDAV6LE43lcmgqxS2kLmRq2E4=", - "dev": true, - "dependencies": { - "normalize-path": "^2.1.1" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/path-to-regexp": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz", - "integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==", - "dependencies": { - "isarray": "0.0.1" - } - }, - "node_modules/path-type": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", - "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/picomatch": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", - "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", - "dev": true, - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pify": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", - "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/pkg-dir": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-2.0.0.tgz", - "integrity": "sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=", - "dev": true, - "dependencies": { - "find-up": "^2.1.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/pkg-up": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-2.0.0.tgz", - "integrity": "sha1-yBmscoBZpGHKscOImivjxJoATX8=", - "dev": true, - "dependencies": { - "find-up": "^2.1.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss": { - "version": "8.2.15", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", - "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", - "dev": true, - "dependencies": { - "colorette": "^1.2.2", - "nanoid": "^3.1.23", - "source-map": "^0.6.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - } - }, - "node_modules/postcss/node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/prelude-ls": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", - "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", - "dev": true, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/progress": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", - "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", - "dev": true, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/prop-types": { - "version": "15.7.2", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz", - "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==", - "dependencies": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.8.1" - } - }, - "node_modules/punycode": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "dependencies": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-dom": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-17.0.2.tgz", - "integrity": "sha512-s4h96KtLDUQlsENhMn1ar8t2bEa+q/YAtj8pPPdIjPDGBDIVNsrD9aXNWqspUe6AzKCIG0C1HZZLqLV7qpOBGA==", - "dependencies": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1", - "scheduler": "^0.20.2" - }, - "peerDependencies": { - "react": "17.0.2" - } - }, - "node_modules/react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" - }, - "node_modules/react-refresh": { - "version": "0.9.0", - "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.9.0.tgz", - "integrity": "sha512-Gvzk7OZpiqKSkxsQvO/mbTN1poglhmAV7gR/DdIrRrSMXraRQQlfikRJOr3Nb9GTMPC5kof948Zy6jJZIFtDvQ==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-router": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.2.0.tgz", - "integrity": "sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==", - "dependencies": { - "@babel/runtime": "^7.1.2", - "history": "^4.9.0", - "hoist-non-react-statics": "^3.1.0", - "loose-envify": "^1.3.1", - "mini-create-react-context": "^0.4.0", - "path-to-regexp": "^1.7.0", - "prop-types": "^15.6.2", - "react-is": "^16.6.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0" - }, - "peerDependencies": { - "react": ">=15" - } - }, - "node_modules/react-router-dom": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-5.2.0.tgz", - "integrity": "sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==", - "dependencies": { - "@babel/runtime": "^7.1.2", - "history": "^4.9.0", - "loose-envify": "^1.3.1", - "prop-types": "^15.6.2", - "react-router": "5.2.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0" - }, - "peerDependencies": { - "react": ">=15" - } - }, - "node_modules/read-pkg": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz", - "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=", - "dev": true, - "dependencies": { - "load-json-file": "^4.0.0", - "normalize-package-data": "^2.3.2", - "path-type": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/read-pkg-up": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-3.0.0.tgz", - "integrity": "sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc=", - "dev": true, - "dependencies": { - "find-up": "^2.0.0", - "read-pkg": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/read-pkg/node_modules/path-type": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz", - "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==", - "dev": true, - "dependencies": { - "pify": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/regenerator-runtime": { - "version": "0.13.7", - "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", - "integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==" - }, - "node_modules/regexp.prototype.flags": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz", - "integrity": "sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/regexpp": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz", - "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/mysticatea" - } - }, - "node_modules/remove-trailing-separator": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz", - "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=", - "dev": true - }, - "node_modules/require-from-string": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", - "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/resolve": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", - "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", - "dev": true, - "dependencies": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/resolve-pathname": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/resolve-pathname/-/resolve-pathname-3.0.0.tgz", - "integrity": "sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==" - }, - "node_modules/reusify": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", - "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", - "dev": true, - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/rimraf": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", - "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", - "dev": true, - "dependencies": { - "glob": "^7.1.3" - }, - "bin": { - "rimraf": "bin.js" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/rollup": { - "version": "2.48.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.48.0.tgz", - "integrity": "sha512-wl9ZSSSsi5579oscSDYSzGn092tCS076YB+TQrzsGuSfYyJeep8eEWj0eaRjuC5McuMNmcnR8icBqiE/FWNB1A==", - "dev": true, - "dependencies": { - "fsevents": "~2.3.1" - }, - "bin": { - "rollup": "dist/bin/rollup" - }, - "engines": { - "node": ">=10.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.1" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/safe-buffer": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", - "dev": true - }, - "node_modules/scheduler": { - "version": "0.20.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.20.2.tgz", - "integrity": "sha512-2eWfGgAqqWFGqtdMmcL5zCMK1U8KlXv8SQFGglL3CEtd0aDVDWgeF/YoCmvln55m5zSk3J/20hTaSBeSObsQDQ==", - "dependencies": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - } - }, - "node_modules/semver": { - "version": "7.3.5", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz", - "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==", - "dev": true, - "dependencies": { - "lru-cache": "^6.0.0" - }, - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/slash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", - "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/slice-ansi": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", - "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", - "dev": true, - "dependencies": { - "ansi-styles": "^4.0.0", - "astral-regex": "^2.0.0", - "is-fullwidth-code-point": "^3.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/slice-ansi?sponsor=1" - } - }, - "node_modules/slice-ansi/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/slice-ansi/node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/slice-ansi/node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "node_modules/source-map": { - "version": "0.5.7", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", - "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/spdx-correct": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", - "integrity": "sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==", - "dev": true, - "dependencies": { - "spdx-expression-parse": "^3.0.0", - "spdx-license-ids": "^3.0.0" - } - }, - "node_modules/spdx-exceptions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz", - "integrity": "sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==", - "dev": true - }, - "node_modules/spdx-expression-parse": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", - "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", - "dev": true, - "dependencies": { - "spdx-exceptions": "^2.1.0", - "spdx-license-ids": "^3.0.0" - } - }, - "node_modules/spdx-license-ids": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.8.tgz", - "integrity": "sha512-NDgA96EnaLSvtbM7trJj+t1LUR3pirkDCcz9nOUlPb5DMBGsH7oES6C3hs3j7R9oHEa1EMvReS/BUAIT5Tcr0g==", - "dev": true - }, - "node_modules/sprintf-js": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", - "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", - "dev": true - }, - "node_modules/string-width": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", - "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", - "dev": true, - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true - }, - "node_modules/string.prototype.matchall": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz", - "integrity": "sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has-symbols": "^1.0.1", - "internal-slot": "^1.0.3", - "regexp.prototype.flags": "^1.3.1", - "side-channel": "^1.0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimend": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.4.tgz", - "integrity": "sha512-y9xCjw1P23Awk8EvTpcyL2NIr1j7wJ39f+k6lvRnSMz+mz9CGz9NYPelDk42kOz6+ql8xjfK8oYzy3jAP5QU5A==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/string.prototype.trimstart": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.4.tgz", - "integrity": "sha512-jh6e984OBfvxS50tdY2nRZnoC5/mLFKOREQfw8t5yytkoUsJRNxvI/E39qu1sD0OtWI3OC0XgKSmcWwziwYuZw==", - "dev": true, - "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/strip-ansi": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", - "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", - "dev": true, - "dependencies": { - "ansi-regex": "^5.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/strip-json-comments": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", - "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", - "dev": true, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", - "dev": true, - "dependencies": { - "has-flag": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/table": { - "version": "6.7.1", - "resolved": "https://registry.npmjs.org/table/-/table-6.7.1.tgz", - "integrity": "sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==", - "dev": true, - "dependencies": { - "ajv": "^8.0.1", - "lodash.clonedeep": "^4.5.0", - "lodash.truncate": "^4.4.2", - "slice-ansi": "^4.0.0", - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/table/node_modules/ajv": { - "version": "8.4.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.4.0.tgz", - "integrity": "sha512-7QD2l6+KBSLwf+7MuYocbWvRPdOu63/trReTLu2KFwkgctnub1auoF+Y1WYcm09CTM7quuscrzqmASaLHC/K4Q==", - "dev": true, - "dependencies": { - "fast-deep-equal": "^3.1.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2", - "uri-js": "^4.2.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/table/node_modules/json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", - "dev": true - }, - "node_modules/text-table": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", - "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=", - "dev": true - }, - "node_modules/tiny-invariant": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.1.0.tgz", - "integrity": "sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==" - }, - "node_modules/tiny-warning": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", - "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==" - }, - "node_modules/to-fast-properties": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", - "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/tsconfig-paths": { - "version": "3.9.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz", - "integrity": "sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==", - "dev": true, - "dependencies": { - "@types/json5": "^0.0.29", - "json5": "^1.0.1", - "minimist": "^1.2.0", - "strip-bom": "^3.0.0" - } - }, - "node_modules/tsconfig-paths/node_modules/json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", - "dev": true, - "dependencies": { - "minimist": "^1.2.0" - }, - "bin": { - "json5": "lib/cli.js" - } - }, - "node_modules/tslib": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", - "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", - "dev": true - }, - "node_modules/tsutils": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz", - "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==", - "dev": true, - "dependencies": { - "tslib": "^1.8.1" - }, - "engines": { - "node": ">= 6" - }, - "peerDependencies": { - "typescript": ">=2.8.0 || >= 3.2.0-dev || >= 3.3.0-dev || >= 3.4.0-dev || >= 3.5.0-dev || >= 3.6.0-dev || >= 3.6.0-beta || >= 3.7.0-dev || >= 3.7.0-beta" - } - }, - "node_modules/type-check": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", - "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", - "dev": true, - "dependencies": { - "prelude-ls": "^1.2.1" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/type-fest": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz", - "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/typescript": { - "version": "4.2.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", - "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", - "dev": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=4.2.0" - } - }, - "node_modules/unbox-primitive": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.1.tgz", - "integrity": "sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.1", - "has-bigints": "^1.0.1", - "has-symbols": "^1.0.2", - "which-boxed-primitive": "^1.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/uri-js": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", - "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", - "dev": true, - "dependencies": { - "punycode": "^2.1.0" - } - }, - "node_modules/v8-compile-cache": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz", - "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==", - "dev": true - }, - "node_modules/validate-npm-package-license": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", - "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", - "dev": true, - "dependencies": { - "spdx-correct": "^3.0.0", - "spdx-expression-parse": "^3.0.0" - } - }, - "node_modules/value-equal": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/value-equal/-/value-equal-1.0.1.tgz", - "integrity": "sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==" - }, - "node_modules/vite": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.3.tgz", - "integrity": "sha512-eO1iwRbn3/BfkNVMNJDeANAFCZ5NobYOFPu7IqfY7DcI7I9nFGjJIZid0EViTmLDGwwSUPmRAq3cRBbO3+DsMA==", - "dev": true, - "dependencies": { - "esbuild": "^0.11.23", - "fsevents": "~2.3.1", - "postcss": "^8.2.10", - "resolve": "^1.19.0", - "rollup": "^2.38.5" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": ">=12.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.1" - } - }, - "node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/which-boxed-primitive": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz", - "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==", - "dev": true, - "dependencies": { - "is-bigint": "^1.0.1", - "is-boolean-object": "^1.1.0", - "is-number-object": "^1.0.4", - "is-string": "^1.0.5", - "is-symbol": "^1.0.3" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/word-wrap": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", - "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", - "dev": true - }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "dev": true - } - }, - "dependencies": { - "@babel/code-frame": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.13.tgz", - "integrity": "sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==", - "dev": true, - "requires": { - "@babel/highlight": "^7.12.13" - } - }, - "@babel/compat-data": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.14.0.tgz", - "integrity": "sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==", - "dev": true - }, - "@babel/core": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.14.3.tgz", - "integrity": "sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==", - "dev": true, - "requires": { - "@babel/code-frame": "^7.12.13", - "@babel/generator": "^7.14.3", - "@babel/helper-compilation-targets": "^7.13.16", - "@babel/helper-module-transforms": "^7.14.2", - "@babel/helpers": "^7.14.0", - "@babel/parser": "^7.14.3", - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2", - "convert-source-map": "^1.7.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.1.2", - "semver": "^6.3.0", - "source-map": "^0.5.0" - }, - "dependencies": { - "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", - "dev": true - } - } - }, - "@babel/generator": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.14.3.tgz", - "integrity": "sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==", - "dev": true, - "requires": { - "@babel/types": "^7.14.2", - "jsesc": "^2.5.1", - "source-map": "^0.5.0" - } - }, - "@babel/helper-compilation-targets": { - "version": "7.13.16", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz", - "integrity": "sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==", - "dev": true, - "requires": { - "@babel/compat-data": "^7.13.15", - "@babel/helper-validator-option": "^7.12.17", - "browserslist": "^4.14.5", - "semver": "^6.3.0" - }, - "dependencies": { - "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", - "dev": true - } - } - }, - "@babel/helper-function-name": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz", - "integrity": "sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==", - "dev": true, - "requires": { - "@babel/helper-get-function-arity": "^7.12.13", - "@babel/template": "^7.12.13", - "@babel/types": "^7.14.2" - } - }, - "@babel/helper-get-function-arity": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz", - "integrity": "sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==", - "dev": true, - "requires": { - "@babel/types": "^7.12.13" - } - }, - "@babel/helper-member-expression-to-functions": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz", - "integrity": "sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==", - "dev": true, - "requires": { - "@babel/types": "^7.13.12" - } - }, - "@babel/helper-module-imports": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz", - "integrity": "sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==", - "dev": true, - "requires": { - "@babel/types": "^7.13.12" - } - }, - "@babel/helper-module-transforms": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz", - "integrity": "sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==", - "dev": true, - "requires": { - "@babel/helper-module-imports": "^7.13.12", - "@babel/helper-replace-supers": "^7.13.12", - "@babel/helper-simple-access": "^7.13.12", - "@babel/helper-split-export-declaration": "^7.12.13", - "@babel/helper-validator-identifier": "^7.14.0", - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2" - } - }, - "@babel/helper-optimise-call-expression": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz", - "integrity": "sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==", - "dev": true, - "requires": { - "@babel/types": "^7.12.13" - } - }, - "@babel/helper-plugin-utils": { - "version": "7.13.0", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz", - "integrity": "sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==", - "dev": true - }, - "@babel/helper-replace-supers": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz", - "integrity": "sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==", - "dev": true, - "requires": { - "@babel/helper-member-expression-to-functions": "^7.13.12", - "@babel/helper-optimise-call-expression": "^7.12.13", - "@babel/traverse": "^7.14.2", - "@babel/types": "^7.14.2" - } - }, - "@babel/helper-simple-access": { - "version": "7.13.12", - "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz", - "integrity": "sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==", - "dev": true, - "requires": { - "@babel/types": "^7.13.12" - } - }, - "@babel/helper-split-export-declaration": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz", - "integrity": "sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==", - "dev": true, - "requires": { - "@babel/types": "^7.12.13" - } - }, - "@babel/helper-validator-identifier": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz", - "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==", - "dev": true - }, - "@babel/helper-validator-option": { - "version": "7.12.17", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz", - "integrity": "sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==", - "dev": true - }, - "@babel/helpers": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.14.0.tgz", - "integrity": "sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==", - "dev": true, - "requires": { - "@babel/template": "^7.12.13", - "@babel/traverse": "^7.14.0", - "@babel/types": "^7.14.0" - } - }, - "@babel/highlight": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz", - "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==", - "dev": true, - "requires": { - "@babel/helper-validator-identifier": "^7.14.0", - "chalk": "^2.0.0", - "js-tokens": "^4.0.0" - } - }, - "@babel/parser": { - "version": "7.14.3", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.14.3.tgz", - "integrity": "sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==", - "dev": true - }, - "@babel/plugin-transform-react-jsx-self": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz", - "integrity": "sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==", - "dev": true, - "requires": { - "@babel/helper-plugin-utils": "^7.12.13" - } - }, - "@babel/plugin-transform-react-jsx-source": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.14.2.tgz", - "integrity": "sha512-OMorspVyjxghAjzgeAWc6O7W7vHbJhV69NeTGdl9Mxgz6PaweAuo7ffB9T5A1OQ9dGcw0As4SYMUhyNC4u7mVg==", - "dev": true, - "requires": { - "@babel/helper-plugin-utils": "^7.13.0" - } - }, - "@babel/runtime": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.14.0.tgz", - "integrity": "sha512-JELkvo/DlpNdJ7dlyw/eY7E0suy5i5GQH+Vlxaq1nsNJ+H7f4Vtv3jMeCEgRhZZQFXTjldYfQgv2qmM6M1v5wA==", - "requires": { - "regenerator-runtime": "^0.13.4" - } - }, - "@babel/runtime-corejs3": { - "version": "7.14.0", - "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.14.0.tgz", - "integrity": "sha512-0R0HTZWHLk6G8jIk0FtoX+AatCtKnswS98VhXwGImFc759PJRp4Tru0PQYZofyijTFUr+gT8Mu7sgXVJLQ0ceg==", - "dev": true, - "requires": { - "core-js-pure": "^3.0.0", - "regenerator-runtime": "^0.13.4" - } - }, - "@babel/template": { - "version": "7.12.13", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.12.13.tgz", - "integrity": "sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==", - "dev": true, - "requires": { - "@babel/code-frame": "^7.12.13", - "@babel/parser": "^7.12.13", - "@babel/types": "^7.12.13" - } - }, - "@babel/traverse": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.14.2.tgz", - "integrity": "sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==", - "dev": true, - "requires": { - "@babel/code-frame": "^7.12.13", - "@babel/generator": "^7.14.2", - "@babel/helper-function-name": "^7.14.2", - "@babel/helper-split-export-declaration": "^7.12.13", - "@babel/parser": "^7.14.2", - "@babel/types": "^7.14.2", - "debug": "^4.1.0", - "globals": "^11.1.0" - } - }, - "@babel/types": { - "version": "7.14.2", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.14.2.tgz", - "integrity": "sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==", - "dev": true, - "requires": { - "@babel/helper-validator-identifier": "^7.14.0", - "to-fast-properties": "^2.0.0" - } - }, - "@eslint/eslintrc": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.1.tgz", - "integrity": "sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==", - "dev": true, - "requires": { - "ajv": "^6.12.4", - "debug": "^4.1.1", - "espree": "^7.3.0", - "globals": "^12.1.0", - "ignore": "^4.0.6", - "import-fresh": "^3.2.1", - "js-yaml": "^3.13.1", - "minimatch": "^3.0.4", - "strip-json-comments": "^3.1.1" - }, - "dependencies": { - "globals": { - "version": "12.4.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz", - "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==", - "dev": true, - "requires": { - "type-fest": "^0.8.1" - } - } - } - }, - "@nodelib/fs.scandir": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz", - "integrity": "sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==", - "dev": true, - "requires": { - "@nodelib/fs.stat": "2.0.4", - "run-parallel": "^1.1.9" - } - }, - "@nodelib/fs.stat": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz", - "integrity": "sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==", - "dev": true - }, - "@nodelib/fs.walk": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz", - "integrity": "sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==", - "dev": true, - "requires": { - "@nodelib/fs.scandir": "2.1.4", - "fastq": "^1.6.0" - } - }, - "@types/history": { - "version": "4.7.8", - "resolved": "https://registry.npmjs.org/@types/history/-/history-4.7.8.tgz", - "integrity": "sha512-S78QIYirQcUoo6UJZx9CSP0O2ix9IaeAXwQi26Rhr/+mg7qqPy8TzaxHSUut7eGjL8WmLccT7/MXf304WjqHcA==", - "dev": true - }, - "@types/json-schema": { - "version": "7.0.7", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.7.tgz", - "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", - "dev": true - }, - "@types/json5": { - "version": "0.0.29", - "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", - "integrity": "sha1-7ihweulOEdK4J7y+UnC86n8+ce4=", - "dev": true - }, - "@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "@types/react": { - "version": "17.0.6", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", - "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", - "dev": true, - "requires": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "@types/react-dom": { - "version": "17.0.5", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-17.0.5.tgz", - "integrity": "sha512-ikqukEhH4H9gr4iJCmQVNzTB307kROe3XFfHAOTxOXPOw7lAoEXnM5KWTkzeANGL5Ce6ABfiMl/zJBYNi7ObmQ==", - "dev": true, - "requires": { - "@types/react": "*" - } - }, - "@types/react-router": { - "version": "5.1.14", - "resolved": "https://registry.npmjs.org/@types/react-router/-/react-router-5.1.14.tgz", - "integrity": "sha512-LAJpqYUaCTMT2anZheoidiIymt8MuX286zoVFPM3DVb23aQBH0mAkFvzpd4LKqiolV8bBtZWT5Qp7hClCNDENw==", - "dev": true, - "requires": { - "@types/history": "*", - "@types/react": "*" - } - }, - "@types/react-router-dom": { - "version": "5.1.7", - "resolved": "https://registry.npmjs.org/@types/react-router-dom/-/react-router-dom-5.1.7.tgz", - "integrity": "sha512-D5mHD6TbdV/DNHYsnwBTv+y73ei+mMjrkGrla86HthE4/PVvL1J94Bu3qABU+COXzpL23T1EZapVVpwHuBXiUg==", - "dev": true, - "requires": { - "@types/history": "*", - "@types/react": "*", - "@types/react-router": "*" - } - }, - "@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "@typescript-eslint/eslint-plugin": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.24.0.tgz", - "integrity": "sha512-qbCgkPM7DWTsYQGjx9RTuQGswi+bEt0isqDBeo+CKV0953zqI0Tp7CZ7Fi9ipgFA6mcQqF4NOVNwS/f2r6xShw==", - "dev": true, - "requires": { - "@typescript-eslint/experimental-utils": "4.24.0", - "@typescript-eslint/scope-manager": "4.24.0", - "debug": "^4.1.1", - "functional-red-black-tree": "^1.0.1", - "lodash": "^4.17.15", - "regexpp": "^3.0.0", - "semver": "^7.3.2", - "tsutils": "^3.17.1" - } - }, - "@typescript-eslint/experimental-utils": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.24.0.tgz", - "integrity": "sha512-IwTT2VNDKH1h8RZseMH4CcYBz6lTvRoOLDuuqNZZoThvfHEhOiZPQCow+5El3PtyxJ1iDr6UXZwYtE3yZQjhcw==", - "dev": true, - "requires": { - "@types/json-schema": "^7.0.3", - "@typescript-eslint/scope-manager": "4.24.0", - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/typescript-estree": "4.24.0", - "eslint-scope": "^5.0.0", - "eslint-utils": "^2.0.0" - } - }, - "@typescript-eslint/parser": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.24.0.tgz", - "integrity": "sha512-dj1ZIh/4QKeECLb2f/QjRwMmDArcwc2WorWPRlB8UNTZlY1KpTVsbX7e3ZZdphfRw29aTFUSNuGB8w9X5sS97w==", - "dev": true, - "requires": { - "@typescript-eslint/scope-manager": "4.24.0", - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/typescript-estree": "4.24.0", - "debug": "^4.1.1" - } - }, - "@typescript-eslint/scope-manager": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.24.0.tgz", - "integrity": "sha512-9+WYJGDnuC9VtYLqBhcSuM7du75fyCS/ypC8c5g7Sdw7pGL4NDTbeH38eJPfzIydCHZDoOgjloxSAA3+4l/zsA==", - "dev": true, - "requires": { - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/visitor-keys": "4.24.0" - } - }, - "@typescript-eslint/types": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.24.0.tgz", - "integrity": "sha512-tkZUBgDQKdvfs8L47LaqxojKDE+mIUmOzdz7r+u+U54l3GDkTpEbQ1Jp3cNqqAU9vMUCBA1fitsIhm7yN0vx9Q==", - "dev": true - }, - "@typescript-eslint/typescript-estree": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.24.0.tgz", - "integrity": "sha512-kBDitL/by/HK7g8CYLT7aKpAwlR8doshfWz8d71j97n5kUa5caHWvY0RvEUEanL/EqBJoANev8Xc/mQ6LLwXGA==", - "dev": true, - "requires": { - "@typescript-eslint/types": "4.24.0", - "@typescript-eslint/visitor-keys": "4.24.0", - "debug": "^4.1.1", - "globby": "^11.0.1", - "is-glob": "^4.0.1", - "semver": "^7.3.2", - "tsutils": "^3.17.1" - } - }, - "@typescript-eslint/visitor-keys": { - "version": "4.24.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.24.0.tgz", - "integrity": "sha512-4ox1sjmGHIxjEDBnMCtWFFhErXtKA1Ec0sBpuz0fqf3P+g3JFGyTxxbF06byw0FRsPnnbq44cKivH7Ks1/0s6g==", - "dev": true, - "requires": { - "@typescript-eslint/types": "4.24.0", - "eslint-visitor-keys": "^2.0.0" - } - }, - "@vitejs/plugin-react-refresh": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-refresh/-/plugin-react-refresh-1.3.3.tgz", - "integrity": "sha512-J3KFwSQKrEK7fgOwTx0PMTlsolZORUch6BswjsM50q+Y7zSvX1ROIRn+tK2VE8SCvbYRHtzEKFlYW3vsWyTosQ==", - "dev": true, - "requires": { - "@babel/core": "^7.12.13", - "@babel/plugin-transform-react-jsx-self": "^7.12.13", - "@babel/plugin-transform-react-jsx-source": "^7.12.13", - "react-refresh": "^0.9.0" - } - }, - "acorn": { - "version": "7.4.1", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", - "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", - "dev": true - }, - "acorn-jsx": { - "version": "5.3.1", - "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz", - "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==", - "dev": true, - "requires": {} - }, - "ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", - "dev": true, - "requires": { - "fast-deep-equal": "^3.1.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - } - }, - "ansi-colors": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", - "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", - "dev": true - }, - "ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", - "dev": true - }, - "ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", - "dev": true, - "requires": { - "color-convert": "^1.9.0" - } - }, - "argparse": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", - "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", - "dev": true, - "requires": { - "sprintf-js": "~1.0.2" - } - }, - "aria-query": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-4.2.2.tgz", - "integrity": "sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==", - "dev": true, - "requires": { - "@babel/runtime": "^7.10.2", - "@babel/runtime-corejs3": "^7.10.2" - } - }, - "array-includes": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.3.tgz", - "integrity": "sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "get-intrinsic": "^1.1.1", - "is-string": "^1.0.5" - } - }, - "array-union": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", - "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", - "dev": true - }, - "array.prototype.flat": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz", - "integrity": "sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==", - "dev": true, - "requires": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1" - } - }, - "array.prototype.flatmap": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz", - "integrity": "sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==", - "dev": true, - "requires": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1", - "function-bind": "^1.1.1" - } - }, - "ast-types-flow": { - "version": "0.0.7", - "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", - "integrity": "sha1-9wtzXGvKGlycItmCw+Oef+ujva0=", - "dev": true - }, - "astral-regex": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", - "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", - "dev": true - }, - "axe-core": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.2.1.tgz", - "integrity": "sha512-evY7DN8qSIbsW2H/TWQ1bX3sXN1d4MNb5Vb4n7BzPuCwRHdkZ1H2eNLuSh73EoQqkGKUtju2G2HCcjCfhvZIAA==", - "dev": true - }, - "axobject-query": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-2.2.0.tgz", - "integrity": "sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==", - "dev": true - }, - "balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true - }, - "brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", - "dev": true, - "requires": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", - "dev": true, - "requires": { - "fill-range": "^7.0.1" - } - }, - "browserslist": { - "version": "4.16.6", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.16.6.tgz", - "integrity": "sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==", - "dev": true, - "requires": { - "caniuse-lite": "^1.0.30001219", - "colorette": "^1.2.2", - "electron-to-chromium": "^1.3.723", - "escalade": "^3.1.1", - "node-releases": "^1.1.71" - } - }, - "call-bind": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", - "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", - "dev": true, - "requires": { - "function-bind": "^1.1.1", - "get-intrinsic": "^1.0.2" - } - }, - "callsites": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", - "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", - "dev": true - }, - "caniuse-lite": { - "version": "1.0.30001228", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001228.tgz", - "integrity": "sha512-QQmLOGJ3DEgokHbMSA8cj2a+geXqmnpyOFT0lhQV6P3/YOJvGDEwoedcwxEQ30gJIwIIunHIicunJ2rzK5gB2A==", - "dev": true - }, - "chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", - "dev": true, - "requires": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" - } - }, - "color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", - "dev": true, - "requires": { - "color-name": "1.1.3" - } - }, - "color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", - "dev": true - }, - "colorette": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", - "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", - "dev": true - }, - "concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", - "dev": true - }, - "confusing-browser-globals": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz", - "integrity": "sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==", - "dev": true - }, - "contains-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/contains-path/-/contains-path-1.0.0.tgz", - "integrity": "sha1-NFizMhhWA+ju0Y9RjUoQiIo6vJE=", - "dev": true, - "requires": { - "normalize-path": "^2.1.1", - "path-starts-with": "^1.0.0" - } - }, - "convert-source-map": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-1.7.0.tgz", - "integrity": "sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==", - "dev": true, - "requires": { - "safe-buffer": "~5.1.1" - } - }, - "core-js-pure": { - "version": "3.12.1", - "resolved": "https://registry.npmjs.org/core-js-pure/-/core-js-pure-3.12.1.tgz", - "integrity": "sha512-1cch+qads4JnDSWsvc7d6nzlKAippwjUlf6vykkTLW53VSV+NkE6muGBToAjEA8pG90cSfcud3JgVmW2ds5TaQ==", - "dev": true - }, - "cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dev": true, - "requires": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - } - }, - "csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "damerau-levenshtein": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.7.tgz", - "integrity": "sha512-VvdQIPGdWP0SqFXghj79Wf/5LArmreyMsGLa6FG6iC4t3j7j5s71TrwWmT/4akbDQIqjfACkLZmjXhA7g2oUZw==", - "dev": true - }, - "debug": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", - "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", - "dev": true, - "requires": { - "ms": "2.1.2" - } - }, - "deep-is": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", - "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", - "dev": true - }, - "define-properties": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", - "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", - "dev": true, - "requires": { - "object-keys": "^1.0.12" - } - }, - "dir-glob": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", - "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", - "dev": true, - "requires": { - "path-type": "^4.0.0" - } - }, - "doctrine": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", - "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", - "dev": true, - "requires": { - "esutils": "^2.0.2" - } - }, - "electron-to-chromium": { - "version": "1.3.732", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.732.tgz", - "integrity": "sha512-qKD5Pbq+QMk4nea4lMuncUMhpEiQwaJyCW7MrvissnRcBDENhVfDmAqQYRQ3X525oTzhar9Zh1cK0L2d1UKYcw==", - "dev": true - }, - "emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true - }, - "enquirer": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz", - "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==", - "dev": true, - "requires": { - "ansi-colors": "^4.1.1" - } - }, - "error-ex": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", - "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", - "dev": true, - "requires": { - "is-arrayish": "^0.2.1" - } - }, - "es-abstract": { - "version": "1.18.0", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz", - "integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "es-to-primitive": "^1.2.1", - "function-bind": "^1.1.1", - "get-intrinsic": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.2", - "is-callable": "^1.2.3", - "is-negative-zero": "^2.0.1", - "is-regex": "^1.1.2", - "is-string": "^1.0.5", - "object-inspect": "^1.9.0", - "object-keys": "^1.1.1", - "object.assign": "^4.1.2", - "string.prototype.trimend": "^1.0.4", - "string.prototype.trimstart": "^1.0.4", - "unbox-primitive": "^1.0.0" - } - }, - "es-to-primitive": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz", - "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==", - "dev": true, - "requires": { - "is-callable": "^1.1.4", - "is-date-object": "^1.0.1", - "is-symbol": "^1.0.2" - } - }, - "esbuild": { - "version": "0.11.23", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.23.tgz", - "integrity": "sha512-iaiZZ9vUF5wJV8ob1tl+5aJTrwDczlvGP0JoMmnpC2B0ppiMCu8n8gmy5ZTGl5bcG081XBVn+U+jP+mPFm5T5Q==", - "dev": true - }, - "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", - "dev": true - }, - "escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", - "dev": true - }, - "eslint": { - "version": "7.26.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.26.0.tgz", - "integrity": "sha512-4R1ieRf52/izcZE7AlLy56uIHHDLT74Yzz2Iv2l6kDaYvEu9x+wMB5dZArVL8SYGXSYV2YAg70FcW5Y5nGGNIg==", - "dev": true, - "requires": { - "@babel/code-frame": "7.12.11", - "@eslint/eslintrc": "^0.4.1", - "ajv": "^6.10.0", - "chalk": "^4.0.0", - "cross-spawn": "^7.0.2", - "debug": "^4.0.1", - "doctrine": "^3.0.0", - "enquirer": "^2.3.5", - "eslint-scope": "^5.1.1", - "eslint-utils": "^2.1.0", - "eslint-visitor-keys": "^2.0.0", - "espree": "^7.3.1", - "esquery": "^1.4.0", - "esutils": "^2.0.2", - "file-entry-cache": "^6.0.1", - "functional-red-black-tree": "^1.0.1", - "glob-parent": "^5.0.0", - "globals": "^13.6.0", - "ignore": "^4.0.6", - "import-fresh": "^3.0.0", - "imurmurhash": "^0.1.4", - "is-glob": "^4.0.0", - "js-yaml": "^3.13.1", - "json-stable-stringify-without-jsonify": "^1.0.1", - "levn": "^0.4.1", - "lodash": "^4.17.21", - "minimatch": "^3.0.4", - "natural-compare": "^1.4.0", - "optionator": "^0.9.1", - "progress": "^2.0.0", - "regexpp": "^3.1.0", - "semver": "^7.2.1", - "strip-ansi": "^6.0.0", - "strip-json-comments": "^3.1.0", - "table": "^6.0.4", - "text-table": "^0.2.0", - "v8-compile-cache": "^2.0.3" - }, - "dependencies": { - "@babel/code-frame": { - "version": "7.12.11", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", - "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==", - "dev": true, - "requires": { - "@babel/highlight": "^7.10.4" - } - }, - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "requires": { - "color-convert": "^2.0.1" - } - }, - "chalk": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", - "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", - "dev": true, - "requires": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "globals": { - "version": "13.8.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz", - "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==", - "dev": true, - "requires": { - "type-fest": "^0.20.2" - } - }, - "has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true - }, - "supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "requires": { - "has-flag": "^4.0.0" - } - }, - "type-fest": { - "version": "0.20.2", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", - "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", - "dev": true - } - } - }, - "eslint-config-airbnb": { - "version": "18.2.1", - "resolved": "https://registry.npmjs.org/eslint-config-airbnb/-/eslint-config-airbnb-18.2.1.tgz", - "integrity": "sha512-glZNDEZ36VdlZWoxn/bUR1r/sdFKPd1mHPbqUtkctgNG4yT2DLLtJ3D+yCV+jzZCc2V1nBVkmdknOJBZ5Hc0fg==", - "dev": true, - "requires": { - "eslint-config-airbnb-base": "^14.2.1", - "object.assign": "^4.1.2", - "object.entries": "^1.1.2" - } - }, - "eslint-config-airbnb-base": { - "version": "14.2.1", - "resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-14.2.1.tgz", - "integrity": "sha512-GOrQyDtVEc1Xy20U7vsB2yAoB4nBlfH5HZJeatRXHleO+OS5Ot+MWij4Dpltw4/DyIkqUfqz1epfhVR5XWWQPA==", - "dev": true, - "requires": { - "confusing-browser-globals": "^1.0.10", - "object.assign": "^4.1.2", - "object.entries": "^1.1.2" - } - }, - "eslint-import-resolver-node": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz", - "integrity": "sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==", - "dev": true, - "requires": { - "debug": "^2.6.9", - "resolve": "^1.13.1" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "requires": { - "ms": "2.0.0" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true - } - } - }, - "eslint-module-utils": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.6.1.tgz", - "integrity": "sha512-ZXI9B8cxAJIH4nfkhTwcRTEAnrVfobYqwjWy/QMCZ8rHkZHFjf9yO4BzpiF9kCSfNlMG54eKigISHpX0+AaT4A==", - "dev": true, - "requires": { - "debug": "^3.2.7", - "pkg-dir": "^2.0.0" - }, - "dependencies": { - "debug": { - "version": "3.2.7", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", - "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", - "dev": true, - "requires": { - "ms": "^2.1.1" - } - } - } - }, - "eslint-plugin-import": { - "version": "2.23.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.23.2.tgz", - "integrity": "sha512-LmNoRptHBxOP+nb0PIKz1y6OSzCJlB+0g0IGS3XV4KaKk2q4szqQ6s6F1utVf5ZRkxk/QOTjdxe7v4VjS99Bsg==", - "dev": true, - "requires": { - "array-includes": "^3.1.3", - "array.prototype.flat": "^1.2.4", - "contains-path": "^1.0.0", - "debug": "^2.6.9", - "doctrine": "^2.1.0", - "eslint-import-resolver-node": "^0.3.4", - "eslint-module-utils": "^2.6.1", - "find-up": "^2.0.0", - "has": "^1.0.3", - "is-core-module": "^2.4.0", - "minimatch": "^3.0.4", - "object.values": "^1.1.3", - "pkg-up": "^2.0.0", - "read-pkg-up": "^3.0.0", - "resolve": "^1.20.0", - "tsconfig-paths": "^3.9.0" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "requires": { - "ms": "2.0.0" - } - }, - "doctrine": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", - "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", - "dev": true, - "requires": { - "esutils": "^2.0.2" - } - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true - } - } - }, - "eslint-plugin-jsx-a11y": { - "version": "6.4.1", - "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz", - "integrity": "sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==", - "dev": true, - "requires": { - "@babel/runtime": "^7.11.2", - "aria-query": "^4.2.2", - "array-includes": "^3.1.1", - "ast-types-flow": "^0.0.7", - "axe-core": "^4.0.2", - "axobject-query": "^2.2.0", - "damerau-levenshtein": "^1.0.6", - "emoji-regex": "^9.0.0", - "has": "^1.0.3", - "jsx-ast-utils": "^3.1.0", - "language-tags": "^1.0.5" - } - }, - "eslint-plugin-react": { - "version": "7.23.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.23.2.tgz", - "integrity": "sha512-AfjgFQB+nYszudkxRkTFu0UR1zEQig0ArVMPloKhxwlwkzaw/fBiH0QWcBBhZONlXqQC51+nfqFrkn4EzHcGBw==", - "dev": true, - "requires": { - "array-includes": "^3.1.3", - "array.prototype.flatmap": "^1.2.4", - "doctrine": "^2.1.0", - "has": "^1.0.3", - "jsx-ast-utils": "^2.4.1 || ^3.0.0", - "minimatch": "^3.0.4", - "object.entries": "^1.1.3", - "object.fromentries": "^2.0.4", - "object.values": "^1.1.3", - "prop-types": "^15.7.2", - "resolve": "^2.0.0-next.3", - "string.prototype.matchall": "^4.0.4" - }, - "dependencies": { - "doctrine": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", - "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", - "dev": true, - "requires": { - "esutils": "^2.0.2" - } - }, - "resolve": { - "version": "2.0.0-next.3", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.3.tgz", - "integrity": "sha512-W8LucSynKUIDu9ylraa7ueVZ7hc0uAgJBxVsQSKOXOyle8a93qXhcz+XAXZ8bIq2d6i4Ehddn6Evt+0/UwKk6Q==", - "dev": true, - "requires": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - } - } - } - }, - "eslint-plugin-react-hooks": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz", - "integrity": "sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==", - "dev": true, - "requires": {} - }, - "eslint-scope": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", - "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", - "dev": true, - "requires": { - "esrecurse": "^4.3.0", - "estraverse": "^4.1.1" - } - }, - "eslint-utils": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz", - "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==", - "dev": true, - "requires": { - "eslint-visitor-keys": "^1.1.0" - }, - "dependencies": { - "eslint-visitor-keys": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", - "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", - "dev": true - } - } - }, - "eslint-visitor-keys": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", - "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", - "dev": true - }, - "espree": { - "version": "7.3.1", - "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz", - "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==", - "dev": true, - "requires": { - "acorn": "^7.4.0", - "acorn-jsx": "^5.3.1", - "eslint-visitor-keys": "^1.3.0" - }, - "dependencies": { - "eslint-visitor-keys": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", - "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", - "dev": true - } - } - }, - "esprima": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", - "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", - "dev": true - }, - "esquery": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz", - "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==", - "dev": true, - "requires": { - "estraverse": "^5.1.0" - }, - "dependencies": { - "estraverse": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", - "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", - "dev": true - } - } - }, - "esrecurse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", - "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", - "dev": true, - "requires": { - "estraverse": "^5.2.0" - }, - "dependencies": { - "estraverse": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", - "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", - "dev": true - } - } - }, - "estraverse": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", - "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", - "dev": true - }, - "esutils": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", - "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", - "dev": true - }, - "fast-deep-equal": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true - }, - "fast-glob": { - "version": "3.2.5", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.5.tgz", - "integrity": "sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==", - "dev": true, - "requires": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.0", - "merge2": "^1.3.0", - "micromatch": "^4.0.2", - "picomatch": "^2.2.1" - } - }, - "fast-json-stable-stringify": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", - "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", - "dev": true - }, - "fast-levenshtein": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", - "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", - "dev": true - }, - "fastq": { - "version": "1.11.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", - "integrity": "sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==", - "dev": true, - "requires": { - "reusify": "^1.0.4" - } - }, - "file-entry-cache": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", - "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", - "dev": true, - "requires": { - "flat-cache": "^3.0.4" - } - }, - "fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", - "dev": true, - "requires": { - "to-regex-range": "^5.0.1" - } - }, - "find-up": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", - "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", - "dev": true, - "requires": { - "locate-path": "^2.0.0" - } - }, - "flat-cache": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", - "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==", - "dev": true, - "requires": { - "flatted": "^3.1.0", - "rimraf": "^3.0.2" - } - }, - "flatted": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz", - "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==", - "dev": true - }, - "fs.realpath": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", - "dev": true - }, - "fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "optional": true - }, - "function-bind": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", - "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", - "dev": true - }, - "functional-red-black-tree": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz", - "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=", - "dev": true - }, - "gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true - }, - "get-intrinsic": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz", - "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==", - "dev": true, - "requires": { - "function-bind": "^1.1.1", - "has": "^1.0.3", - "has-symbols": "^1.0.1" - } - }, - "glob": { - "version": "7.1.7", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", - "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", - "dev": true, - "requires": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - } - }, - "glob-parent": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", - "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", - "dev": true, - "requires": { - "is-glob": "^4.0.1" - } - }, - "globals": { - "version": "11.12.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", - "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", - "dev": true - }, - "globby": { - "version": "11.0.3", - "resolved": "https://registry.npmjs.org/globby/-/globby-11.0.3.tgz", - "integrity": "sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==", - "dev": true, - "requires": { - "array-union": "^2.1.0", - "dir-glob": "^3.0.1", - "fast-glob": "^3.1.1", - "ignore": "^5.1.4", - "merge2": "^1.3.0", - "slash": "^3.0.0" - }, - "dependencies": { - "ignore": { - "version": "5.1.8", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", - "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", - "dev": true - } - } - }, - "graceful-fs": { - "version": "4.2.6", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", - "integrity": "sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==", - "dev": true - }, - "has": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", - "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", - "dev": true, - "requires": { - "function-bind": "^1.1.1" - } - }, - "has-bigints": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.1.tgz", - "integrity": "sha512-LSBS2LjbNBTf6287JEbEzvJgftkF5qFkmCo9hDRpAzKhUOlJ+hx8dd4USs00SgsUNwc4617J9ki5YtEClM2ffA==", - "dev": true - }, - "has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", - "dev": true - }, - "has-symbols": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz", - "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==", - "dev": true - }, - "history": { - "version": "4.10.1", - "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz", - "integrity": "sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==", - "requires": { - "@babel/runtime": "^7.1.2", - "loose-envify": "^1.2.0", - "resolve-pathname": "^3.0.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0", - "value-equal": "^1.0.1" - } - }, - "hoist-non-react-statics": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", - "integrity": "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==", - "requires": { - "react-is": "^16.7.0" - } - }, - "hosted-git-info": { - "version": "2.8.9", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", - "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", - "dev": true - }, - "ignore": { - "version": "4.0.6", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz", - "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", - "dev": true - }, - "import-fresh": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", - "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", - "dev": true, - "requires": { - "parent-module": "^1.0.0", - "resolve-from": "^4.0.0" - } - }, - "imurmurhash": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", - "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=", - "dev": true - }, - "inflight": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", - "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", - "dev": true, - "requires": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true - }, - "internal-slot": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.3.tgz", - "integrity": "sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==", - "dev": true, - "requires": { - "get-intrinsic": "^1.1.0", - "has": "^1.0.3", - "side-channel": "^1.0.4" - } - }, - "is-arrayish": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", - "integrity": "sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=", - "dev": true - }, - "is-bigint": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.2.tgz", - "integrity": "sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==", - "dev": true - }, - "is-boolean-object": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.1.tgz", - "integrity": "sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==", - "dev": true, - "requires": { - "call-bind": "^1.0.2" - } - }, - "is-callable": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.3.tgz", - "integrity": "sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==", - "dev": true - }, - "is-core-module": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", - "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", - "dev": true, - "requires": { - "has": "^1.0.3" - } - }, - "is-date-object": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.4.tgz", - "integrity": "sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==", - "dev": true - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true - }, - "is-glob": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", - "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", - "dev": true, - "requires": { - "is-extglob": "^2.1.1" - } - }, - "is-negative-zero": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.1.tgz", - "integrity": "sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==", - "dev": true - }, - "is-number": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", - "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", - "dev": true - }, - "is-number-object": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.5.tgz", - "integrity": "sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==", - "dev": true - }, - "is-regex": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.3.tgz", - "integrity": "sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "has-symbols": "^1.0.2" - } - }, - "is-string": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.6.tgz", - "integrity": "sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==", - "dev": true - }, - "is-symbol": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz", - "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==", - "dev": true, - "requires": { - "has-symbols": "^1.0.2" - } - }, - "isarray": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" - }, - "isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", - "dev": true - }, - "js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "js-yaml": { - "version": "3.14.1", - "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", - "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", - "dev": true, - "requires": { - "argparse": "^1.0.7", - "esprima": "^4.0.0" - } - }, - "jsesc": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", - "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", - "dev": true - }, - "json-parse-better-errors": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz", - "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", - "dev": true - }, - "json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true - }, - "json-stable-stringify-without-jsonify": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", - "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", - "dev": true - }, - "json5": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz", - "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", - "dev": true, - "requires": { - "minimist": "^1.2.5" - } - }, - "jsx-ast-utils": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz", - "integrity": "sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==", - "dev": true, - "requires": { - "array-includes": "^3.1.2", - "object.assign": "^4.1.2" - } - }, - "language-subtag-registry": { - "version": "0.3.21", - "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz", - "integrity": "sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==", - "dev": true - }, - "language-tags": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.5.tgz", - "integrity": "sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=", - "dev": true, - "requires": { - "language-subtag-registry": "~0.3.2" - } - }, - "levn": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", - "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", - "dev": true, - "requires": { - "prelude-ls": "^1.2.1", - "type-check": "~0.4.0" - } - }, - "load-json-file": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", - "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", - "dev": true, - "requires": { - "graceful-fs": "^4.1.2", - "parse-json": "^4.0.0", - "pify": "^3.0.0", - "strip-bom": "^3.0.0" - } - }, - "locate-path": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", - "integrity": "sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=", - "dev": true, - "requires": { - "p-locate": "^2.0.0", - "path-exists": "^3.0.0" - } - }, - "lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "dev": true - }, - "lodash.clonedeep": { - "version": "4.5.0", - "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", - "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=", - "dev": true - }, - "lodash.truncate": { - "version": "4.4.2", - "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", - "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=", - "dev": true - }, - "loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "requires": { - "js-tokens": "^3.0.0 || ^4.0.0" - } - }, - "lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dev": true, - "requires": { - "yallist": "^4.0.0" - } - }, - "merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true - }, - "micromatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.4.tgz", - "integrity": "sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==", - "dev": true, - "requires": { - "braces": "^3.0.1", - "picomatch": "^2.2.3" - } - }, - "mini-create-react-context": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz", - "integrity": "sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==", - "requires": { - "@babel/runtime": "^7.12.1", - "tiny-warning": "^1.0.3" - } - }, - "minimatch": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", - "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", - "dev": true, - "requires": { - "brace-expansion": "^1.1.7" - } - }, - "minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", - "dev": true - }, - "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - }, - "nanoid": { - "version": "3.1.23", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", - "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", - "dev": true - }, - "natural-compare": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", - "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", - "dev": true - }, - "node-releases": { - "version": "1.1.72", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-1.1.72.tgz", - "integrity": "sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==", - "dev": true - }, - "normalize-package-data": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", - "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", - "dev": true, - "requires": { - "hosted-git-info": "^2.1.4", - "resolve": "^1.10.0", - "semver": "2 || 3 || 4 || 5", - "validate-npm-package-license": "^3.0.1" - }, - "dependencies": { - "semver": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", - "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", - "dev": true - } - } - }, - "normalize-path": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-2.1.1.tgz", - "integrity": "sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=", - "dev": true, - "requires": { - "remove-trailing-separator": "^1.0.1" - } - }, - "object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" - }, - "object-inspect": { - "version": "1.10.3", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz", - "integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==", - "dev": true - }, - "object-keys": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", - "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", - "dev": true - }, - "object.assign": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.2.tgz", - "integrity": "sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==", - "dev": true, - "requires": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "has-symbols": "^1.0.1", - "object-keys": "^1.1.1" - } - }, - "object.entries": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.3.tgz", - "integrity": "sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==", - "dev": true, - "requires": { - "call-bind": "^1.0.0", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.1", - "has": "^1.0.3" - } - }, - "object.fromentries": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.4.tgz", - "integrity": "sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has": "^1.0.3" - } - }, - "object.values": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.3.tgz", - "integrity": "sha512-nkF6PfDB9alkOUxpf1HNm/QlkeW3SReqL5WXeBLpEJJnlPSvRaDQpW3gQTksTN3fgJX4hL42RzKyOin6ff3tyw==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has": "^1.0.3" - } - }, - "once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", - "dev": true, - "requires": { - "wrappy": "1" - } - }, - "optionator": { - "version": "0.9.1", - "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", - "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==", - "dev": true, - "requires": { - "deep-is": "^0.1.3", - "fast-levenshtein": "^2.0.6", - "levn": "^0.4.1", - "prelude-ls": "^1.2.1", - "type-check": "^0.4.0", - "word-wrap": "^1.2.3" - } - }, - "p-limit": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-1.3.0.tgz", - "integrity": "sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==", - "dev": true, - "requires": { - "p-try": "^1.0.0" - } - }, - "p-locate": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-2.0.0.tgz", - "integrity": "sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=", - "dev": true, - "requires": { - "p-limit": "^1.1.0" - } - }, - "p-try": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/p-try/-/p-try-1.0.0.tgz", - "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", - "dev": true - }, - "parent-module": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", - "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", - "dev": true, - "requires": { - "callsites": "^3.0.0" - } - }, - "parse-json": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", - "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", - "dev": true, - "requires": { - "error-ex": "^1.3.1", - "json-parse-better-errors": "^1.0.1" - } - }, - "path-exists": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", - "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", - "dev": true - }, - "path-is-absolute": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", - "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", - "dev": true - }, - "path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true - }, - "path-parse": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", - "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", - "dev": true - }, - "path-starts-with": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/path-starts-with/-/path-starts-with-1.0.0.tgz", - "integrity": "sha1-soJDAV6LE43lcmgqxS2kLmRq2E4=", - "dev": true, - "requires": { - "normalize-path": "^2.1.1" - } - }, - "path-to-regexp": { - "version": "1.8.0", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz", - "integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==", - "requires": { - "isarray": "0.0.1" - } - }, - "path-type": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", - "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", - "dev": true - }, - "picomatch": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", - "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", - "dev": true - }, - "pify": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", - "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=", - "dev": true - }, - "pkg-dir": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-2.0.0.tgz", - "integrity": "sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=", - "dev": true, - "requires": { - "find-up": "^2.1.0" - } - }, - "pkg-up": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-2.0.0.tgz", - "integrity": "sha1-yBmscoBZpGHKscOImivjxJoATX8=", - "dev": true, - "requires": { - "find-up": "^2.1.0" - } - }, - "postcss": { - "version": "8.2.15", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", - "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", - "dev": true, - "requires": { - "colorette": "^1.2.2", - "nanoid": "^3.1.23", - "source-map": "^0.6.1" - }, - "dependencies": { - "source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true - } - } - }, - "prelude-ls": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", - "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", - "dev": true - }, - "progress": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", - "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", - "dev": true - }, - "prop-types": { - "version": "15.7.2", - "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz", - "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==", - "requires": { - "loose-envify": "^1.4.0", - "object-assign": "^4.1.1", - "react-is": "^16.8.1" - } - }, - "punycode": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true - }, - "queue-microtask": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", - "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", - "dev": true - }, - "react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "requires": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - } - }, - "react-dom": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-17.0.2.tgz", - "integrity": "sha512-s4h96KtLDUQlsENhMn1ar8t2bEa+q/YAtj8pPPdIjPDGBDIVNsrD9aXNWqspUe6AzKCIG0C1HZZLqLV7qpOBGA==", - "requires": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1", - "scheduler": "^0.20.2" - } - }, - "react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" - }, - "react-refresh": { - "version": "0.9.0", - "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.9.0.tgz", - "integrity": "sha512-Gvzk7OZpiqKSkxsQvO/mbTN1poglhmAV7gR/DdIrRrSMXraRQQlfikRJOr3Nb9GTMPC5kof948Zy6jJZIFtDvQ==", - "dev": true - }, - "react-router": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.2.0.tgz", - "integrity": "sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==", - "requires": { - "@babel/runtime": "^7.1.2", - "history": "^4.9.0", - "hoist-non-react-statics": "^3.1.0", - "loose-envify": "^1.3.1", - "mini-create-react-context": "^0.4.0", - "path-to-regexp": "^1.7.0", - "prop-types": "^15.6.2", - "react-is": "^16.6.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0" - } - }, - "react-router-dom": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-5.2.0.tgz", - "integrity": "sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==", - "requires": { - "@babel/runtime": "^7.1.2", - "history": "^4.9.0", - "loose-envify": "^1.3.1", - "prop-types": "^15.6.2", - "react-router": "5.2.0", - "tiny-invariant": "^1.0.2", - "tiny-warning": "^1.0.0" - } - }, - "read-pkg": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz", - "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=", - "dev": true, - "requires": { - "load-json-file": "^4.0.0", - "normalize-package-data": "^2.3.2", - "path-type": "^3.0.0" - }, - "dependencies": { - "path-type": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz", - "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==", - "dev": true, - "requires": { - "pify": "^3.0.0" - } - } - } - }, - "read-pkg-up": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-3.0.0.tgz", - "integrity": "sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc=", - "dev": true, - "requires": { - "find-up": "^2.0.0", - "read-pkg": "^3.0.0" - } - }, - "regenerator-runtime": { - "version": "0.13.7", - "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", - "integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==" - }, - "regexp.prototype.flags": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz", - "integrity": "sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - } - }, - "regexpp": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz", - "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==", - "dev": true - }, - "remove-trailing-separator": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz", - "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=", - "dev": true - }, - "require-from-string": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", - "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", - "dev": true - }, - "resolve": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", - "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", - "dev": true, - "requires": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - } - }, - "resolve-from": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", - "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", - "dev": true - }, - "resolve-pathname": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/resolve-pathname/-/resolve-pathname-3.0.0.tgz", - "integrity": "sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==" - }, - "reusify": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", - "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", - "dev": true - }, - "rimraf": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", - "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", - "dev": true, - "requires": { - "glob": "^7.1.3" - } - }, - "rollup": { - "version": "2.48.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.48.0.tgz", - "integrity": "sha512-wl9ZSSSsi5579oscSDYSzGn092tCS076YB+TQrzsGuSfYyJeep8eEWj0eaRjuC5McuMNmcnR8icBqiE/FWNB1A==", - "dev": true, - "requires": { - "fsevents": "~2.3.1" - } - }, - "run-parallel": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", - "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", - "dev": true, - "requires": { - "queue-microtask": "^1.2.2" - } - }, - "safe-buffer": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", - "dev": true - }, - "scheduler": { - "version": "0.20.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.20.2.tgz", - "integrity": "sha512-2eWfGgAqqWFGqtdMmcL5zCMK1U8KlXv8SQFGglL3CEtd0aDVDWgeF/YoCmvln55m5zSk3J/20hTaSBeSObsQDQ==", - "requires": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - } - }, - "semver": { - "version": "7.3.5", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz", - "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==", - "dev": true, - "requires": { - "lru-cache": "^6.0.0" - } - }, - "shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "requires": { - "shebang-regex": "^3.0.0" - } - }, - "shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true - }, - "side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", - "dev": true, - "requires": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" - } - }, - "slash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", - "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", - "dev": true - }, - "slice-ansi": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", - "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", - "dev": true, - "requires": { - "ansi-styles": "^4.0.0", - "astral-regex": "^2.0.0", - "is-fullwidth-code-point": "^3.0.0" - }, - "dependencies": { - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "requires": { - "color-convert": "^2.0.1" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - } - } - }, - "source-map": { - "version": "0.5.7", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", - "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", - "dev": true - }, - "spdx-correct": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", - "integrity": "sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==", - "dev": true, - "requires": { - "spdx-expression-parse": "^3.0.0", - "spdx-license-ids": "^3.0.0" - } - }, - "spdx-exceptions": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz", - "integrity": "sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==", - "dev": true - }, - "spdx-expression-parse": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", - "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", - "dev": true, - "requires": { - "spdx-exceptions": "^2.1.0", - "spdx-license-ids": "^3.0.0" - } - }, - "spdx-license-ids": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.8.tgz", - "integrity": "sha512-NDgA96EnaLSvtbM7trJj+t1LUR3pirkDCcz9nOUlPb5DMBGsH7oES6C3hs3j7R9oHEa1EMvReS/BUAIT5Tcr0g==", - "dev": true - }, - "sprintf-js": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", - "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", - "dev": true - }, - "string-width": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", - "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", - "dev": true, - "requires": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.0" - }, - "dependencies": { - "emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true - } - } - }, - "string.prototype.matchall": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz", - "integrity": "sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3", - "es-abstract": "^1.18.0-next.2", - "has-symbols": "^1.0.1", - "internal-slot": "^1.0.3", - "regexp.prototype.flags": "^1.3.1", - "side-channel": "^1.0.4" - } - }, - "string.prototype.trimend": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.4.tgz", - "integrity": "sha512-y9xCjw1P23Awk8EvTpcyL2NIr1j7wJ39f+k6lvRnSMz+mz9CGz9NYPelDk42kOz6+ql8xjfK8oYzy3jAP5QU5A==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - } - }, - "string.prototype.trimstart": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.4.tgz", - "integrity": "sha512-jh6e984OBfvxS50tdY2nRZnoC5/mLFKOREQfw8t5yytkoUsJRNxvI/E39qu1sD0OtWI3OC0XgKSmcWwziwYuZw==", - "dev": true, - "requires": { - "call-bind": "^1.0.2", - "define-properties": "^1.1.3" - } - }, - "strip-ansi": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", - "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", - "dev": true, - "requires": { - "ansi-regex": "^5.0.0" - } - }, - "strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", - "dev": true - }, - "strip-json-comments": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", - "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", - "dev": true - }, - "supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", - "dev": true, - "requires": { - "has-flag": "^3.0.0" - } - }, - "table": { - "version": "6.7.1", - "resolved": "https://registry.npmjs.org/table/-/table-6.7.1.tgz", - "integrity": "sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==", - "dev": true, - "requires": { - "ajv": "^8.0.1", - "lodash.clonedeep": "^4.5.0", - "lodash.truncate": "^4.4.2", - "slice-ansi": "^4.0.0", - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0" - }, - "dependencies": { - "ajv": { - "version": "8.4.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.4.0.tgz", - "integrity": "sha512-7QD2l6+KBSLwf+7MuYocbWvRPdOu63/trReTLu2KFwkgctnub1auoF+Y1WYcm09CTM7quuscrzqmASaLHC/K4Q==", - "dev": true, - "requires": { - "fast-deep-equal": "^3.1.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2", - "uri-js": "^4.2.2" - } - }, - "json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", - "dev": true - } - } - }, - "text-table": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", - "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=", - "dev": true - }, - "tiny-invariant": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.1.0.tgz", - "integrity": "sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==" - }, - "tiny-warning": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", - "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==" - }, - "to-fast-properties": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", - "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=", - "dev": true - }, - "to-regex-range": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", - "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", - "dev": true, - "requires": { - "is-number": "^7.0.0" - } - }, - "tsconfig-paths": { - "version": "3.9.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz", - "integrity": "sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==", - "dev": true, - "requires": { - "@types/json5": "^0.0.29", - "json5": "^1.0.1", - "minimist": "^1.2.0", - "strip-bom": "^3.0.0" - }, - "dependencies": { - "json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", - "dev": true, - "requires": { - "minimist": "^1.2.0" - } - } - } - }, - "tslib": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", - "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", - "dev": true - }, - "tsutils": { - "version": "3.21.0", - "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz", - "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==", - "dev": true, - "requires": { - "tslib": "^1.8.1" - } - }, - "type-check": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", - "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", - "dev": true, - "requires": { - "prelude-ls": "^1.2.1" - } - }, - "type-fest": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz", - "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==", - "dev": true - }, - "typescript": { - "version": "4.2.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", - "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", - "dev": true - }, - "unbox-primitive": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.1.tgz", - "integrity": "sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==", - "dev": true, - "requires": { - "function-bind": "^1.1.1", - "has-bigints": "^1.0.1", - "has-symbols": "^1.0.2", - "which-boxed-primitive": "^1.0.2" - } - }, - "uri-js": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", - "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", - "dev": true, - "requires": { - "punycode": "^2.1.0" - } - }, - "v8-compile-cache": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz", - "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==", - "dev": true - }, - "validate-npm-package-license": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", - "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", - "dev": true, - "requires": { - "spdx-correct": "^3.0.0", - "spdx-expression-parse": "^3.0.0" - } - }, - "value-equal": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/value-equal/-/value-equal-1.0.1.tgz", - "integrity": "sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==" - }, - "vite": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.3.tgz", - "integrity": "sha512-eO1iwRbn3/BfkNVMNJDeANAFCZ5NobYOFPu7IqfY7DcI7I9nFGjJIZid0EViTmLDGwwSUPmRAq3cRBbO3+DsMA==", - "dev": true, - "requires": { - "esbuild": "^0.11.23", - "fsevents": "~2.3.1", - "postcss": "^8.2.10", - "resolve": "^1.19.0", - "rollup": "^2.38.5" - } - }, - "which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "requires": { - "isexe": "^2.0.0" - } - }, - "which-boxed-primitive": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz", - "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==", - "dev": true, - "requires": { - "is-bigint": "^1.0.1", - "is-boolean-object": "^1.1.0", - "is-number-object": "^1.0.4", - "is-string": "^1.0.5", - "is-symbol": "^1.0.3" - } - }, - "word-wrap": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", - "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", - "dev": true - }, - "wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", - "dev": true - }, - "yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "dev": true - } - } -} diff --git a/batch2/react-batch/package.json b/batch2/react-batch/package.json deleted file mode 100644 index e2d5e5c9778..00000000000 --- a/batch2/react-batch/package.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "react-batch", - "version": "0.0.0", - "scripts": { - "dev": "vite", - "build": "tsc && vite build", - "serve": "vite preview" - }, - "dependencies": { - "react": "^17.0.2", - "react-dom": "^17.0.0", - "react-router-dom": "^5.2.0" - }, - "devDependencies": { - "@types/react": "^17.0.5", - "@types/react-dom": "^17.0.0", - "@types/react-router-dom": "^5.1.7", - "@typescript-eslint/eslint-plugin": "^4.23.0", - "@typescript-eslint/parser": "^4.23.0", - "@vitejs/plugin-react-refresh": "^1.3.1", - "eslint": "^7.26.0", - "eslint-config-airbnb": "^18.2.1", - "eslint-plugin-import": "^2.23.0", - "eslint-plugin-jsx-a11y": "^6.4.1", - "eslint-plugin-react": "^7.23.2", - "eslint-plugin-react-hooks": "^4.2.0", - "typescript": "^4.1.2", - "vite": "^2.2.3" - } -} diff --git a/batch2/react-batch/src/App.tsx b/batch2/react-batch/src/App.tsx deleted file mode 100644 index cb1d1f4a03c..00000000000 --- a/batch2/react-batch/src/App.tsx +++ /dev/null @@ -1,24 +0,0 @@ -import React from 'react'; -import { Switch, Route } from 'react-router-dom'; -import BatchPage from './pages/BatchPage'; -import BatchesPage from './pages/BatchesPage'; - -import '@hail/common/hail.css'; - -export default function App() { - return ( -
-
- - - - - -
-
- ); -} - -function BadRoute() { - return

Uh oh! That page does not exist...

; -} diff --git a/batch2/react-batch/src/components/BatchTable.tsx b/batch2/react-batch/src/components/BatchTable.tsx deleted file mode 100644 index 5d2225ce185..00000000000 --- a/batch2/react-batch/src/components/BatchTable.tsx +++ /dev/null @@ -1,20 +0,0 @@ -import React from 'react'; -import { Link } from 'react-router-dom'; -import type { Batch } from '@hail/common/types'; - -function BatchTableRow({ batch }: { batch: Batch }) { - return ( - <> - {batch.id} - {batch.state} - - ); -} - -export default function BatchTable({ batches }: { batches: Batch[] }) { - return ( -
    - {batches.map((b) =>
  • )} -
- ); -} diff --git a/batch2/react-batch/src/components/JobTable.tsx b/batch2/react-batch/src/components/JobTable.tsx deleted file mode 100644 index f4185b833df..00000000000 --- a/batch2/react-batch/src/components/JobTable.tsx +++ /dev/null @@ -1,21 +0,0 @@ -import React from 'react'; -import type { Job } from '@hail/common/types'; - -function JobTableRow({ job }: { job: Job }) { - return
{JSON.stringify(job, null, 2)}
; -} - -type JobTableProps = { - batchId: number, - jobs: Job[], -} -export default function JobTable({ batchId, jobs }: JobTableProps) { - return ( - <> -

Batch #{batchId}

-
    - {jobs.map((j) =>
  1. )} -
- - ); -} diff --git a/batch2/react-batch/src/main.tsx b/batch2/react-batch/src/main.tsx deleted file mode 100644 index dabadc36c77..00000000000 --- a/batch2/react-batch/src/main.tsx +++ /dev/null @@ -1,13 +0,0 @@ -import React from 'react'; -import ReactDOM from 'react-dom'; -import { BrowserRouter } from 'react-router-dom'; -import App from './App'; - -import '@hail/common/hail.css'; - -ReactDOM.render( - - - , - document.getElementById('root'), -); diff --git a/batch2/react-batch/src/pages/BatchPage.tsx b/batch2/react-batch/src/pages/BatchPage.tsx deleted file mode 100644 index 2cf8611894f..00000000000 --- a/batch2/react-batch/src/pages/BatchPage.tsx +++ /dev/null @@ -1,14 +0,0 @@ -import React from 'react'; -import { useParams } from 'react-router-dom'; -import { useJobs } from '@hail/common/react/batch-client'; -import JobTable from '../components/JobTable'; - -import '@hail/common/hail.css'; - -type BatchPageParams = { id?: string }; -export default function BatchPage() { - const id = parseInt(useParams().id!, 10); - const jobs = useJobs(id); - - return jobs ? :
Loading...
; -} diff --git a/batch2/react-batch/src/pages/BatchesPage.tsx b/batch2/react-batch/src/pages/BatchesPage.tsx deleted file mode 100644 index e1116a3f896..00000000000 --- a/batch2/react-batch/src/pages/BatchesPage.tsx +++ /dev/null @@ -1,14 +0,0 @@ -import React from 'react'; -import { useBatches } from '@hail/common/react/batch-client'; -import BatchTable from '../components/BatchTable'; - -export default function BatchesPage() { - const batches = useBatches(); - - return ( - <> -

Batches

- {batches ? :
Loading...
} - - ); -} diff --git a/batch2/react-batch/tsconfig.json b/batch2/react-batch/tsconfig.json deleted file mode 100644 index 51efd413cde..00000000000 --- a/batch2/react-batch/tsconfig.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "compilerOptions": { - "target": "ESNext", - "lib": ["DOM", "DOM.Iterable", "ESNext"], - "types": ["vite/client"], - "allowJs": false, - "skipLibCheck": false, - "esModuleInterop": false, - "allowSyntheticDefaultImports": true, - "strict": true, - "forceConsistentCasingInFileNames": true, - "module": "ESNext", - "moduleResolution": "Node", - "resolveJsonModule": true, - "isolatedModules": true, - "noEmit": true, - "jsx": "react" - }, - "include": ["./src"] -} diff --git a/batch2/react-batch/vite.config.ts b/batch2/react-batch/vite.config.ts deleted file mode 100644 index b874b79498b..00000000000 --- a/batch2/react-batch/vite.config.ts +++ /dev/null @@ -1,14 +0,0 @@ -/* eslint-disable */ -import { defineConfig } from 'vite'; -import reactRefresh from '@vitejs/plugin-react-refresh'; - -// https://vitejs.dev/config/ -export default defineConfig({ - plugins: [reactRefresh()], - server: { - proxy: { - '/api': 'http://localhost:5050' - }, - cors: true, - } -}) diff --git a/batch2/svelte-batch/.gitignore b/batch2/svelte-batch/.gitignore deleted file mode 100644 index 126fe84d533..00000000000 --- a/batch2/svelte-batch/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/node_modules/ -/dist/ -/.vscode/ -.DS_Store diff --git a/batch2/svelte-batch/index.html b/batch2/svelte-batch/index.html deleted file mode 100644 index b794a404672..00000000000 --- a/batch2/svelte-batch/index.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - Batch2 Svelte - - -
- - - diff --git a/batch2/svelte-batch/package-lock.json b/batch2/svelte-batch/package-lock.json deleted file mode 100644 index 041eee60caa..00000000000 --- a/batch2/svelte-batch/package-lock.json +++ /dev/null @@ -1,940 +0,0 @@ -{ - "name": "svelte-batch", - "version": "0.0.0", - "lockfileVersion": 2, - "requires": true, - "packages": { - "": { - "version": "0.0.0", - "devDependencies": { - "@sveltejs/vite-plugin-svelte": "^1.0.0-next.7", - "svelte": "^3.37.0", - "svelte-navigator": "^3.1.5", - "svelte-preprocess": "^4.7.2", - "typescript": "^4.2.4", - "vite": "^2.2.3" - } - }, - "node_modules/@rollup/pluginutils": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-4.1.0.tgz", - "integrity": "sha512-TrBhfJkFxA+ER+ew2U2/fHbebhLT/l/2pRk0hfj9KusXUuRXd2v0R58AfaZK9VXDQ4TogOSEmICVrQAA3zFnHQ==", - "dev": true, - "dependencies": { - "estree-walker": "^2.0.1", - "picomatch": "^2.2.2" - }, - "engines": { - "node": ">= 8.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0" - } - }, - "node_modules/@sveltejs/vite-plugin-svelte": { - "version": "1.0.0-next.10", - "resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-1.0.0-next.10.tgz", - "integrity": "sha512-ImvxbhPePm2hWNTKBSA3LHAYGwiEjHjvvgfPLXm4R87sfZ+BMXql9jBmDpzUC/URBLT4BB3Jxos/i523qkJBHg==", - "dev": true, - "dependencies": { - "@rollup/pluginutils": "^4.1.0", - "chalk": "^4.1.1", - "debug": "^4.3.2", - "hash-sum": "^2.0.0", - "require-relative": "^0.8.7", - "slash": "^4.0.0", - "source-map": "^0.7.3", - "svelte-hmr": "^0.14.2" - }, - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "svelte": "^3.37.0", - "vite": "^2.2.3" - } - }, - "node_modules/@types/node": { - "version": "15.0.3", - "resolved": "https://registry.npmjs.org/@types/node/-/node-15.0.3.tgz", - "integrity": "sha512-/WbxFeBU+0F79z9RdEOXH4CsDga+ibi5M8uEYr91u3CkT/pdWcV8MCook+4wDPnZBexRdwWS+PiVZ2xJviAzcQ==", - "dev": true - }, - "node_modules/@types/pug": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.4.tgz", - "integrity": "sha1-h3L80EGOPNLMFxVV1zAHQVBR9LI=", - "dev": true - }, - "node_modules/@types/sass": { - "version": "1.16.0", - "resolved": "https://registry.npmjs.org/@types/sass/-/sass-1.16.0.tgz", - "integrity": "sha512-2XZovu4NwcqmtZtsBR5XYLw18T8cBCnU2USFHTnYLLHz9fkhnoEMoDsqShJIOFsFhn5aJHjweiUUdTrDGujegA==", - "dev": true, - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/chalk": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", - "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", - "dev": true, - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "node_modules/colorette": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", - "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", - "dev": true - }, - "node_modules/debug": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", - "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", - "dev": true, - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/dedent-js": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dedent-js/-/dedent-js-1.0.1.tgz", - "integrity": "sha1-vuX7fJ5yfYXf+iRZDRDsGrElUwU=", - "dev": true - }, - "node_modules/detect-indent": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz", - "integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/esbuild": { - "version": "0.11.20", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.20.tgz", - "integrity": "sha512-QOZrVpN/Yz74xfat0H6euSgn3RnwLevY1mJTEXneukz1ln9qB+ieaerRMzSeETpz/UJWsBMzRVR/andBht5WKw==", - "dev": true, - "hasInstallScript": true, - "bin": { - "esbuild": "bin/esbuild" - } - }, - "node_modules/estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "dev": true - }, - "node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "hasInstallScript": true, - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", - "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", - "dev": true - }, - "node_modules/has": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", - "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.1" - }, - "engines": { - "node": ">= 0.4.0" - } - }, - "node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/hash-sum": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/hash-sum/-/hash-sum-2.0.0.tgz", - "integrity": "sha512-WdZTbAByD+pHfl/g9QSsBIIwy8IT+EsPiKDs0KNX+zSHhdDLFKdZu0BQHljvO+0QI/BasbMSUa8wYNCZTvhslg==", - "dev": true - }, - "node_modules/is-core-module": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", - "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", - "dev": true, - "dependencies": { - "has": "^1.0.3" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/lower-case": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz", - "integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==", - "dev": true, - "dependencies": { - "tslib": "^2.0.3" - } - }, - "node_modules/min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "dev": true, - "engines": { - "node": ">=4" - } - }, - "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - }, - "node_modules/nanoid": { - "version": "3.1.23", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", - "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", - "dev": true, - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/no-case": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", - "integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==", - "dev": true, - "dependencies": { - "lower-case": "^2.0.2", - "tslib": "^2.0.3" - } - }, - "node_modules/pascal-case": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz", - "integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==", - "dev": true, - "dependencies": { - "no-case": "^3.0.4", - "tslib": "^2.0.3" - } - }, - "node_modules/path-parse": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", - "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", - "dev": true - }, - "node_modules/picomatch": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", - "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", - "dev": true, - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/postcss": { - "version": "8.2.15", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", - "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", - "dev": true, - "dependencies": { - "colorette": "^1.2.2", - "nanoid": "^3.1.23", - "source-map": "^0.6.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - } - }, - "node_modules/postcss/node_modules/source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/require-relative": { - "version": "0.8.7", - "resolved": "https://registry.npmjs.org/require-relative/-/require-relative-0.8.7.tgz", - "integrity": "sha1-eZlTn8ngR6N5KPoZb44VY9q9Nt4=", - "dev": true - }, - "node_modules/resolve": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", - "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", - "dev": true, - "dependencies": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/rollup": { - "version": "2.47.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.47.0.tgz", - "integrity": "sha512-rqBjgq9hQfW0vRmz+0S062ORRNJXvwRpzxhFXORvar/maZqY6za3rgQ/p1Glg+j1hnc1GtYyQCPiAei95uTElg==", - "dev": true, - "dependencies": { - "fsevents": "~2.3.1" - }, - "bin": { - "rollup": "dist/bin/rollup" - }, - "engines": { - "node": ">=10.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.1" - } - }, - "node_modules/slash": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", - "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", - "dev": true, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/source-map": { - "version": "0.7.3", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", - "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", - "dev": true, - "engines": { - "node": ">= 8" - } - }, - "node_modules/strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dev": true, - "dependencies": { - "min-indent": "^1.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true, - "engines": { - "node": ">= 8" - } - }, - "node_modules/svelte-hmr": { - "version": "0.14.3", - "resolved": "https://registry.npmjs.org/svelte-hmr/-/svelte-hmr-0.14.3.tgz", - "integrity": "sha512-N56xX405zLMw2tpGHKRx5h+kmdeZwxI21pvyC6OyBHJDCF6DlwWBm9TifdQmSD4dloWSmpDPzHWYa3CSjfopUg==", - "dev": true, - "peerDependencies": { - "svelte": ">=3.19.0" - } - }, - "node_modules/svelte-navigator": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/svelte-navigator/-/svelte-navigator-3.1.5.tgz", - "integrity": "sha512-CGTaexasSLpUaTSN2AlYqii0JeisIgg7uZbm8XCLKlpM9Qv3IltlJ7Nvh90Xw9ND97KqtGOjNJ3LNwMN1ABV0w==", - "dev": true, - "dependencies": { - "svelte2tsx": "^0.1.151" - }, - "peerDependencies": { - "svelte": "3.x" - } - }, - "node_modules/svelte-preprocess": { - "version": "4.7.3", - "resolved": "https://registry.npmjs.org/svelte-preprocess/-/svelte-preprocess-4.7.3.tgz", - "integrity": "sha512-Zx1/xLeGOIBlZMGPRCaXtlMe4ZA0faato5Dc3CosEqwu75MIEPuOstdkH6cy+RYTUYynoxzNaDxkPX4DbrPwRA==", - "dev": true, - "hasInstallScript": true, - "dependencies": { - "@types/pug": "^2.0.4", - "@types/sass": "^1.16.0", - "detect-indent": "^6.0.0", - "strip-indent": "^3.0.0" - }, - "engines": { - "node": ">= 9.11.2" - }, - "peerDependencies": { - "@babel/core": "^7.10.2", - "coffeescript": "^2.5.1", - "less": "^3.11.3", - "postcss": "^7 || ^8", - "postcss-load-config": "^2.1.0 || ^3.0.0", - "pug": "^3.0.0", - "sass": "^1.26.8", - "stylus": "^0.54.7", - "sugarss": "^2.0.0", - "svelte": "^3.23.0", - "typescript": "^3.9.5 || ^4.0.0" - }, - "peerDependenciesMeta": { - "@babel/core": { - "optional": true - }, - "coffeescript": { - "optional": true - }, - "less": { - "optional": true - }, - "node-sass": { - "optional": true - }, - "postcss": { - "optional": true - }, - "postcss-load-config": { - "optional": true - }, - "pug": { - "optional": true - }, - "sass": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "typescript": { - "optional": true - } - } - }, - "node_modules/svelte2tsx": { - "version": "0.1.191", - "resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.1.191.tgz", - "integrity": "sha512-pSciOMX9LAmvirtCilFRqrtHJxml4pdyMKq7Tjpac4EaWsO89Cfsjl04wBZ7YZKzW8UotbPOjCaJEoC/XDc/QQ==", - "dev": true, - "dependencies": { - "dedent-js": "^1.0.1", - "pascal-case": "^3.1.1" - }, - "peerDependencies": { - "svelte": "^3.24", - "typescript": "^4.1.2" - } - }, - "node_modules/tslib": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz", - "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==", - "dev": true - }, - "node_modules/typescript": { - "version": "4.2.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", - "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", - "dev": true, - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=4.2.0" - } - }, - "node_modules/vite": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.2.tgz", - "integrity": "sha512-QhLdOompDrfkyryCNTts9HE+eJhvhN9ibKNJ5Q8DpQai+6nOsuIlaveZNg67e1O/2QaWqXeBo82eHnAs1De2bQ==", - "dev": true, - "dependencies": { - "esbuild": "^0.11.20", - "fsevents": "~2.3.1", - "postcss": "^8.2.10", - "resolve": "^1.19.0", - "rollup": "^2.38.5" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": ">=12.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.1" - } - } - }, - "dependencies": { - "@rollup/pluginutils": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-4.1.0.tgz", - "integrity": "sha512-TrBhfJkFxA+ER+ew2U2/fHbebhLT/l/2pRk0hfj9KusXUuRXd2v0R58AfaZK9VXDQ4TogOSEmICVrQAA3zFnHQ==", - "dev": true, - "requires": { - "estree-walker": "^2.0.1", - "picomatch": "^2.2.2" - } - }, - "@sveltejs/vite-plugin-svelte": { - "version": "1.0.0-next.10", - "resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-1.0.0-next.10.tgz", - "integrity": "sha512-ImvxbhPePm2hWNTKBSA3LHAYGwiEjHjvvgfPLXm4R87sfZ+BMXql9jBmDpzUC/URBLT4BB3Jxos/i523qkJBHg==", - "dev": true, - "requires": { - "@rollup/pluginutils": "^4.1.0", - "chalk": "^4.1.1", - "debug": "^4.3.2", - "hash-sum": "^2.0.0", - "require-relative": "^0.8.7", - "slash": "^4.0.0", - "source-map": "^0.7.3", - "svelte-hmr": "^0.14.2" - } - }, - "@types/node": { - "version": "15.0.3", - "resolved": "https://registry.npmjs.org/@types/node/-/node-15.0.3.tgz", - "integrity": "sha512-/WbxFeBU+0F79z9RdEOXH4CsDga+ibi5M8uEYr91u3CkT/pdWcV8MCook+4wDPnZBexRdwWS+PiVZ2xJviAzcQ==", - "dev": true - }, - "@types/pug": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.4.tgz", - "integrity": "sha1-h3L80EGOPNLMFxVV1zAHQVBR9LI=", - "dev": true - }, - "@types/sass": { - "version": "1.16.0", - "resolved": "https://registry.npmjs.org/@types/sass/-/sass-1.16.0.tgz", - "integrity": "sha512-2XZovu4NwcqmtZtsBR5XYLw18T8cBCnU2USFHTnYLLHz9fkhnoEMoDsqShJIOFsFhn5aJHjweiUUdTrDGujegA==", - "dev": true, - "requires": { - "@types/node": "*" - } - }, - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "requires": { - "color-convert": "^2.0.1" - } - }, - "chalk": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", - "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", - "dev": true, - "requires": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true - }, - "colorette": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", - "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", - "dev": true - }, - "debug": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", - "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", - "dev": true, - "requires": { - "ms": "2.1.2" - } - }, - "dedent-js": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dedent-js/-/dedent-js-1.0.1.tgz", - "integrity": "sha1-vuX7fJ5yfYXf+iRZDRDsGrElUwU=", - "dev": true - }, - "detect-indent": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz", - "integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==", - "dev": true - }, - "esbuild": { - "version": "0.11.20", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.20.tgz", - "integrity": "sha512-QOZrVpN/Yz74xfat0H6euSgn3RnwLevY1mJTEXneukz1ln9qB+ieaerRMzSeETpz/UJWsBMzRVR/andBht5WKw==", - "dev": true - }, - "estree-walker": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", - "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", - "dev": true - }, - "fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", - "dev": true, - "optional": true - }, - "function-bind": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", - "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", - "dev": true - }, - "has": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", - "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", - "dev": true, - "requires": { - "function-bind": "^1.1.1" - } - }, - "has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true - }, - "hash-sum": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/hash-sum/-/hash-sum-2.0.0.tgz", - "integrity": "sha512-WdZTbAByD+pHfl/g9QSsBIIwy8IT+EsPiKDs0KNX+zSHhdDLFKdZu0BQHljvO+0QI/BasbMSUa8wYNCZTvhslg==", - "dev": true - }, - "is-core-module": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", - "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", - "dev": true, - "requires": { - "has": "^1.0.3" - } - }, - "lower-case": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz", - "integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==", - "dev": true, - "requires": { - "tslib": "^2.0.3" - } - }, - "min-indent": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", - "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", - "dev": true - }, - "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", - "dev": true - }, - "nanoid": { - "version": "3.1.23", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", - "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", - "dev": true - }, - "no-case": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", - "integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==", - "dev": true, - "requires": { - "lower-case": "^2.0.2", - "tslib": "^2.0.3" - } - }, - "pascal-case": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz", - "integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==", - "dev": true, - "requires": { - "no-case": "^3.0.4", - "tslib": "^2.0.3" - } - }, - "path-parse": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", - "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", - "dev": true - }, - "picomatch": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", - "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", - "dev": true - }, - "postcss": { - "version": "8.2.15", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", - "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", - "dev": true, - "requires": { - "colorette": "^1.2.2", - "nanoid": "^3.1.23", - "source-map": "^0.6.1" - }, - "dependencies": { - "source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true - } - } - }, - "require-relative": { - "version": "0.8.7", - "resolved": "https://registry.npmjs.org/require-relative/-/require-relative-0.8.7.tgz", - "integrity": "sha1-eZlTn8ngR6N5KPoZb44VY9q9Nt4=", - "dev": true - }, - "resolve": { - "version": "1.20.0", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", - "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", - "dev": true, - "requires": { - "is-core-module": "^2.2.0", - "path-parse": "^1.0.6" - } - }, - "rollup": { - "version": "2.47.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.47.0.tgz", - "integrity": "sha512-rqBjgq9hQfW0vRmz+0S062ORRNJXvwRpzxhFXORvar/maZqY6za3rgQ/p1Glg+j1hnc1GtYyQCPiAei95uTElg==", - "dev": true, - "requires": { - "fsevents": "~2.3.1" - } - }, - "slash": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", - "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", - "dev": true - }, - "source-map": { - "version": "0.7.3", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", - "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", - "dev": true - }, - "strip-indent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", - "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", - "dev": true, - "requires": { - "min-indent": "^1.0.0" - } - }, - "supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "requires": { - "has-flag": "^4.0.0" - } - }, - "svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true - }, - "svelte-hmr": { - "version": "0.14.3", - "resolved": "https://registry.npmjs.org/svelte-hmr/-/svelte-hmr-0.14.3.tgz", - "integrity": "sha512-N56xX405zLMw2tpGHKRx5h+kmdeZwxI21pvyC6OyBHJDCF6DlwWBm9TifdQmSD4dloWSmpDPzHWYa3CSjfopUg==", - "dev": true, - "requires": {} - }, - "svelte-navigator": { - "version": "3.1.5", - "resolved": "https://registry.npmjs.org/svelte-navigator/-/svelte-navigator-3.1.5.tgz", - "integrity": "sha512-CGTaexasSLpUaTSN2AlYqii0JeisIgg7uZbm8XCLKlpM9Qv3IltlJ7Nvh90Xw9ND97KqtGOjNJ3LNwMN1ABV0w==", - "dev": true, - "requires": { - "svelte2tsx": "^0.1.151" - } - }, - "svelte-preprocess": { - "version": "4.7.3", - "resolved": "https://registry.npmjs.org/svelte-preprocess/-/svelte-preprocess-4.7.3.tgz", - "integrity": "sha512-Zx1/xLeGOIBlZMGPRCaXtlMe4ZA0faato5Dc3CosEqwu75MIEPuOstdkH6cy+RYTUYynoxzNaDxkPX4DbrPwRA==", - "dev": true, - "requires": { - "@types/pug": "^2.0.4", - "@types/sass": "^1.16.0", - "detect-indent": "^6.0.0", - "strip-indent": "^3.0.0" - } - }, - "svelte2tsx": { - "version": "0.1.191", - "resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.1.191.tgz", - "integrity": "sha512-pSciOMX9LAmvirtCilFRqrtHJxml4pdyMKq7Tjpac4EaWsO89Cfsjl04wBZ7YZKzW8UotbPOjCaJEoC/XDc/QQ==", - "dev": true, - "requires": { - "dedent-js": "^1.0.1", - "pascal-case": "^3.1.1" - } - }, - "tslib": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz", - "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==", - "dev": true - }, - "typescript": { - "version": "4.2.4", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", - "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", - "dev": true - }, - "vite": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.2.tgz", - "integrity": "sha512-QhLdOompDrfkyryCNTts9HE+eJhvhN9ibKNJ5Q8DpQai+6nOsuIlaveZNg67e1O/2QaWqXeBo82eHnAs1De2bQ==", - "dev": true, - "requires": { - "esbuild": "^0.11.20", - "fsevents": "~2.3.1", - "postcss": "^8.2.10", - "resolve": "^1.19.0", - "rollup": "^2.38.5" - } - } - } -} diff --git a/batch2/svelte-batch/package.json b/batch2/svelte-batch/package.json deleted file mode 100644 index e256d65e9f4..00000000000 --- a/batch2/svelte-batch/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "svelte-batch", - "version": "0.0.0", - "scripts": { - "dev": "vite", - "build": "vite build", - "serve": "vite preview" - }, - "devDependencies": { - "@sveltejs/vite-plugin-svelte": "^1.0.0-next.7", - "svelte": "^3.37.0", - "svelte-navigator": "^3.1.5", - "svelte-preprocess": "^4.7.2", - "typescript": "^4.2.4", - "vite": "^2.2.3" - } -} diff --git a/batch2/svelte-batch/public/favicon.ico b/batch2/svelte-batch/public/favicon.ico deleted file mode 100644 index d75d248ef0b15096a95054643a4d97f5d9b60846..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmaKqSxA*Z7>4KMS_;~x;8i!JU{-`tpyx!I(n2NFMU)w2L{WiMS3#Lcp@vrpA*5Yp zSy6|`AkYfDD(e{`n8yX0pLf20X1@3RmKkGw`Vte3=0)aUq%ldx zm^49K+Hw0b#^`KboP)QXJOwbuVUFxlAs{RfqJ+twGylWfOp{Hc$s#253LlN1nsFVc zKa>40?h5(7PTC6ltDx)(Y&Ze2xggCq(kK? zTA`;gAfKD!+uFjpxc_A3+Ma(L28W=z4Gvs@r*ECk`;c45=S#;=oA|abt`f&j5&uJO z3Dn+&^gZ%h4JidsaTR{{!_Y8PUx(-%PosPy2gi@qIvBMMYz;e3L1{f~mrd9RdB>pZ zD}4R|sk_C`;=cT&r)c=8u>7h9)u32*SbL`xiq3(pq5C^5-sSOw;<|fv@nfXfl&U`2 z81K5ExDp;bf#DISW%IY%k&2-noShOoz-;kb(u?5RFX-ro?87j3GZdCXrFc8bTx}jd zz_n@djWnxc*TbbCjEq80FPyG}1zQwvjq7R6ZSWuQ@_#A*LN5n<3$BI?X}q%iD!B-s zdSFcNp!EgpJr6CAK?klug4>=)Tv z+F#{yt>6EK)3NU=L&y_W3UNaC?Tg=6YE0)^V;(0Mb0$WJ7>7@Lg0~+3x9d)!Pd - import { Router, Route } from 'svelte-navigator' - import '@hail/common/hail.css' - import BatchesPage from './pages/BatchesPage.svelte' - import BatchPage from './pages/BatchPage.svelte' - - -
- -
- - - - - - - -

Uh oh! That page does not exist...

-
-
-
-
diff --git a/batch2/svelte-batch/src/assets/svelte.png b/batch2/svelte-batch/src/assets/svelte.png deleted file mode 100644 index e673c91c7bcb0e3a1be737745af56990613a641e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5185 zcmb7Ic{r5c+ka+Z#*(pQFEeFJ2}O~Z8EF@d5F#cN<(nC6vWyuCh3ZR|LZ*_XWJ_TN zGm@>cg|g2KDNlX&N=se&V8QiJoo*%Kj+*cI2_v~tsxBn zz@`(&51#=5sJICQkT7x8Ql!%%u6zlnoR5ga1q=NDo|E#Tvlx+&i6{s!iYADXh@uR# zsDLVZaZglB7qwK1df1}TUeOF!w7eiTzrmZlAzn^C?2LmGAdfM@6NqH$J$fa(m%HH1 zEfIf;QtXMtHqMbFKSx~OKEuf3c~rB^bdVIWHs`$YVN>_&XMCrPgxJLYDO?fn5VAhz zS{B*|nZ)foWa$5LZCB%jF2cAcUORK-k8ut2urUfK=zcD`G@zLOQwDqfy#AkE*PAJx z4GhXXimv`pa!)O#G7HtL5)-th2wK70>Ye}Gbc4OQY3E&j(YUf>x;${qk(kCxEbmWI zRa1Ok9w9+fDE)D8K*t0V9-I9LPEuhSu@$-e+FCf5be=t#I@-)=37iq+*2{ba2H2FWiIdr6?Kc=LDOLd-zI-=UBUAUEa*oP{^!lu7LH2;!g18V=DQ5^+iQ!k z_q?5*CAo2QiN^^sS&F$uuNyr&q(NB8NGlnH{spB704y!@*#_GxnoJ8qq88l_0H z+N{Dd%ic8-6zjYP(|CfMwWz_vgae*Bibc6^4}Og8iZd$Yf4Repz2wpP>3;iml^>LE z`w;i4F4)5cz@2j~(2rZE^7n+Zt|0ArFaOnDB?vsW`og-;ZCkJ^5x)HRA?fCWgW)zB zg1~Q;P$%t_;4=ablxaey+KXQ#Xw*;6TBXLuGrh`S!3$3}W!F+Ez<6C=C$36`#$<4o z2Aq=F0bzwdNlU@mYD4k}PCy`=ROKjuMP9x;^KGmGwMRYm8*QDRWTM^$Gyh8QP44y# zw7$mydNNyM=`F6N=&QmP3(t%#k5_LV-qq&p!=wBhv8E=5kjvE3$O+~yx7&~UyC8_ zdv9csIBh?UT&>PkUg{VHHzZYoe}Xg?@|i;L__UJe=IPTwWY0%%dk#LMf0}Ac5k#XfN13Ts3vSg+4s*G0A2*i-!;o3ErBBhw2|*>K@EQww znf^f!xTE_@s7_PkuJ)~8rI}A;&6ld&a}7i3?1U)Pp-(-9EcnGvwz|YS&0_(h0e;dA zbBSOC`|;P9$%`iGmcT>9E6uKAPw4|J&SX)_6gE+>4gyy-1TB~UZUyw+;Zu=gr(wiZ z3HoBGc;BZ{)UPu5>~4^37zY%30f`CxB&WtPibuS|Y;D{aNIqr05-Z7eA%3ip5Su`- zSb#;)f^dqDc*mX?iLbEYa6E2NXN!=vFjGqjlm0fb%^zS;P-09~OdLn5d+7u9B8sZt zDL|(kE>dqXUPu>ov_Zx%jiZV+&c1+Ihn#>UE$`-B&VaOxE62#Es?vlP)aJgZDTVj= zYWcOyQ@GP-k72ie-G*$-V4@$%xbXoC=>+XyTwdF5t6j@^whHV|O!P*{YaUiQ5{b8; zr>x}Uo|yQW(=2Dw$3$c2=-K9-L`0=H1X&@y9nn@R*QmES;KDVBhKA1kI0RX&@Q&U( zZEv*fLeDCmj&40dS7Jl!^`ReE>(J!YL1Z|NP~R#`4!ZbzK&cLf6f*H`{#?q+dWJ)Z zE;le*hCP6kdU-5@x~nDj9$bd1to2-K2-4KyL^Xm5TB`CJJ|M13oBU>apA(C+IN+xc z{dvi-b$)i1jKBt;$rAG9&0t))j(N&03`^cbiCIttM9R5|C-^kg6(HsYK|Ho@j{1s$ zZhJ*9hkd?v%zE*6SFHZW=R#Uch#l2#bgAofCx}fDgHC-23)O2VYAEIdr&Iz4L6eh9 zvvdbLoEqmVgbVAi^EtCGjvb&p!z#3t`l%xw9*8i%i6)oV+COulKRG@iqiD17y!;yP zd!+y9?X@j{zP;Sg%Zxbl9Cy&Jl7X z1#?Mo4FtI~z0*VQWA%&DgYK2Z||2J*(0x8`gi> zxV0QcKX>)4YA2SUC3fkQyFdLjogxe(wgSJUofsu5w57^ z3+#?&yX#h36xC^deink;;{E+nyg};Nmpb9Ix4HJ?(rwoZ)#Odo$G|gtq~7YPqRh4( zh1ZA?z7enrUBo~5d>1fHwEuL8Y`nQ(^KeV-eyUKR7$WdAqkGklSBG49RabVZ@|_$U z5(RUUylOpjFk=d%4o#g01a`M7_MU_p8+dQZ^FB(UhhLaWUAB#1G$h2hB~+O%As$lX z;5DnxFjV|J1k)ejZQoz><{B+wxYAp$#rsZK%cH90XTbV+rNK`HD^$aDIy~$`kL=1V z`DjIA%#f)v6T$5{CSbt*co0r72lYjlUKk|PVo%7XI_b4T#PSd=@}MpzD6m6YMqxmg zog14%H-elu+8&v4tu$t6kCV{}wmPe-@$`>V=~P>Td7p3i__?d2W?didI7KO0`AtDS zNkYFh{fi?q_87+Zuy(-sy>bf*vYQb2Zu$O-%G;w6LaQy~^@6 zi%!2m+^_dUu`8tYw+hDBoVCb>vvT?YvVi1wJd0XA;TNQDu?xVxPSOf7n?0s5$TrhD4#!Ej8RWHotCK$T>pJr<6W}ft zs2=&E!~c=f`Z4B`3$P}ftU2Efp@%slfc-J;xRRfVU{RNDpRBms=jB%j5mx;R-|v;vEX+_-hII!_*f};KVAN?G&KRX2GAP z@M-P#1(Lu}Vf%(uI#n;@WUr&j6T9yeKm(vc3$0bvQVrP+0>Gj(#Mx=P07kC*HFfwN zL@_McO}h|6=EYg>1Wid!yHn^8@{Wrac4o6d;9D$$eI)Dq^iw7pk3j;75`Y_=EP$1W zV@}mQsr#6i*6kMpfC>Qgw};`VlrIpn0(C`5t*y2QT|UXZ83+LaJPXTFRLcbf&;$?? z*o01LS#cm2mpPaQt^Q6K4)<7a_aXez;t12qY*}+D5Y(;1-=Wkwzuh}`7!Jd@I*TP< z{kaqVyWCNRCgT21z|n_T{krVdCM4`SutmqRNR#5u{Qmfb-+6{vSI7Eyw!BMVJ_^_V z=e)8FLDBy9)HQtG^Qy*B9zxH2=uOs+Fi7E~92GST6s^KC-+fiaTdfwdNsskFo15Aw z>Y0)goNAwX{kFLGl+yEV)Wm3qF_(yxO)113`bU1q^?tmduw|-0m;uYduI4Y_u*6%Q zD_HN#Ir9SFY2xda>Rz&Y!FC)~sCq?a{nIB@6U;;a8yAD{C0-UVtlm}gpx(Jv#iCS5 za~|tC=IwX7Ce%$se?DYzGp13*Dcw74EzW6C4fnsgQ1_ftW(glh zYR`vEVWs!4#3U~BlYDPlNkkH3?^}zBVx;XO=;oPdo>SK>Wmc7%E)<{7oEXQ)P_97y zW^Mys9}K7)M**F*?y+#TLcw6>1W3pOwun;-HlY$c!d|P?OP0jdwL{H#Ju41xj#=wQ zK1%#&e@95andgyN3Xp->QqM`sS$Hr$>(OL$g~x$7q;xwy^sp4bD$|?g$X<~}&jbCG z!mwp&N@N864PGXd{FIENON#LY4&g3Hb68}-^3p7<7|&i7!qYv82c zWzcl^2op_+0jl*Z)ll9|^7uIEu}Vo`l`?kH>gC>=20o%p1Sho>_*hqbcTI!%!uka) zm37F1BxUAQlmHfdlujuuchBZ$u^?W4Db}C;@aS>HzF2dqzyMOy*Sh z(5Wv}OKL;O7>XObV}F;DhLVKI!>&4SlHa~ZNj{@va7%gk!tN9yH)f`)Y>BNNee-wqA@-P7 zmo+fE1fDFDy5jJ;Xx%Vphi<8q*sE+o6j#svA+b8COA9Tb>VG}kVH{;4npU-WV@SN> z7h5iYHXpu;bW`YCjvKbdZ+RuWyp}W%apAIAI#7XabEo}8k*lC(H12@_m>L8(PF&v^ zaNz#Z{+A36u5PQePx%t|DWl-{b)%94C(3iFnQCKqB@UdvUJ&t}uRrZ-(~}LzHt>s? zI4^1WJ-_da&#$`sHM;;m#u)`M=-XB+@(Dr3e1V1XFj+N$#+uG$EhjA+$Y(InEUE1| zzr;{K2u|<}LNm zeA;QzyA%d`Y%7x3CQmytPLj~7MjBV}+Y1oeosBMhsAZtpM^q-K2SK$1RuY)*r>Ac) zyx&D(@M4P!OS?bxb&=*qsLrp#$aL5l~B@cgqSn$l)9a+Ej#0$9I`r}~GR>lgGJLL0AYHaiMz z57?PKj3e0X-KfnMGScNGpI}CopnjI306}!4=8YMK!NNC_o5B*XvJ~Q7gN|s#j?BxH z&pqp-7!uE}Lf;N#&_OrAd-W3Ju4q6>@mIUVW8H-gbD950f3-t{IF#cVf1gTT#;Fi% zL3ztx?fKh2{6f@fl5oybzmlxNPrT}|$H{0{B)$ED+1bc(~OSM{-l{1dmLsMzh(PL+# z^-QYsfRKLw0CxvyusMaFRAGzu=X-Ta&i1yewRWmEXKzr^arb{88cLjS{NPaL18a*Igysgcdvt!TEjakV5xkVE<*{Q0J4)t!~JyB2ikK)7;hr{KEi1Gggj~dWS diff --git a/batch2/svelte-batch/src/components/BatchTable.svelte b/batch2/svelte-batch/src/components/BatchTable.svelte deleted file mode 100644 index eb60c9391e1..00000000000 --- a/batch2/svelte-batch/src/components/BatchTable.svelte +++ /dev/null @@ -1,15 +0,0 @@ - - -

Batches

-
    - {#each batches as batch} -
  • - {batch.id} - {batch.state} -
  • - {/each} -
diff --git a/batch2/svelte-batch/src/components/JobTable.svelte b/batch2/svelte-batch/src/components/JobTable.svelte deleted file mode 100644 index 32ec0e8f6ba..00000000000 --- a/batch2/svelte-batch/src/components/JobTable.svelte +++ /dev/null @@ -1,14 +0,0 @@ - - -

Batch #{batchId}

-
    - {#each jobs as job} -
  1. {JSON.stringify(job, null, 2)}
  2. - {/each} -
diff --git a/batch2/svelte-batch/src/global.d.ts b/batch2/svelte-batch/src/global.d.ts deleted file mode 100644 index 4078e7476a2..00000000000 --- a/batch2/svelte-batch/src/global.d.ts +++ /dev/null @@ -1,2 +0,0 @@ -/// -/// diff --git a/batch2/svelte-batch/src/main.ts b/batch2/svelte-batch/src/main.ts deleted file mode 100644 index d8200ac4fe3..00000000000 --- a/batch2/svelte-batch/src/main.ts +++ /dev/null @@ -1,7 +0,0 @@ -import App from './App.svelte' - -const app = new App({ - target: document.getElementById('app') -}) - -export default app diff --git a/batch2/svelte-batch/src/pages/BatchPage.svelte b/batch2/svelte-batch/src/pages/BatchPage.svelte deleted file mode 100644 index 4af01ffeb68..00000000000 --- a/batch2/svelte-batch/src/pages/BatchPage.svelte +++ /dev/null @@ -1,25 +0,0 @@ - - -{#if jobs} - -{:else} -
Loading...
-{/if} diff --git a/batch2/svelte-batch/src/pages/BatchesPage.svelte b/batch2/svelte-batch/src/pages/BatchesPage.svelte deleted file mode 100644 index 99fd07e791f..00000000000 --- a/batch2/svelte-batch/src/pages/BatchesPage.svelte +++ /dev/null @@ -1,24 +0,0 @@ - - -{#if batches} - -{:else} -
Loading...
-{/if} diff --git a/batch2/svelte-batch/svelte.config.cjs b/batch2/svelte-batch/svelte.config.cjs deleted file mode 100644 index 0b32783a0c9..00000000000 --- a/batch2/svelte-batch/svelte.config.cjs +++ /dev/null @@ -1,7 +0,0 @@ -const sveltePreprocess = require('svelte-preprocess') - -module.exports = { - // Consult https://github.com/sveltejs/svelte-preprocess - // for more information about preprocessors - preprocess: sveltePreprocess() -} diff --git a/batch2/svelte-batch/tsconfig.json b/batch2/svelte-batch/tsconfig.json deleted file mode 100644 index 15d55f64d58..00000000000 --- a/batch2/svelte-batch/tsconfig.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "compilerOptions": { - "moduleResolution": "node", - "target": "esnext", - "module": "esnext", - /** - * svelte-preprocess cannot figure out whether you have - * a value or a type, so tell TypeScript to enforce using - * `import type` instead of `import` for Types. - */ - "importsNotUsedAsValues": "error", - "isolatedModules": true, - "resolveJsonModule": true, - /** - * To have warnings / errors of the Svelte compiler at the - * correct position, enable source maps by default. - */ - "sourceMap": true, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "baseUrl": ".", - /** - * Typecheck JS in `.svelte` and `.js` files by default. - * Disable checkJs if you'd like to use dynamic types in JS. - * Note that setting allowJs false does not prevent the use - * of JS in `.svelte` files. - */ - "allowJs": true, - "checkJs": true - }, - /** - * Use global.d.ts instead of compilerOptions.types - * to avoid limiting type declarations. - */ - "include": ["src/**/*.d.ts", "src/**/*.ts", "src/**/*.js", "src/**/*.svelte"] -} diff --git a/batch2/svelte-batch/vite.config.js b/batch2/svelte-batch/vite.config.js deleted file mode 100644 index bc4d2e689af..00000000000 --- a/batch2/svelte-batch/vite.config.js +++ /dev/null @@ -1,13 +0,0 @@ -import { defineConfig } from 'vite' -import svelte from '@sveltejs/vite-plugin-svelte' - -// https://vitejs.dev/config/ -export default defineConfig({ - plugins: [svelte()], - server: { - proxy: { - '/api': 'http://localhost:5050' - }, - cors: true, - } -}) diff --git a/benchmark-service/Makefile b/benchmark-service/Makefile index ca075066cea..6098045eff5 100644 --- a/benchmark-service/Makefile +++ b/benchmark-service/Makefile @@ -1,8 +1,7 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -BENCHMARK_IMAGE := $(DOCKER_PREFIX)/benchmark:$(TOKEN) +BENCHMARK_LATEST = $(DOCKER_PREFIX)/benchmark:latest +BENCHMARK_IMAGE = $(DOCKER_PREFIX)/benchmark:$(shell docker images -q --no-trunc benchmark:latest | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -15,12 +14,20 @@ check: .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. benchmark/Dockerfile.out $(BENCHMARK_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(BENCHMARK_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -f Dockerfile.out -t benchmark --cache-from benchmark,$(BENCHMARK_LATEST),service-base .. + +.PHONY: push +push: build + docker tag benchmark $(BENCHMARK_LATEST) + docker push $(BENCHMARK_LATEST) + docker tag benchmark $(BENCHMARK_IMAGE) + docker push $(BENCHMARK_IMAGE) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "benchmark_image":{"image":"$(BENCHMARK_IMAGE)"},"benchmark_database":{"user_secret_name":"sql-benchmark-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/benchmark/python/benchmark_hail/compare/compare.py b/benchmark/python/benchmark_hail/compare/compare.py index b099400d93a..2484f7c2d8c 100644 --- a/benchmark/python/benchmark_hail/compare/compare.py +++ b/benchmark/python/benchmark_hail/compare/compare.py @@ -113,12 +113,10 @@ def format(name, ratio, t1, t2, memory_ratio, mem1, mem2): print(format('Benchmark Name', 'Ratio', 'Time 1', 'Time 2', 'Mem Ratio', 'Mem 1 (MB)', 'Mem 2 (MB)')) print(format('--------------', '-----', '------', '------', '---------', '----------', '----------')) for name, r1, r2, m1, m2 in comparison: + comps.append(r2 / r1) print(format(name, fmt_diff(r2 / r1), fmt_time(r1, 8), fmt_time(r2, 8), fmt_mem_ratio(m2, m1), fmt_mem(m1), fmt_mem(m2))) - if name.startswith('sentinel'): - continue - comps.append(r2 / r1) print('----------------------') print(f'Harmonic mean: {fmt_diff(hmean(comps))}') diff --git a/benchmark/python/benchmark_hail/run/__init__.py b/benchmark/python/benchmark_hail/run/__init__.py index 5e6846830ec..b7f5f6cea92 100644 --- a/benchmark/python/benchmark_hail/run/__init__.py +++ b/benchmark/python/benchmark_hail/run/__init__.py @@ -5,7 +5,6 @@ from . import linalg_benchmarks from . import shuffle_benchmarks from . import combiner_benchmarks -from . import sentinel_benchmarks __all__ = [ 'run_all', @@ -17,5 +16,4 @@ 'linalg_benchmarks', 'methods_benchmarks', 'shuffle_benchmarks', - 'combiner_benchmarks', - 'sentinel_benchmarks'] + 'combiner_benchmarks'] diff --git a/benchmark/python/benchmark_hail/run/methods_benchmarks.py b/benchmark/python/benchmark_hail/run/methods_benchmarks.py index 8b33a95546e..10a81b0414a 100644 --- a/benchmark/python/benchmark_hail/run/methods_benchmarks.py +++ b/benchmark/python/benchmark_hail/run/methods_benchmarks.py @@ -212,36 +212,4 @@ def linear_regression_rows_nd(mt_path): res = hl._linear_regression_rows_nd(y=[mt[key] for key in pheno_dict.keys()], x=mt.x, covariates=[mt[key] for key in cov_dict.keys()]) - res._force_count() - -@benchmark(args=random_doubles.handle('mt')) -def logistic_regression_rows_wald(mt_path): - mt = hl.read_matrix_table(mt_path) - mt = mt.head(2000) - num_phenos = 5 - num_covs = 2 - pheno_dict = {f"pheno_{i}": hl.rand_bool(.5, seed=i) for i in range(num_phenos)} - cov_dict = {f"cov_{i}": hl.rand_unif(0, 1, seed=i) for i in range(num_covs)} - mt = mt.annotate_cols(**pheno_dict) - mt = mt.annotate_cols(**cov_dict) - res = hl.logistic_regression_rows(test='wald', - y=[mt[key] for key in pheno_dict.keys()], - x=mt.x, - covariates=[mt[key] for key in cov_dict.keys()]) - res._force_count() - -@benchmark(args=random_doubles.handle('mt')) -def logistic_regression_rows_wald_nd(mt_path): - mt = hl.read_matrix_table(mt_path) - mt = mt.head(2000) - num_phenos = 5 - num_covs = 2 - pheno_dict = {f"pheno_{i}": hl.rand_bool(.5, seed=i) for i in range(num_phenos)} - cov_dict = {f"cov_{i}": hl.rand_unif(0, 1, seed=i) for i in range(num_covs)} - mt = mt.annotate_cols(**pheno_dict) - mt = mt.annotate_cols(**cov_dict) - res = hl._logistic_regression_rows_nd(test='wald', - y=[mt[key] for key in pheno_dict.keys()], - x=mt.x, - covariates=[mt[key] for key in cov_dict.keys()]) - res._force_count() + res._force_count() \ No newline at end of file diff --git a/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py b/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py deleted file mode 100644 index 296ec4e6104..00000000000 --- a/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py +++ /dev/null @@ -1,50 +0,0 @@ -import hail as hl - -from .resources import * -from .utils import benchmark -import gzip - - -def read_gunzip(path): - with gzip.open(path) as f: - for line in f: - pass - - -@benchmark(args=many_ints_table.handle('tsv')) -def sentinel_read_gunzip_1(path): - read_gunzip(path) - - -@benchmark(args=many_ints_table.handle('tsv')) -def sentinel_read_gunzip_2(path): - read_gunzip(path) - - -@benchmark(args=many_ints_table.handle('tsv')) -def sentinel_read_gunzip_3(path): - read_gunzip(path) - - -def iter_hash(m, n): - x = 0 - for i in range(m): - y = 0 - for j in range(n): - y = hash(y + j) - x += y - - -@benchmark() -def sentinel_cpu_hash_1(): - iter_hash(10000, 25000) - - -@benchmark() -def sentinel_cpu_hash_2(): - iter_hash(10000, 25000) - - -@benchmark() -def sentinel_cpu_hash_3(): - iter_hash(10000, 25000) diff --git a/benchmark/scripts/benchmark_in_batch.py b/benchmark/scripts/benchmark_in_batch.py index 2ccd900ccb8..c3e023924a5 100644 --- a/benchmark/scripts/benchmark_in_batch.py +++ b/benchmark/scripts/benchmark_in_batch.py @@ -1,8 +1,7 @@ import os import random -import re import sys -import time +import re from benchmark_hail.run.resources import all_resources from benchmark_hail.run.utils import list_benchmarks @@ -19,10 +18,9 @@ labeled_sha = SHA label = os.environ.get('BENCHMARK_LABEL') - timestamp = time.strftime('%Y-%m-%d') if label: labeled_sha = f'{labeled_sha}-{label}' - output_file = os.path.join(BUCKET_BASE, f'{timestamp}-{labeled_sha}.json') + output_file = os.path.join(BUCKET_BASE, f'{labeled_sha}.json') permissions_test_file = os.path.join(BUCKET_BASE, f'permissions-test') b = hb.Batch( diff --git a/bootstrap-gateway/Makefile b/bootstrap-gateway/Makefile index c535ccd9800..dbb9616a73a 100644 --- a/bootstrap-gateway/Makefile +++ b/bootstrap-gateway/Makefile @@ -1,17 +1,23 @@ include ../config.mk -.PHONY: build deploy +.PHONY: build push deploy -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -GATEWAY_IMAGE := $(DOCKER_PREFIX)/gateway:$(TOKEN) +GATEWAY_LATEST = $(DOCKER_PREFIX)/gateway:latest +GATEWAY_IMAGE = $(DOCKER_PREFIX)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh . Dockerfile.out $(GATEWAY_IMAGE) + -docker pull $(GATEWAY_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out + docker build -t gateway -f Dockerfile.out --cache-from gateway,$(GATEWAY_LATEST),hail-ubuntu . + +push: build + docker tag gateway $(GATEWAY_LATEST) + docker push $(GATEWAY_LATEST) + docker tag gateway $(GATEWAY_IMAGE) + docker push $(GATEWAY_IMAGE) -deploy: build +deploy: push python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"global":{"ip":"$(IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"gateway_image":{"image":"$(GATEWAY_IMAGE)"},"global":{"project":"$(PROJECT)"}}' deployment.yaml deployment.yaml.out diff --git a/build.yaml b/build.yaml index 8c7c9b0cf84..53e84ab088f 100644 --- a/build.yaml +++ b/build.yaml @@ -46,7 +46,6 @@ steps: - gcr-pull-key - gcr-push-service-account-key - test-gsa-key - - test-aws-key - auth-oauth2-client-secret - benchmark-gsa-key - kind: buildImage2 @@ -157,7 +156,7 @@ steps: - from: /repo/tls to: /io/tls dependsOn: - - hail_ubuntu_image + - service_base_image - merge_code - kind: runImage name: create_ssl_config_hail_root @@ -628,10 +627,10 @@ steps: - service_base_image - merge_code - kind: buildImage2 - name: hail_buildkit_image - dockerFile: /io/repo/ci/buildkit/Dockerfile + name: hail_kaniko_image + dockerFile: /io/repo/ci/kaniko/Dockerfile contextPath: /io/repo/ci - publishAs: hail-buildkit + publishAs: hail-kaniko inputs: - from: /repo/ci to: /io/repo/ci @@ -1253,10 +1252,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 @@ -1275,10 +1272,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1300,10 +1293,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 @@ -1322,10 +1313,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1347,10 +1334,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 @@ -1369,10 +1354,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1394,10 +1375,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 @@ -1416,10 +1395,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1441,10 +1416,8 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 @@ -1463,10 +1436,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1484,10 +1453,8 @@ steps: tar xzf test.tar.gz tar xvf debug-wheel-container.tar python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - export HAIL_TEST_S3_BUCKET=hail-test-dy5rg - export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 -n 10 test/hailtop/aiotools/test_copy.py inputs: - from: /debug-wheel-container.tar @@ -1499,10 +1466,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: test-aws-key - namespace: - valueFrom: default_ns.name - mountPath: /test-aws-key dependsOn: - default_ns - hail_run_image @@ -1524,7 +1487,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" python3 -m pytest -m unchecked_allocator --ignore=test/hailtop/batch/ --log-cli-level=INFO -s -vv --instafail --durations=50 test @@ -1641,7 +1604,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_GCS_BUCKET=cpg-hail-test + export HAIL_TEST_BUCKET=cpg-hail-test export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export HAIL_QUERY_BACKEND=local @@ -1763,7 +1726,7 @@ steps: script: | set -ex export HAIL_SHORT_VERSION='0.2' - export SPHINXOPTS='-tgenerate_notebook_outputs' + export SPHINXOPTS='-tchecktutorial' mkdir -p {{ token }}/python cd {{ token }} @@ -2501,7 +2464,6 @@ steps: - base_image - build_hail_jar_only - merge_code - - create_test_gsa_keys - kind: deploy name: deploy_query namespace: @@ -3071,7 +3033,7 @@ steps: - deploy_batch - deploy_ci_agent - create_certs - - hail_buildkit_image + - hail_kaniko_image - kind: runImage name: test_ci image: @@ -3119,7 +3081,6 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3172,7 +3133,6 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3225,7 +3185,6 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3278,7 +3237,6 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3331,7 +3289,6 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3383,7 +3340,6 @@ steps: script: | set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json - export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json cd /io/hailtop/batch hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test diff --git a/ci/Dockerfile.ci-utils b/ci/Dockerfile.ci-utils index f8cf1c7b6ca..557a30c0dbc 100644 --- a/ci/Dockerfile.ci-utils +++ b/ci/Dockerfile.ci-utils @@ -1,5 +1,6 @@ FROM {{ service_base_image.image }} +RUN hail-apt-get-install docker.io RUN hail-pip-install twine COPY jinja2_render.py . COPY wait-for.py . diff --git a/ci/Makefile b/ci/Makefile index fc155da4a7e..40c2067d193 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -1,10 +1,13 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) +CI_UTILS_LATEST = $(DOCKER_PREFIX)/ci-utils:latest +CI_UTILS_IMAGE = $(DOCKER_PREFIX)/ci-utils:$(shell docker images -q --no-trunc ci-utils | sed -e 's,[^:]*:,,') -CI_UTILS_IMAGE := $(DOCKER_PREFIX)/ci-utils:$(TOKEN) -CI_IMAGE := $(DOCKER_PREFIX)/ci:$(TOKEN) -HAIL_BUILDKIT_IMAGE := $(DOCKER_PREFIX)/hail-buildkit:$(TOKEN) +CI_LATEST = $(DOCKER_PREFIX)/ci:latest +CI_IMAGE = $(DOCKER_PREFIX)/ci:$(shell docker images -q --no-trunc ci | sed -e 's,[^:]*:,,') + +HAIL_KANIKO_LATEST = $(DOCKER_PREFIX)/hail_kaniko:latest +HAIL_KANIKO_IMAGE = $(DOCKER_PREFIX)/hail_kaniko:$(shell docker images -q --no-trunc hail_kaniko | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../batch:../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -23,27 +26,48 @@ check: blacken: $(BLACK) -.PHONY: service-base -service-base: - $(MAKE) -C ../docker service-base - .PHONY: build-ci-utils -build-ci-utils: service-base - python3 jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile.ci-utils Dockerfile.ci-utils.out - ../docker-build.sh . Dockerfile.ci-utils.out $(CI_UTILS_IMAGE) +build-ci-utils: + $(MAKE) -C ../docker build + -docker pull $(CI_UTILS_LATEST) + python3 jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile.ci-utils Dockerfile.ci-utils.out + docker build -t ci-utils -f Dockerfile.ci-utils.out --cache-from ci-utils,$(CI_UTILS_LATEST),service-base . + +.PHONY: push-ci-utils +push-ci-utils: build-ci-utils + docker tag ci-utils $(CI_UTILS_LATEST) + docker push $(CI_UTILS_LATEST) + docker tag ci-utils $(CI_UTILS_IMAGE) + docker push $(CI_UTILS_IMAGE) -.PHONY: build-hail-buildkit -build-hail-buildkit: - ../docker-build.sh . buildkit/Dockerfile $(HAIL_BUILDKIT_IMAGE) +.PHONY: hail-kaniko-build +hail-kaniko-build: + -docker pull $(HAIL_KANIKO_LATEST) + docker build -t hail_kaniko -f kaniko/Dockerfile --cache-from hail_kaniko,$(HAIL_KANIKO_LATEST),service-base . + +.PHONY: push +hail-kaniko-push: hail-kaniko-build + docker tag hail_kaniko $(HAIL_KANIKO_LATEST) + docker push $(HAIL_KANIKO_LATEST) + docker tag hail_kaniko $(HAIL_KANIKO_IMAGE) + docker push $(HAIL_KANIKO_IMAGE) .PHONY: build -build: service-base - python3 jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. ci/Dockerfile.out $(CI_IMAGE) +build: + $(MAKE) -C ../docker build + -docker pull $(CI_LATEST) + python3 jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -t ci -f Dockerfile.out --cache-from ci,$(CI_LATEST),service-base .. + +.PHONY: push +push: build + docker tag ci $(CI_LATEST) + docker push $(CI_LATEST) + docker tag ci $(CI_IMAGE) + docker push $(CI_IMAGE) .PHONY: deploy -deploy: build build-ci-utils build-hail-buildkit +deploy: push push-ci-utils hail-kaniko-push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - ! [ -z "$(CI_DEVELOPER_TEST_REPO_TOKEN)" -a $(NAMESPACE) != "default" ] # for dev namespaces, you must specify a github repo by its token, check your currently running CI's value for HAIL_WATCHED_BRANCHES - python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_buildkit_image":{"image":"$(HAIL_BUILDKIT_IMAGE)"},"create_ci_test_repo":{"token":"$(CI_DEVELOPER_TEST_REPO_TOKEN)"}}' deployment.yaml deployment.yaml.out + python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_kaniko_image":{"image":"$(HAIL_KANIKO_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/ci/buildkit/Dockerfile b/ci/buildkit/Dockerfile deleted file mode 100644 index 85a346933c0..00000000000 --- a/ci/buildkit/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM moby/buildkit:v0.8.3-rootless -USER root -RUN apk add python3 py-pip && pip3 install jinja2 -USER user -COPY --chown=user:user jinja2_render.py /home/user/jinja2_render.py -COPY --chown=user:user buildkit/convert-google-application-credentials-to-docker-auth-config /home/user/convert-google-application-credentials-to-docker-auth-config diff --git a/ci/ci/build.py b/ci/ci/build.py index e07dcc6526f..cd711108320 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -1,4 +1,5 @@ import abc +import os.path import json import logging from collections import defaultdict, Counter @@ -16,7 +17,7 @@ DOMAIN, IP, CI_UTILS_IMAGE, - BUILDKIT_IMAGE, + KANIKO_IMAGE, DEFAULT_NAMESPACE, KUBERNETES_SERVER_URL, BUCKET, @@ -247,18 +248,11 @@ def __init__( self.publish_as = publish_as self.inputs = inputs self.resources = resources - self.extra_cache_repository = None - if publish_as: - self.extra_cache_repository = f'{DOCKER_PREFIX}/{self.publish_as}' if params.scope == 'deploy' and publish_as and not is_test_deployment: self.base_image = f'{DOCKER_PREFIX}/{self.publish_as}' else: self.base_image = f'{DOCKER_PREFIX}/ci-intermediate' self.image = f'{self.base_image}:{self.token}' - if publish_as: - self.cache_repository = f'{DOCKER_PREFIX}/{self.publish_as}:cache' - else: - self.cache_repository = f'{DOCKER_PREFIX}/ci-intermediate:cache' self.job = None def wrapped_job(self): @@ -297,48 +291,43 @@ def build(self, batch, code, scope): if isinstance(self.dockerfile, dict): assert ['inline'] == list(self.dockerfile.keys()) - unrendered_dockerfile = f'/home/user/Dockerfile.in.{self.token}' + unrendered_dockerfile = f'/io/Dockerfile.in.{self.token}' create_inline_dockerfile_if_present = f'echo {shq(self.dockerfile["inline"])} > {unrendered_dockerfile};\n' else: assert isinstance(self.dockerfile, str) unrendered_dockerfile = self.dockerfile create_inline_dockerfile_if_present = '' + dockerfile_in_context = os.path.join(context, 'Dockerfile.' + self.token) + cache_repo = DOCKER_PREFIX + '/cache' script = f''' set -ex {create_inline_dockerfile_if_present} -time python3 \ - ~/jinja2_render.py \ +cp {unrendered_dockerfile} /python3.7-slim-stretch/Dockerfile.in + +time chroot /python3.7-slim-stretch /usr/local/bin/python3 \ + jinja2_render.py \ {shq(json.dumps(config))} \ - {unrendered_dockerfile} \ - /home/user/Dockerfile + /Dockerfile.in \ + /Dockerfile.out -set +x -/bin/sh /home/user/convert-google-application-credentials-to-docker-auth-config -set -x +mv /python3.7-slim-stretch/Dockerfile.out {shq(dockerfile_in_context)} -export BUILDKITD_FLAGS=--oci-worker-no-process-sandbox -export BUILDCTL_CONNECT_RETRIES_MAX=100 # https://github.com/moby/buildkit/issues/1423 -buildctl-daemonless.sh \ - build \ - --frontend dockerfile.v0 \ - --local context={shq(context)} \ - --local dockerfile=/home/user \ - --output 'type=image,"name={shq(self.image)},{shq(self.cache_repository)}",push=true' \ - --export-cache type=inline \ - --import-cache type=registry,ref={shq(self.cache_repository)} \ - --trace=/home/user/trace -cat /home/user/trace -''' +set +e +/busybox/sh /convert-google-application-credentials-to-kaniko-auth-config +set -e + +exec /kaniko/executor --dockerfile={shq(dockerfile_in_context)} --context=dir://{shq(context)} --destination={shq(self.image)} --cache=true --cache-repo={shq(cache_repo)} --snapshotMode=redo --use-new-run''' log.info(f'step {self.name}, script:\n{script}') docker_registry = DOCKER_PREFIX.split('/')[0] + self.job = batch.create_job( - BUILDKIT_IMAGE, - command=['/bin/sh', '-c', script], + KANIKO_IMAGE, + command=['/busybox/sh', '-c', script], secrets=[ { 'namespace': DEFAULT_NAMESPACE, @@ -354,7 +343,6 @@ def build(self, batch, code, scope): resources=self.resources, input_files=input_files, parents=self.deps_parents(), - unconfined=True, ) def cleanup(self, batch, scope, parents): diff --git a/ci/ci/constants.py b/ci/ci/constants.py index 54907db995e..08e33cad8cd 100644 --- a/ci/ci/constants.py +++ b/ci/ci/constants.py @@ -32,5 +32,4 @@ def __init__(self, gh_username: str, hail_username: Optional[str] = None, teams: User('lgruen'), User('CDiaz96', 'carolin', [SERVICES_TEAM]), User('daniel-goldstein', 'dgoldste', [SERVICES_TEAM]), - User('ammekk', 'ammekk'), ] diff --git a/ci/ci/environment.py b/ci/ci/environment.py index bc69229efdb..d94a6fbd0dd 100644 --- a/ci/ci/environment.py +++ b/ci/ci/environment.py @@ -13,7 +13,7 @@ assert DOMAIN != '' IP = os.environ.get('HAIL_IP') CI_UTILS_IMAGE = os.environ.get('HAIL_CI_UTILS_IMAGE', f'{DOCKER_PREFIX}/ci-utils:latest') -BUILDKIT_IMAGE = os.environ['HAIL_BUILDKIT_IMAGE'] +KANIKO_IMAGE = os.environ['HAIL_KANIKO_IMAGE'] DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] BUCKET = os.environ['HAIL_CI_BUCKET_NAME'] diff --git a/ci/ci/templates/pr-table.html b/ci/ci/templates/pr-table.html index e4bc4624487..405392a45db 100644 --- a/ci/ci/templates/pr-table.html +++ b/ci/ci/templates/pr-table.html @@ -6,7 +6,6 @@
PR Build StateLabels Review State Author
- {{ pr.labels|join(", ") }} - {% if pr.review_state %} {{ pr.review_state }} diff --git a/ci/ci/templates/pr.html b/ci/ci/templates/pr.html index c18191aed86..5b6f34f8e8a 100644 --- a/ci/ci/templates/pr.html +++ b/ci/ci/templates/pr.html @@ -17,7 +17,6 @@

{{ pr.title }} {% for name, value in batch['attributes'].items() %}
{{ name }}: {{ value }}
{% endfor %} -
labels: {{ pr.labels|join(", ") }}
diff --git a/ci/deployment.yaml b/ci/deployment.yaml index c12b07aeff1..602ed75dab6 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -60,8 +60,8 @@ spec: value: "{{ global.zone }}" - name: HAIL_CI_UTILS_IMAGE value: "{{ ci_utils_image.image }}" - - name: HAIL_BUILDKIT_IMAGE - value: "{{ hail_buildkit_image.image }}" + - name: HAIL_KANIKO_IMAGE + value: "{{ hail_kaniko_image.image }}" - name: HAIL_IP value: "{{ global.ip }}" - name: HAIL_DOMAIN diff --git a/ci/kaniko/Dockerfile b/ci/kaniko/Dockerfile new file mode 100644 index 00000000000..5f06cd43e50 --- /dev/null +++ b/ci/kaniko/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.7-slim-stretch +RUN pip3 install jinja2 + +FROM gcr.io/kaniko-project/executor:debug + +COPY --from=0 /bin/ /python3.7-slim-stretch/bin +# boot is empty +# cannot copy dev in kaniko +# etc is too big in kaniko, ld is necessary for python to dynamically link +COPY --from=0 /etc/ld.so.cache /python3.7-slim-stretch/etc/ld.so.cache +COPY --from=0 /etc/ld.so.conf /python3.7-slim-stretch/etc/ld.so.conf +# home is empty +COPY --from=0 /lib/ /python3.7-slim-stretch/lib +COPY --from=0 /lib64/ /python3.7-slim-stretch/lib64 +# media is empty +# mnt is empty +# opt is empty +# cannot copy proc in kaniko +COPY --from=0 /root/ /python3.7-slim-stretch/root +COPY --from=0 /run/ /python3.7-slim-stretch/run +COPY --from=0 /sbin/ /python3.7-slim-stretch/sbin +# srv is empty +# cannot copy sys in kaniko +# ignore tmp +COPY --from=0 /usr/ /python3.7-slim-stretch/usr +COPY --from=0 /var/ /python3.7-slim-stretch/var + +COPY jinja2_render.py /python3.7-slim-stretch/jinja2_render.py +COPY kaniko/convert-google-application-credentials-to-kaniko-auth-config /convert-google-application-credentials-to-kaniko-auth-config diff --git a/ci/buildkit/convert-google-application-credentials-to-docker-auth-config b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config similarity index 71% rename from ci/buildkit/convert-google-application-credentials-to-docker-auth-config rename to ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config index e75b628e800..e5f976696a0 100644 --- a/ci/buildkit/convert-google-application-credentials-to-docker-auth-config +++ b/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config @@ -1,6 +1,4 @@ set +e -mkdir -p $HOME/.docker - echo '{"auths": { "'$REGISTRY'": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ - > $HOME/.docker/config.json + > /kaniko/.docker/config.json diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index bf8f1b377f9..ce45331b4c7 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -111,7 +111,7 @@ steps: - from: /repo/ci/test/resources to: /io/ci/test/resources dependsOn: - - hail_ubuntu_image + - service_base_image - merge_code - kind: runImage name: create_certs @@ -137,10 +137,10 @@ steps: - default_ns - create_certs_image - kind: buildImage2 - name: hail_buildkit_image - dockerFile: /io/repo/ci/buildkit/Dockerfile + name: hail_kaniko_image + dockerFile: /io/repo/ci/kaniko/Dockerfile contextPath: /io/repo/ci - publishAs: hail-buildkit + publishAs: hail-kaniko inputs: - from: /repo/ci to: /io/repo/ci diff --git a/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh b/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh deleted file mode 100644 index 3c3687f416d..00000000000 --- a/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -wget -c -O - "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/20130606_g1k_3202_samples_ped_population.txt" | -bgzip -c | -gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_samples_ped_population.txt.bgz diff --git a/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py b/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py deleted file mode 100644 index e121e1388f7..00000000000 --- a/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py +++ /dev/null @@ -1,32 +0,0 @@ -import hailtop.batch as hb - -phased_url_root = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/" \ - "1000G_2504_high_coverage/working/20201028_3202_phased" -gt_url_root = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/" \ - "1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot" - -backend = hb.ServiceBackend(billing_project="hail-datasets-api") -batch = hb.Batch(backend=backend, name="1kg-highcov") -for i in [str(x) for x in range(1,23)]: - j = batch.new_job(name=i) - j.image("gcr.io/broad-ctsa/datasets:041421") - j.command(f"wget -c -O - {phased_url_root}/CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.filtered.shapeit2-duohmm-phased.vcf.gz | " - f"zcat | " - f"bgzip -c | " - f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{i}_GRCh38.vcf.bgz") -for i in ["X"]: - j = batch.new_job(name=i) - j.image("gcr.io/broad-ctsa/datasets:041421") - j.command(f"wget -c -O - {phased_url_root}/CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.filtered.eagle2-phased.vcf.gz | " - f"zcat | " - f"bgzip -c | " - f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{i}_GRCh38.vcf.bgz") -for i in ["Y"]: - j = batch.new_job(name=i) - j.image("gcr.io/broad-ctsa/datasets:041421") - j.command(f"wget -c -O - {gt_url_root}/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.recalibrated_variants.vcf.gz | " - f"zcat | " - f"bgzip -c | " - f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chr{i}_GRCh38.vcf.bgz") -batch.run(open=True, wait=False) -backend.close() diff --git a/datasets/extract/extract_CADD.py b/datasets/extract/extract_CADD.py deleted file mode 100644 index c8d74c373fa..00000000000 --- a/datasets/extract/extract_CADD.py +++ /dev/null @@ -1,35 +0,0 @@ -import hailtop.batch as hb - -name = "CADD" -tmp_bucket = "gs://hail-datasets-tmp" -builds = { - "GRCh37": { - "snvs_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/whole_genome_SNVs.tsv.gz", - "indels_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/InDels.tsv.gz", - "version": "v1.6" - }, - "GRCh38": { - "snvs_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz", - "indels_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel.tsv.gz", - "version": "v1.6" - } -} - -backend = hb.ServiceBackend(billing_project="hail-datasets-api") -batch = hb.Batch(backend=backend, name=name) -for build in ["GRCh37", "GRCh38"]: - snvs_url = builds[build]["snvs_url"] - indels_url = builds[build]["indels_url"] - version = builds[build]["version"] - - j = batch.new_job(name=f"{name}_{version}_{build}") - j.image("gcr.io/broad-ctsa/datasets:050521") - j.command("gcloud -q auth activate-service-account --key-file=/gsa-key/key.json") - j.command(f"wget -c -O - {snvs_url} {indels_url} | " - "zcat | " - "grep -v '^#' | " - """awk -v FS=$'\t' -v OFS=$'\t' 'BEGIN {print "chromosome","position","ref","alt","raw_score","PHRED_score"} {print $0}' | """ - "bgzip -c | " - f"gsutil cp - {tmp_bucket}/{name}/{name}_{version}_{build}.tsv.bgz") -batch.run(open=True, wait=False) -backend.close() diff --git a/datasets/extract/extract_dbSNP.py b/datasets/extract/extract_dbSNP.py deleted file mode 100644 index c1401ebe029..00000000000 --- a/datasets/extract/extract_dbSNP.py +++ /dev/null @@ -1,29 +0,0 @@ -import hailtop.batch as hb - -name = "dbSNP" -tmp_bucket = "gs://hail-datasets-tmp" -builds = { - "GRCh37": { - "url": "https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.25.gz", - "version": "154" - }, - "GRCh38": { - "url": "https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.38.gz", - "version": "154" - } -} - -backend = hb.ServiceBackend(billing_project="hail-datasets-api") -batch = hb.Batch(backend=backend, name=name) -for build in ["GRCh37", "GRCh38"]: - vcf = builds[build]["url"] - version = builds[build]["version"] - j = batch.new_job(name=f"{name}_{version}_{build}") - j.image("gcr.io/broad-ctsa/datasets:050521") - j.command("gcloud -q auth activate-service-account --key-file=/gsa-key/key.json") - j.command(f"wget -c -O - {vcf} | " - "zcat | " - "bgzip -c | " - f"gsutil cp - {tmp_bucket}/{name}/{name}_{version}_{build}.vcf.bgz") -batch.run(open=True, wait=False) -backend.close() diff --git a/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb b/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb deleted file mode 100644 index 1a13c9e4854..00000000000 --- a/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb +++ /dev/null @@ -1,814 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "social-parcel", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import hail as hl\n", - "hl.init()" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-enclosure", - "metadata": {}, - "source": [ - "NYGC 30x HighCov samples Hail Table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "hollywood-princess", - "metadata": {}, - "outputs": [], - "source": [ - "ht_samples = hl.import_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_samples_ped_population.txt.bgz\", \n", - " delimiter=\"\\s+\",\n", - " impute=True\n", - ")\n", - "\n", - "ht_samples = ht_samples.annotate(\n", - " FatherID = hl.if_else(ht_samples.FatherID == \"0\", \n", - " hl.missing(hl.tstr), \n", - " ht_samples.FatherID), \n", - " MotherID = hl.if_else(ht_samples.MotherID == \"0\", \n", - " hl.missing(hl.tstr), \n", - " ht_samples.MotherID),\n", - " Sex = hl.if_else(ht_samples.Sex == 1, \"male\", \"female\")\n", - ")\n", - "ht_samples = ht_samples.key_by(\"SampleID\")\n", - "\n", - "n_rows = ht_samples.count()\n", - "n_partitions = ht_samples.n_partitions()\n", - "\n", - "ht_samples = ht_samples.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_samples\",\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions)\n", - ")\n", - "\n", - "ht_samples.write(\"gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht\", overwrite=False)\n", - "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht\")\n", - "ht_samples.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "ruled-processor", - "metadata": {}, - "source": [ - "### Phased genotypes" - ] - }, - { - "cell_type": "markdown", - "id": "elegant-maker", - "metadata": {}, - "source": [ - "Creating MTs for the phased data is straightforward, as multiallelic variants were split during phasing." - ] - }, - { - "cell_type": "markdown", - "id": "increasing-component", - "metadata": {}, - "source": [ - "#### Autosomes (phased):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "excessive-library", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt = hl.import_vcf(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}_GRCh38.vcf.bgz\",\n", - " reference_genome=\"GRCh38\"\n", - ")\n", - "\n", - "n_rows, n_cols = mt.count()\n", - "n_partitions = mt.n_partitions()\n", - "\n", - "mt = mt.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_autosomes\",\n", - " reference_genome=\"GRCh38\",\n", - " n_rows=n_rows,\n", - " n_cols=n_cols,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "\n", - "# Get list of INFO fields that are arrays\n", - "known_keys = [x[0] for x in list(mt.row.info.items()) if \"array\" in str(x[1])]\n", - "\n", - "# Extract value from INFO array fields (all arrays are length 1)\n", - "mt = mt.annotate_rows(\n", - " info = mt.info.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(mt.info[k]),\n", - " mt.info[k][0])\n", - " for k in known_keys}\n", - " )\n", - ")\n", - "\n", - "mt = mt.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_phased_GRCh38.mt\",\n", - " overwrite=False,\n", - " _read_if_exists=True\n", - ")\n", - "\n", - "mt = mt.annotate_cols(**ht_samples[mt.s])\n", - "mt = hl.sample_qc(mt)\n", - "mt = hl.variant_qc(mt)\n", - "\n", - "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt\", overwrite=False)\n", - "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt\")\n", - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "leading-directory", - "metadata": {}, - "source": [ - "#### ChrX (phased):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alien-medicaid", - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.import_vcf(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chrX_GRCh38.vcf.bgz\",\n", - " reference_genome=\"GRCh38\"\n", - ")\n", - "\n", - "n_rows, n_cols = mt.count()\n", - "n_partitions = mt.n_partitions()\n", - "\n", - "mt = mt.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_chrX\",\n", - " reference_genome=\"GRCh38\",\n", - " n_rows=n_rows,\n", - " n_cols=n_cols,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "\n", - "# Get list of INFO fields that are arrays\n", - "known_keys = [x[0] for x in list(mt.row.info.items()) if \"array\" in str(x[1])]\n", - "\n", - "# Extract appropriate value from INFO array fields (all arrays are length 1)\n", - "mt = mt.annotate_rows(\n", - " info = mt.info.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(mt.info[k]),\n", - " mt.info[k][0])\n", - " for k in known_keys}\n", - " )\n", - ")\n", - "\n", - "mt = mt.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_phased_GRCh38.mt\",\n", - " overwrite=False,\n", - " _read_if_exists=True\n", - ")\n", - "\n", - "mt = mt.annotate_cols(**ht_samples[mt.s])\n", - "mt = hl.sample_qc(mt)\n", - "mt = hl.variant_qc(mt)\n", - "\n", - "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt\", overwrite=False)\n", - "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt\")\n", - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "ideal-change", - "metadata": {}, - "source": [ - "### Unphased genotypes" - ] - }, - { - "cell_type": "markdown", - "id": "statutory-karaoke", - "metadata": {}, - "source": [ - "#### Autosomes (unphased):" - ] - }, - { - "cell_type": "markdown", - "id": "above-wales", - "metadata": {}, - "source": [ - "Import chr1-chr22 VCF to `MatrixTable` and checkpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "painful-virtue", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.import_vcf(\n", - " (\"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_\"\n", - " \"chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}_\"\n", - " \"GRCh38.vcf.bgz\"),\n", - " reference_genome=\"GRCh38\",\n", - " array_elements_required=False\n", - ")\n", - "mt = mt.annotate_entries(\n", - " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", - " hl.missing(mt.PL.dtype), \n", - " mt.PL)\n", - ")\n", - "mt = mt.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_imported_vcf.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "original-admission", - "metadata": {}, - "source": [ - "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", - "\n", - "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "diverse-march", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_imported_vcf.mt\"\n", - ")\n", - "\n", - "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", - "bi = bi.annotate_rows(a_index=1, was_split=False)\n", - "bi = bi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_biallelic.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", - "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", - "multi = multi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_multiallelic.mt\", \n", - " overwrite=False,\n", - " _read_if_exists=True\n", - ")\n", - "\n", - "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", - "split = split.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_multiallelic_split.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = split.union_rows(bi)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = unioned.repartition(12000, shuffle=True)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned_repart.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "criminal-terry", - "metadata": {}, - "source": [ - "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", - "\n", - "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "conscious-society", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "unioned = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned_repart.mt\"\n", - ")\n", - "\n", - "# Get list of INFO fields that are arrays\n", - "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", - "\n", - "# Extract appropriate values from INFO array fields after splitting\n", - "mt = unioned.annotate_rows(\n", - " info = unioned.info.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", - " unioned.info[k][unioned.a_index - 1]) \n", - " for k in known_keys}\n", - " )\n", - ")\n", - "\n", - "n_rows, n_cols = mt.count()\n", - "n_partitions = mt.n_partitions()\n", - "\n", - "mt = mt.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_autosomes\",\n", - " reference_genome=\"GRCh38\",\n", - " n_rows=n_rows,\n", - " n_cols=n_cols,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "\n", - "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", - "mt = mt.annotate_cols(**ht_samples[mt.s])\n", - "mt = hl.sample_qc(mt)\n", - "mt = hl.variant_qc(mt)\n", - "\n", - "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt\", overwrite=False)\n", - "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt\")\n", - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "blank-dance", - "metadata": {}, - "source": [ - "#### ChrX (unphased):" - ] - }, - { - "cell_type": "markdown", - "id": "distributed-numbers", - "metadata": {}, - "source": [ - "Import chrX VCF to `MatrixTable` and checkpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "organized-bunny", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.import_vcf(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chrX_GRCh38.vcf.bgz\",\n", - " reference_genome=\"GRCh38\", \n", - " array_elements_required=False\n", - ")\n", - "mt = mt.annotate_entries(\n", - " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", - " hl.missing(mt.PL.dtype), \n", - " mt.PL)\n", - ")\n", - "mt = mt.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_imported_vcf.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "official-doubt", - "metadata": {}, - "source": [ - "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", - "\n", - "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "convertible-distribution", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_imported_vcf.mt\"\n", - ")\n", - "\n", - "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", - "bi = bi.annotate_rows(a_index=1, was_split=False)\n", - "bi = bi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_biallelic.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", - "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", - "multi = multi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_multiallelic.mt\", \n", - " overwrite=False,\n", - " _read_if_exists=True\n", - ")\n", - "\n", - "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", - "split = split.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_multiallelic_split.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = split.union_rows(bi)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = unioned.repartition(512, shuffle=True)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned_repart.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "lonely-storm", - "metadata": {}, - "source": [ - "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", - "\n", - "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "beginning-outline", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "unioned = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned_repart.mt\"\n", - ")\n", - "\n", - "# Get list of INFO fields that are arrays\n", - "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", - "\n", - "# Extract appropriate values from INFO array fields after splitting\n", - "mt = unioned.annotate_rows(\n", - " info = unioned.info.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", - " unioned.info[k][unioned.a_index - 1]) \n", - " for k in known_keys}\n", - " )\n", - ")\n", - "\n", - "n_rows, n_cols = mt.count()\n", - "n_partitions = mt.n_partitions()\n", - "\n", - "mt = mt.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_chrX\",\n", - " reference_genome=\"GRCh38\",\n", - " n_rows=n_rows,\n", - " n_cols=n_cols,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "\n", - "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", - "mt = mt.annotate_cols(**ht_samples[mt.s])\n", - "mt = hl.sample_qc(mt)\n", - "mt = hl.variant_qc(mt)\n", - "\n", - "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt\", overwrite=False)\n", - "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt\")\n", - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "existing-helping", - "metadata": {}, - "source": [ - "#### ChrY (unphased):" - ] - }, - { - "cell_type": "markdown", - "id": "suspected-savannah", - "metadata": {}, - "source": [ - "Import chrY VCF to `MatrixTable` and checkpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "distinguished-smooth", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.import_vcf(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chrY_GRCh38.vcf.bgz\",\n", - " reference_genome=\"GRCh38\", \n", - " array_elements_required=False\n", - ")\n", - "mt = mt.annotate_entries(\n", - " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", - " hl.missing(mt.PL.dtype), \n", - " mt.PL)\n", - ")\n", - "mt = mt.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_imported_vcf.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "suspected-locator", - "metadata": {}, - "source": [ - "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", - "\n", - "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "answering-nitrogen", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_imported_vcf.mt\"\n", - ")\n", - "\n", - "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", - "bi = bi.annotate_rows(a_index=1, was_split=False)\n", - "bi = bi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_biallelic.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", - "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", - "multi = multi.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_multiallelic.mt\", \n", - " overwrite=False,\n", - " _read_if_exists=True\n", - ")\n", - "\n", - "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", - "split = split.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_multiallelic_split.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = split.union_rows(bi)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")\n", - "\n", - "unioned = unioned.repartition(8, shuffle=True)\n", - "unioned = unioned.checkpoint(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned_repart.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "professional-cleaning", - "metadata": {}, - "source": [ - "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", - "\n", - "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "alternate-motor", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "unioned = hl.read_matrix_table(\n", - " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned_repart.mt\"\n", - ")\n", - "\n", - "# Get list of INFO fields that are arrays\n", - "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", - "\n", - "# Extract appropriate values from INFO array fields after splitting\n", - "mt = unioned.annotate_rows(\n", - " info = unioned.info.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", - " unioned.info[k][unioned.a_index - 1]) \n", - " for k in known_keys}\n", - " )\n", - ")\n", - "\n", - "n_rows, n_cols = mt.count()\n", - "n_partitions = mt.n_partitions()\n", - "\n", - "mt = mt.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=\"1000_Genomes_HighCov_chrY\",\n", - " reference_genome=\"GRCh38\",\n", - " n_rows=n_rows,\n", - " n_cols=n_cols,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "\n", - "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", - "mt = mt.annotate_cols(**ht_samples[mt.s])\n", - "mt = hl.sample_qc(mt)\n", - "mt = hl.variant_qc(mt)\n", - "\n", - "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt\", overwrite=False)\n", - "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt\")\n", - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Create/update schemas" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "id": "exposed-ivory", - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "import textwrap\n", - "\n", - "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n", - "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", - "with open(datasets_path, \"r\") as f:\n", - " datasets = json.load(f)\n", - "\n", - "names = datasets.keys()\n", - "for name in [name for name in names if \"1000_Genomes_HighCov\" in name]:\n", - " versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n", - " if not versions:\n", - " versions = [None]\n", - " reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n", - " if not reference_genomes:\n", - " reference_genomes = [None]\n", - "\n", - " print(name)\n", - " # Create schemas for unphased versions, since phased entries only have GT\n", - " if name == \"1000_Genomes_HighCov_chrY\":\n", - " v = versions[0]\n", - " else:\n", - " v = versions[1]\n", - " print(v)\n", - " print(reference_genomes[0] + \"\\n\")\n", - "\n", - " path = [dataset[\"url\"][\"gcp\"][\"us\"]\n", - " for dataset in datasets[name][\"versions\"]\n", - " if all([dataset[\"version\"] == v,\n", - " dataset[\"reference_genome\"] == reference_genomes[0]])]\n", - " assert len(path) == 1\n", - " path = path[0]\n", - " if path.endswith(\".ht\"):\n", - " table = hl.methods.read_table(path)\n", - " table_class = \"hail.Table\"\n", - " else:\n", - " table = hl.methods.read_matrix_table(path)\n", - " table_class = \"hail.MatrixTable\"\n", - "\n", - " description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n", - " description = \"\\n\".join([line.rstrip() for line in description])\n", - "\n", - " template = \"\"\".. _{dataset}:\n", - "\n", - "{dataset}\n", - "{underline1}\n", - "\n", - "* **Versions:** {versions}\n", - "* **Reference genome builds:** {ref_genomes}\n", - "* **Type:** :class:`{class}`\n", - "\n", - "Schema ({version0}, {ref_genome0})\n", - "{underline2}\n", - "\n", - ".. code-block:: text\n", - "\n", - "{schema}\n", - "\n", - "\"\"\"\n", - " context = {\n", - " \"dataset\": name,\n", - " \"underline1\": len(name) * \"=\",\n", - " \"version0\": v,\n", - " \"ref_genome0\": reference_genomes[0],\n", - " \"versions\": \", \".join([str(version) for version in versions]),\n", - " \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n", - " \"underline2\": len(\"\".join([\"Schema (\", str(v), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n", - " \"schema\": textwrap.indent(description, \" \"),\n", - " \"class\": table_class\n", - " }\n", - " with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n", - " f.write(template.format(**context).strip())" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/datasets/notebooks/CADD_datasets.ipynb b/datasets/notebooks/CADD_datasets.ipynb deleted file mode 100644 index 1499df66e60..00000000000 --- a/datasets/notebooks/CADD_datasets.ipynb +++ /dev/null @@ -1,124 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### CADD\n", - "\n", - "Use to create CADD Hail Tables after downloading raw data from https://cadd.gs.washington.edu/ with Hail Batch (see `datasets/extract/extract_CADD.py`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "import hail as hl\n", - "hl.init()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_root = \"gs://hail-datasets-tmp\"\n", - "output_root = \"gs://hail-datasets-us\"\n", - "\n", - "name = \"CADD\"\n", - "version = \"v1.6\"\n", - "builds = [\"GRCh37\", \"GRCh38\"]\n", - "\n", - "for build in builds:\n", - " ht = hl.import_table(f\"{input_root}/{name}/{name}_{version}_{build}.tsv.bgz\",\n", - " min_partitions=2048,\n", - " types={\"position\": hl.tint,\n", - " \"raw_score\": hl.tfloat,\n", - " \"PHRED_score\": hl.tfloat})\n", - "\n", - " if build == \"GRCh37\":\n", - " ht = ht.annotate(locus = hl.locus(ht.chromosome, ht.position, build))\n", - " else:\n", - " ht = ht.annotate(locus = hl.locus(\"chr\" + ht.chromosome, ht.position, build))\n", - "\n", - " ht = ht.annotate(alleles = [ht.ref, ht.alt])\n", - " ht = ht.select(\"locus\", \"alleles\", \"raw_score\", \"PHRED_score\")\n", - " ht = ht.key_by(\"locus\", \"alleles\")\n", - " \n", - " n_rows = ht.count()\n", - " n_partitions = ht.n_partitions()\n", - " \n", - " ht = ht.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=name,\n", - " version=version,\n", - " reference_genome=build,\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions\n", - " )\n", - " )\n", - " \n", - " ht.write(f\"{output_root}/{name}_{version}_{build}.ht\")\n", - " ht = hl.read_table(f\"{output_root}/{name}_{version}_{build}.ht\")\n", - " ht.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht37 = hl.read_table(\"gs://hail-datasets-us/CADD_v1.6_GRCh37.ht\")\n", - "ht37.describe()\n", - "print(f\"GRCh37: {str(hl.eval(ht37.metadata))}\")\n", - "ht37.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht38 = hl.read_table(\"gs://hail-datasets-us/CADD_v1.6_GRCh38.ht\")\n", - "ht38.describe()\n", - "print(f\"GRCh38: {str(hl.eval(ht38.metadata))}\")\n", - "ht38.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/datasets/notebooks/dbSNP_datasets.ipynb b/datasets/notebooks/dbSNP_datasets.ipynb deleted file mode 100644 index 7d8c07602d0..00000000000 --- a/datasets/notebooks/dbSNP_datasets.ipynb +++ /dev/null @@ -1,685 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## dbSNP\n", - "\n", - "Use to create Hail Tables for dbSNP, after downloading raw data from https://ftp.ncbi.nih.gov/snp/. \n", - "\n", - "Raw data downloaded with Hail Batch, see `hail/datasets/extract/extract_dbSNP.py`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import hail as hl\n", - "hl.init()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Hail Tables from GRCh37 and GRCh38 assembly reports\n", - "\n", - "The contigs in the VCFs are [RefSeq](https://www.ncbi.nlm.nih.gov/refseq/) accession numbers, and need to be mapped back to the appropriate chromosome for each reference genome.\n", - "\n", - "The GRCh37 assembly can be found [here](https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.25), and the assembly report mapping chromosomes to RefSeq sequences can be found [here](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_assembly_report.txt).\n", - "\n", - "The GRCh38 assembly can be found [here](https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.39), and the assembly report mapping chromosomes to RefSeq sequences can be found [here](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### GRCh37" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "ht = hl.import_table(\"gs://hail-datasets-tmp/dbSNP/GCF_000001405.25_GRCh37.p13_assembly_report.txt\", \n", - " no_header=True, \n", - " comment=\"#\",\n", - " delimiter=\"\\t\", \n", - " missing=\"na\")\n", - "\n", - "field_names = ['sequence_name','sequence_role','assigned_molecule',\n", - " 'assigned_molecule_location/type', 'genbank_accn', 'relationship', \n", - " 'refseq_accn', 'assembly_unit', 'sequence_length', 'ucsc_style_name']\n", - "\n", - "name = \"dbSNP\"\n", - "version = \"154\"\n", - "build = \"GRCh37\"\n", - "n_rows = ht.count()\n", - "n_partitions = ht.n_partitions()\n", - "\n", - "ht = ht.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=name,\n", - " version=version,\n", - " reference_genome=build,\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "ht = ht.rename(dict(zip([f\"f{i}\" for i in range(10)], field_names)))\n", - "ht = ht.drop(\"relationship\").key_by(\"refseq_accn\")\n", - "\n", - "ht.write(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", - "ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", - "ht.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### GRCh38" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "ht = hl.import_table(\"gs://hail-datasets-tmp/dbSNP/GCF_000001405.39_GRCh38.p13_assembly_report.txt\", \n", - " no_header=True, \n", - " comment=\"#\",\n", - " delimiter=\"\\t\", \n", - " missing=\"na\")\n", - "\n", - "field_names = ['sequence_name','sequence_role','assigned_molecule',\n", - " 'assigned_molecule_location/type', 'genbank_accn', 'relationship', \n", - " 'refseq_accn', 'assembly_unit', 'sequence_length', 'ucsc_style_name']\n", - "\n", - "name = \"dbSNP\"\n", - "version = \"154\"\n", - "build = \"GRCh38\"\n", - "n_rows = ht.count()\n", - "n_partitions = ht.n_partitions()\n", - "\n", - "ht = ht.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=name,\n", - " version=version,\n", - " reference_genome=build,\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions\n", - " )\n", - ")\n", - "ht = ht.rename(dict(zip([f\"f{i}\" for i in range(10)], field_names)))\n", - "ht = ht.drop(\"relationship\").key_by(\"refseq_accn\")\n", - "\n", - "ht.write(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", - "ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", - "ht.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create Hail Tables for dbSNP\n", - "\n", - "Now we can use the assembly report for each reference genome build to map from RefSeq accession numbers to chromosomes, and create Hail Tables. There are no samples or entries in the dbSNP VCFs. Some helpful information about the dbSNP VCFs is available [here](https://www.ncbi.nlm.nih.gov/snp/docs/products/vcf/redesign/).\n", - "\n", - "We will create two Hail Tables for each reference genome build, both keyed by `[\"locus\", \"alleles\"]`:\n", - "\n", - " - Table with all fields from the imported VCF (e.g. `gs://hail-datasets-us/dbSNP_154_GRCh37.ht`)\n", - " - Table with only the rsID field (e.g. `gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht`)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First load VCFs to get all the contigs present in each dataset so we can create a mapping to used to recode contigs from RefSeq accession numbers to GRCh37/38 builds. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt37 = hl.import_vcf(f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh37.vcf.bgz\", \n", - " header_file=f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh37_header_only.vcf.txt\", \n", - " reference_genome=None, \n", - " min_partitions=512)\n", - "\n", - "mt38 = hl.import_vcf(f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh38.vcf.bgz\", \n", - " header_file=f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh38_header_only.vcf.txt\", \n", - " reference_genome=None, \n", - " min_partitions=512)\n", - "\n", - "mt37 = mt37.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/dbSNP_154_GRCh37_no_coding.mt\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - "\n", - "mt38 = mt38.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/dbSNP_154_GRCh38_no_coding.mt\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - "\n", - "# To get all contigs present for recoding to correct reference genome mapping\n", - "contigs_present37 = mt37.aggregate_rows(hl.agg.collect_as_set(mt37.locus.contig))\n", - "contigs_present38 = mt38.aggregate_rows(hl.agg.collect_as_set(mt38.locus.contig))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Load NCBI assembly reports with RefSeq mappings\n", - "assembly37_ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", - "assembly37_ht = assembly37_ht.annotate(\n", - " contig = hl.if_else(assembly37_ht.sequence_role == \"unlocalized-scaffold\", \n", - " assembly37_ht.genbank_accn, \n", - " assembly37_ht.assigned_molecule)\n", - ")\n", - "assembly38_ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", - "\n", - "# Map RefSeq identifiers to chromosomes for GRCh37 using the \"contig\" field we created in assembly report\n", - "rg37 = hl.get_reference(\"GRCh37\")\n", - "refseq_to_chr37 = dict(zip(assembly37_ht.refseq_accn.collect(), assembly37_ht.contig.collect()))\n", - "refseq_to_chr37 = {k: v for k, v in refseq_to_chr37.items() if k in contigs_present37 and v in rg37.contigs}\n", - "\n", - "# Map RefSeq identifiers to chromosomes for GRCh38 using the \"ucsc_style_name\" field in assembly report\n", - "rg38 = hl.get_reference(\"GRCh38\")\n", - "refseq_to_chr38 = dict(zip(assembly38_ht.refseq_accn.collect(), assembly38_ht.ucsc_style_name.collect()))\n", - "refseq_to_chr38 = {k: v for k, v in refseq_to_chr38.items() if k in contigs_present38 and v in rg38.contigs}\n", - "\n", - "recodings = {\n", - " \"GRCh37\": refseq_to_chr37, \n", - " \"GRCh38\": refseq_to_chr38\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use the function and known keys below to convert allele frequency arrays to structs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Convert array of strings like hl.array([\"GnomAD:.,1,3.187e-05\",\"TOPMED:.,1,2.389e-05\"]) to a struct\n", - "def arr_str_to_struct(hl_array, known_keys):\n", - " _dict = hl.dict(\n", - " hl_array.map(\n", - " lambda x: (\"_\" + x.split(\":\")[0], \n", - " x.split(\":\")[1].split(\",\").map(lambda x: hl.if_else(x == \".\", hl.missing(hl.tfloat), hl.float(x))))\n", - " )\n", - " )\n", - " _struct = hl.rbind(_dict, lambda d: hl.struct(**{k: _dict.get(k) for k in known_keys}))\n", - " return _struct\n", - "\n", - "# To get all possible keys for allele frequency arrays after loading VCF as MatrixTable\n", - "# known_keys_FREQ = mt.aggregate_rows(\n", - "# hl.agg.explode(\n", - "# lambda x: hl.agg.collect_as_set(x), mt.info.FREQ.split(\"\\\\|\").map(lambda x: x.split(\":\")[0])\n", - "# )\n", - "# )\n", - "\n", - "known_keys = ['GENOME_DK','TWINSUK','dbGaP_PopFreq','Siberian','Chileans',\n", - " 'FINRISK','HapMap','Estonian','ALSPAC','GoESP',\n", - " 'TOPMED','PAGE_STUDY','1000Genomes','Korea1K','ChromosomeY',\n", - " 'ExAC','Qatari','GoNL','MGP','GnomAD',\n", - " 'Vietnamese','GnomAD_exomes','PharmGKB','KOREAN','Daghestan',\n", - " 'HGDP_Stanford','NorthernSweden','SGDP_PRJ']\n", - "known_keys_FREQ = list(map(lambda x: f\"_{x}\", known_keys))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now can read in VCF files again as MatrixTables with the correct contig recodings, and reformat the allele frequency information in `info.FREQ` and the clinical attributes in `info`.\n", - "\n", - "Note that we are specifying a separate header file in the `hl.import_vcf` calls in the cell below. \n", - "\n", - "To make parsing strings easier, the following INFO fields in the VCF headers were changed from `Number=.` to `Number=1`: FREQ, CLNHGVS, CLNVI, CLNORIGIN, CLNSIG, CLNDISB, CLNDN, CLNREVSTAT, CLNACC. \n", - "\n", - "The modified VCF headers used are available in `gs://hail-datasets-tmp/dbSNP`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "name = \"dbSNP\"\n", - "version = \"154\"\n", - "builds = [\"GRCh37\", \"GRCh38\"]\n", - "\n", - "for build in builds:\n", - " mt = hl.import_vcf(f\"gs://hail-datasets-tmp/{name}/{name}_{version}_{build}.vcf.bgz\", \n", - " header_file=f\"gs://hail-datasets-tmp/{name}/{name}_{version}_{build}_header_only.vcf.txt\", \n", - " contig_recoding=recodings[build], \n", - " skip_invalid_loci=True, \n", - " reference_genome=build, \n", - " min_partitions=512)\n", - "\n", - " # First annotation, go from str to array for FREQ\n", - " mt = mt.annotate_rows(\n", - " info = mt.info.annotate(\n", - " FREQ = hl.or_missing(hl.is_defined(mt.info.FREQ), mt.info.FREQ.split(\"\\\\|\"))\n", - " )\n", - " )\n", - " # Second annotation, turn array into a struct for FREQ\n", - " mt = mt.annotate_rows(\n", - " info = mt.info.annotate(\n", - " FREQ = hl.or_missing(hl.is_defined(mt.info.FREQ), \n", - " arr_str_to_struct(mt.info.FREQ, known_keys_FREQ))\n", - " )\n", - " )\n", - " # Reformat clinical attributes from str to array\n", - " mt = mt.annotate_rows(\n", - " info = mt.info.annotate(\n", - " CLNHGVS = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNHGVS), \n", - " mt.info.CLNHGVS.split(\"(?:(\\|)|(\\,))\")).map(lambda x: hl.if_else((x == \".\"), hl.missing(hl.tstr), x)),\n", - " CLNVI = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNVI), \n", - " mt.info.CLNVI.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNORIGIN = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNORIGIN), \n", - " mt.info.CLNORIGIN.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNSIG = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNSIG), \n", - " mt.info.CLNSIG.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNDISDB = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNDISDB), \n", - " mt.info.CLNDISDB.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNDN = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNDN), \n", - " mt.info.CLNDN.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNREVSTAT = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNREVSTAT), \n", - " mt.info.CLNREVSTAT.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", - " CLNACC = hl.or_missing(\n", - " hl.is_defined(mt.info.CLNACC), \n", - " mt.info.CLNACC.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\")\n", - " )\n", - " )\n", - " \n", - " mt = mt.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.mt\", \n", - " _read_if_exists=True, \n", - " overwrite=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then we can just grab the `rows` table since we have no sample or entry information in the MatrixTable. \n", - "\n", - "From there, we need to filter the biallelic and multiallelic variants into separate tables, split the multiallelic variants, and then union the split multiallelic table rows back with the biallelic table rows.\n", - "\n", - "The allele frequency arrays start with the reference allele which is then followed by alternate alleles as ordered in the ALT column (from the VCF). So after splitting we can index the array with `a_index` to pull out the relevant allele frequency." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "name = \"dbSNP\"\n", - "version = \"154\"\n", - "builds = [\"GRCh37\", \"GRCh38\"]\n", - "\n", - "for build in builds:\n", - " # No samples or entries in MT, just grab table with the rows\n", - " mt = hl.read_matrix_table(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.mt\")\n", - " ht = mt.rows()\n", - " \n", - " ht_ba = ht.filter(hl.len(ht.alleles) <= 2)\n", - " ht_ba = ht_ba.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_biallelic.ht\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - "\n", - " ht_ma = ht.filter(hl.len(ht.alleles) > 2)\n", - " ht_ma = ht_ma.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_multiallelic.ht\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - "\n", - " ht_split = hl.split_multi(ht_ma, keep_star=True, permit_shuffle=True)\n", - " ht_split = ht_split.repartition(64, shuffle=False)\n", - " ht_split = ht_split.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_split_multiallelic.ht\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - " \n", - " # Next, have to fix indices and union ht_split with ht_ba\n", - " ht_union = ht_ba.union(ht_split, unify=True)\n", - " ht_union = ht_union.annotate(\n", - " a_index = hl.if_else(hl.is_missing(ht_union.a_index), 1, ht_union.a_index),\n", - " was_split = hl.if_else(hl.is_missing(ht_union.was_split), False, ht_union.was_split),\n", - " old_locus = hl.if_else(hl.is_missing(ht_union.old_locus), ht_union.locus, ht_union.old_locus),\n", - " old_alleles = hl.if_else(hl.is_missing(ht_union.old_alleles), ht_union.alleles, ht_union.old_alleles)\n", - " )\n", - " ht_union = ht_union.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_unioned.ht\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - " \n", - " # Arrays for AFs start w/ ref allele in index 0, so just use a_index to get alternate AFs\n", - " ht = ht_union.annotate(\n", - " info = ht_union.info.annotate(\n", - " FREQ = ht_union.info.FREQ.annotate(\n", - " **{k: hl.or_missing(hl.is_defined(ht_union.info.FREQ[k]), \n", - " ht_union.info.FREQ[k][ht_union.a_index]) \n", - " for k in known_keys_FREQ}\n", - " )\n", - " )\n", - " )\n", - " ht = ht.repartition(512, shuffle=True)\n", - " ht = ht.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.ht\", \n", - " _read_if_exists=True, \n", - " overwrite=False)\n", - "\n", - " n_rows = ht.count()\n", - " n_partitions = ht.n_partitions()\n", - "\n", - " ht = ht.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=name,\n", - " version=version,\n", - " reference_genome=build,\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions\n", - " )\n", - " )\n", - " ht.write(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", - " ht = hl.read_table(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", - " ht.describe()\n", - " print(str(hl.eval(ht.metadata)) + \"\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Also write tables with only the rsID field, for smaller tables that just map `[locus, alleles]` to `rsID`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "name = \"dbSNP\"\n", - "version = \"154\"\n", - "builds = [\"GRCh37\", \"GRCh38\"]\n", - "\n", - "for build in builds:\n", - " # Write table with only rsid's\n", - " ht_rsid = hl.read_table(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", - " ht_rsid = ht_rsid.select(\"rsid\")\n", - "\n", - " n_rows = ht_rsid.count()\n", - " n_partitions = ht_rsid.n_partitions()\n", - "\n", - " ht_rsid = ht_rsid.annotate_globals(\n", - " metadata=hl.struct(\n", - " name=f\"{name}_rsid\",\n", - " version=version,\n", - " reference_genome=build,\n", - " n_rows=n_rows,\n", - " n_partitions=n_partitions\n", - " )\n", - " )\n", - " ht_rsid.write(f\"gs://hail-datasets-us/{name}_rsid_{version}_{build}.ht\")\n", - " ht_rsid = hl.read_table(f\"gs://hail-datasets-us/{name}_rsid_{version}_{build}.ht\")\n", - " ht_rsid.describe()\n", - " print(str(hl.eval(ht_rsid.metadata)) + \"\\n\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# To check uniqueness of keys\n", - "tables = [\"gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht\", \"gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht\"]\n", - "for table in tables:\n", - " ht = hl.read_table(table)\n", - " \n", - " ht_count = ht.count()\n", - " print(f\"n = {ht_count}\")\n", - " ht_distinct_count = ht.distinct().count()\n", - " print(f\"n_distinct = {ht_distinct_count}\")\n", - " \n", - " if ht_count == ht_distinct_count:\n", - " print(f\"{table} rows unique\\n\")\n", - " else:\n", - " print(f\"{table} rows NOT unique\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add dbSNP to datasets API and annotation DB\n", - "\n", - "Now we can add the tables we created to `hail/python/hail/experimental/datasets.json`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "\n", - "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", - "with open(datasets_path, \"r\") as f:\n", - " datasets = json.load(f)\n", - "\n", - "names = [\"dbSNP\", \"dbSNP_rsid\"]\n", - "version = \"154\"\n", - "builds = [\"GRCh37\", \"GRCh38\"]\n", - "\n", - "gcs_us_url_root = \"gs://hail-datasets-us\"\n", - "gcs_eu_url_root = \"gs://hail-datasets-eu\"\n", - "aws_us_url_root = \"s3://hail-datasets-us-east-1\"\n", - "\n", - "for name in names:\n", - " json_entry = {\n", - " \"annotation_db\": {\n", - " \"key_properties\": []\n", - " },\n", - " \"description\": \"dbSNP: Reference SNP (rs or RefSNP) Hail Table. The database includes both common and rare single-base nucleotide variation (SNV), short (=< 50bp) deletion/insertion polymorphisms, and other classes of small genetic variations.\",\n", - " \"url\": \"https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/\",\n", - " \"versions\": [\n", - " {\n", - " \"reference_genome\": builds[0],\n", - " \"url\": {\n", - " \"aws\": {\n", - " \"us\": f\"{aws_us_url_root}/{name}_{version}_{builds[0]}.ht\"\n", - " },\n", - " \"gcp\": {\n", - " \"eu\": f\"{gcs_eu_url_root}/{name}_{version}_{builds[0]}.ht\",\n", - " \"us\": f\"{gcs_us_url_root}/{name}_{version}_{builds[0]}.ht\"\n", - " }\n", - " },\n", - " \"version\": version\n", - " },\n", - " {\n", - " \"reference_genome\": builds[1],\n", - " \"url\": {\n", - " \"aws\": {\n", - " \"us\": f\"{aws_us_url_root}/{name}_{version}_{builds[1]}.ht\"\n", - " },\n", - " \"gcp\": {\n", - " \"eu\": f\"{gcs_eu_url_root}/{name}_{version}_{builds[1]}.ht\",\n", - " \"us\": f\"{gcs_us_url_root}/{name}_{version}_{builds[1]}.ht\"\n", - " }\n", - " },\n", - " \"version\": version\n", - " } \n", - " ]\n", - " }\n", - " \n", - " if name == \"dbSNP_rsid\":\n", - " json_entry[\"description\"] = \"dbSNP: This Hail Table contains a mapping from locus/allele pairs to Reference SNP IDs (rsID). For the full dataset, see dbSNP.\"\n", - " \n", - " datasets[name] = json_entry\n", - "\n", - "# Write new entries back to datasets.json config:\n", - "with open(datasets_path, \"w\") as f:\n", - " json.dump(datasets, f, sort_keys=True, ensure_ascii=False, indent=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After adding tables to `datasets.json`, create .rst schema files for docs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Create/update schema .rst file\n", - "import textwrap\n", - "\n", - "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n", - "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", - "with open(datasets_path, \"r\") as f:\n", - " datasets = json.load(f)\n", - "\n", - "names = [\"dbSNP\", \"dbSNP_rsid\"]\n", - "for name in names:\n", - " versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n", - " if not versions:\n", - " versions = [None]\n", - " reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n", - " if not reference_genomes:\n", - " reference_genomes = [None]\n", - "\n", - " print(name)\n", - " print(versions[0])\n", - " print(reference_genomes[0] + \"\\n\")\n", - "\n", - " path = [dataset[\"url\"][\"gcp\"][\"us\"]\n", - " for dataset in datasets[name][\"versions\"]\n", - " if all([dataset[\"version\"] == versions[0],\n", - " dataset[\"reference_genome\"] == reference_genomes[0]])]\n", - " assert len(path) == 1\n", - " path = path[0]\n", - " if path.endswith(\".ht\"):\n", - " table = hl.methods.read_table(path)\n", - " table_class = \"hail.Table\"\n", - " else:\n", - " table = hl.methods.read_matrix_table(path)\n", - " table_class = \"hail.MatrixTable\"\n", - "\n", - " description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n", - " description = \"\\n\".join([line.rstrip() for line in description])\n", - "\n", - " template = \"\"\".. _{dataset}:\n", - "\n", - "{dataset}\n", - "{underline1}\n", - "\n", - "* **Versions:** {versions}\n", - "* **Reference genome builds:** {ref_genomes}\n", - "* **Type:** :class:`{class}`\n", - "\n", - "Schema ({version0}, {ref_genome0})\n", - "{underline2}\n", - "\n", - ".. code-block:: text\n", - "\n", - "{schema}\n", - "\n", - "\"\"\"\n", - " context = {\n", - " \"dataset\": name,\n", - " \"underline1\": len(name) * \"=\",\n", - " \"version0\": versions[0],\n", - " \"ref_genome0\": reference_genomes[0],\n", - " \"versions\": \", \".join([str(version) for version in versions]),\n", - " \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n", - " \"underline2\": len(\"\".join([\"Schema (\", str(versions[0]), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n", - " \"schema\": textwrap.indent(description, \" \"),\n", - " \"class\": table_class\n", - " }\n", - " with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n", - " f.write(template.format(**context).strip())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/datasets/notebooks/reformat_buckets.ipynb b/datasets/notebooks/reformat_buckets.ipynb deleted file mode 100644 index 74bfbf1e864..00000000000 --- a/datasets/notebooks/reformat_buckets.ipynb +++ /dev/null @@ -1,195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import os\n", - "import json" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Create list with each line containing old path followed by new path\n", - "lines = []\n", - "with open(\"reformat_buckets.txt\", \"r\") as f:\n", - " for line in f:\n", - " line = line.replace(\"gsutil -u broad-ctsa -m cp -r \", \"\").replace(\"*\", \"\")\n", - " lines.append(line)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Create dict mapping current urls to new urls\n", - "new_mappings = {}\n", - "\n", - "# GCS\n", - "# Mappings from old_name: new_name for hail-datasets-us\n", - "for line in lines:\n", - " line = line.strip().split(\" \")\n", - " line = [x.rstrip(\"/\") for x in line]\n", - " new_mappings[line[0]] = line[1]\n", - "# Mappings from old_name: new_name for hail-datasets-eu\n", - "for line in lines:\n", - " line = line.replace(\"hail-datasets-us/\", \"hail-datasets-eu/\")\n", - " line = line.strip().split(\" \")\n", - " line = [x.rstrip(\"/\") for x in line]\n", - " new_mappings[line[0]] = line[1]\n", - "# AWS\n", - "# Mappings from old_name: new_name for hail-datasets-us-east-1\n", - "for line in lines:\n", - " line = line.replace(\"gs://hail-datasets-us/\", \"s3://hail-datasets-us-east-1/\")\n", - " line = line.strip().split(\" \")\n", - " line = [x.rstrip(\"/\") for x in line]\n", - " new_mappings[line[0]] = line[1]\n", - "\n", - "with open(\"reformat_buckets_mappings.json\", \"w\") as f:\n", - " json.dump(new_mappings, f, sort_keys=True, ensure_ascii=False, indent=2)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "# Load config file\n", - "datasets_json_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", - "with open(datasets_json_path) as f:\n", - " datasets_json = json.load(f)\n", - "\n", - "# Update urls for all datasets according to new mappings\n", - "dataset_names = [name for name in datasets_json]\n", - "for name in dataset_names:\n", - " versions = datasets_json[name][\"versions\"]\n", - " for version in versions:\n", - " if \"aws\" in version[\"url\"]:\n", - " if version[\"url\"][\"aws\"][\"us\"] in new_mappings.keys():\n", - " version[\"url\"][\"aws\"][\"us\"] = new_mappings[version[\"url\"][\"aws\"][\"us\"]]\n", - " if \"gcp\" in version[\"url\"]:\n", - " if \"us\" in version[\"url\"][\"gcp\"]:\n", - " if version[\"url\"][\"gcp\"][\"us\"] in new_mappings.keys():\n", - " version[\"url\"][\"gcp\"][\"us\"] = new_mappings[version[\"url\"][\"gcp\"][\"us\"]]\n", - " if \"eu\" in version[\"url\"][\"gcp\"]:\n", - " if version[\"url\"][\"gcp\"][\"eu\"] in new_mappings.keys():\n", - " version[\"url\"][\"gcp\"][\"eu\"] = new_mappings[version[\"url\"][\"gcp\"][\"eu\"]]\n", - " # Update GTEx names while we're at it\n", - " if \"GTEx_eQTL\" in name or \"GTEx_sQTL\" in name:\n", - " tissue = name.split(\"_\", 3)[-1]\n", - " qtl = name.split(\"_\", 3)[1]\n", - " updated_name = f\"GTEx_{qtl}_{tissue}_all_snp_gene_associations\"\n", - " datasets_json[updated_name] = datasets_json[name]\n", - " del datasets_json[name]\n", - "\n", - "# Write new entries to config file\n", - "with open(datasets_json_path, \"w\") as f:\n", - " json.dump(datasets_json, f, sort_keys=True, ensure_ascii=False, indent=2)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "import hail as hl\n", - "hl.init(spark_conf={\"spark.hadoop.fs.s3a.aws.credentials.provider\":\n", - " \"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider\"})\n", - "\n", - "# Test that we can load datasets from GCS and AWS\n", - "datasets_json_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", - "with open(datasets_json_path) as f:\n", - " datasets_json = json.load(f)\n", - "\n", - "dataset_names = [name for name in datasets_json]\n", - "for name in dataset_names:\n", - " print(name)\n", - " versions = datasets_json[name][\"versions\"]\n", - " for version in versions:\n", - " if \"gcp\" in version[\"url\"]:\n", - " if \"us\" in version[\"url\"][\"gcp\"]:\n", - " url = version[\"url\"][\"gcp\"][\"us\"]\n", - " print(url)\n", - " if url.endswith(\".ht\"):\n", - " hl.read_table(url)\n", - " elif url.endswith(\".mt\"):\n", - " hl.read_matrix_table(url)\n", - " else:\n", - " hl.linalg.BlockMatrix.read(url)\n", - " if \"eu\" in version[\"url\"][\"gcp\"]:\n", - " url = version[\"url\"][\"gcp\"][\"eu\"]\n", - " print(url)\n", - " if url.endswith(\".ht\"):\n", - " hl.read_table(url)\n", - " elif url.endswith(\".mt\"):\n", - " hl.read_matrix_table(url)\n", - " else:\n", - " hl.linalg.BlockMatrix.read(url)\n", - " if \"aws\" in version[\"url\"]:\n", - " url = version[\"url\"][\"aws\"][\"us\"].replace(\"s3://\", \"s3a://\")\n", - " print(url)\n", - " if url.endswith(\".ht\"):\n", - " hl.read_table(url)\n", - " elif url.endswith(\".mt\"):\n", - " hl.read_matrix_table(url)\n", - " else:\n", - " hl.linalg.BlockMatrix.read(url)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/datasets/notebooks/reformat_buckets.txt b/datasets/notebooks/reformat_buckets.txt deleted file mode 100644 index 1d4e644223d..00000000000 --- a/datasets/notebooks/reformat_buckets.txt +++ /dev/null @@ -1,201 +0,0 @@ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_samples.ht/* gs://hail-datasets-us/1000_Genomes/phase_3/samples.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_sample_relationships.ht/* gs://hail-datasets-us/1000_Genomes/phase_3/sample_relationships.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD.v1.4.GRCh37.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD.v1.4.GRCh38.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD_v1.6_GRCh37.ht/* gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD_v1.6_GRCh38.ht/* gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/DANN.GRCh37.ht/* gs://hail-datasets-us/DANN/GRCh37/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/DANN.GRCh38.ht/* gs://hail-datasets-us/DANN/GRCh38/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_elements.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_elements.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_scores.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_scores.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt/* gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_154_GRCh37.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_154_GRCh38.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht/* gs://hail-datasets-us/dbSNP/assembly_reports/GRCh37/p13.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht/* gs://hail-datasets-us/dbSNP/assembly_reports/GRCh38/p13.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp4.0a.GRCh37.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp4.0a.GRCh38.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gencode.v19.annotation.GRCh37.ht/* gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gencode.v31.annotation.GRCh38.ht/* gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gene_specific_summary_2019-07.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/variant_summary_2019-07.GRCh37.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/variant_summary_2019-07.GRCh38.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gnomad_v2.1.1_lof_metrics_by_gene.ht/* gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/DANN.GRCh37.ht/* gs://hail-datasets-us/DANN/GRCh37/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/DANN.GRCh38.ht/* gs://hail-datasets-us/DANN/GRCh38/table.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht/* gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht/* gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht/ -gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht/* gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht/ diff --git a/datasets/notebooks/reformat_buckets_mappings.json b/datasets/notebooks/reformat_buckets_mappings.json deleted file mode 100644 index 97a696cd943..00000000000 --- a/datasets/notebooks/reformat_buckets_mappings.json +++ /dev/null @@ -1,605 +0,0 @@ -{ - "gs://hail-datasets-eu/1000_Genomes_NYGC_30x_HighCov_samples.ht": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/samples.ht", - "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/autosomes.mt", - "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/autosomes.mt", - "gs://hail-datasets-eu/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", - "gs://hail-datasets-eu/1000_Genomes_chrMT.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrMT.mt", - "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrX.mt", - "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrX.mt", - "gs://hail-datasets-eu/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", - "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrY.mt", - "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrY.mt", - "gs://hail-datasets-eu/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", - "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", - "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", - "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", - "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", - "gs://hail-datasets-eu/1000_Genomes_phase3_sample_relationships.ht": "gs://hail-datasets-eu/1000_Genomes/phase_3/sample_relationships.ht", - "gs://hail-datasets-eu/1000_Genomes_phase3_samples.ht": "gs://hail-datasets-eu/1000_Genomes/phase_3/samples.ht", - "gs://hail-datasets-eu/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", - "gs://hail-datasets-eu/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", - "gs://hail-datasets-eu/CADD_v1.6_GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.6/GRCh37/table.ht", - "gs://hail-datasets-eu/CADD_v1.6_GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.6/GRCh38/table.ht", - "gs://hail-datasets-eu/DANN.GRCh37.ht": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", - "gs://hail-datasets-eu/DANN.GRCh38.ht": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", - "gs://hail-datasets-eu/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-eu/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-eu/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-eu/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-eu/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", - "gs://hail-datasets-eu/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", - "gs://hail-datasets-eu/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", - "gs://hail-datasets-eu/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", - "gs://hail-datasets-eu/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", - "gs://hail-datasets-eu/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", - "gs://hail-datasets-eu/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", - "gs://hail-datasets-eu/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", - "gs://hail-datasets-eu/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "gs://hail-datasets-eu/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", - "gs://hail-datasets-eu/NCBI_assembly_report_p13_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/assembly_reports/GRCh37/p13.ht", - "gs://hail-datasets-eu/NCBI_assembly_report_p13_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/assembly_reports/GRCh38/p13.ht", - "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", - "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", - "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", - "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", - "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", - "gs://hail-datasets-eu/annotations/DANN.GRCh37.ht": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", - "gs://hail-datasets-eu/annotations/DANN.GRCh38.ht": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", - "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", - "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", - "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", - "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", - "gs://hail-datasets-eu/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "gs://hail-datasets-eu/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", - "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", - "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", - "gs://hail-datasets-eu/annotations/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", - "gs://hail-datasets-eu/annotations/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", - "gs://hail-datasets-eu/annotations/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", - "gs://hail-datasets-eu/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", - "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", - "gs://hail-datasets-eu/dbSNP_154_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/full_table.ht", - "gs://hail-datasets-eu/dbSNP_154_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/full_table.ht", - "gs://hail-datasets-eu/dbSNP_rsid_154_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/rsid_only_table.ht", - "gs://hail-datasets-eu/dbSNP_rsid_154_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/rsid_only_table.ht", - "gs://hail-datasets-eu/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", - "gs://hail-datasets-eu/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", - "gs://hail-datasets-eu/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", - "gs://hail-datasets-eu/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", - "gs://hail-datasets-eu/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", - "gs://hail-datasets-eu/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", - "gs://hail-datasets-eu/giant_bmi_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", - "gs://hail-datasets-eu/giant_bmi_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", - "gs://hail-datasets-eu/giant_bmi_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", - "gs://hail-datasets-eu/giant_bmi_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", - "gs://hail-datasets-eu/giant_bmi_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", - "gs://hail-datasets-eu/giant_bmi_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", - "gs://hail-datasets-eu/giant_height_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AFR.ht", - "gs://hail-datasets-eu/giant_height_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_ALL.ht", - "gs://hail-datasets-eu/giant_height_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AMR.ht", - "gs://hail-datasets-eu/giant_height_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EAS.ht", - "gs://hail-datasets-eu/giant_height_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EUR.ht", - "gs://hail-datasets-eu/giant_height_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_SAS.ht", - "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", - "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", - "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", - "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", - "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", - "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", - "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", - "gs://hail-datasets-eu/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "gs://hail-datasets-eu/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", - "gs://hail-datasets-eu/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", - "gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht", - "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt", - "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt", - "gs://hail-datasets-us/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", - "gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt", - "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt", - "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt", - "gs://hail-datasets-us/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", - "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt", - "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt", - "gs://hail-datasets-us/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", - "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", - "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", - "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", - "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", - "gs://hail-datasets-us/1000_Genomes_phase3_sample_relationships.ht": "gs://hail-datasets-us/1000_Genomes/phase_3/sample_relationships.ht", - "gs://hail-datasets-us/1000_Genomes_phase3_samples.ht": "gs://hail-datasets-us/1000_Genomes/phase_3/samples.ht", - "gs://hail-datasets-us/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht", - "gs://hail-datasets-us/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht", - "gs://hail-datasets-us/CADD_v1.6_GRCh37.ht": "gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht", - "gs://hail-datasets-us/CADD_v1.6_GRCh38.ht": "gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht", - "gs://hail-datasets-us/DANN.GRCh37.ht": "gs://hail-datasets-us/DANN/GRCh37/table.ht", - "gs://hail-datasets-us/DANN.GRCh38.ht": "gs://hail-datasets-us/DANN/GRCh38/table.ht", - "gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-us/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht", - "gs://hail-datasets-us/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht", - "gs://hail-datasets-us/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht", - "gs://hail-datasets-us/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht", - "gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", - "gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", - "gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", - "gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", - "gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", - "gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht": "gs://hail-datasets-us/dbSNP/assembly_reports/GRCh37/p13.ht", - "gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht": "gs://hail-datasets-us/dbSNP/assembly_reports/GRCh38/p13.ht", - "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", - "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", - "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", - "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht", - "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht", - "gs://hail-datasets-us/annotations/DANN.GRCh37.ht": "gs://hail-datasets-us/DANN/GRCh37/table.ht", - "gs://hail-datasets-us/annotations/DANN.GRCh38.ht": "gs://hail-datasets-us/DANN/GRCh38/table.ht", - "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht", - "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht", - "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht", - "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht", - "gs://hail-datasets-us/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht", - "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht", - "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht", - "gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht", - "gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht", - "gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht", - "gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht", - "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht", - "gs://hail-datasets-us/dbSNP_154_GRCh37.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht", - "gs://hail-datasets-us/dbSNP_154_GRCh38.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht", - "gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht", - "gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht", - "gs://hail-datasets-us/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht", - "gs://hail-datasets-us/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht", - "gs://hail-datasets-us/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht", - "gs://hail-datasets-us/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht", - "gs://hail-datasets-us/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht", - "gs://hail-datasets-us/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht", - "gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", - "gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", - "gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", - "gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", - "gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", - "gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", - "gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht", - "gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht", - "gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht", - "gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht", - "gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht", - "gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht", - "gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", - "gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", - "gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", - "gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", - "gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", - "gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", - "gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", - "gs://hail-datasets-us/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "gs://hail-datasets-us/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht", - "gs://hail-datasets-us/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht", - "s3://hail-datasets-us-east-1/1000_Genomes_NYGC_30x_HighCov_samples.ht": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/samples.ht", - "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/autosomes.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/autosomes.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrMT.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrMT.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrX.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrX.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrY.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrY.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_sample_relationships.ht": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/sample_relationships.ht", - "s3://hail-datasets-us-east-1/1000_Genomes_phase3_samples.ht": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/samples.ht", - "s3://hail-datasets-us-east-1/CADD.v1.4.GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht", - "s3://hail-datasets-us-east-1/CADD.v1.4.GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht", - "s3://hail-datasets-us-east-1/CADD_v1.6_GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh37/table.ht", - "s3://hail-datasets-us-east-1/CADD_v1.6_GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh38/table.ht", - "s3://hail-datasets-us-east-1/DANN.GRCh37.ht": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht", - "s3://hail-datasets-us-east-1/DANN.GRCh38.ht": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht", - "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "s3://hail-datasets-us-east-1/GERP_elements.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht", - "s3://hail-datasets-us-east-1/GERP_elements.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht", - "s3://hail-datasets-us-east-1/GERP_scores.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht", - "s3://hail-datasets-us-east-1/GERP_scores.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht", - "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", - "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", - "s3://hail-datasets-us-east-1/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", - "s3://hail-datasets-us-east-1/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", - "s3://hail-datasets-us-east-1/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "s3://hail-datasets-us-east-1/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", - "s3://hail-datasets-us-east-1/NCBI_assembly_report_p13_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/assembly_reports/GRCh37/p13.ht", - "s3://hail-datasets-us-east-1/NCBI_assembly_report_p13_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/assembly_reports/GRCh38/p13.ht", - "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", - "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", - "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", - "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht", - "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht", - "s3://hail-datasets-us-east-1/annotations/DANN.GRCh37.ht": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht", - "s3://hail-datasets-us-east-1/annotations/DANN.GRCh38.ht": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht", - "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht", - "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht", - "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht", - "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht", - "s3://hail-datasets-us-east-1/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "s3://hail-datasets-us-east-1/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht", - "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh37.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht", - "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh38.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht", - "s3://hail-datasets-us-east-1/annotations/gencode.v19.annotation.GRCh37.ht": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht", - "s3://hail-datasets-us-east-1/annotations/gencode.v31.annotation.GRCh38.ht": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht", - "s3://hail-datasets-us-east-1/annotations/gene_specific_summary_2019-07.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht", - "s3://hail-datasets-us-east-1/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht", - "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht", - "s3://hail-datasets-us-east-1/dbSNP_154_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/full_table.ht", - "s3://hail-datasets-us-east-1/dbSNP_154_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/full_table.ht", - "s3://hail-datasets-us-east-1/dbSNP_rsid_154_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/rsid_only_table.ht", - "s3://hail-datasets-us-east-1/dbSNP_rsid_154_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/rsid_only_table.ht", - "s3://hail-datasets-us-east-1/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht", - "s3://hail-datasets-us-east-1/dbnsfp4.0a.GRCh37.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht", - "s3://hail-datasets-us-east-1/dbnsfp4.0a.GRCh38.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht", - "s3://hail-datasets-us-east-1/gencode.v19.annotation.GRCh37.ht": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht", - "s3://hail-datasets-us-east-1/gencode.v31.annotation.GRCh38.ht": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht", - "s3://hail-datasets-us-east-1/gene_specific_summary_2019-07.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_AFR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_ALL_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_AMR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_EAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_EUR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", - "s3://hail-datasets-us-east-1/giant_bmi_exome_SAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_AFR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AFR.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_ALL_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_ALL.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_AMR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AMR.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_EAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EAS.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_EUR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EUR.ht", - "s3://hail-datasets-us-east-1/giant_height_exome_SAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_SAS.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", - "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", - "s3://hail-datasets-us-east-1/gnomad_v2.1.1_lof_metrics_by_gene.ht": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "s3://hail-datasets-us-east-1/variant_summary_2019-07.GRCh37.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht", - "s3://hail-datasets-us-east-1/variant_summary_2019-07.GRCh38.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht" -} diff --git a/dev-docs/batch-operation.md b/dev-docs/batch-operation.md index 752851cdd5f..224d56e6f20 100644 --- a/dev-docs/batch-operation.md +++ b/dev-docs/batch-operation.md @@ -10,19 +10,14 @@ kubectl top pods -l app=batch-driver Less readily available metrics include the request latency (which might naturally vary by request type) and scheduling latency. The goal is to operate Batch around 95% CPU. When Batch becomes overwhelmed, CPU is saturated and request latency increases. New requests inject work into the system, but time out and are retried due to latency, creating a bad feedback loop. -The incoming request rate from workers is controlled by the `internal-gateway`. The `internal-gateway` is fixed at 3 replicates and imposes a per-instance, per-namespace request limit: +The incoming request rate from workers is controlled by the `internal-gateway`. The `internal-gateway` is fixed at 3 replicates and imposes a per-instance request limit: ``` -map $service $batch_driver_limit_key { - "batch-driver" "$namespace"; - default ""; # no key => no limit -} - -limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=10r/s; +limit_req_zone global zone=limit:1m rate=45r/s; server { location { - limit_req zone=batch_driver burst=20 nodelay; + limit_req zone=limit burst=20 nodelay; } } ``` @@ -38,30 +33,3 @@ Watch the behavior under maximum load. Normally the allocated cores increases f You can inspect the `internal-gateway` logs to determine if the request rate is maximized. When the maximum request rate is exceeded, `internal-gateway` nginx returns 503 and logs a message. To determine if the cluster size is at the maximum, check the CPU and `internal-gateway` request rate when the cluster is not growing, but just replacing preempted nodes. The CPU should not be pegged, and `internal-gateway` should reject requests at most in transient bursts. In general, the load will be much lower at equilibrium because filling an empty node requires many operations. - -## Quotas - -When using Local SSDs on preemptible machines there are only two quotas that matter: "Preemptible -Local SSD (GB)" (`PREEMPTIBLE_LOCAL_SSD_GB`) and "Preemptible CPUs" (`PREEMPTIBLE_CPUS`). The former -is measured in GB, so you'll need 375 GB of quota for every machine with a Local SSD. The latter is -measured in cores. For example, if you are using a mix of n1 and n2 machines with 8 cores and 1 -Local SSD, a 5000 machine (40,000 core) cluster will need: - -- 1,875,000 GB of Preemptible Local SSD quota, and - -- 40,000 cores of Preemptible CPUs quota. - -In practice, we use Local SSD quota much faster than CPU quota. Google will freely gives us a 5,000 -core quota in any given zone. We've also received quotas as high as 300,000 cores. Google is -hesitant to grant a quota of more than 400,000 GB in a zone. The largest Preemptible Local SSD quota -we have been granted in one zone is 640,000 GB. - -We recommend requesting double your quota when you're using 80-90% of the current quota. Repeating -this process will generally allow you to quickly scale the cluster. - -A higher or lower quota is requested by going to the "Quota Metric Details" page for a specific -quota (e.g. `PREEMPTIBLE_CPUS`), selecting the regions or zones of interest, and clicking "Edit -Quotas". - -Quota requests during Eastern Time business hours appear to be approved faster. We presume this is -because our Technical Account Managers work in the Cambridge, MA Google office. diff --git a/dev-docs/compiler-team/development_tools.md b/dev-docs/compiler-team/development_tools.md deleted file mode 100644 index 8aeb9bb5d1d..00000000000 --- a/dev-docs/compiler-team/development_tools.md +++ /dev/null @@ -1,79 +0,0 @@ -# Hail development tools - -This document describes and links tools used by the Hail compiler team. -The document is written for the most common operating system used by -the team, macOS. - -## General tools - -##### Homebrew - macOS package manager - -Homebrew is hard to live without. Use it to install many of the other tools -used by the team. - -https://brew.sh/ - -##### git - version control - -It's nice to have a relatively recent version of git. Install this with -brew: - - brew install git - -It will probably be necessary to change system paths so that the -installed git is available before system git, as [described here](https://ajahne.github.io/blog/tools/2018/06/11/how-to-upgrade-git-mac.html). - -Once this is working, you should fork the hail-is/hail repository into -your own user space, then clone the repository locally: - - git clone https://github.com/username/hail.git - -Then add a remote for the main repository to pull in changes: - - git remote add hi https://github.com/hail-is/hail.git - - -##### Zulip - dev / user chat - -We use Zulip for development discussion and conversations with users -(though not typically for user support). - -Get it here: - -https://zulip.com/ - -Our Zulip server is https://hail.zulipchat.com - -##### Anaconda - manage Python installations and packages - -https://www.anaconda.com/download/#macos - -After installing Anaconda, you should create a new dev environment -for Hail with: - - conda create --name hail python=3.7 - -and - - conda activate hail - -(put the latter in a shell .rc file so this is done on shell startup) - -##### IntelliJ IDEA - IDE for java/scala/python - -https://www.jetbrains.com/idea/ - -Configuration is hard to document here, get help by asking the team. - -##### iTerm2 - terminal replacement - -iTerm2 is (subjectively) nicer to use and objectively more customizable -than the built-in macOS terminal. - -https://iterm2.com/ - -##### Google cloud utilities - -We primarily use Google Cloud for development. Get the SDK here: - -https://cloud.google.com/sdk/docs/install \ No newline at end of file diff --git a/dev-docs/development_process.md b/dev-docs/development_process.md index 066e4be23d7..7f34f116fac 100644 --- a/dev-docs/development_process.md +++ b/dev-docs/development_process.md @@ -117,7 +117,7 @@ Your namespace name is the same as your username. Submitting a dev deploy with hailctl will give you the link to a UI where you can monitor the progress of everything deploying and get the logs for any steps that fail. You can also see a recent history of your dev deploys at -[ci.hail.is/me](https://ci.hail.is/me). +[ci.hail.is/me](ci.hail.is/me). If the tests fail, you can then examine the Kubernetes logs for the service @@ -159,9 +159,9 @@ PRs will not have any labels. For the PR title, start the title with the name of the service(s) the changes impact. For example, if it’s a Benchmark change, then you’d write -`[benchmark]`. If it’s a Hail Query change, then it would be `[query]`. We also want +[benchmark]. If it’s a Hail Query change, then it would be [query]. We also want the title to be descriptive enough to know what the change is without being too -verbose. An example is “`[batch]` Added read_only option for gcsfuse”. +verbose. An example is “[batch] Added read_only option for gcsfuse”. For the PR commit message, we want the message to be descriptive of the complete set of changes that occurred, especially if it’s a complicated set of diff --git a/dev-docs/google-cloud-cookbook.md b/dev-docs/google-cloud-cookbook.md deleted file mode 100644 index 815c8c2fd2c..00000000000 --- a/dev-docs/google-cloud-cookbook.md +++ /dev/null @@ -1,30 +0,0 @@ -# Google Cloud Cookbook - -Executable snippets for certain operational tasks. These snippets will likely bitrot and otherwise -be unreliable. The reader is recommended to test the snippet before use. - -## Delete All Detached Disks - -There were approximately 240,000 disks accidentally left unattached in a project. This script was -used to delete them. We only delete 990 at a time because there is a "queries per second" quota that -limits how many queries we can make every 100 seconds. We empirically determined that deleting 990 -instances at a time did not exceed the quota. - -``` -for i in $(seq 0 280) -do - gcloud compute disks list \ - --filter='LAST_ATTACH_TIMESTAMP < LAST_DETATCH_TIMESTAMP and name ~ "^batch-disk" and STATUS=Ready' \ - --limit=990 \ - | tail -n +2 \ - > /tmp/disks - for zone in $(cat /tmp/disks | awk '{print $2}' | uniq ) - do - echo $zone - awk '$2=="'$zone'" {print $1}' /tmp/disks \ - | grep -Ee '^batch-disk' \ - | xargs /bin/bash -c 'yes | gcloud compute disks delete '--zone=$zone' $*' % & - done - wait -done -``` diff --git a/docker-build.sh b/docker-build.sh deleted file mode 100755 index 73367a15d9f..00000000000 --- a/docker-build.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -CONTEXT="$(cd $1 && pwd)" -DOCKERFILE="$CONTEXT/$2" -REMOTE_IMAGE_NAME=$3 -EXTRA_CACHE=$4 - -CACHE_NAME=$(echo $REMOTE_IMAGE_NAME | sed -E 's/(:[^:]+)(@[^@]+)?$//'):cache - -DOCKER_BUILDKIT=1 docker build \ - --file ${DOCKERFILE} \ - --cache-from ${CACHE_NAME} \ - ${EXTRA_CACHE:+--cache-from ${EXTRA_CACHE}} \ - --build-arg BUILDKIT_INLINE_CACHE=1 \ - --tag ${REMOTE_IMAGE_NAME} \ - --tag ${CACHE_NAME} \ - ${CONTEXT} - -time DOCKER_BUILDKIT=1 docker push ${REMOTE_IMAGE_NAME} -time DOCKER_BUILDKIT=1 docker push ${CACHE_NAME} diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index e1d969d3d64..b1fa1fc5200 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -1,17 +1,5 @@ FROM {{ hail_ubuntu_image.image }} -RUN hail-pip-install pyspark==3.1.1 -ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark -ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" -ENV PYSPARK_PYTHON python3 - -# source: https://cloud.google.com/storage/docs/gsutil_install#linux -RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ - install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl -ENV PATH $PATH:/google-cloud-sdk/bin - RUN hail-apt-get-install \ git \ htop \ @@ -26,23 +14,35 @@ RUN hail-apt-get-install \ liblapack3 \ g++-10 \ gcc-10 \ - cmake \ - && \ - update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 && \ + cmake + +RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10 +# source: https://cloud.google.com/storage/docs/gsutil_install#linux +RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ + install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl +ENV PATH $PATH:/google-cloud-sdk/bin +RUN gcloud version && kubectl version --client=true + +COPY docker/requirements.txt . +RUN hail-pip-install -r requirements.txt pyspark==3.1.1 + +ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark +ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" +ENV PYSPARK_PYTHON python3 + # Regarding explicitly selecting 2.0.1: https://github.com/hail-is/hail/issues/8343 RUN wget -nv -O ${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar COPY docker/core-site.xml ${SPARK_HOME}/conf/core-site.xml +COPY pylintrc setup.cfg / + RUN git clone https://github.com/catchorg/Catch2.git --depth 1 --branch v2.13.3 && \ cd Catch2 && \ cmake -Bbuild -H. -DBUILD_TESTING=OFF && \ cmake --build build/ --target install && \ cd .. && \ rm -rf Catch2 - -COPY docker/requirements.txt . -RUN hail-pip-install -r requirements.txt - -COPY pylintrc setup.cfg / diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 4c53181ed61..c4c9fbfb6ca 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -1,8 +1,16 @@ FROM {{ base_image.image }} +RUN hail-apt-get-install build-essential python3-dev + COPY docker/service-base-requirements.txt . RUN hail-pip-install -r service-base-requirements.txt +COPY hail/python/setup-hailtop.py /hailtop/setup.py +COPY hail/python/hailtop /hailtop/hailtop/ +COPY /hail_version /hailtop/hailtop/hail_version +COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in +RUN hail-pip-install /hailtop && rm -rf /hailtop + COPY gear/setup.py /gear/setup.py COPY gear/gear /gear/gear/ RUN hail-pip-install /gear && rm -rf /gear @@ -11,9 +19,3 @@ COPY web_common/setup.py web_common/MANIFEST.in /web_common/ COPY web_common/web_common /web_common/web_common/ RUN hail-pip-install /web_common && rm -rf /web_common RUN echo 'from hailtop import version; print(version());' | python3 - -COPY hail/python/setup-hailtop.py /hailtop/setup.py -COPY hail/python/hailtop /hailtop/hailtop/ -COPY /hail_version /hailtop/hailtop/hail_version -COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in -RUN hail-pip-install /hailtop && rm -rf /hailtop diff --git a/docker/Makefile b/docker/Makefile index 484b74fa332..72996926c38 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -1,58 +1,96 @@ include ../config.mk .PHONY: build -build: base service-base service-java-run-base hail-ubuntu +build: base-stmp service-base hail-ubuntu -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) +HAIL_UBUNTU_LATEST = $(DOCKER_PREFIX)/hail-ubuntu:latest +HAIL_UBUNTU_IMAGE = $(DOCKER_PREFIX)/hail-ubuntu:$(shell docker images -q --no-trunc hail-ubuntu:latest | sed -e 's,[^:]*:,,') -HAIL_UBUNTU_IMAGE := $(DOCKER_PREFIX)/hail-ubuntu:$(TOKEN) -BASE_IMAGE := $(DOCKER_PREFIX)/base:$(TOKEN) -SERVICE_BASE_IMAGE := $(DOCKER_PREFIX)/service-base:$(TOKEN) -SERVICE_JAVA_RUN_BASE_IMAGE := $(DOCKER_PREFIX)/service-java-run-base:$(TOKEN) -HAIL_PUBLIC_IMAGE := $(DOCKER_PREFIX)/hail-public:$(TOKEN) -GENETICS_PUBLIC_IMAGE := $(DOCKER_PREFIX)/genetics-public:$(TOKEN) +BASE_LATEST = $(DOCKER_PREFIX)/base:latest +BASE_IMAGE = $(DOCKER_PREFIX)/base:$(shell docker images -q --no-trunc base:latest | sed -e 's,[^:]*:,,') -.PHONY: hail-ubuntu hail-ubuntu-image-ref -hail-ubuntu: hail-ubuntu-image-ref -hail-ubuntu-image-ref: hail-ubuntu/Dockerfile hail-ubuntu/hail-pip-install hail-ubuntu/pip.conf hail-ubuntu/hail-apt-get-install +SERVICE_BASE_LATEST = $(DOCKER_PREFIX)/service-base:latest +SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') + +SERVICE_JAVA_RUN_BASE_LATEST = $(DOCKER_PREFIX)/service-java-run-base:latest +SERVICE_JAVA_RUN_BASE_IMAGE = $(DOCKER_PREFIX)/service-java-run-base:$(shell docker images -q --no-trunc service-java-run-base:latest | sed -e 's,[^:]*:,,') + +HAIL_PUBLIC_LATEST = $(DOCKER_PREFIX)/hail-public:latest +HAIL_PUBLIC_IMAGE = $(DOCKER_PREFIX)/hail-public:$(shell docker images -q --no-trunc hail-public:latest | sed -e 's,[^:]*:,,') + +GENETICS_PUBLIC_LATEST = $(DOCKER_PREFIX)/genetics-public:latest +GENETICS_PUBLIC_IMAGE = $(DOCKER_PREFIX)/genetics-public:$(shell docker images -q --no-trunc genetics-public:latest | sed -e 's,[^:]*:,,') + +.PHONY: hail-ubuntu +hail-ubuntu: hail-ubuntu-stmp + +hail-ubuntu-stmp: hail-ubuntu/Dockerfile hail-ubuntu/hail-pip-install hail-ubuntu/pip.conf hail-ubuntu/hail-apt-get-install + -docker pull $(DOCKER_PREFIX)/ubuntu:bionic-20200921 + -docker pull $(HAIL_UBUNTU_LATEST) python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' hail-ubuntu/Dockerfile hail-ubuntu/Dockerfile.out - ../docker-build.sh hail-ubuntu Dockerfile.out $(HAIL_UBUNTU_IMAGE) - echo $(HAIL_UBUNTU_IMAGE) > $@ - -.PHONY: base base-image-ref -base: base-image-ref -base-image-ref: hail-ubuntu-image-ref Dockerfile.base core-site.xml requirements.txt ../pylintrc ../setup.cfg - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat hail-ubuntu-image-ref)'"}}' Dockerfile.base Dockerfile.base.out - ../docker-build.sh .. docker/Dockerfile.base.out $(BASE_IMAGE) - echo $(BASE_IMAGE) > $@ - -.PHONY: hail_version -hail_version: + docker build -t hail-ubuntu -f hail-ubuntu/Dockerfile.out --cache-from hail-ubuntu,$(HAIL_UBUNTU_LATEST),ubuntu:bionic-20200921 hail-ubuntu + touch hail-ubuntu-stmp + +.PHONY: base +base: base-stmp + +base-stmp: hail-ubuntu-stmp Dockerfile.base core-site.xml requirements.txt ../pylintrc ../setup.cfg + -docker pull $(BASE_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.base Dockerfile.base.out + [ "$(shell bash stat-permissions.sh Dockerfile.base)" = "644" ] + [ "$(shell bash stat-permissions.sh core-site.xml)" = "644" ] + [ "$(shell bash stat-permissions.sh requirements.txt)" = "644" ] + [ "$(shell bash stat-permissions.sh ../pylintrc)" = "644" ] + [ "$(shell bash stat-permissions.sh ../setup.cfg)" = "644" ] + docker build -t base -f Dockerfile.base.out --cache-from base,$(BASE_LATEST),hail-ubuntu .. + touch base-stmp + +.PHONY: service-base +service-base: base-stmp + -docker pull $(SERVICE_BASE_LATEST) $(MAKE) -C ../hail python/hailtop/hail_version cp ../hail/python/hailtop/hail_version ../hail_version + python3 ../ci/jinja2_render.py '{"base_image":{"image":"base"}}' Dockerfile.service-base Dockerfile.service-base.out + [ "$(shell bash stat-permissions.sh Dockerfile.service-base.out)" = "644" ] + [ "$(shell bash stat-permissions.sh service-base-requirements.txt)" = "644" ] + docker build -t service-base -f Dockerfile.service-base.out --cache-from service-base,$(SERVICE_BASE_LATEST),base,hail-ubuntu .. -.PHONY: service-base service-base-image-ref -service-base: service-base-image-ref -service-base-image-ref: base hail_version - python3 ../ci/jinja2_render.py '{"base_image":{"image":"'$$(cat base-image-ref)'"}}' Dockerfile.service-base Dockerfile.service-base.out - ../docker-build.sh .. docker/Dockerfile.service-base.out $(SERVICE_BASE_IMAGE) - echo $(SERVICE_BASE_IMAGE) > $@ +.PHONY: service-java-run-base +service-java-run-base: hail-ubuntu-stmp + -docker pull $(SERVICE_JAVA_RUN_BASE_LATEST) + $(MAKE) -C ../hail python/hailtop/hail_version + cp ../hail/python/hailtop/hail_version ../hail_version + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.service-java-run-base Dockerfile.service-java-run-base.out + [ "$(shell bash stat-permissions.sh Dockerfile.service-java-run-base.out)" = "644" ] + docker build -t service-java-run-base -f Dockerfile.service-java-run-base.out --cache-from service-java-run-base,$(SERVICE_JAVA_RUN_BASE_LATEST),base,hail-ubuntu .. -.PHONY: service-java-run-base service-java-run-base-image-ref -service-java-run-base: service-java-run-base-image-ref -service-java-run-base-image-ref: hail-ubuntu-image-ref hail_version - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat hail-ubuntu-image-ref)'"}}' Dockerfile.service-java-run-base Dockerfile.service-java-run-base.out - ../docker-build.sh .. docker/Dockerfile.service-java-run-base.out $(SERVICE_JAVA_RUN_BASE_IMAGE) - echo $(SERVICE_JAVA_RUN_BASE_IMAGE) > $@ +.PHONY: push +push: build + docker tag hail-ubuntu $(HAIL_UBUNTU_LATEST) + docker push $(HAIL_UBUNTU_LATEST) + docker tag hail-ubuntu $(HAIL_UBUNTU_IMAGE) + docker push $(HAIL_UBUNTU_IMAGE) + docker tag base $(BASE_LATEST) + docker push $(BASE_LATEST) + docker tag base $(BASE_IMAGE) + docker push $(BASE_IMAGE) + docker tag service-base $(SERVICE_BASE_LATEST) + docker push $(SERVICE_BASE_LATEST) + docker tag service-base $(SERVICE_BASE_IMAGE) + docker push $(SERVICE_BASE_IMAGE) + docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_LATEST) + docker push $(SERVICE_JAVA_RUN_BASE_LATEST) + docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_IMAGE) + docker push $(SERVICE_JAVA_RUN_BASE_IMAGE) .PHONY: deploy -deploy: build +deploy: push .PHONY: clean clean: - rm -f base hail-ubuntu-image-ref base service-base service-java-run-base + rm -f base-stmp hail-ubuntu-stmp ## Public Images ############################################################### -publish-public-images: hail-ubuntu-image-ref +publish-public-images: bash publish-public-images.sh $(DOCKER_PREFIX) diff --git a/docker/publish-public-images.sh b/docker/publish-public-images.sh index 5626e4a2e78..b7e8d829b09 100644 --- a/docker/publish-public-images.sh +++ b/docker/publish-public-images.sh @@ -10,27 +10,27 @@ hail_pip_version=$(cat hail/hail_pip_version) build_and_push() { name=$1 + base=$2 versioned_short=hailgenetics/$name:$hail_pip_version versioned_full=$docker_prefix/$versioned_short - cache=$docker_prefix/hailgenetics/$name:cache - - DOCKER_BUILDKIT=1 docker build \ - --file $name/Dockerfile.out \ - --cache-from ${cache} \ - --build-arg BUILDKIT_INLINE_CACHE=1 \ - --tag $versioned_short \ - --tag $versioned_full \ - --tag $cache \ - ${name} - - time DOCKER_BUILDKIT=1 docker push ${versioned_short} - time DOCKER_BUILDKIT=1 docker push ${versioned_full} - time DOCKER_BUILDKIT=1 docker push ${cache} + latest_full=$docker_prefix/hailgenetics/$name:latest + + docker pull $latest || true + docker build \ + $name/ \ + -f $name/Dockerfile.out \ + -t $versioned_short \ + -t $versioned_full \ + -t $latest_full \ + --cache-from $latest_full,$base + docker push $versioned_short + docker push $versioned_full + docker push $latest_full } -python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$(cat hail-ubuntu-image-ref)'"}}' hail/Dockerfile hail/Dockerfile.out -build_and_push hail +python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' hail/Dockerfile hail/Dockerfile.out +build_and_push hail hail-ubuntu python3 ../ci/jinja2_render.py '{"hail_public_image":{"image":"'hailgenetics/hail:$hail_pip_version'"}}' genetics/Dockerfile genetics/Dockerfile.out -build_and_push genetics +build_and_push genetics hailgenetics/hail:${hail_pip_version} diff --git a/docker/python-dill/push.sh b/docker/python-dill/push.sh index 82522eff021..4f6bfbdaad9 100644 --- a/docker/python-dill/push.sh +++ b/docker/python-dill/push.sh @@ -3,21 +3,8 @@ for version in 3.6 3.6-slim 3.7 3.7-slim 3.8 3.8-slim do sed "s/@PYTHON_VERSION@/$version/g" Dockerfile > Dockerfile.out - - public=hailgenetics/python-dill:$version - private=${DOCKER_PREFIX}/python-dill:$version - cache=${DOCKER_PREFIX}/python-dill:cache - - DOCKER_BUILDKIT=1 docker build \ - --file Dockerfile.out \ - --cache-from ${cache} \ - --build-arg BUILDKIT_INLINE_CACHE=1 \ - --tag ${public} \ - --tag ${private} \ - --tag ${cache} \ - . - - time DOCKER_BUILDKIT=1 docker push ${public} - time DOCKER_BUILDKIT=1 docker push ${private} - time DOCKER_BUILDKIT=1 docker push ${cache} + docker build --tag hailgenetics/python-dill:$version - 3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6 keyrings.alt>=3.1 @@ -51,13 +48,13 @@ pytest-instafail==0.4.2 pytest-xdist==2.2.1 python-dateutil==2.8.1 python-json-logger==0.1.11 -requests==2.25.1 +requests==2.22.0 setuptools>=38.6.0 sortedcontainers==2.1.0 tabulate==0.8.3 tqdm==4.42.1 twine>=1.11.0 -urllib3==1.26.5 +urllib3==1.24.3 uvloop==0.14.0 Werkzeug==0.15.4 wheel>=0.31.0 diff --git a/gateway/Makefile b/gateway/Makefile index 46e1dde6661..6ee3f846e9d 100644 --- a/gateway/Makefile +++ b/gateway/Makefile @@ -2,17 +2,23 @@ include ../config.mk .PHONY: build push deploy -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -GATEWAY_IMAGE := $(DOCKER_PREFIX)/gateway:$(TOKEN) +GATEWAY_LATEST = $(DOCKER_PREFIX)/gateway:latest +GATEWAY_IMAGE = $(DOCKER_PREFIX)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out + -docker pull $(GATEWAY_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out python3 ../ci/jinja2_render.py '{"domain": "$(DOMAIN)","subdomains":[$(shell sed -e 's/\(.*\)/"\1"/' ../letsencrypt/subdomains.txt | paste -s -d, -)]}' gateway.nginx.conf gateway.nginx.conf.out - ../docker-build.sh . Dockerfile.out $(GATEWAY_IMAGE) + docker build -t gateway -f Dockerfile.out --cache-from gateway,$(GATEWAY_LATEST),hail-ubuntu . + +push: build + docker tag gateway $(GATEWAY_LATEST) + docker push $(GATEWAY_LATEST) + docker tag gateway $(GATEWAY_IMAGE) + docker push $(GATEWAY_IMAGE) -deploy: build +deploy: push python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"global":{"ip":"$(IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"gateway_image":{"image":"$(GATEWAY_IMAGE)"},"global":{"project":"$(PROJECT)"}}' deployment.yaml deployment.yaml.out diff --git a/grafana/Makefile b/grafana/Makefile index 182474f8112..cee6e4c0c32 100644 --- a/grafana/Makefile +++ b/grafana/Makefile @@ -2,17 +2,23 @@ include ../config.mk .PHONY: build push deploy -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -GRAFANA_NGINX_IMAGE := $(DOCKER_PREFIX)/grafana_nginx:$(TOKEN) +GRAFANA_NGINX_LATEST = $(DOCKER_PREFIX)/grafana_nginx:latest +GRAFANA_NGINX_IMAGE = $(DOCKER_PREFIX)/grafana_nginx:$(shell docker images -q --no-trunc grafana_nginx | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out + -docker pull $(GRAFANA_NGINX_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - ../docker-build.sh . Dockerfile.nginx.out $(GRAFANA_NGINX_IMAGE) + docker build -t grafana_nginx -f Dockerfile.nginx.out --cache-from grafana_nginx,$(GRAFANA_NGINX_LATEST),hail-ubuntu . + +push: build + docker tag grafana_nginx $(GRAFANA_NGINX_LATEST) + docker push $(GRAFANA_NGINX_LATEST) + docker tag grafana_nginx $(GRAFANA_NGINX_IMAGE) + docker push $(GRAFANA_NGINX_IMAGE) -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "grafana_nginx_image": {"image": "$(GRAFANA_NGINX_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/grafana/deployment.yaml b/grafana/deployment.yaml index 292a68ea942..99b065fe484 100644 --- a/grafana/deployment.yaml +++ b/grafana/deployment.yaml @@ -29,7 +29,7 @@ spec: secretName: ssl-config-grafana containers: - name: grafana - image: grafana/grafana:8.0.2 + image: grafana/grafana:7.3.7 env: {% if deploy %} - name: GF_SERVER_DOMAIN diff --git a/hail/Makefile b/hail/Makefile index d1da1b43436..5595addebc1 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.13 SPARK_VERSION ?= 3.1.1 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 70 +HAIL_PATCH_VERSION := 67 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 @@ -192,7 +192,7 @@ copy-py-files: $(PYTHON_VERSION_INFO) $(SHADOW_JAR) $(INIT_SCRIPTS) $(PY_FILES) mkdir -p build/deploy mkdir -p build/deploy/src cp ../README.md build/deploy/ - rsync -r \ + rsync -rv \ --exclude '.eggs/' \ --exclude '.pytest_cache/' \ --exclude '__pycache__/' \ @@ -336,7 +336,7 @@ HAIL_CACHE_VERSION = $(shell cat python/hail/hail_version) .PHONY: hail-docs hail-docs: $(PYTHON_VERSION_INFO) python/hail/docs/change_log.rst $(MAKE) -C python/hail/docs \ - SPHINXOPTS='-tgenerate_notebook_outputs' \ + SPHINXOPTS='-tchecktutorial' \ BUILDDIR=$(HAIL_DIR)/build/docs/hail \ html mkdir -p build/www/docs diff --git a/hail/build.gradle b/hail/build.gradle index 439d4dca774..a40f9381576 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -27,6 +27,9 @@ repositories { mavenCentral() jcenter() maven { url "https://repository.cloudera.com/artifactory/cloudera-repos/" } + maven { url "https://repo.spring.io/plugins-release/" } + // Necessary for elasticsearch spark 3.0.1 snapshot. + maven { url "https://oss.sonatype.org/content/repositories/snapshots/"} } sourceSets.main.scala.srcDir "src/main/java" @@ -192,7 +195,7 @@ dependencies { } else if (sparkVersion.startsWith("3.0.")) { assert(scalaMajorVersion == "2.12") - bundled 'org.elasticsearch:elasticsearch-spark-30_2.12:7.13.1' + bundled 'org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0-SNAPSHOT' } else if (sparkVersion.startsWith("2.4.")) { assert(scalaMajorVersion == "2.11") diff --git a/hail/python/MANIFEST.in b/hail/python/MANIFEST.in index aa401baf489..77493686b12 100644 --- a/hail/python/MANIFEST.in +++ b/hail/python/MANIFEST.in @@ -5,4 +5,3 @@ include hail/backend/hail-all-spark.jar include hailtop/hail_version include hailtop/hailctl/deploy.yaml include hailtop/py.typed -include requirements.txt diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index b62b32d6d59..7634d7d5109 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -59,7 +59,7 @@ from hail.expr import aggregators as agg # noqa: E402 from hail.utils import (Struct, Interval, hadoop_copy, hadoop_open, hadoop_ls, # noqa: E402 hadoop_stat, hadoop_exists, hadoop_is_file, - hadoop_is_dir, hadoop_scheme_supported, copy_log) + hadoop_is_dir, copy_log) from .context import (init, init_local, stop, spark_context, tmp_dir, default_reference, # noqa: E402 get_reference, set_global_seed, _set_flags, _get_flags, current_backend, @@ -94,7 +94,6 @@ 'hadoop_stat', 'hadoop_exists', 'hadoop_ls', - 'hadoop_scheme_supported', 'copy_log', 'Struct', 'Interval', diff --git a/hail/python/hail/backend/spark_backend.py b/hail/python/hail/backend/spark_backend.py index d0ff75dd327..fff59fd5a29 100644 --- a/hail/python/hail/backend/spark_backend.py +++ b/hail/python/hail/backend/spark_backend.py @@ -8,9 +8,6 @@ import py4j import pyspark -from typing import List - -import hail as hl from hail.utils.java import Env, scala_package_object, scala_object from hail.expr.types import dtype from hail.expr.table_type import ttable @@ -24,6 +21,7 @@ from .py4j_backend import Py4JBackend, handle_java_exception from ..hail_logging import Logger + if pyspark.__version__ < '3' and sys.version_info > (3, 8): raise EnvironmentError('Hail with spark {} requires Python 3.6 or 3.7, found {}.{}'.format( pyspark.__version__, sys.version_info.major, sys.version_info.minor)) @@ -310,8 +308,7 @@ def to_spark(self, t, flatten): t = t.expand_types() if flatten: t = t.flatten() - return pyspark.sql.DataFrame(self._jbackend.pyToDF(self._to_java_table_ir(t._tir)), - Env.spark_session()._wrapped) + return pyspark.sql.DataFrame(self._jbackend.pyToDF(self._to_java_table_ir(t._tir)), Env.spark_session()._wrapped) def to_pandas(self, t, flatten): return self.to_spark(t, flatten).toPandas() @@ -372,13 +369,3 @@ def register_ir_function(self, name, type_parameters, argument_names, argument_t def persist_ir(self, ir): return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) - - def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.Interval]', intervals_type): - json_repr = { - 'paths': paths, - 'intervals': intervals_type._convert_to_json(intervals), - 'intervalPointType': intervals_type.element_type.point_type._parsable_string(), - } - - results = self._jhc.backend().pyReadMultipleMatrixTables(json.dumps(json_repr)) - return [MatrixTable._from_java(jm) for jm in results] diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 8746e22ca69..41e83b20ffb 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -35,10 +35,7 @@ def _get_local_tmpdir(local_tmpdir): def _get_log(log): if log is None: py_version = version() - log_dir = os.environ.get('HAIL_LOG_DIR') - if log_dir is None: - log_dir = os.getcwd() - log = hail.utils.timestamp_path(os.path.join(log_dir, 'hail'), + log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'), suffix=f'-{py_version}.log') return log diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 223e52edb02..257df758294 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -24,46 +24,13 @@ an earlier version of Hail to read files written in a later version. --- -## Version 0.2.70 - -Released 2021-06-21 - ---- - -## Version 0.2.69 - -Released 2021-06-14 - -### New Features - -- (hail#10592) Added `hl.get_hgdp` function. -- (hail#10555) Added `hl.hadoop_scheme_supported` function. -- (hail#10551) Indexing ndarrays now supports ellipses. - -### Bug fixes - -- (hail#10553) Dividing two integers now returns a `float64`, not a `float32`. -- (hail#10595) Don't include nans in `lambda_gc_agg`. - -### hailctl dataproc - -- (hail#10574) Hail logs will now be stored in `/home/hail` by default. - ---- - -## Version 0.2.68 - -Released 2021-05-27 - ---- - ## Version 0.2.67 ### Critical performance fix Released 2021-05-06 -- (hail#10451) Fixed a memory leak / performance bug triggered by `hl.literal(...).contains(...)` +- (hail#10451) Fixed a memory leak / performance bug triggered by `hl.literal(...).contains(...) --- diff --git a/hail/python/hail/docs/conf.py b/hail/python/hail/docs/conf.py index 0a906b4db67..170977131c7 100644 --- a/hail/python/hail/docs/conf.py +++ b/hail/python/hail/docs/conf.py @@ -65,9 +65,7 @@ nbsphinx_timeout = 300 nbsphinx_allow_errors = False # F821 undefined name 'tags' -if not tags.has('checktutorial') and not tags.has('generate_notebook_outputs'): # noqa: F821 - # these flags have the same effect: they run the notebook and save the output in the generated - # rST file. +if not tags.has('checktutorial'): # noqa: F821 nbsphinx_execute = 'never' autosummary_generate = True diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst deleted file mode 100644 index 60b039de6de..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst +++ /dev/null @@ -1,214 +0,0 @@ -.. _1000_Genomes_HighCov_autosomes: - -1000_Genomes_HighCov_autosomes -============================== - -* **Versions:** NYGC_30x_phased, NYGC_30x_unphased -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (NYGC_30x_unphased, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'FamilyID': str - 'FatherID': str - 'MotherID': str - 'Sex': str - 'Population': str - 'Superpopulation': str - 'sample_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - AC: int32, - AF: float64, - AN: int32, - BaseQRankSum: float64, - ClippingRankSum: float64, - DP: int32, - DS: bool, - END: int32, - FS: float64, - HaplotypeScore: float64, - InbreedingCoeff: float64, - MLEAC: int32, - MLEAF: float64, - MQ: float64, - MQ0: int32, - MQRankSum: float64, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool, - QD: float64, - RAW_MQ: float64, - ReadPosRankSum: float64, - SOR: float64, - VQSLOD: float64, - VariantType: str, - culprit: str, - AN_EUR: int32, - AN_EAS: int32, - AN_AMR: int32, - AN_SAS: int32, - AN_AFR: int32, - AC_EUR: int32, - AC_EAS: int32, - AC_AMR: int32, - AC_SAS: int32, - AC_AFR: int32, - AC_Hom_EUR: int32, - AC_Hom_EAS: int32, - AC_Hom_AMR: int32, - AC_Hom_SAS: int32, - AC_Hom_AFR: int32, - AC_Hom: int32, - AC_Het_EUR: int32, - AC_Het_EAS: int32, - AC_Het_AMR: int32, - AC_Het_SAS: int32, - AC_Het_AFR: int32, - AC_Het: int32, - AF_EUR: float64, - AF_EAS: float64, - AF_AMR: float64, - AF_SAS: float64, - AF_AFR: float64, - HWE_EUR: float64, - HWE_EAS: float64, - HWE_AMR: float64, - HWE_SAS: float64, - HWE_AFR: float64, - HWE: float64, - ExcHet_EUR: float64, - ExcHet_EAS: float64, - ExcHet_AMR: float64, - ExcHet_SAS: float64, - ExcHet_AFR: float64, - ExcHet: float64, - ME: float64, - AN_EUR_unrel: int32, - AN_EAS_unrel: int32, - AN_AMR_unrel: int32, - AN_SAS_unrel: int32, - AN_AFR_unrel: int32, - AC_EUR_unrel: int32, - AC_EAS_unrel: int32, - AC_AMR_unrel: int32, - AC_SAS_unrel: int32, - AC_AFR_unrel: int32, - AC_Hom_EUR_unrel: int32, - AC_Hom_EAS_unrel: int32, - AC_Hom_AMR_unrel: int32, - AC_Hom_SAS_unrel: int32, - AC_Hom_AFR_unrel: int32, - AC_Het_EUR_unrel: int32, - AC_Het_EAS_unrel: int32, - AC_Het_AMR_unrel: int32, - AC_Het_SAS_unrel: int32, - AC_Het_AFR_unrel: int32, - AF_EUR_unrel: float64, - AF_EAS_unrel: float64, - AF_AMR_unrel: float64, - AF_SAS_unrel: float64, - AF_AFR_unrel: float64, - HWE_EUR_unrel: float64, - HWE_EAS_unrel: float64, - HWE_AMR_unrel: float64, - HWE_SAS_unrel: float64, - HWE_AFR_unrel: float64 - } - 'a_index': int32 - 'was_split': bool - 'variant_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'AB': float64 - 'AD': array - 'DP': int32 - 'GQ': int32 - 'GT': call - 'MIN_DP': int32 - 'MQ0': int32 - 'PGT': call - 'PID': str - 'PL': array - 'RGQ': int32 - 'SB': array - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst deleted file mode 100644 index 7f1b80a2acb..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst +++ /dev/null @@ -1,214 +0,0 @@ -.. _1000_Genomes_HighCov_chrX: - -1000_Genomes_HighCov_chrX -========================= - -* **Versions:** NYGC_30x_phased, NYGC_30x_unphased -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (NYGC_30x_unphased, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'FamilyID': str - 'FatherID': str - 'MotherID': str - 'Sex': str - 'Population': str - 'Superpopulation': str - 'sample_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - AC: int32, - AF: float64, - AN: int32, - BaseQRankSum: float64, - ClippingRankSum: float64, - DP: int32, - DS: bool, - END: int32, - FS: float64, - HaplotypeScore: float64, - InbreedingCoeff: float64, - MLEAC: int32, - MLEAF: float64, - MQ: float64, - MQ0: int32, - MQRankSum: float64, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool, - QD: float64, - RAW_MQ: float64, - ReadPosRankSum: float64, - SOR: float64, - VQSLOD: float64, - VariantType: str, - culprit: str, - AN_EUR: int32, - AN_EAS: int32, - AN_AMR: int32, - AN_SAS: int32, - AN_AFR: int32, - AC_EUR: int32, - AC_EAS: int32, - AC_AMR: int32, - AC_SAS: int32, - AC_AFR: int32, - AC_Hom_EUR: int32, - AC_Hom_EAS: int32, - AC_Hom_AMR: int32, - AC_Hom_SAS: int32, - AC_Hom_AFR: int32, - AC_Hom: int32, - AC_Het_EUR: int32, - AC_Het_EAS: int32, - AC_Het_AMR: int32, - AC_Het_SAS: int32, - AC_Het_AFR: int32, - AC_Het: int32, - AF_EUR: float64, - AF_EAS: float64, - AF_AMR: float64, - AF_SAS: float64, - AF_AFR: float64, - HWE_EUR: float64, - HWE_EAS: float64, - HWE_AMR: float64, - HWE_SAS: float64, - HWE_AFR: float64, - HWE: float64, - ExcHet_EUR: float64, - ExcHet_EAS: float64, - ExcHet_AMR: float64, - ExcHet_SAS: float64, - ExcHet_AFR: float64, - ExcHet: float64, - ME: float64, - AN_EUR_unrel: int32, - AN_EAS_unrel: int32, - AN_AMR_unrel: int32, - AN_SAS_unrel: int32, - AN_AFR_unrel: int32, - AC_EUR_unrel: int32, - AC_EAS_unrel: int32, - AC_AMR_unrel: int32, - AC_SAS_unrel: int32, - AC_AFR_unrel: int32, - AC_Hom_EUR_unrel: int32, - AC_Hom_EAS_unrel: int32, - AC_Hom_AMR_unrel: int32, - AC_Hom_SAS_unrel: int32, - AC_Hom_AFR_unrel: int32, - AC_Het_EUR_unrel: int32, - AC_Het_EAS_unrel: int32, - AC_Het_AMR_unrel: int32, - AC_Het_SAS_unrel: int32, - AC_Het_AFR_unrel: int32, - AF_EUR_unrel: float64, - AF_EAS_unrel: float64, - AF_AMR_unrel: float64, - AF_SAS_unrel: float64, - AF_AFR_unrel: float64, - HWE_EUR_unrel: float64, - HWE_EAS_unrel: float64, - HWE_AMR_unrel: float64, - HWE_SAS_unrel: float64, - HWE_AFR_unrel: float64 - } - 'a_index': int32 - 'was_split': bool - 'variant_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'AB': float64 - 'AD': array - 'DP': int32 - 'GQ': int32 - 'GT': call - 'MIN_DP': int32 - 'MQ0': int32 - 'PGT': call - 'PID': str - 'PL': array - 'RGQ': int32 - 'SB': array - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst deleted file mode 100644 index 27c4e92d74f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst +++ /dev/null @@ -1,175 +0,0 @@ -.. _1000_Genomes_HighCov_chrY: - -1000_Genomes_HighCov_chrY -========================= - -* **Versions:** NYGC_30x_unphased -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (NYGC_30x_unphased, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'FamilyID': str - 'FatherID': str - 'MotherID': str - 'Sex': str - 'Population': str - 'Superpopulation': str - 'sample_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - AC: int32, - AF: float64, - AN: int32, - BaseQRankSum: float64, - ClippingRankSum: float64, - DP: int32, - DS: bool, - END: int32, - ExcessHet: float64, - FS: float64, - HaplotypeScore: float64, - InbreedingCoeff: float64, - MLEAC: int32, - MLEAF: float64, - MQ: float64, - MQ0: int32, - MQRankSum: float64, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool, - QD: float64, - RAW_MQ: float64, - ReadPosRankSum: float64, - SOR: float64, - VQSLOD: float64, - VariantType: str, - culprit: str, - AN_EAS: int32, - AN_AMR: int32, - AN_EUR: int32, - AN_AFR: int32, - AN_SAS: int32, - AN_EUR_unrel: int32, - AN_EAS_unrel: int32, - AN_AMR_unrel: int32, - AN_SAS_unrel: int32, - AN_AFR_unrel: int32, - AC_EAS: int32, - AC_AMR: int32, - AC_EUR: int32, - AC_AFR: int32, - AC_SAS: int32, - AC_EUR_unrel: int32, - AC_EAS_unrel: int32, - AC_AMR_unrel: int32, - AC_SAS_unrel: int32, - AC_AFR_unrel: int32, - AF_EAS: float64, - AF_AMR: float64, - AF_EUR: float64, - AF_AFR: float64, - AF_SAS: float64, - AF_EUR_unrel: float64, - AF_EAS_unrel: float64, - AF_AMR_unrel: float64, - AF_SAS_unrel: float64, - AF_AFR_unrel: float64 - } - 'a_index': int32 - 'was_split': bool - 'variant_qc': struct { - dp_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - gq_stats: struct { - mean: float64, - stdev: float64, - min: float64, - max: float64 - }, - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_filtered: int64, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'AB': float64 - 'AD': array - 'DP': int32 - 'GQ': int32 - 'GT': call - 'MIN_DP': int32 - 'MQ0': int32 - 'PGT': call - 'PID': str - 'PL': array - 'RGQ': int32 - 'SB': array - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst deleted file mode 100644 index 37f2a7384b8..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst +++ /dev/null @@ -1,128 +0,0 @@ -.. _1000_Genomes_Retracted_autosomes: - -1000_Genomes_Retracted_autosomes -================================ - -* **Versions:** phase_3 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (phase_3, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'population': str - 'super_population': str - 'is_female': bool - 'family_id': str - 'relationship_role': str - 'maternal_id': str - 'paternal_id': str - 'children_ids': array - 'sibling_ids': array - 'second_order_relationship_ids': array - 'third_order_relationship_ids': array - 'sample_qc': struct { - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - CIEND: int32, - CIPOS: int32, - CS: str, - END: int32, - IMPRECISE: bool, - MC: array, - MEINFO: array, - MEND: int32, - MLEN: int32, - MSTART: int32, - SVLEN: array, - SVTYPE: str, - TSD: str, - AC: int32, - AF: float64, - NS: int32, - AN: int32, - EAS_AF: float64, - EUR_AF: float64, - AFR_AF: float64, - AMR_AF: float64, - SAS_AF: float64, - DP: int32, - AA: str, - VT: str, - EX_TARGET: bool, - MULTI_ALLELIC: bool, - STRAND_FLIP: bool, - REF_SWITCH: bool, - DEPRECATED_RSID: array, - RSID_REMOVED: array, - GRCH37_38_REF_STRING_MATCH: bool, - NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, - GRCH37_POS: int32, - GRCH37_REF: str, - ALLELE_TRANSFORM: bool, - REF_NEW_ALLELE: bool, - CHROM_CHANGE_BETWEEN_ASSEMBLIES: str - } - 'a_index': int32 - 'was_split': bool - 'old_locus': locus - 'old_alleles': array - 'variant_qc': struct { - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - n_called: int64, - n_not_called: int64, - call_rate: float32, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'GT': call - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst deleted file mode 100644 index c2f90a8592b..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst +++ /dev/null @@ -1,128 +0,0 @@ -.. _1000_Genomes_Retracted_chrX: - -1000_Genomes_Retracted_chrX -=========================== - -* **Versions:** phase_3 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (phase_3, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'population': str - 'super_population': str - 'is_female': bool - 'family_id': str - 'relationship_role': str - 'maternal_id': str - 'paternal_id': str - 'children_ids': array - 'sibling_ids': array - 'second_order_relationship_ids': array - 'third_order_relationship_ids': array - 'sample_qc': struct { - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - CIEND: int32, - CIPOS: int32, - CS: str, - END: int32, - IMPRECISE: bool, - MC: array, - MEINFO: array, - MEND: int32, - MLEN: int32, - MSTART: int32, - SVLEN: array, - SVTYPE: str, - TSD: str, - AC: int32, - AF: float64, - NS: int32, - AN: int32, - EAS_AF: float64, - EUR_AF: float64, - AFR_AF: float64, - AMR_AF: float64, - SAS_AF: float64, - DP: int32, - AA: str, - VT: str, - EX_TARGET: bool, - MULTI_ALLELIC: bool, - STRAND_FLIP: bool, - REF_SWITCH: bool, - DEPRECATED_RSID: array, - RSID_REMOVED: array, - GRCH37_38_REF_STRING_MATCH: bool, - NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, - GRCH37_POS: int32, - GRCH37_REF: str, - ALLELE_TRANSFORM: bool, - REF_NEW_ALLELE: bool, - CHROM_CHANGE_BETWEEN_ASSEMBLIES: str - } - 'a_index': int32 - 'was_split': bool - 'old_locus': locus - 'old_alleles': array - 'variant_qc': struct { - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - n_called: int64, - n_not_called: int64, - call_rate: float32, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'GT': call - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst deleted file mode 100644 index 03e258facd1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst +++ /dev/null @@ -1,117 +0,0 @@ -.. _1000_Genomes_Retracted_chrY: - -1000_Genomes_Retracted_chrY -=========================== - -* **Versions:** phase_3 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (phase_3, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 's': str - 'population': str - 'super_population': str - 'is_female': bool - 'family_id': str - 'relationship_role': str - 'maternal_id': str - 'paternal_id': str - 'children_ids': array - 'sibling_ids': array - 'second_order_relationship_ids': array - 'third_order_relationship_ids': array - 'sample_qc': struct { - call_rate: float64, - n_called: int64, - n_not_called: int64, - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_singleton: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - n_star: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - DP: int32, - END: int32, - SVTYPE: str, - AA: str, - AC: int32, - AF: float64, - NS: int32, - AN: int32, - EAS_AF: float64, - EUR_AF: float64, - AFR_AF: float64, - AMR_AF: float64, - SAS_AF: float64, - VT: str, - EX_TARGET: bool, - MULTI_ALLELIC: bool, - STRAND_FLIP: bool, - REF_SWITCH: bool, - DEPRECATED_RSID: str, - RSID_REMOVED: str, - GRCH37_38_REF_STRING_MATCH: bool, - NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, - GRCH37_POS: int32, - GRCH37_REF: str, - ALLELE_TRANSFORM: bool, - REF_NEW_ALLELE: bool, - CHROM_CHANGE_BETWEEN_ASSEMBLIES: str - } - 'a_index': int32 - 'was_split': bool - 'old_locus': locus - 'old_alleles': array - 'variant_qc': struct { - AC: array, - AF: array, - AN: int32, - homozygote_count: array, - n_called: int64, - n_not_called: int64, - call_rate: float32, - n_het: int64, - n_non_ref: int64, - het_freq_hwe: float64, - p_value_hwe: float64 - } - ---------------------------------------- - Entry fields: - 'GT': call - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst index aa891dd30cb..d4676aaea39 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst @@ -4,7 +4,7 @@ ====================== * **Versions:** phase_3 -* **Reference genome builds:** GRCh37 +* **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -115,3 +115,4 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst index 5ebcbb7647e..8a058234af1 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst @@ -4,7 +4,7 @@ ================= * **Versions:** phase_3 -* **Reference genome builds:** GRCh37 +* **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -115,3 +115,4 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst index 663e4910596..ee27a256f0d 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst @@ -4,7 +4,7 @@ ================= * **Versions:** phase_3 -* **Reference genome builds:** GRCh37 +* **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -104,3 +104,4 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/CADD.rst b/hail/python/hail/docs/datasets/schemas/CADD.rst index 90f72cac06b..f9f1b075a28 100644 --- a/hail/python/hail/docs/datasets/schemas/CADD.rst +++ b/hail/python/hail/docs/datasets/schemas/CADD.rst @@ -3,7 +3,7 @@ CADD ==== -* **Versions:** 1.4, 1.6 +* **Versions:** 1.4 * **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.Table` @@ -30,3 +30,4 @@ Schema (1.4, GRCh37) ---------------------------------------- Key: ['locus', 'alleles'] ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst deleted file mode 100644 index 1ecce309f93..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations: - -GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst deleted file mode 100644 index 1fae6e47cc0..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations: - -GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations -============================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst deleted file mode 100644 index 2da0160aec6..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Artery_Coronary_all_snp_gene_associations: - -GTEx_eQTL_Artery_Coronary_all_snp_gene_associations -=================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst deleted file mode 100644 index 28597a4b4b7..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Artery_Tibial_all_snp_gene_associations: - -GTEx_eQTL_Artery_Tibial_all_snp_gene_associations -================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst deleted file mode 100644 index 3dd286b6fa8..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations: - -GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations -======================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index 2727bc4a840..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations: - -GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst deleted file mode 100644 index 8a86950ca5f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations: - -GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst deleted file mode 100644 index 058174375af..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Cortex_all_snp_gene_associations: - -GTEx_eQTL_Brain_Cortex_all_snp_gene_associations -================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst deleted file mode 100644 index 53f4b85dd00..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations: - -GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations -============================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst deleted file mode 100644 index df9fe4cf3df..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations: - -GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations -===================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index 92cf844108d..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations: - -GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations -========================================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index 0b2a7b33d69..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations: - -GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst deleted file mode 100644 index 98a3c320511..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations: - -GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations -================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst deleted file mode 100644 index e328efdf574..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations: - -GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations -========================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst deleted file mode 100644 index d6dc9d7be81..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations: - -GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations -========================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst deleted file mode 100644 index 401490b52cb..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations: - -GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations -============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst deleted file mode 100644 index 9092d0265d1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations: - -GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations -===================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst deleted file mode 100644 index 5d978076af4..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Colon_Transverse_all_snp_gene_associations: - -GTEx_eQTL_Colon_Transverse_all_snp_gene_associations -==================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst deleted file mode 100644 index bd4621ed006..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations: - -GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations -======================================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst deleted file mode 100644 index a7bdadd47a4..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations: - -GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations -==================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst deleted file mode 100644 index 7f2e6694b9a..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations: - -GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations -========================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst deleted file mode 100644 index 2c80cd1b735..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations: - -GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst deleted file mode 100644 index 3fdd01e06bd..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations: - -GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations -================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst deleted file mode 100644 index ef26366b9e4..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations: - -GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst deleted file mode 100644 index e7244d44cb1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations: - -GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations -=================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst deleted file mode 100644 index 3798c114bf2..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations: - -GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations -================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst deleted file mode 100644 index 490d25cb499..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Prostate_all_snp_gene_associations: - -GTEx_eQTL_Prostate_all_snp_gene_associations -============================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst deleted file mode 100644 index fe06b6395a7..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations: - -GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations -=================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst deleted file mode 100644 index 9fa7ddc5ae1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations: - -GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations -============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst deleted file mode 100644 index cf0cb983784..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations: - -GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations -================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst deleted file mode 100644 index 8b9e89728ad..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Testis_all_snp_gene_associations: - -GTEx_eQTL_Testis_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst deleted file mode 100644 index 64dfdc1c721..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Uterus_all_snp_gene_associations: - -GTEx_eQTL_Uterus_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst deleted file mode 100644 index 35abd532099..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Vagina_all_snp_gene_associations: - -GTEx_eQTL_Vagina_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst deleted file mode 100644 index 862600b473a..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_Whole_Blood_all_snp_gene_associations: - -GTEx_eQTL_Whole_Blood_all_snp_gene_associations -=============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst index 61149cb9651..a051200fc19 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_Spleen_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Adipose_Subcutaneous: -GTEx_eQTL_Spleen_all_snp_gene_associations -========================================== +GTEx_eQTL_allpairs_Adipose_Subcutaneous +======================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst index 53afd786e96..c75671298bc 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Stomach_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Adipose_Visceral_Omentum: -GTEx_eQTL_Stomach_all_snp_gene_associations +GTEx_eQTL_allpairs_Adipose_Visceral_Omentum =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst new file mode 100644 index 00000000000..22c8c78368f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Adrenal_Gland: + +GTEx_eQTL_allpairs_Adrenal_Gland +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst new file mode 100644 index 00000000000..e11155d1e5d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Artery_Aorta: + +GTEx_eQTL_allpairs_Artery_Aorta +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst new file mode 100644 index 00000000000..bcc6c605321 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Artery_Coronary: + +GTEx_eQTL_allpairs_Artery_Coronary +================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst new file mode 100644 index 00000000000..16d156b7085 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Artery_Tibial: + +GTEx_eQTL_allpairs_Artery_Tibial +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst new file mode 100644 index 00000000000..82d5617f4c8 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Amygdala: + +GTEx_eQTL_allpairs_Brain_Amygdala +================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst similarity index 83% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst index bf52a3f05ab..e0b8994e344 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_Artery_Aorta_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24: -GTEx_eQTL_Artery_Aorta_all_snp_gene_associations -================================================ +GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24 +======================================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst new file mode 100644 index 00000000000..63902160116 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia: + +GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst new file mode 100644 index 00000000000..5d2b2af3f1c --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere: + +GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst new file mode 100644 index 00000000000..be35ca19066 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Cerebellum: + +GTEx_eQTL_allpairs_Brain_Cerebellum +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst new file mode 100644 index 00000000000..268c1976bab --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Cortex: + +GTEx_eQTL_allpairs_Brain_Cortex +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst index 34d6c4701da..e4cd83a2f62 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Thyroid_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9: -GTEx_eQTL_Thyroid_all_snp_gene_associations +GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9 =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst new file mode 100644 index 00000000000..665692560d1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Hippocampus: + +GTEx_eQTL_allpairs_Brain_Hippocampus +==================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst new file mode 100644 index 00000000000..d312030fcb6 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Hypothalamus: + +GTEx_eQTL_allpairs_Brain_Hypothalamus +===================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst similarity index 88% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst index d7cae1acb66..21cc80548bb 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia: -GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations +GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia ======================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst new file mode 100644 index 00000000000..38c4c2820ed --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia: + +GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst index f74f4e4bba4..94fba0afb96 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1: -GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations +GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1 ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst index 7208baa2c76..69fbe08ecf6 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Ovary_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Brain_Substantia_nigra: -GTEx_eQTL_Ovary_all_snp_gene_associations +GTEx_eQTL_allpairs_Brain_Substantia_nigra ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst index b9937d7d3ae..f0af835ef21 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Lung_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Breast_Mammary_Tissue: -GTEx_eQTL_Lung_all_snp_gene_associations +GTEx_eQTL_allpairs_Breast_Mammary_Tissue ======================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst index 0bbdbfcb526..7a1ea5953a4 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Pituitary_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts: -GTEx_eQTL_Pituitary_all_snp_gene_associations +GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts ============================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst index d3f1c83a6c8..22d91eca4c5 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes: -GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations +GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes ==================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst new file mode 100644 index 00000000000..d64cfda1dd5 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Colon_Sigmoid: + +GTEx_eQTL_allpairs_Colon_Sigmoid +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst new file mode 100644 index 00000000000..ee809ed97a1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Colon_Transverse: + +GTEx_eQTL_allpairs_Colon_Transverse +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst index 98d59c12132..74f28cce57f 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction: -GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations +GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction ====================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst new file mode 100644 index 00000000000..8593aa7ad1d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Esophagus_Mucosa: + +GTEx_eQTL_allpairs_Esophagus_Mucosa +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst new file mode 100644 index 00000000000..0ed95e808cd --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Esophagus_Muscularis: + +GTEx_eQTL_allpairs_Esophagus_Muscularis +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst index 99cb6080806..d9a18af9156 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Liver_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Heart_Atrial_Appendage: -GTEx_eQTL_Liver_all_snp_gene_associations +GTEx_eQTL_allpairs_Heart_Atrial_Appendage ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst new file mode 100644 index 00000000000..a7fb139ff02 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Heart_Left_Ventricle: + +GTEx_eQTL_allpairs_Heart_Left_Ventricle +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst new file mode 100644 index 00000000000..f592163b795 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Kidney_Cortex: + +GTEx_eQTL_allpairs_Kidney_Cortex +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst new file mode 100644 index 00000000000..35ed2ea186c --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Liver: + +GTEx_eQTL_allpairs_Liver +======================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst new file mode 100644 index 00000000000..7edbd47598b --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Lung: + +GTEx_eQTL_allpairs_Lung +======================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst new file mode 100644 index 00000000000..d1f7d7f4e91 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Minor_Salivary_Gland: + +GTEx_eQTL_allpairs_Minor_Salivary_Gland +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst new file mode 100644 index 00000000000..43066dfce22 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Muscle_Skeletal: + +GTEx_eQTL_allpairs_Muscle_Skeletal +================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst new file mode 100644 index 00000000000..685788c3bd1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Nerve_Tibial: + +GTEx_eQTL_allpairs_Nerve_Tibial +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst new file mode 100644 index 00000000000..68847a52817 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Ovary: + +GTEx_eQTL_allpairs_Ovary +======================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst new file mode 100644 index 00000000000..936d3a97d10 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Pancreas: + +GTEx_eQTL_allpairs_Pancreas +=========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst new file mode 100644 index 00000000000..95afebfd9d5 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Pituitary: + +GTEx_eQTL_allpairs_Pituitary +============================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst new file mode 100644 index 00000000000..e252471e957 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Prostate: + +GTEx_eQTL_allpairs_Prostate +=========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst index fe0f91b29b4..fe93b201cdc 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic: -GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations +GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic ================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst index 3f38c933b46..514a58a69e0 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_Pancreas_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg: -GTEx_eQTL_Pancreas_all_snp_gene_associations -============================================ +GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg +============================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst index 33f7c4a9e8e..7bee21efdf7 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations: +.. _GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum: -GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations +GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst new file mode 100644 index 00000000000..47134d85953 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Spleen: + +GTEx_eQTL_allpairs_Spleen +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst new file mode 100644 index 00000000000..7b2e575b99d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Stomach: + +GTEx_eQTL_allpairs_Stomach +========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst new file mode 100644 index 00000000000..88f97eb38cb --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Testis: + +GTEx_eQTL_allpairs_Testis +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst new file mode 100644 index 00000000000..743fdcb7d3d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Thyroid: + +GTEx_eQTL_allpairs_Thyroid +========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst new file mode 100644 index 00000000000..ef2cf2654c1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Uterus: + +GTEx_eQTL_allpairs_Uterus +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst new file mode 100644 index 00000000000..82487f0024c --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Vagina: + +GTEx_eQTL_allpairs_Vagina +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst new file mode 100644 index 00000000000..31c04731633 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_allpairs_Whole_Blood: + +GTEx_eQTL_allpairs_Whole_Blood +============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst deleted file mode 100644 index 093fe6bf0e3..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations: - -GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations -============================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst deleted file mode 100644 index 187c8e160d3..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations: - -GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations -================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst deleted file mode 100644 index 0c8d81edeed..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Artery_Aorta_all_snp_gene_associations: - -GTEx_sQTL_Artery_Aorta_all_snp_gene_associations -================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst deleted file mode 100644 index c6dd450788c..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations: - -GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations -======================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index 4ed7ba6fb53..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations: - -GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst deleted file mode 100644 index b7b1adbb0e5..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations: - -GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst deleted file mode 100644 index 8e62af681a6..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Cortex_all_snp_gene_associations: - -GTEx_sQTL_Brain_Cortex_all_snp_gene_associations -================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst deleted file mode 100644 index 1ba6f881e77..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations: - -GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations -============================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst deleted file mode 100644 index 8ab36bd1461..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations: - -GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations -===================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index b31b6fbac15..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations: - -GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations -========================================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst deleted file mode 100644 index 6e3e63fc703..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations: - -GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations -=============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst deleted file mode 100644 index acda830ef64..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations: - -GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations -================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst deleted file mode 100644 index e51d8f62902..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations: - -GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations -========================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst deleted file mode 100644 index 5efb8007818..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations: - -GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations -========================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst deleted file mode 100644 index 3172e2c974d..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations: - -GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations -============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst deleted file mode 100644 index 9517b8766a3..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations: - -GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations -===================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst deleted file mode 100644 index f129ce9bcce..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Colon_Transverse_all_snp_gene_associations: - -GTEx_sQTL_Colon_Transverse_all_snp_gene_associations -==================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst deleted file mode 100644 index 5fb54441f2e..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations: - -GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations -======================================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst deleted file mode 100644 index f92d2ef1265..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations: - -GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations -==================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst deleted file mode 100644 index d60e6b4a246..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations: - -GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst deleted file mode 100644 index f7c8ceba38a..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations: - -GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations -========================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst deleted file mode 100644 index 17c994fb956..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations: - -GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst deleted file mode 100644 index 3cff963512b..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations: - -GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations -================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst deleted file mode 100644 index 12ae74f2ebb..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations: - -GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations -======================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst deleted file mode 100644 index 6d9c90164b2..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations: - -GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations -=================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst deleted file mode 100644 index a8ac97dd802..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations: - -GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations -================================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst deleted file mode 100644 index 5fd8ee897ae..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Prostate_all_snp_gene_associations: - -GTEx_sQTL_Prostate_all_snp_gene_associations -============================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst deleted file mode 100644 index 80f95bfc44e..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations: - -GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations -=================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst deleted file mode 100644 index 184172d66a8..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations: - -GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations -============================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst deleted file mode 100644 index b6e96ee8016..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations: - -GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations -================================================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst deleted file mode 100644 index a854831eb0f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Testis_all_snp_gene_associations: - -GTEx_sQTL_Testis_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst deleted file mode 100644 index 69e6c711f78..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Uterus_all_snp_gene_associations: - -GTEx_sQTL_Uterus_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst deleted file mode 100644 index db3f8ba27da..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Vagina_all_snp_gene_associations: - -GTEx_sQTL_Vagina_all_snp_gene_associations -========================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst deleted file mode 100644 index 367a833e7e6..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_Whole_Blood_all_snp_gene_associations: - -GTEx_sQTL_Whole_Blood_all_snp_gene_associations -=============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst index c258702aa3a..4403302cb9f 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_Spleen_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Adipose_Subcutaneous: -GTEx_sQTL_Spleen_all_snp_gene_associations -========================================== +GTEx_sQTL_allpairs_Adipose_Subcutaneous +======================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst index 5b777f812f0..b0eff871a7e 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Stomach_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Adipose_Visceral_Omentum: -GTEx_sQTL_Stomach_all_snp_gene_associations +GTEx_sQTL_allpairs_Adipose_Visceral_Omentum =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst new file mode 100644 index 00000000000..436f78f73fc --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Adrenal_Gland: + +GTEx_sQTL_allpairs_Adrenal_Gland +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst new file mode 100644 index 00000000000..a417c60633a --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Artery_Aorta: + +GTEx_sQTL_allpairs_Artery_Aorta +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst new file mode 100644 index 00000000000..82957434abb --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Artery_Coronary: + +GTEx_sQTL_allpairs_Artery_Coronary +================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst new file mode 100644 index 00000000000..4818985ebe0 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Artery_Tibial: + +GTEx_sQTL_allpairs_Artery_Tibial +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst new file mode 100644 index 00000000000..fe07981a273 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Amygdala: + +GTEx_sQTL_allpairs_Brain_Amygdala +================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst similarity index 84% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst index 1d4fb614395..ad57111f8c6 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_Artery_Coronary_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24: -GTEx_sQTL_Artery_Coronary_all_snp_gene_associations -=================================================== +GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24 +======================================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst new file mode 100644 index 00000000000..fbe7b6d8334 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia: + +GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst new file mode 100644 index 00000000000..f9e382c3b05 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere: + +GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst new file mode 100644 index 00000000000..e3637045be4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Cerebellum: + +GTEx_sQTL_allpairs_Brain_Cerebellum +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst new file mode 100644 index 00000000000..e71b58449be --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Cortex: + +GTEx_sQTL_allpairs_Brain_Cortex +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst index f56a112099f..38589f54a13 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Thyroid_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9: -GTEx_sQTL_Thyroid_all_snp_gene_associations +GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9 =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst new file mode 100644 index 00000000000..aa9c72c87b9 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Hippocampus: + +GTEx_sQTL_allpairs_Brain_Hippocampus +==================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst new file mode 100644 index 00000000000..f2f59f5945f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Hypothalamus: + +GTEx_sQTL_allpairs_Brain_Hypothalamus +===================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst index 8ada5364461..66f21944ef2 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia: -GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations +GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia ======================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst new file mode 100644 index 00000000000..6ba79533e93 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia: + +GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia +============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst index 70420933e8d..5ed0b6b3ed7 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Artery_Tibial_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1: -GTEx_sQTL_Artery_Tibial_all_snp_gene_associations +GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1 ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst index b31fb33dd72..cd4511b5d25 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Liver_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Brain_Substantia_nigra: -GTEx_sQTL_Liver_all_snp_gene_associations +GTEx_sQTL_allpairs_Brain_Substantia_nigra ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst similarity index 92% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst index 8cff1e193ae..93b2e173ad9 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Lung_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Breast_Mammary_Tissue: -GTEx_sQTL_Lung_all_snp_gene_associations +GTEx_sQTL_allpairs_Breast_Mammary_Tissue ======================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst index c6511b9404c..32475df3201 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Pituitary_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts: -GTEx_sQTL_Pituitary_all_snp_gene_associations +GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts ============================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst index 4cd86a145fa..eef0af0b28c 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes: -GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations +GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes ==================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst new file mode 100644 index 00000000000..52255749085 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Colon_Sigmoid: + +GTEx_sQTL_allpairs_Colon_Sigmoid +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst new file mode 100644 index 00000000000..4b9ac30ec31 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Colon_Transverse: + +GTEx_sQTL_allpairs_Colon_Transverse +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst index 2865572a1a9..b1292d43d20 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction: -GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations +GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction ====================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst new file mode 100644 index 00000000000..5bc94f4278f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Esophagus_Mucosa: + +GTEx_sQTL_allpairs_Esophagus_Mucosa +=================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst new file mode 100644 index 00000000000..b51e24620f9 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Esophagus_Muscularis: + +GTEx_sQTL_allpairs_Esophagus_Muscularis +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst index b2a7403e39a..78708f62780 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Ovary_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Heart_Atrial_Appendage: -GTEx_sQTL_Ovary_all_snp_gene_associations +GTEx_sQTL_allpairs_Heart_Atrial_Appendage ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst new file mode 100644 index 00000000000..cb586153e85 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Heart_Left_Ventricle: + +GTEx_sQTL_allpairs_Heart_Left_Ventricle +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst new file mode 100644 index 00000000000..cbc5df23483 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Kidney_Cortex: + +GTEx_sQTL_allpairs_Kidney_Cortex +================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst new file mode 100644 index 00000000000..5de66bab179 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Liver: + +GTEx_sQTL_allpairs_Liver +======================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst new file mode 100644 index 00000000000..8bd1658b4de --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Lung: + +GTEx_sQTL_allpairs_Lung +======================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst new file mode 100644 index 00000000000..ad8bb02e277 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Minor_Salivary_Gland: + +GTEx_sQTL_allpairs_Minor_Salivary_Gland +======================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst new file mode 100644 index 00000000000..00ffa3f6066 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Muscle_Skeletal: + +GTEx_sQTL_allpairs_Muscle_Skeletal +================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst new file mode 100644 index 00000000000..477b8d5c4c1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Nerve_Tibial: + +GTEx_sQTL_allpairs_Nerve_Tibial +=============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst new file mode 100644 index 00000000000..67b13839c84 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Ovary: + +GTEx_sQTL_allpairs_Ovary +======================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst new file mode 100644 index 00000000000..dcc44f3a5b4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Pancreas: + +GTEx_sQTL_allpairs_Pancreas +=========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst new file mode 100644 index 00000000000..71ea6b4db6e --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Pituitary: + +GTEx_sQTL_allpairs_Pituitary +============================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst new file mode 100644 index 00000000000..baeeff2e1bf --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Prostate: + +GTEx_sQTL_allpairs_Prostate +=========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst index 176d54c9890..4bcb41d6c74 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic: -GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations +GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic ================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst index ac85e8c57ea..f47fd638828 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_Pancreas_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg: -GTEx_sQTL_Pancreas_all_snp_gene_associations -============================================ +GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg +============================================= * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst index ce243ff04f6..a0c0e060f84 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations: +.. _GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum: -GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations +GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst new file mode 100644 index 00000000000..66a842b49cf --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Spleen: + +GTEx_sQTL_allpairs_Spleen +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst new file mode 100644 index 00000000000..a16502ee211 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Stomach: + +GTEx_sQTL_allpairs_Stomach +========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst new file mode 100644 index 00000000000..2120ff1f433 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Testis: + +GTEx_sQTL_allpairs_Testis +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst new file mode 100644 index 00000000000..5c5d1bec7b1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Thyroid: + +GTEx_sQTL_allpairs_Thyroid +========================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst new file mode 100644 index 00000000000..8375d148fca --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Uterus: + +GTEx_sQTL_allpairs_Uterus +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst new file mode 100644 index 00000000000..7c583ad799e --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Vagina: + +GTEx_sQTL_allpairs_Vagina +========================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst new file mode 100644 index 00000000000..245107ebe49 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_allpairs_Whole_Blood: + +GTEx_sQTL_allpairs_Whole_Blood +============================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/dbSNP.rst b/hail/python/hail/docs/datasets/schemas/dbSNP.rst deleted file mode 100644 index 9aa89d21ce5..00000000000 --- a/hail/python/hail/docs/datasets/schemas/dbSNP.rst +++ /dev/null @@ -1,99 +0,0 @@ -.. _dbSNP: - -dbSNP -===== - -* **Versions:** 154 -* **Reference genome builds:** GRCh37, GRCh38 -* **Type:** :class:`hail.Table` - -Schema (154, GRCh37) -~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'qual': float64 - 'filters': set - 'info': struct { - RS: int32, - GENEINFO: str, - PSEUDOGENEINFO: str, - dbSNPBuildID: int32, - SAO: int32, - SSR: int32, - VC: str, - PM: bool, - NSF: bool, - NSM: bool, - NSN: bool, - SYN: bool, - U3: bool, - U5: bool, - ASS: bool, - DSS: bool, - INT: bool, - R3: bool, - R5: bool, - GNO: bool, - PUB: bool, - FREQ: struct { - _GENOME_DK: float64, - _TWINSUK: float64, - _dbGaP_PopFreq: float64, - _Siberian: float64, - _Chileans: float64, - _FINRISK: float64, - _HapMap: float64, - _Estonian: float64, - _ALSPAC: float64, - _GoESP: float64, - _TOPMED: float64, - _PAGE_STUDY: float64, - _1000Genomes: float64, - _Korea1K: float64, - _ChromosomeY: float64, - _ExAC: float64, - _Qatari: float64, - _GoNL: float64, - _MGP: float64, - _GnomAD: float64, - _Vietnamese: float64, - _GnomAD_exomes: float64, - _PharmGKB: float64, - _KOREAN: float64, - _Daghestan: float64, - _HGDP_Stanford: float64, - _NorthernSweden: float64, - _SGDP_PRJ: float64 - }, - COMMON: bool, - CLNHGVS: array, - CLNVI: array, - CLNORIGIN: array, - CLNSIG: array, - CLNDISDB: array, - CLNDN: array, - CLNREVSTAT: array, - CLNACC: array - } - 'a_index': int32 - 'was_split': bool - 'old_locus': locus - 'old_alleles': array - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst b/hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst deleted file mode 100644 index 42556396bec..00000000000 --- a/hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst +++ /dev/null @@ -1,31 +0,0 @@ -.. _dbSNP_rsid: - -dbSNP_rsid -========== - -* **Versions:** 154 -* **Reference genome builds:** GRCh37, GRCh38 -* **Type:** :class:`hail.Table` - -Schema (154, GRCh37) -~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/functions/collections.rst b/hail/python/hail/docs/functions/collections.rst index 7b2e1f322bb..978db0183a1 100644 --- a/hail/python/hail/docs/functions/collections.rst +++ b/hail/python/hail/docs/functions/collections.rst @@ -22,7 +22,6 @@ Collection functions len map flatmap - starmap zip enumerate zip_with_index @@ -40,7 +39,6 @@ Collection functions .. autofunction:: len .. autofunction:: map .. autofunction:: flatmap -.. autofunction:: starmap .. autofunction:: zip .. autofunction:: enumerate .. autofunction:: zip_with_index diff --git a/hail/python/hail/docs/utils/index.rst b/hail/python/hail/docs/utils/index.rst index 04f54fad047..373eec9d97d 100644 --- a/hail/python/hail/docs/utils/index.rst +++ b/hail/python/hail/docs/utils/index.rst @@ -15,12 +15,10 @@ utils hadoop_is_dir hadoop_stat hadoop_ls - hadoop_scheme_supported copy_log range_table range_matrix_table get_1kg - get_hgdp get_movie_lens .. autoclass:: Interval @@ -33,10 +31,8 @@ utils .. autofunction:: hadoop_is_dir .. autofunction:: hadoop_stat .. autofunction:: hadoop_ls -.. autofunction:: hadoop_scheme_supported .. autofunction:: copy_log .. autofunction:: range_table .. autofunction:: range_matrix_table .. autofunction:: get_1kg -.. autofunction:: get_hgdp .. autofunction:: get_movie_lens diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index 89477f742d1..61bfad05512 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -1,214 +1,113 @@ { - "1000_Genomes_HighCov_autosomes": { - "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", - "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt", - "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt" - } - }, - "version": "NYGC_30x_phased" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt", - "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt" - } - }, - "version": "NYGC_30x_unphased" - } - ] - }, - "1000_Genomes_HighCov_chrX": { - "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", - "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt", - "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt" - } - }, - "version": "NYGC_30x_phased" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt", - "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt" - } - }, - "version": "NYGC_30x_unphased" - } - ] - }, - "1000_Genomes_HighCov_chrY": { - "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", - "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt", - "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt" - } - }, - "version": "NYGC_30x_unphased" - } - ] - }, - "1000_Genomes_Retracted_autosomes": { - "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", - "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", + "1000_Genomes_autosomes": { + "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", + "url": "https://www.internationalgenome.org/home", "versions": [ { - "reference_genome": "GRCh38", + "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/autosomes.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/autosomes.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh37.mt", + "us": "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt" } }, "version": "phase_3" - } - ] - }, - "1000_Genomes_Retracted_chrX": { - "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", - "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", - "versions": [ + }, { "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrX.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh38.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrX.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh38.mt", + "us": "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_Retracted_chrY": { - "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", - "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", + "1000_Genomes_chrMT": { + "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", + "url": "https://www.internationalgenome.org/home", "versions": [ { - "reference_genome": "GRCh38", + "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrY.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrMT.phase_3.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrY.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_chrMT.phase_3.GRCh37.mt", + "us": "gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_autosomes": { - "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_autosomes dataset). For GRCh38, the NYGC 30x coverage autosomes phased dataset is available as 1000_Genomes_HighCov_autosomes.", + "1000_Genomes_chrX": { + "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", "url": "https://www.internationalgenome.org/home", "versions": [ { "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/autosomes.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/autosomes.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh37.mt", + "us": "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt" } }, "version": "phase_3" - } - ] - }, - "1000_Genomes_chrMT": { - "description": "1000 Genomes Project: Mitochondrial chromosome variants.", - "url": "https://www.internationalgenome.org/home", - "versions": [ + }, { - "reference_genome": "GRCh37", + "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrMT.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh38.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrMT.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh38.mt", + "us": "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_chrX": { - "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_chrX dataset). For GRCh38, the NYGC 30x coverage chrX phased dataset is available as 1000_Genomes_HighCov_chrX.", + "1000_Genomes_chrY": { + "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", "url": "https://www.internationalgenome.org/home", "versions": [ { "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrX.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrX.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh37.mt", + "us": "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt" } }, "version": "phase_3" - } - ] - }, - "1000_Genomes_chrY": { - "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_chrY dataset). For GRCh38, the NYGC 30x coverage chrY dataset is available as 1000_Genomes_HighCov_chrY.", - "url": "https://www.internationalgenome.org/home", - "versions": [ + }, { - "reference_genome": "GRCh37", + "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrY.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh38.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrY.mt", - "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh38.mt", + "us": "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt" } }, "version": "phase_3" @@ -228,11 +127,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht" + "us": "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", - "us": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht" + "eu": "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht" } }, "version": "1.4" @@ -241,40 +140,14 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht" + "us": "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", - "us": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht" + "eu": "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht" } }, "version": "1.4" - }, - { - "reference_genome": "GRCh37", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh37/table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/CADD/v1.6/GRCh37/table.ht", - "us": "gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht" - } - }, - "version": "1.6" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh38/table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/CADD/v1.6/GRCh38/table.ht", - "us": "gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht" - } - }, - "version": "1.6" } ] }, @@ -291,11 +164,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht" + "us": "s3://hail-datasets-us-east-1/annotations/DANN.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", - "us": "gs://hail-datasets-us/DANN/GRCh37/table.ht" + "eu": "gs://hail-datasets-eu/annotations/DANN.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/DANN.GRCh37.ht" } }, "version": null @@ -304,11 +177,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht" + "us": "s3://hail-datasets-us-east-1/annotations/DANN.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", - "us": "gs://hail-datasets-us/DANN/GRCh38/table.ht" + "eu": "gs://hail-datasets-eu/annotations/DANN.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/DANN.GRCh38.ht" } }, "version": null @@ -328,11 +201,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht" + "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", - "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht" + "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht" } }, "version": "release_95" @@ -341,11 +214,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht" + "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", - "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht" + "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht" } }, "version": "release_95" @@ -365,11 +238,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht" + "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", - "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht" + "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht" } }, "version": "release_95" @@ -378,11 +251,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht" + "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", - "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht" + "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht" } }, "version": "release_95" @@ -397,11 +270,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt" + "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", - "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt" + "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt", + "us": "gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt" } }, "version": "v7" @@ -416,11 +289,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt" + "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", - "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt" + "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt", + "us": "gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt" } }, "version": "v7" @@ -435,18 +308,18 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt" + "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", - "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt" + "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt", + "us": "gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt" } }, "version": "v7" } ] }, - "GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Adipose_Subcutaneous": { "annotation_db": { "key_properties": [] }, @@ -457,18 +330,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Adipose_Visceral_Omentum": { "annotation_db": { "key_properties": [] }, @@ -479,18 +352,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Adrenal_Gland": { "annotation_db": { "key_properties": [] }, @@ -501,18 +374,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Artery_Aorta_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Artery_Aorta": { "annotation_db": { "key_properties": [] }, @@ -523,18 +396,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Artery_Coronary_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Artery_Coronary": { "annotation_db": { "key_properties": [] }, @@ -545,18 +418,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Artery_Tibial_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Artery_Tibial": { "annotation_db": { "key_properties": [] }, @@ -567,18 +440,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Amygdala": { "annotation_db": { "key_properties": [] }, @@ -589,18 +462,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24": { "annotation_db": { "key_properties": [] }, @@ -611,18 +484,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -633,18 +506,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere": { "annotation_db": { "key_properties": [] }, @@ -655,18 +528,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Cerebellum": { "annotation_db": { "key_properties": [] }, @@ -677,18 +550,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Cortex_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Cortex": { "annotation_db": { "key_properties": [] }, @@ -699,18 +572,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9": { "annotation_db": { "key_properties": [] }, @@ -721,18 +594,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Hippocampus": { "annotation_db": { "key_properties": [] }, @@ -743,18 +616,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Hypothalamus": { "annotation_db": { "key_properties": [] }, @@ -765,18 +638,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -787,18 +660,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -809,18 +682,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1": { "annotation_db": { "key_properties": [] }, @@ -831,18 +704,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Brain_Substantia_nigra": { "annotation_db": { "key_properties": [] }, @@ -853,18 +726,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Breast_Mammary_Tissue": { "annotation_db": { "key_properties": [] }, @@ -875,18 +748,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts": { "annotation_db": { "key_properties": [] }, @@ -897,18 +770,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes": { "annotation_db": { "key_properties": [] }, @@ -919,18 +792,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Colon_Sigmoid": { "annotation_db": { "key_properties": [] }, @@ -941,18 +814,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Colon_Transverse_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Colon_Transverse": { "annotation_db": { "key_properties": [] }, @@ -963,18 +836,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction": { "annotation_db": { "key_properties": [] }, @@ -985,18 +858,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Esophagus_Mucosa": { "annotation_db": { "key_properties": [] }, @@ -1007,18 +880,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Esophagus_Muscularis": { "annotation_db": { "key_properties": [] }, @@ -1029,18 +902,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Heart_Atrial_Appendage": { "annotation_db": { "key_properties": [] }, @@ -1051,18 +924,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Heart_Left_Ventricle": { "annotation_db": { "key_properties": [] }, @@ -1073,18 +946,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Kidney_Cortex": { "annotation_db": { "key_properties": [] }, @@ -1095,18 +968,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Liver_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Liver": { "annotation_db": { "key_properties": [] }, @@ -1117,18 +990,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Lung_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Lung": { "annotation_db": { "key_properties": [] }, @@ -1139,18 +1012,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Minor_Salivary_Gland": { "annotation_db": { "key_properties": [] }, @@ -1161,18 +1034,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Muscle_Skeletal": { "annotation_db": { "key_properties": [] }, @@ -1183,18 +1056,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Nerve_Tibial": { "annotation_db": { "key_properties": [] }, @@ -1205,18 +1078,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Ovary_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Ovary": { "annotation_db": { "key_properties": [] }, @@ -1227,18 +1100,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Pancreas_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Pancreas": { "annotation_db": { "key_properties": [] }, @@ -1249,18 +1122,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Pituitary_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Pituitary": { "annotation_db": { "key_properties": [] }, @@ -1271,18 +1144,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Prostate_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Prostate": { "annotation_db": { "key_properties": [] }, @@ -1293,18 +1166,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic": { "annotation_db": { "key_properties": [] }, @@ -1315,18 +1188,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg": { "annotation_db": { "key_properties": [] }, @@ -1337,18 +1210,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum": { "annotation_db": { "key_properties": [] }, @@ -1359,18 +1232,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Spleen_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Spleen": { "annotation_db": { "key_properties": [] }, @@ -1381,18 +1254,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Stomach_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Stomach": { "annotation_db": { "key_properties": [] }, @@ -1403,18 +1276,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Testis_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Testis": { "annotation_db": { "key_properties": [] }, @@ -1425,18 +1298,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Thyroid_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Thyroid": { "annotation_db": { "key_properties": [] }, @@ -1447,18 +1320,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Uterus_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Uterus": { "annotation_db": { "key_properties": [] }, @@ -1469,18 +1342,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Vagina_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Vagina": { "annotation_db": { "key_properties": [] }, @@ -1491,18 +1364,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_Whole_Blood_all_snp_gene_associations": { + "GTEx_eQTL_allpairs_Whole_Blood": { "annotation_db": { "key_properties": [] }, @@ -1513,18 +1386,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Adipose_Subcutaneous": { "annotation_db": { "key_properties": [] }, @@ -1535,18 +1408,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Adipose_Visceral_Omentum": { "annotation_db": { "key_properties": [] }, @@ -1557,18 +1430,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Adrenal_Gland": { "annotation_db": { "key_properties": [] }, @@ -1579,18 +1452,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Artery_Aorta_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Artery_Aorta": { "annotation_db": { "key_properties": [] }, @@ -1601,18 +1474,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Artery_Coronary_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Artery_Coronary": { "annotation_db": { "key_properties": [] }, @@ -1623,18 +1496,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Artery_Tibial_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Artery_Tibial": { "annotation_db": { "key_properties": [] }, @@ -1645,18 +1518,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Amygdala": { "annotation_db": { "key_properties": [] }, @@ -1667,18 +1540,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24": { "annotation_db": { "key_properties": [] }, @@ -1689,18 +1562,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -1711,18 +1584,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere": { "annotation_db": { "key_properties": [] }, @@ -1733,18 +1606,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Cerebellum": { "annotation_db": { "key_properties": [] }, @@ -1755,18 +1628,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Cortex_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Cortex": { "annotation_db": { "key_properties": [] }, @@ -1777,18 +1650,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9": { "annotation_db": { "key_properties": [] }, @@ -1799,18 +1672,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Hippocampus": { "annotation_db": { "key_properties": [] }, @@ -1821,18 +1694,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Hypothalamus": { "annotation_db": { "key_properties": [] }, @@ -1843,18 +1716,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -1865,18 +1738,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia": { "annotation_db": { "key_properties": [] }, @@ -1887,18 +1760,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1": { "annotation_db": { "key_properties": [] }, @@ -1909,18 +1782,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Brain_Substantia_nigra": { "annotation_db": { "key_properties": [] }, @@ -1931,18 +1804,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Breast_Mammary_Tissue": { "annotation_db": { "key_properties": [] }, @@ -1953,18 +1826,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts": { "annotation_db": { "key_properties": [] }, @@ -1975,18 +1848,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes": { "annotation_db": { "key_properties": [] }, @@ -1997,18 +1870,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Colon_Sigmoid": { "annotation_db": { "key_properties": [] }, @@ -2019,18 +1892,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Colon_Transverse_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Colon_Transverse": { "annotation_db": { "key_properties": [] }, @@ -2041,18 +1914,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction": { "annotation_db": { "key_properties": [] }, @@ -2063,18 +1936,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Esophagus_Mucosa": { "annotation_db": { "key_properties": [] }, @@ -2085,18 +1958,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Esophagus_Muscularis": { "annotation_db": { "key_properties": [] }, @@ -2107,18 +1980,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Heart_Atrial_Appendage": { "annotation_db": { "key_properties": [] }, @@ -2129,18 +2002,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Heart_Left_Ventricle": { "annotation_db": { "key_properties": [] }, @@ -2151,18 +2024,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Kidney_Cortex": { "annotation_db": { "key_properties": [] }, @@ -2173,18 +2046,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Liver_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Liver": { "annotation_db": { "key_properties": [] }, @@ -2195,18 +2068,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Lung_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Lung": { "annotation_db": { "key_properties": [] }, @@ -2217,18 +2090,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Minor_Salivary_Gland": { "annotation_db": { "key_properties": [] }, @@ -2239,18 +2112,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Muscle_Skeletal": { "annotation_db": { "key_properties": [] }, @@ -2261,18 +2134,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Nerve_Tibial": { "annotation_db": { "key_properties": [] }, @@ -2283,18 +2156,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Ovary_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Ovary": { "annotation_db": { "key_properties": [] }, @@ -2305,18 +2178,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Pancreas_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Pancreas": { "annotation_db": { "key_properties": [] }, @@ -2327,18 +2200,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Pituitary_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Pituitary": { "annotation_db": { "key_properties": [] }, @@ -2349,18 +2222,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Prostate_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Prostate": { "annotation_db": { "key_properties": [] }, @@ -2371,18 +2244,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic": { "annotation_db": { "key_properties": [] }, @@ -2393,18 +2266,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg": { "annotation_db": { "key_properties": [] }, @@ -2415,18 +2288,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum": { "annotation_db": { "key_properties": [] }, @@ -2437,18 +2310,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Spleen_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Spleen": { "annotation_db": { "key_properties": [] }, @@ -2459,18 +2332,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Stomach_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Stomach": { "annotation_db": { "key_properties": [] }, @@ -2481,18 +2354,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Testis_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Testis": { "annotation_db": { "key_properties": [] }, @@ -2503,18 +2376,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Thyroid_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Thyroid": { "annotation_db": { "key_properties": [] }, @@ -2525,18 +2398,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Uterus_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Uterus": { "annotation_db": { "key_properties": [] }, @@ -2547,18 +2420,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Vagina_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Vagina": { "annotation_db": { "key_properties": [] }, @@ -2569,18 +2442,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_Whole_Blood_all_snp_gene_associations": { + "GTEx_sQTL_allpairs_Whole_Blood": { "annotation_db": { "key_properties": [] }, @@ -2591,11 +2464,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" + "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", - "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" + "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht", + "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht" } }, "version": "v8" @@ -2610,11 +2483,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", - "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt", + "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt" } }, "version": "v2" @@ -2629,11 +2502,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", - "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt", + "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt" } }, "version": "v2" @@ -2648,11 +2521,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", - "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt", + "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt" } }, "version": "v2" @@ -2673,11 +2546,11 @@ "reference_genome": null, "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht" + "us": "s3://hail-datasets-us-east-1/annotations/gene_specific_summary_2019-07.txt.gz.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", - "us": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht" + "eu": "gs://hail-datasets-eu/annotations/gene_specific_summary_2019-07.txt.gz.ht", + "us": "gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht" } }, "version": "2019-07" @@ -2695,11 +2568,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht" + "us": "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", - "us": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht" + "eu": "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht", + "us": "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht" } }, "version": "2019-07" @@ -2708,11 +2581,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht" + "us": "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", - "us": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht" + "eu": "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht", + "us": "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht" } }, "version": "2019-07" @@ -2733,11 +2606,11 @@ "reference_genome": null, "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht" + "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", - "us": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht" + "eu": "gs://hail-datasets-eu/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht", + "us": "gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht" } }, "version": "4.0" @@ -2755,11 +2628,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht" + "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", - "us": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht" + "eu": "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht" } }, "version": "4.0" @@ -2768,87 +2641,17 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht" + "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", - "us": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht" + "eu": "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht" } }, "version": "4.0" } ] }, - "dbSNP": { - "annotation_db": { - "key_properties": [] - }, - "description": "dbSNP: Reference SNP (rs or RefSNP) Hail Table. The database includes both common and rare single-base nucleotide variation (SNV), short (=< 50bp) deletion/insertion polymorphisms, and other classes of small genetic variations.", - "url": "https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/", - "versions": [ - { - "reference_genome": "GRCh37", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/full_table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/full_table.ht", - "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht" - } - }, - "version": "154" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/full_table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/full_table.ht", - "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht" - } - }, - "version": "154" - } - ] - }, - "dbSNP_rsid": { - "annotation_db": { - "key_properties": [] - }, - "description": "dbSNP: This Hail Table contains a mapping from locus/allele pairs to Reference SNP IDs (rsID). For the full dataset, see dbSNP.", - "url": "https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/", - "versions": [ - { - "reference_genome": "GRCh37", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/rsid_only_table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/rsid_only_table.ht", - "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht" - } - }, - "version": "154" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/rsid_only_table.ht" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/rsid_only_table.ht", - "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht" - } - }, - "version": "154" - } - ] - }, "gencode": { "annotation_db": { "key_properties": [] @@ -2860,11 +2663,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht" + "us": "s3://hail-datasets-us-east-1/annotations/gencode.v19.annotation.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", - "us": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht" + "eu": "gs://hail-datasets-eu/annotations/gencode.v19.annotation.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht" } }, "version": "v19" @@ -2873,11 +2676,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht" + "us": "s3://hail-datasets-us-east-1/annotations/gencode.v31.annotation.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", - "us": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht" + "eu": "gs://hail-datasets-eu/annotations/gencode.v31.annotation.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht" } }, "version": "v31" @@ -2897,11 +2700,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht" + "us": "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", - "us": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht" + "eu": "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht" } }, "version": "hg19" @@ -2910,11 +2713,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht" + "us": "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", - "us": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht" + "eu": "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht" } }, "version": "hg19" @@ -2934,11 +2737,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht" + "us": "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", - "us": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht" + "eu": "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh37.ht", + "us": "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht" } }, "version": "hg19" @@ -2947,11 +2750,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht" + "us": "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh38.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", - "us": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht" + "eu": "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh38.ht", + "us": "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht" } }, "version": "hg19" @@ -2971,11 +2774,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_AFR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_AFR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht" } }, "version": "2018" @@ -2995,11 +2798,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_ALL_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_ALL_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht" } }, "version": "2018" @@ -3019,11 +2822,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_AMR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_AMR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht" } }, "version": "2018" @@ -3043,11 +2846,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_EAS_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_EAS_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht" } }, "version": "2018" @@ -3067,11 +2870,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_EUR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_EUR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht" } }, "version": "2018" @@ -3091,11 +2894,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht" + "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_SAS_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht" + "eu": "gs://hail-datasets-eu/giant_bmi_exome_SAS_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht" } }, "version": "2018" @@ -3115,11 +2918,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AFR.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_AFR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AFR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_AFR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht" } }, "version": "2018" @@ -3139,11 +2942,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_ALL.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_ALL_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_ALL.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_ALL_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht" } }, "version": "2018" @@ -3163,11 +2966,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AMR.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_AMR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AMR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_AMR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht" } }, "version": "2018" @@ -3187,11 +2990,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EAS.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_EAS_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EAS.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_EAS_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht" } }, "version": "2018" @@ -3211,11 +3014,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EUR.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_EUR_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EUR.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_EUR_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht" } }, "version": "2018" @@ -3235,11 +3038,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_SAS.ht" + "us": "s3://hail-datasets-us-east-1/giant_height_exome_SAS_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_SAS.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht" + "eu": "gs://hail-datasets-eu/giant_height_exome_SAS_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht" } }, "version": "2018" @@ -3259,11 +3062,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3283,11 +3086,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -3307,11 +3110,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3331,11 +3134,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -3355,11 +3158,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3379,11 +3182,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -3403,11 +3206,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3427,11 +3230,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -3451,11 +3254,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3475,11 +3278,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -3499,11 +3302,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht" } }, "version": "2018" @@ -3523,11 +3326,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht" + "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", - "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht" + "eu": "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht", + "us": "gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht" } }, "version": "2018" @@ -4601,11 +4404,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht" + "us": "s3://hail-datasets-us-east-1/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", - "us": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht" + "eu": "gs://hail-datasets-eu/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht", + "us": "gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht" } }, "version": "2.1.1" @@ -4648,11 +4451,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht" + "us": "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", - "us": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht" + "eu": "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht", + "us": "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht" } }, "version": "2.2" @@ -4667,11 +4470,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt" + "us": "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", - "us": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt" + "eu": "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt", + "us": "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt" } }, "version": "2.2" @@ -4680,11 +4483,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC/baseline_v1.1/GRCh37/ld_scores.mt" + "us": "s3://hail-datasets-us-east-1/LDSC_baseline_v1.1_ld_scores.GRCh37.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", - "us": "gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt" + "eu": "gs://hail-datasets-eu/LDSC_baseline_v1.1_ld_scores.GRCh37.mt", + "us": "gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt" } }, "version": "1.1" diff --git a/hail/python/hail/experimental/datasets.py b/hail/python/hail/experimental/datasets.py index e7182bef66a..5ac483b9791 100644 --- a/hail/python/hail/experimental/datasets.py +++ b/hail/python/hail/experimental/datasets.py @@ -6,21 +6,11 @@ import pkg_resources -def _read_dataset(path: str) -> Union[hl.Table, hl.MatrixTable, hl.linalg.BlockMatrix]: - if path.endswith('.ht'): - return hl.read_table(path) - elif path.endswith('.mt'): - return hl.read_matrix_table(path) - elif path.endswith('.bm'): - return hl.linalg.BlockMatrix.read(path) - raise ValueError(f'Invalid path: {path}. Can only load datasets with .ht, .mt, or .bm extensions.') - - def load_dataset(name: str, version: Optional[str], reference_genome: Optional[str], region: str = 'us', - cloud: str = 'gcp') -> Union[hl.Table, hl.MatrixTable, hl.linalg.BlockMatrix]: + cloud: str = 'gcp') -> Union[hl.Table, hl.MatrixTable]: """Load a genetic dataset from Hail's repository. Example @@ -114,11 +104,12 @@ def load_dataset(name: str, dataset['reference_genome'] == reference_genome])] assert len(path) == 1 path = path[0] - if path.startswith('s3://'): - try: - dataset = _read_dataset(path) - except hl.utils.java.FatalError: - dataset = _read_dataset(path.replace('s3://', 's3a://')) - else: - dataset = _read_dataset(path) - return dataset + + if path.endswith('.ht'): + return hl.read_table(path) + elif path.endswith('.mt'): + return hl.read_matrix_table(path) + elif path.endswith('.bm'): + return hl.linalg.BlockMatrix.read(path) + raise ValueError(f'Invalid path {repr(path)}: can only load' + f' datasets with .ht, .mt, or .bm extensions.') diff --git a/hail/python/hail/experimental/db.py b/hail/python/hail/experimental/db.py index 682d7094deb..344c11a5248 100644 --- a/hail/python/hail/experimental/db.py +++ b/hail/python/hail/experimental/db.py @@ -5,7 +5,8 @@ import hail as hl import pkg_resources -from hailtop.utils import (external_requests_client_session, retry_response_returning_functions) +from hailtop.utils import (retry_response_returning_functions, + external_requests_client_session) from .lens import MatrixRows, TableRows from ..expr import StructExpression @@ -269,13 +270,9 @@ def index_compatible_version(self, for version in self.versions) if index is not None] if len(compatible_indexed_values) == 0: - versions = [f'{(v.version, v.reference_genome)}' for v in self.versions] - raise ValueError( - f'Could not find compatible version of {self.name} for user' - f' dataset with key {key_expr.dtype}.\n' - f'This annotation dataset is available for the following' - f' versions and reference genome builds: {", ".join(versions)}.' - ) + raise ValueError(f'Could not find compatible version of' + f' {self.name} for user dataset with' + f' key {key_expr.dtype}.') assert len(compatible_indexed_values) == 1, \ f'{key_expr.dtype}, {self.name}, {compatible_indexed_values}' return compatible_indexed_values[0] diff --git a/hail/python/hail/experimental/haplotype_freq_em.py b/hail/python/hail/experimental/haplotype_freq_em.py index 363d84811e3..4dd8bd205a5 100644 --- a/hail/python/hail/experimental/haplotype_freq_em.py +++ b/hail/python/hail/experimental/haplotype_freq_em.py @@ -14,7 +14,7 @@ def haplotype_freq_em(gt_counts) -> ArrayExpression: [AABB, AABb, AAbb, AaBB, AaBb, Aabb, aaBB, aaBb, aabb] The estimated haplotype counts are returned in an array in the following order: - [AB, aB, Ab, ab] + [AB, Ab, aB, ab] Where _A_ and _a_ are the reference and non-reference alleles for the first variant, resp. And _B_ and _b_ are the reference and non-reference alleles for the second variant, resp. diff --git a/hail/python/hail/experimental/plots.py b/hail/python/hail/experimental/plots.py index 8f0a8537d10..b23c8bb0422 100644 --- a/hail/python/hail/experimental/plots.py +++ b/hail/python/hail/experimental/plots.py @@ -63,7 +63,7 @@ def plot_roc_curve(ht, scores, tp_label='tp', fp_label='fp', colors=None, title= tpr=hl.scan.count_where(ordered_ht[tp_label]) / total_tp, fpr=hl.scan.count_where(ordered_ht[fp_label]) / total_fp, ).key_by().drop('_score') - last_row = hl.utils.range_table(1).key_by().select(score_name=score, score=hl.float64(float('-inf')), tpr=hl.float64(1.0), fpr=hl.float64(1.0)) + last_row = hl.utils.range_table(1).key_by().select(score_name=score, score=hl.float64(float('-inf')), tpr=hl.float32(1.0), fpr=hl.float32(1.0)) ordered_ht = ordered_ht.union(last_row) ordered_ht = ordered_ht.annotate( auc_contrib=hl.or_else((ordered_ht.fpr - hl.scan.max(ordered_ht.fpr)) * ordered_ht.tpr, 0.0) diff --git a/hail/python/hail/experimental/vcf_combiner/__main__.py b/hail/python/hail/experimental/vcf_combiner/__main__.py index 150ca7fb9dc..7137dd0c197 100644 --- a/hail/python/hail/experimental/vcf_combiner/__main__.py +++ b/hail/python/hail/experimental/vcf_combiner/__main__.py @@ -30,7 +30,7 @@ def main(): 'GVCFs will be overridden by the names in sample map.', required=False) parser.add_argument('--branch-factor', type=int, default=CombinerConfig.default_branch_factor, help='Branch factor.') - parser.add_argument('--batch-size', type=int, default=CombinerConfig.default_phase1_batch_size, help='Batch size.') + parser.add_argument('--batch-size', type=int, default=CombinerConfig.default_batch_size, help='Batch size.') parser.add_argument('--target-records', type=int, default=CombinerConfig.default_target_records, help='Target records per partition.') parser.add_argument('--overwrite', help='overwrite the output path', action='store_true') parser.add_argument('--key-by-locus-and-alleles', help='Key by both locus and alleles in the final output.', action='store_true') diff --git a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py index 5d8310a20cd..c2bc0c9a8fa 100644 --- a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py +++ b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py @@ -1,7 +1,6 @@ """An experimental library for combining (g)VCFS into sparse matrix tables""" # these are necessary for the diver script included at the end of this file import math -import os import uuid from typing import Optional, List, Tuple, Dict @@ -12,7 +11,7 @@ from hail.genetics.reference_genome import reference_genome_type from hail.ir import Apply, TableMapRows, MatrixKeyRowsBy, TopLevelReference from hail.typecheck import oneof, sequenceof, typecheck -from hail.utils.java import info, warning, Env +from hail.utils.java import info, warning _transform_rows_function_map = {} _merge_function_map = {} @@ -292,70 +291,55 @@ def combine_gvcfs(mts): return unlocalize(combined) -@typecheck(mt=hl.MatrixTable, desired_average_partition_size=int, tmp_path=str) -def calculate_new_intervals(mt, desired_average_partition_size: int, tmp_path: str): +@typecheck(ht=hl.Table, n=int, reference_genome=reference_genome_type) +def calculate_new_intervals(ht, n, reference_genome): """takes a table, keyed by ['locus', ...] and produces a list of intervals suitable - for repartitioning a combiner matrix table. + for repartitioning a combiner matrix table Parameters ---------- - mt : :class:`.MatrixTable` - Sparse MT intermediate. - desired_average_partition_size : :obj:`int` - Average target number of rows for each partition. - tmp_path : :obj:`str` - Temporary path for scan checkpointing. + ht : :class:`.Table` + Table / Rows Table to compute new intervals for + n : :obj:`int` + Number of rows each partition should have, (last partition may be smaller) + reference_genome: :class:`str` or :class:`.ReferenceGenome`, optional + Reference genome to use. Returns ------- - (:obj:`List[Interval]`, :obj:`.Type`) + :obj:`List[Interval]` """ - assert list(mt.row_key) == ['locus'] - assert isinstance(mt.locus.dtype, hl.tlocus) - reference_genome = mt.locus.dtype.reference_genome + assert list(ht.key) == ['locus'] + assert ht.locus.dtype == hl.tlocus(reference_genome=reference_genome) end = hl.Locus(reference_genome.contigs[-1], reference_genome.lengths[reference_genome.contigs[-1]], reference_genome=reference_genome) - (n_rows, n_cols) = mt.count() + n_rows = ht.count() if n_rows == 0: raise ValueError('empty table!') - # split by a weight function that takes into account the number of - # dense entries per row. However, give each row some base weight - # to prevent densify computations from becoming unbalanced (these - # scale roughly linearly with N_ROW * N_COL) - ht = mt.select_rows(weight=hl.agg.count() + (n_cols // 25) + 1).rows().checkpoint(tmp_path) - - total_weight = ht.aggregate(hl.agg.sum(ht.weight)) - partition_weight = int(total_weight / (n_rows / desired_average_partition_size)) - - ht = ht.annotate(cumulative_weight=hl.scan.sum(ht.weight), - last_weight=hl.scan._prev_nonnull(ht.weight), - row_idx=hl.scan.count()) - - def partition_bound(x): - return x - (x % hl.int64(partition_weight)) - - at_partition_bound = partition_bound(ht.cumulative_weight) != partition_bound(ht.cumulative_weight - ht.last_weight) - - ht = ht.filter(at_partition_bound | (ht.row_idx == n_rows - 1)) + ht = ht.select() + ht = ht.annotate(x=hl.scan.count()) + ht = ht.annotate(y=ht.x + 1) + ht = ht.filter((ht.x // n != ht.y // n) | (ht.x == (n_rows - 1))) + ht = ht.select() ht = ht.annotate(start=hl.or_else( hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1, reference_genome=reference_genome)), hl.locus_from_global_position(0, reference_genome=reference_genome))) - ht = ht.select( - interval=hl.interval(start=hl.struct(locus=ht.start), end=hl.struct(locus=ht.locus), includes_end=True)) + ht = ht.key_by() + ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True)) - intervals_dtype = hl.tarray(ht.interval.dtype) intervals = ht.aggregate(hl.agg.collect(ht.interval)) + last_st = hl.eval( - hl.locus_from_global_position(hl.literal(intervals[-1].end.locus).global_position() + 1, + hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1, reference_genome=reference_genome)) - interval = hl.Interval(start=hl.Struct(locus=last_st), end=hl.Struct(locus=end), includes_end=True) + interval = hl.Interval(start=last_st, end=end, includes_end=True) intervals.append(interval) - return intervals, intervals_dtype + return intervals @typecheck(reference_genome=reference_genome_type, interval_size=int) @@ -442,9 +426,8 @@ def __init__(self, class CombinerConfig(object): - default_max_partitions_per_job = 75_000 default_branch_factor = 100 - default_phase1_batch_size = 100 + default_batch_size = 100 default_target_records = 30_000 # These are used to calculate intervals for reading GVCFs in the combiner @@ -456,7 +439,7 @@ class CombinerConfig(object): def __init__(self, branch_factor: int = default_branch_factor, - batch_size: int = default_phase1_batch_size, + batch_size: int = default_batch_size, target_records: int = default_target_records): self.branch_factor: int = branch_factor self.batch_size: int = batch_size @@ -478,7 +461,6 @@ def int_ceil(x): file_size.append([1 for _ in range(n_inputs)]) while len(file_size[-1]) > 1: - batch_size_this_phase = self.batch_size if len(file_size) == 1 else 1 last_stage_files = file_size[-1] n = len(last_stage_files) i = 0 @@ -486,7 +468,7 @@ def int_ceil(x): while (i < n): job = [] job_i = 0 - while job_i < batch_size_this_phase and i < n: + while job_i < self.batch_size and i < n: merge = [] merge_i = 0 merge_size = 0 @@ -519,7 +501,7 @@ def int_ceil(x): info(f"GVCF combiner plan:\n" f" Branch factor: {self.branch_factor}\n" - f" Phase 1 batch size: {self.batch_size}\n" + f" Batch size: {self.batch_size}\n" f" Combining {n_inputs} input files in {tree_height} phases with {total_jobs} total jobs.{''.join(phase_strs)}\n") return CombinerPlan(file_size, phases) @@ -535,7 +517,7 @@ def run_combiner(sample_paths: List[str], header: Optional[str] = None, sample_names: Optional[List[str]] = None, branch_factor: int = CombinerConfig.default_branch_factor, - batch_size: int = CombinerConfig.default_phase1_batch_size, + batch_size: int = CombinerConfig.default_batch_size, target_records: int = CombinerConfig.default_target_records, overwrite: bool = False, reference_genome: str = 'default', @@ -662,10 +644,9 @@ def run_combiner(sample_paths: List[str], info(f"Starting phase {phase_i}/{n_phases}, merging {len(files_to_merge)} {merge_str} in {n_jobs} {job_str}.") if phase_i > 1: - intervals, intervals_dtype = calculate_new_intervals(hl.read_matrix_table(files_to_merge[0]), - config.target_records, - os.path.join(tmp_path, - f'phase{phase_i}_interval_checkpoint.ht')) + intervals = calculate_new_intervals(hl.read_matrix_table(files_to_merge[0]).rows(), + config.target_records, + reference_genome=reference_genome) new_files_to_merge = [] @@ -690,8 +671,7 @@ def run_combiner(sample_paths: List[str], reference_genome=reference_genome, contig_recoding=contig_recoding)] else: - mts = Env.spark_backend("vcf_combiner").read_multiple_matrix_tables(inputs, intervals, - intervals_dtype) + mts = [hl.read_matrix_table(path, _intervals=intervals) for path in inputs] merge_mts.append(combine_gvcfs(mts)) diff --git a/hail/python/hail/expr/__init__.py b/hail/python/hail/expr/__init__.py index 9f168591b0b..5cd92f149f3 100644 --- a/hail/python/hail/expr/__init__.py +++ b/hail/python/hail/expr/__init__.py @@ -36,7 +36,7 @@ is_star, is_complex, is_strand_ambiguous, allele_type, hamming, \ mendel_error_code, triangle, downcode, gq_from_pl, parse_call, \ unphased_diploid_gt_index_call, argmax, argmin, zip, _zip_func, enumerate, zip_with_index, map, \ - flatmap, starmap, flatten, any, all, filter, sorted, find, group_by, fold, \ + flatmap, flatten, any, all, filter, sorted, find, group_by, fold, \ array_scan, len, min, nanmin, max, nanmax, mean, median, product, sum, \ cumulative_sum, struct, tuple, set, empty_set, array, empty_array, \ empty_dict, delimit, abs, sign, floor, ceil, float, float32, float64, \ @@ -182,7 +182,6 @@ 'zip_with_index', 'map', 'flatmap', - 'starmap', 'flatten', 'any', 'all', diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index 7e9f6c13c38..7c342a3ea9a 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -4,7 +4,7 @@ import hail import hail as hl from hail.expr import expressions -from hail.expr.types import HailType, is_numeric, is_compound, is_setlike, tint32, \ +from hail.expr.types import HailType, is_numeric, is_compound, tint32, \ tint64, tfloat32, tfloat64, tstr, tbool, tarray, \ tndarray, tset, tdict, tstruct, ttuple, tinterval, \ tlocus, tcall, from_numpy @@ -162,7 +162,7 @@ def impute_type(x): raise ExpressionException("Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) - elif is_setlike(x): + elif isinstance(x, set): if len(x) == 0: raise ExpressionException("Cannot impute type of empty set. Use 'hl.empty_set' to create an empty set.") ts = {impute_type(element) for element in x} @@ -517,7 +517,7 @@ def _promote_numeric(self, typ): def _div_ret_type_f(t): assert is_numeric(t) if t == tint32 or t == tint64: - return tfloat64 + return tfloat32 else: # Float64 or Float32 return t diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 47d8a071920..56b5e1537ef 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -14,7 +14,7 @@ tndarray, tlocus, tinterval, is_numeric import hail.ir as ir from hail.typecheck import typecheck, typecheck_method, func_spec, oneof, \ - identity, nullable, tupleof, sliceof, dictof, anyfunc + identity, nullable, tupleof, sliceof, dictof from hail.utils.java import Env, warning from hail.utils.linkedlist import LinkedList from hail.utils.misc import wrap_to_list, wrap_to_tuple, get_nice_field_error, get_nice_attr_error @@ -345,29 +345,6 @@ def transform_ir(array, name, body): assert isinstance(self._type, tarray) return array_map - @typecheck_method(f=anyfunc) - def starmap(self, f): - r"""Transform each element of a collection of tuples. - - Examples - -------- - - >>> hl.eval(hl.array([(1, 2), (2, 3)]).starmap(lambda x, y: x+y)) - [3, 5] - - Parameters - ---------- - f : function ( (\*args) -> :class:`.Expression`) - Function to transform each element of the collection. - - Returns - ------- - :class:`.CollectionExpression`. - Collection where each element has been transformed according to `f`. - """ - - return self.map(lambda e: f(*e)) - def length(self): """Returns the size of a collection. @@ -2202,10 +2179,27 @@ def __truediv__(self, other): :class:`.NumericExpression` The left number divided by the left. """ - return self._bin_op_numeric("/", other, self._div_ret_type_f) + + def ret_type_f(t): + assert is_numeric(t) + if t == tint32 or t == tint64: + return tfloat32 + else: + # Float64 or Float32 + return t + + return self._bin_op_numeric("/", other, ret_type_f) def __rtruediv__(self, other): - return self._bin_op_numeric_reverse("/", other, self._div_ret_type_f) + def ret_type_f(t): + assert is_numeric(t) + if t == tint32 or t == tint64: + return tfloat32 + else: + # float64 or float32 + return t + + return self._bin_op_numeric_reverse("/", other, ret_type_f) def __floordiv__(self, other): """Divide two numbers with floor division. @@ -3775,41 +3769,20 @@ def shape(self): _opt_long_slice = sliceof(nullable(expr_int64), nullable(expr_int64), nullable(expr_int64)) - @typecheck_method(item=nullable(oneof(expr_int64, type(...), _opt_long_slice, tupleof(nullable(oneof(expr_int64, type(...), _opt_long_slice)))))) + @typecheck_method(item=oneof(expr_int64, _opt_long_slice, tupleof(oneof(expr_int64, _opt_long_slice)))) def __getitem__(self, item): if not isinstance(item, tuple): item = (item,) - num_ellipses = len([e for e in item if isinstance(e, type(...))]) - if num_ellipses > 1: - raise IndexError("an index can only have a single ellipsis (\'...\')") - - num_nones = len([x for x in item if x is None]) - list_item = list(item) - - if num_ellipses == 1: - list_types = [type(e) for e in list_item] - ellipsis_location = list_types.index(type(...)) - num_slices_to_add = self.ndim - (len(item) - num_nones) + 1 - no_ellipses = list_item[:ellipsis_location] + [slice(None)] * num_slices_to_add + list_item[ellipsis_location + 1:] - else: - no_ellipses = list_item - - no_nums = [x for x in no_ellipses if ((x is None) or (isinstance(x, slice)))] - indices_nones = [i for i, x in enumerate(no_nums) if x is None] - formatted_item = [x for x in no_ellipses if x is not None] + if len(item) != self.ndim: + raise ValueError(f'Must specify one index per dimension. ' + f'Expected {self.ndim} dimensions but got {len(item)}') - if len(formatted_item) > self.ndim: - raise IndexError(f'too many indices for array: array is ' - f'{self.ndim}-dimensional, but {len(item)} were indexed') - if len(formatted_item) < self.ndim: - formatted_item += [slice(None, None, None)] * (self.ndim - len(formatted_item)) - - n_sliced_dims = len([s for s in formatted_item if isinstance(s, slice)]) + n_sliced_dims = len([s for s in item if isinstance(s, slice)]) if n_sliced_dims > 0: slices = [] - for i, s in enumerate(formatted_item): + for i, s in enumerate(item): dlen = self.shape[i] if isinstance(s, slice): @@ -3821,7 +3794,6 @@ def __getitem__(self, item): max_bound = hl.if_else(step > 0, dlen, dlen - 1) min_bound = hl.if_else(step > 0, to_expr(0, tint64), to_expr(-1, tint64)) - if s.start is not None: # python treats start < -dlen as None when step < 0: [0,1][-3:0:-1] # and 0 otherwise: [0,1][-3::1] == [0,1][0::1] @@ -3851,35 +3823,15 @@ def __getitem__(self, item): hl.str("Index ") + hl.str(s) + hl.str(f" is out of bounds for axis {i} with size ") + hl.str(dlen) ) slices.append(checked_int) - product = construct_expr(ir.NDArraySlice(self._ir, hl.tuple(slices)._ir), - tndarray(self._type.element_type, n_sliced_dims), - self._indices, - self._aggregations) - - if len(indices_nones) > 0: - reshape_arg = [] - index_non_nones = 0 - for i in range(n_sliced_dims + num_nones): - if i in indices_nones: - reshape_arg.append(1) - else: - reshape_arg.append(product.shape[index_non_nones]) - index_non_nones += 1 - product = product.reshape(tuple(reshape_arg)) + return construct_expr(ir.NDArraySlice(self._ir, hl.tuple(slices)._ir), + tndarray(self._type.element_type, n_sliced_dims), + self._indices, + self._aggregations) - else: - product = construct_expr(ir.NDArrayRef(self._ir, [idx._ir for idx in formatted_item]), - self._type.element_type, - self._indices, - self._aggregations) - - if len(indices_nones) > 0: - reshape_arg = [] - for i in indices_nones: - reshape_arg.append(1) - product = hl.nd.array(product).reshape(tuple(reshape_arg)) - - return product + return construct_expr(ir.NDArrayRef(self._ir, [idx._ir for idx in item]), + self._type.element_type, + self._indices, + self._aggregations) @typecheck_method(shape=oneof(expr_int64, tupleof(expr_int64), expr_tuple())) def reshape(self, *shape): diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index e44db3b3631..96048e98ff1 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -25,7 +25,7 @@ from hail.genetics.reference_genome import reference_genome_type, ReferenceGenome import hail.ir as ir from hail.typecheck import (typecheck, nullable, anytype, enumeration, tupleof, - func_spec, oneof, arg_check, args_check, anyfunc) + func_spec, oneof, arg_check, args_check) from hail.utils.java import Env, warning from hail.utils.misc import plural @@ -3614,34 +3614,6 @@ def map(f: Callable, collection): return collection.map(f) -@typecheck(f=anyfunc, - collection=expr_oneof(expr_set(), expr_array(), expr_ndarray())) -def starmap(f: Callable, collection): - r"""Transform each element of a collection of tuples. - - Examples - -------- - - >>> a = [(1, 5), (3, 2), (7, 8)] - - >>> hl.eval(hl.starmap(lambda x, y: hl.if_else(x < y, x, y), a)) - [1, 2, 7] - - Parameters - ---------- - f : function ( (\*args) -> :class:`.Expression`) - Function to transform each element of the collection. - collection : :class:`.ArrayExpression` or :class:`.SetExpression` - Collection expression. - - Returns - ------- - :class:`.ArrayExpression` or :class:`.SetExpression`. - Collection where each element has been transformed by `f`. - """ - return collection.starmap(f) - - @typecheck(x=expr_oneof(expr_set(), expr_array(), expr_dict(), expr_str, expr_tuple(), expr_struct())) def len(x) -> Int32Expression: """Returns the size of a collection or string. @@ -4510,7 +4482,7 @@ def _sort_by(collection, less_than): collection._aggregations) -@typecheck(collection=expr_oneof(expr_array(), expr_dict(), expr_set()), +@typecheck(collection=expr_array(), key=nullable(func_spec(1, expr_any)), reverse=expr_bool) def sorted(collection, @@ -4538,8 +4510,8 @@ def sorted(collection, Parameters ---------- - collection : :class:`.ArrayExpression` or :class:`.SetExpression` or :class:`.DictExpression` - Collection to sort. + collection : :class:`.ArrayExpression` + Array to sort. key: function ( (arg) -> :class:`.Expression`), optional Function to evaluate for each element to compute sort key. reverse : :class:`.BooleanExpression` @@ -4551,9 +4523,6 @@ def sorted(collection, Sorted array. """ - if not isinstance(collection, ArrayExpression): - collection = hl.array(collection) - def comp(left, right): return (hl.case() .when(hl.is_missing(left), False) diff --git a/hail/python/hail/fs/fs.py b/hail/python/hail/fs/fs.py index 63c07c316dc..4c4efd7abc2 100644 --- a/hail/python/hail/fs/fs.py +++ b/hail/python/hail/fs/fs.py @@ -57,10 +57,6 @@ def remove(self, path: str): def rmtree(self, path: str): pass - @abc.abstractmethod - def supports_scheme(self, scheme: str) -> bool: - pass - def copy_log(self, path: str) -> None: log = Env.hc()._log try: diff --git a/hail/python/hail/fs/google_fs.py b/hail/python/hail/fs/google_fs.py index 0206ec0e065..454bad8cf82 100644 --- a/hail/python/hail/fs/google_fs.py +++ b/hail/python/hail/fs/google_fs.py @@ -139,6 +139,3 @@ def rmtree(self, path: str): if self._is_local(path): rmtree(path) self.client.rm(path, recursive=True) - - def supports_scheme(self, scheme: str) -> bool: - return scheme in ("gs", "") diff --git a/hail/python/hail/fs/hadoop_fs.py b/hail/python/hail/fs/hadoop_fs.py index 4a77aec7681..114c0a3fe1d 100644 --- a/hail/python/hail/fs/hadoop_fs.py +++ b/hail/python/hail/fs/hadoop_fs.py @@ -50,9 +50,6 @@ def remove(self, path: str): def rmtree(self, path: str): return self._jfs.rmtree(path) - def supports_scheme(self, scheme: str) -> bool: - return self._jfs.supportsScheme(scheme) - class HadoopReader(io.RawIOBase): def __init__(self, hfs, path, buffer_size): diff --git a/hail/python/hail/fs/local_fs.py b/hail/python/hail/fs/local_fs.py index ad7c5270be6..dfa03cb5ee9 100644 --- a/hail/python/hail/fs/local_fs.py +++ b/hail/python/hail/fs/local_fs.py @@ -68,6 +68,3 @@ def remove(self, path: str): def rmtree(self, path: str): rmtree(path) - - def supports_scheme(self, scheme: str) -> bool: - return scheme == "" diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index 22224c2c2d0..00ffe7c7b37 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -416,8 +416,6 @@ def bound_variables(self): return {n for n, _ in self.params} | {self.name} | super().bound_variables def _compute_type(self, env, agg_env): - for _, b in self.params: - b._compute_type(env, agg_env) self.body._compute_type(_env_bind(env, self.bindings(len(self.params))), agg_env) self._type = self.body.typ @@ -477,10 +475,7 @@ def _compute_type(self, env, agg_env): self.left._compute_type(env, agg_env) self.right._compute_type(env, agg_env) if self.op == '/': - int_types = [tint32, tint64] - if self.left.typ in int_types and self.right.typ in int_types: - self._type = tfloat64 - elif self.left.typ == tfloat64: + if self.left.typ == tfloat64: self._type = tfloat64 else: self._type = tfloat32 diff --git a/hail/python/hail/methods/__init__.py b/hail/python/hail/methods/__init__.py index efa3b770974..79557145dfb 100644 --- a/hail/python/hail/methods/__init__.py +++ b/hail/python/hail/methods/__init__.py @@ -10,7 +10,7 @@ filter_alleles, filter_alleles_hts, split_multi_hts, balding_nichols_model, ld_prune, row_correlation, ld_matrix, linear_mixed_model, linear_regression_rows, _linear_regression_rows_nd, - logistic_regression_rows, _logistic_regression_rows_nd, poisson_regression_rows, + logistic_regression_rows, poisson_regression_rows, linear_mixed_regression_rows, lambda_gc) from .qc import sample_qc, variant_qc, vep, concordance, nirvana, summarize_variants from .misc import rename_duplicates, maximal_independent_set, filter_intervals @@ -24,7 +24,6 @@ 'linear_regression_rows', '_linear_regression_rows_nd', 'logistic_regression_rows', - '_logistic_regression_rows_nd', 'poisson_regression_rows', 'linear_mixed_regression_rows', 'lambda_gc', diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index 37e26ebd336..acc1e89911f 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -1,23 +1,24 @@ import json import re -from typing import List -import hail as hl -from hail import ir +from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ + sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char +from hail.utils.java import Env, FatalError, jindexed_seq_args, warning +from hail.utils import wrap_to_list +from hail.matrixtable import MatrixTable +from hail.table import Table +from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ + tcall, tbool, tint64, tfloat32 from hail.expr import StructExpression, LocusExpression, \ expr_array, expr_float64, expr_str, expr_numeric, expr_call, expr_bool, \ expr_any, \ to_expr, analyze -from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ - tcall, tbool, tint64, tfloat32 +from hail import ir from hail.genetics.reference_genome import reference_genome_type -from hail.matrixtable import MatrixTable -from hail.methods.misc import require_biallelic, require_row_key_variant, require_col_key_str -from hail.table import Table -from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ - sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char -from hail.utils import wrap_to_list -from hail.utils.java import Env, FatalError, jindexed_seq_args, warning +from hail.methods.misc import require_biallelic, require_row_key_variant, require_row_key_variant_w_struct_locus, require_col_key_str +import hail as hl + +from typing import List def locus_interval_expr(contig, start, end, includes_start, includes_end, @@ -322,7 +323,8 @@ def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=No The default value is ``0.0``. The missing value is ``0.0``. """ - require_biallelic(dataset, 'export_plink', tolerate_generic_locus=True) + require_biallelic(dataset, 'export_plink') + require_row_key_variant_w_struct_locus(dataset, 'export_plink') if ind_id is None: require_col_key_str(dataset, "export_plink") diff --git a/hail/python/hail/methods/misc.py b/hail/python/hail/methods/misc.py index 82c7e938aaa..31f945dfad1 100644 --- a/hail/python/hail/methods/misc.py +++ b/hail/python/hail/methods/misc.py @@ -230,12 +230,9 @@ def require_key(table, method): raise ValueError("Method '{}' requires a non-empty key".format(method)) -@typecheck(dataset=MatrixTable, method=str, tolerate_generic_locus=bool) -def require_biallelic(dataset, method, tolerate_generic_locus: bool = False) -> MatrixTable: - if tolerate_generic_locus: - require_row_key_variant_w_struct_locus(dataset, method) - else: - require_row_key_variant(dataset, method) +@typecheck(dataset=MatrixTable, method=str) +def require_biallelic(dataset, method) -> MatrixTable: + require_row_key_variant(dataset, method) return dataset._select_rows(method, hl.case() .when(dataset.alleles.length() == 2, dataset._rvrow) diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index 1fda56a868a..19a56094dc3 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -547,7 +547,7 @@ def process_partition(part): @typecheck(test=enumeration('wald', 'lrt', 'score', 'firth'), - y=oneof(expr_float64, sequenceof(expr_float64)), + y=oneof(expr_float64, sequenceof(expr_float64), sequenceof(sequenceof(expr_float64))), x=expr_float64, covariates=sequenceof(expr_float64), pass_through=sequenceof(oneof(str, Expression))) @@ -815,417 +815,6 @@ def logistic_regression_rows(test, y, x, covariates, pass_through=()) -> hail.Ta return result.persist() -# Helpers for logreg: -def mean_impute(hl_array): - non_missing_mean = hl.mean(hl_array, filter_missing=True) - return hl_array.map(lambda entry: hl.if_else(hl.is_defined(entry), entry, non_missing_mean)) - - -def sigmoid(hl_nd): - return hl_nd.map(lambda x: hl.if_else(x > 0, hl.rbind(hl.exp(x), lambda exped: exped / (exped + 1)), 1 / (1 + hl.exp(-x)))) - - -def nd_max(hl_nd): - return hl.max(hl_nd.reshape(-1)._data_array()) - - -def logreg_fit(X, y, null_fit=None, max_iter=25, tol=1E-6): - assert(X.ndim == 2) - assert(y.ndim == 1) - # X is samples by covs. - # y is length num samples, for one cov. - n = X.shape[0] - m = X.shape[1] - - if null_fit is None: - avg = y.sum() / n - logit_avg = hl.log(avg / (1 - avg)) - b = hl.nd.hstack([hl.nd.array([logit_avg]), hl.nd.zeros((hl.int32(m - 1)))]) - mu = sigmoid(X @ b) - score = X.T @ (y - mu) - # Reshape so we do a rowwise multiply - fisher = X.T @ (X * (mu * (1 - mu)).reshape(-1, 1)) - else: - # num covs used to fit null model. - m0 = null_fit.b.shape[0] - m_diff = m - m0 - - X0 = X[:, 0:m0] - X1 = X[:, m0:] - - b = hl.nd.hstack([null_fit.b, hl.nd.zeros((m_diff,))]) - mu = sigmoid(X @ b) - score = hl.nd.hstack([null_fit.score, X1.T @ (y - mu)]) - - fisher00 = null_fit.fisher - fisher01 = X0.T @ (X1 * (mu * (1 - mu)).reshape(-1, 1)) - fisher10 = fisher01.T - fisher11 = X1.T @ (X1 * (mu * (1 - mu)).reshape(-1, 1)) - - fisher = hl.nd.vstack([ - hl.nd.hstack([fisher00, fisher01]), - hl.nd.hstack([fisher10, fisher11]) - ]) - - # Useful type abbreviations - tvector64 = hl.tndarray(hl.tfloat64, 1) - tmatrix64 = hl.tndarray(hl.tfloat64, 2) - search_return_type = hl.tstruct(b=tvector64, score=tvector64, fisher=tmatrix64, num_iter=hl.tint32, log_lkhd=hl.tfloat64, converged=hl.tbool, exploded=hl.tbool) - - def na(field_name): - return hl.missing(search_return_type[field_name]) - - # Need to do looping now. - def search(recur, cur_iter, b, mu, score, fisher): - delta_b_struct = hl.nd.solve(fisher, score, no_crash=True) - - exploded = delta_b_struct.failed - delta_b = delta_b_struct.solution - max_delta_b = nd_max(delta_b.map(lambda e: hl.abs(e))) - log_lkhd = ((y * mu) + (1 - y) * (1 - mu)).map(lambda e: hl.log(e)).sum() - - def compute_next_iter(cur_iter, b, mu, score, fisher): - cur_iter = cur_iter + 1 - b = b + delta_b - mu = sigmoid(X @ b) - score = X.T @ (y - mu) - fisher = X.T @ (X * (mu * (1 - mu)).reshape(-1, 1)) - return recur(cur_iter, b, mu, score, fisher) - - return (hl.case() - .when(exploded | hl.is_nan(delta_b[0]), hl.struct(b=na('b'), score=na('score'), fisher=na('fisher'), num_iter=cur_iter, log_lkhd=log_lkhd, converged=False, exploded=True)) - .when(cur_iter > max_iter, hl.struct(b=na('b'), score=na('score'), fisher=na('fisher'), num_iter=cur_iter, log_lkhd=log_lkhd, converged=False, exploded=False)) - .when(max_delta_b < tol, hl.struct(b=b, score=score, fisher=fisher, num_iter=cur_iter, log_lkhd=log_lkhd, converged=True, exploded=False)) - .default(compute_next_iter(cur_iter, b, mu, score, fisher))) - - res_struct = hl.experimental.loop(search, search_return_type, 1, b, mu, score, fisher) - - return res_struct - - -def wald_test(X, y, null_fit, link): - assert (link == "logistic") - fit = logreg_fit(X, y, null_fit) - - se = hl.nd.diagonal(hl.nd.inv(fit.fisher)).map(lambda e: hl.sqrt(e)) - z = fit.b / se - p = z.map(lambda e: 2 * hl.pnorm(-hl.abs(e))) - return hl.struct( - beta=fit.b[X.shape[1] - 1], - standard_error=se[X.shape[1] - 1], - z_stat=z[X.shape[1] - 1], - p_value=p[X.shape[1] - 1], - fit=hl.struct(n_iterations=fit.num_iter, converged=fit.converged, exploded=fit.exploded)) - - -def lrt_test(X, y, null_fit, link): - assert (link == "logistic") - fit = logreg_fit(X, y, null_fit) - - chi_sq = hl.if_else(~fit.converged, hl.missing(hl.tfloat64), 2 * (fit.log_lkhd - null_fit.log_lkhd)) - p = hl.pchisqtail(chi_sq, X.shape[1] - null_fit.b.shape[0]) - - return hl.struct( - beta=fit.b[X.shape[1] - 1], - chi_sq_stat=chi_sq, - p_value=p, - fit=hl.struct(n_iterations=fit.num_iter, converged=fit.converged, exploded=fit.exploded)) - - -@typecheck(test=enumeration('wald', 'lrt', 'score', 'firth'), - y=oneof(expr_float64, sequenceof(expr_float64)), - x=expr_float64, - covariates=sequenceof(expr_float64), - pass_through=sequenceof(oneof(str, Expression))) -def _logistic_regression_rows_nd(test, y, x, covariates, pass_through=()) -> hail.Table: - r"""For each row, test an input variable for association with a - binary response variable using logistic regression. - - Examples - -------- - Run the logistic regression Wald test per variant using a Boolean - phenotype, intercept and two covariates stored in column-indexed - fields: - - >>> result_ht = hl.logistic_regression_rows( - ... test='wald', - ... y=dataset.pheno.is_case, - ... x=dataset.GT.n_alt_alleles(), - ... covariates=[1, dataset.pheno.age, dataset.pheno.is_female]) - - Run the logistic regression Wald test per variant using a list of binary (0/1) - phenotypes, intercept and two covariates stored in column-indexed - fields: - - >>> result_ht = hl.logistic_regression_rows( - ... test='wald', - ... y=[dataset.pheno.is_case, dataset.pheno.is_case], # where pheno values are 0, 1, or missing - ... x=dataset.GT.n_alt_alleles(), - ... covariates=[1, dataset.pheno.age, dataset.pheno.is_female]) - - Warning - ------- - :func:`.logistic_regression_rows` considers the same set of - columns (i.e., samples, points) for every row, namely those columns for - which **all** response variables and covariates are defined. For each row, missing values of - `x` are mean-imputed over these columns. As in the example, the - intercept covariate ``1`` must be included **explicitly** if desired. - - Notes - ----- - This method performs, for each row, a significance test of the input - variable in predicting a binary (case-control) response variable based - on the logistic regression model. The response variable type must either - be numeric (with all present values 0 or 1) or Boolean, in which case - true and false are coded as 1 and 0, respectively. - - Hail supports the Wald test ('wald'), likelihood ratio test ('lrt'), - Rao score test ('score'), and Firth test ('firth'). Hail only includes - columns for which the response variable and all covariates are defined. - For each row, Hail imputes missing input values as the mean of the - non-missing values. - - The example above considers a model of the form - - .. math:: - - \mathrm{Prob}(\mathrm{is\_case}) = - \mathrm{sigmoid}(\beta_0 + \beta_1 \, \mathrm{gt} - + \beta_2 \, \mathrm{age} - + \beta_3 \, \mathrm{is\_female} + \varepsilon), - \quad - \varepsilon \sim \mathrm{N}(0, \sigma^2) - - where :math:`\mathrm{sigmoid}` is the `sigmoid function`_, the genotype - :math:`\mathrm{gt}` is coded as 0 for HomRef, 1 for Het, and 2 for - HomVar, and the Boolean covariate :math:`\mathrm{is\_female}` is coded as - for ``True`` (female) and 0 for ``False`` (male). The null model sets - :math:`\beta_1 = 0`. - - .. _sigmoid function: https://en.wikipedia.org/wiki/Sigmoid_function - - The structure of the emitted row field depends on the test statistic as - shown in the tables below. - - ========== ================== ======= ============================================ - Test Field Type Value - ========== ================== ======= ============================================ - Wald `beta` float64 fit effect coefficient, - :math:`\hat\beta_1` - Wald `standard_error` float64 estimated standard error, - :math:`\widehat{\mathrm{se}}` - Wald `z_stat` float64 Wald :math:`z`-statistic, equal to - :math:`\hat\beta_1 / \widehat{\mathrm{se}}` - Wald `p_value` float64 Wald p-value testing :math:`\beta_1 = 0` - LRT, Firth `beta` float64 fit effect coefficient, - :math:`\hat\beta_1` - LRT, Firth `chi_sq_stat` float64 deviance statistic - LRT, Firth `p_value` float64 LRT / Firth p-value testing - :math:`\beta_1 = 0` - Score `chi_sq_stat` float64 score statistic - Score `p_value` float64 score p-value testing :math:`\beta_1 = 0` - ========== ================== ======= ============================================ - - For the Wald and likelihood ratio tests, Hail fits the logistic model for - each row using Newton iteration and only emits the above fields - when the maximum likelihood estimate of the coefficients converges. The - Firth test uses a modified form of Newton iteration. To help diagnose - convergence issues, Hail also emits three fields which summarize the - iterative fitting process: - - ================ =================== ======= =============================== - Test Field Type Value - ================ =================== ======= =============================== - Wald, LRT, Firth `fit.n_iterations` int32 number of iterations until - convergence, explosion, or - reaching the max (25 for - Wald, LRT; 100 for Firth) - Wald, LRT, Firth `fit.converged` bool ``True`` if iteration converged - Wald, LRT, Firth `fit.exploded` bool ``True`` if iteration exploded - ================ =================== ======= =============================== - - We consider iteration to have converged when every coordinate of - :math:`\beta` changes by less than :math:`10^{-6}`. For Wald and LRT, - up to 25 iterations are attempted; in testing we find 4 or 5 iterations - nearly always suffice. Convergence may also fail due to explosion, - which refers to low-level numerical linear algebra exceptions caused by - manipulating ill-conditioned matrices. Explosion may result from (nearly) - linearly dependent covariates or complete separation_. - - .. _separation: https://en.wikipedia.org/wiki/Separation_(statistics) - - A more common situation in genetics is quasi-complete seperation, e.g. - variants that are observed only in cases (or controls). Such variants - inevitably arise when testing millions of variants with very low minor - allele count. The maximum likelihood estimate of :math:`\beta` under - logistic regression is then undefined but convergence may still occur - after a large number of iterations due to a very flat likelihood - surface. In testing, we find that such variants produce a secondary bump - from 10 to 15 iterations in the histogram of number of iterations per - variant. We also find that this faux convergence produces large standard - errors and large (insignificant) p-values. To not miss such variants, - consider using Firth logistic regression, linear regression, or - group-based tests. - - Here's a concrete illustration of quasi-complete seperation in R. Suppose - we have 2010 samples distributed as follows for a particular variant: - - ======= ====== === ====== - Status HomRef Het HomVar - ======= ====== === ====== - Case 1000 10 0 - Control 1000 0 0 - ======= ====== === ====== - - The following R code fits the (standard) logistic, Firth logistic, - and linear regression models to this data, where ``x`` is genotype, - ``y`` is phenotype, and ``logistf`` is from the logistf package: - - .. code-block:: R - - x <- c(rep(0,1000), rep(1,1000), rep(1,10) - y <- c(rep(0,1000), rep(0,1000), rep(1,10)) - logfit <- glm(y ~ x, family=binomial()) - firthfit <- logistf(y ~ x) - linfit <- lm(y ~ x) - - The resulting p-values for the genotype coefficient are 0.991, 0.00085, - and 0.0016, respectively. The erroneous value 0.991 is due to - quasi-complete separation. Moving one of the 10 hets from case to control - eliminates this quasi-complete separation; the p-values from R are then - 0.0373, 0.0111, and 0.0116, respectively, as expected for a less - significant association. - - The Firth test reduces bias from small counts and resolves the issue of - separation by penalizing maximum likelihood estimation by the `Jeffrey's - invariant prior `__. This - test is slower, as both the null and full model must be fit per variant, - and convergence of the modified Newton method is linear rather than - quadratic. For Firth, 100 iterations are attempted for the null model - and, if that is successful, for the full model as well. In testing we - find 20 iterations nearly always suffices. If the null model fails to - converge, then the `logreg.fit` fields reflect the null model; - otherwise, they reflect the full model. - - See - `Recommended joint and meta-analysis strategies for case-control association testing of single low-count variants `__ - for an empirical comparison of the logistic Wald, LRT, score, and Firth - tests. The theoretical foundations of the Wald, likelihood ratio, and score - tests may be found in Chapter 3 of Gesine Reinert's notes - `Statistical Theory `__. - Firth introduced his approach in - `Bias reduction of maximum likelihood estimates, 1993 `__. - Heinze and Schemper further analyze Firth's approach in - `A solution to the problem of separation in logistic regression, 2002 `__. - - Hail's logistic regression tests correspond to the ``b.wald``, - ``b.lrt``, and ``b.score`` tests in `EPACTS`_. For each variant, Hail - imputes missing input values as the mean of non-missing input values, - whereas EPACTS subsets to those samples with called genotypes. Hence, - Hail and EPACTS results will currently only agree for variants with no - missing genotypes. - - .. _EPACTS: http://genome.sph.umich.edu/wiki/EPACTS#Single_Variant_Tests - - Note - ---- - Use the `pass_through` parameter to include additional row fields from - matrix table underlying ``x``. For example, to include an "rsid" field, set - ``pass_through=['rsid']`` or ``pass_through=[mt.rsid]``. - - Parameters - ---------- - test : {'wald', 'lrt', 'score', 'firth'} - Statistical test. - y : :class:`.Float64Expression` or :obj:`list` of :class:`.Float64Expression` - One or more column-indexed response expressions. - All non-missing values must evaluate to 0 or 1. - Note that a :class:`.BooleanExpression` will be implicitly converted to - a :class:`.Float64Expression` with this property. - x : :class:`.Float64Expression` - Entry-indexed expression for input variable. - covariates : :obj:`list` of :class:`.Float64Expression` - Non-empty list of column-indexed covariate expressions. - pass_through : :obj:`list` of :class:`str` or :class:`.Expression` - Additional row fields to include in the resulting table. - - Returns - ------- - :class:`.Table` - """ - if len(covariates) == 0: - raise ValueError('logistic regression requires at least one covariate expression') - - mt = matrix_table_source('logistic_regresion_rows/x', x) - check_entry_indexed('logistic_regresion_rows/x', x) - - y_is_list = isinstance(y, list) - if y_is_list and len(y) == 0: - raise ValueError("'logistic_regression_rows': found no values for 'y'") - y = wrap_to_list(y) - - for e in covariates: - analyze('logistic_regression_rows/covariates', e, mt._col_indices) - - # _warn_if_no_intercept('logistic_regression_rows', covariates) - - x_field_name = Env.get_uid() - y_field_names = [f'__y_{i}' for i in range(len(y))] - num_y_fields = len(y_field_names) - - y_dict = dict(zip(y_field_names, y)) - - cov_field_names = [f'__cov{i}' for i in range(len(covariates))] - row_fields = _get_regression_row_fields(mt, pass_through, 'logistic_regression_rows') - - # Handle filtering columns with missing values: - mt = mt.filter_cols(hl.array(y + covariates).all(hl.is_defined)) - - # FIXME: selecting an existing entry field should be emitted as a SelectFields - mt = mt._select_all(col_exprs=dict(**y_dict, - **dict(zip(cov_field_names, covariates))), - row_exprs=row_fields, - col_key=[], - entry_exprs={x_field_name: x}) - - sample_field_name = "samples" - ht = mt._localize_entries("entries", sample_field_name) - - # cov_nd rows are samples, columns are the different covariates - if covariates: - ht = ht.annotate_globals(cov_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: [sample_struct[cov_name] for cov_name in cov_field_names]))) - else: - ht = ht.annotate_globals(cov_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: hl.empty_array(hl.tfloat64)))) - - # y_nd rows are samples, columns are the various dependent variables. - ht = ht.annotate_globals(y_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: [sample_struct[y_name] for y_name in y_field_names]))) - - # Fit null models, which means doing a logreg fit with just the covariates for each phenotype. - null_models = hl.range(num_y_fields).map(lambda idx: logreg_fit(ht.cov_nd, ht.y_nd[:, idx])) - ht = ht.annotate_globals(nulls=null_models) - ht = ht.transmute(x=hl.nd.array(mean_impute(ht.entries[x_field_name]))) - - if test == "wald": - # For each y vector, need to do wald test. - covs_and_x = hl.nd.hstack([ht.cov_nd, ht.x.reshape((-1, 1))]) - wald_structs = hl.range(num_y_fields).map(lambda idx: wald_test(covs_and_x, ht.y_nd[:, idx], ht.nulls[idx], "logistic")) - ht = ht.annotate(logistic_regression=wald_structs) - elif test == "lrt": - covs_and_x = hl.nd.hstack([ht.cov_nd, ht.x.reshape((-1, 1))]) - lrt_structs = hl.range(num_y_fields).map(lambda idx: lrt_test(covs_and_x, ht.y_nd[:, idx], ht.nulls[idx], "logistic")) - ht = ht.annotate(logistic_regression=lrt_structs) - - else: - raise ValueError("Only support wald and lrt so far") - - if not y_is_list: - ht = ht.transmute(**ht.logistic_regression[0]) - - ht = ht.drop("x") - - return ht - - @typecheck(test=enumeration('wald', 'lrt', 'score'), y=expr_float64, x=expr_float64, @@ -1868,9 +1457,9 @@ def lambda_gc(p_value, approximate=True): def _lambda_gc_agg(p_value, approximate=True): chisq = hl.qchisqtail(p_value, 1) if approximate: - med_chisq = hl.agg.filter(~hl.is_nan(p_value), hl.agg.approx_quantiles(chisq, 0.5)) + med_chisq = hl.agg.approx_quantiles(chisq, 0.5) else: - med_chisq = hl.agg.filter(~hl.is_nan(p_value), hl.median(hl.agg.collect(chisq))) + med_chisq = hl.median(hl.agg.collect(chisq)) return med_chisq / hl.qchisqtail(0.5, 1) diff --git a/hail/python/hail/nd/__init__.py b/hail/python/hail/nd/__init__.py index 711ce6da441..e68467071d8 100644 --- a/hail/python/hail/nd/__init__.py +++ b/hail/python/hail/nd/__init__.py @@ -1,9 +1,7 @@ from .nd import array, from_column_major, arange, full, zeros, ones, svd, qr, solve, diagonal, inv, concatenate, \ eye, identity, vstack, hstack -newaxis = None - __all__ = [ 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'svd', 'diagonal', 'inv', - 'concatenate', 'eye', 'identity', 'vstack', 'hstack', 'newaxis' + 'concatenate', 'eye', 'identity', 'vstack', 'hstack' ] diff --git a/hail/python/hail/nd/nd.py b/hail/python/hail/nd/nd.py index 1d20486b505..cca8da0002b 100644 --- a/hail/python/hail/nd/nd.py +++ b/hail/python/hail/nd/nd.py @@ -221,8 +221,8 @@ def diagonal(nd): return hl.nd.array(hl.range(hl.int32(shape_min)).map(lambda i: nd[i, i])) -@typecheck(a=expr_ndarray(), b=expr_ndarray(), no_crash=bool) -def solve(a, b, no_crash=False): +@typecheck(a=expr_ndarray(), b=expr_ndarray()) +def solve(a, b): """Solve a linear system. Parameters @@ -251,21 +251,11 @@ def solve(a, b, no_crash=False): if b.dtype.element_type != hl.tfloat64: b = b.map(lambda e: hl.float64(e)) - if no_crash: - name = "linear_solve_no_crash" - return_type = hl.tstruct(solution=hl.tndarray(hl.tfloat64, 2), failed=hl.tbool) - else: - name = "linear_solve" - return_type = hl.tndarray(hl.tfloat64, 2) - - ir = Apply(name, return_type, a._ir, b._ir) - result = construct_expr(ir, return_type, a._indices, a._aggregations) + ir = Apply("linear_solve", hl.tndarray(hl.tfloat64, 2), a._ir, b._ir) + result = construct_expr(ir, hl.tndarray(hl.tfloat64, 2), a._indices, a._aggregations) if b_ndim_orig == 1: - if no_crash: - result = hl.struct(solution=result.solution.reshape((-1)), failed=result.failed) - else: - result = result.reshape((-1)) + result = result.reshape((-1)) return result diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index 4085ba0939c..9dfa42acc67 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -3281,7 +3281,7 @@ def _same(self, other, tolerance=1e-6, absolute=False): from hail.expr.functions import _values_similar if self._type != other._type: - print(f'Table._same: types differ:\n {self._type}\n {other._type}') + print(f'Table._same: types differ: {self._type}, {other._type}') return False left_global_value = Env.get_uid() @@ -3470,10 +3470,6 @@ def multi_way_zip_join(tables, data_field_name, global_field_name) -> 'Table': is exactly one entry in some `data_field_name` array for every row in the inputs. - The :meth:`multi_way_zip_join` method assumes that inputs have distinct - keys. If any input has duplicate keys, the row value that is included - in the result array for that key is undefined. - Parameters ---------- tables : :class:`list` of :class:`Table` diff --git a/hail/python/hail/utils/__init__.py b/hail/python/hail/utils/__init__.py index 3156b8c9924..0f6b102dcd9 100644 --- a/hail/python/hail/utils/__init__.py +++ b/hail/python/hail/utils/__init__.py @@ -1,11 +1,11 @@ from .misc import wrap_to_list, get_env_or_default, uri_path, local_path_uri, new_temp_file, new_local_temp_dir, new_local_temp_file, with_local_temp_file, storage_level, range_matrix_table, range_table, run_command, HailSeedGenerator, timestamp_path, _dumps_partitions, default_handler -from .hadoop_utils import hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_scheme_supported, hadoop_stat, copy_log +from .hadoop_utils import hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_stat, copy_log from .struct import Struct from .linkedlist import LinkedList from .interval import Interval from .frozendict import frozendict from .java import error, warning, info, FatalError, HailUserError -from .tutorial import get_1kg, get_hgdp, get_movie_lens +from .tutorial import get_1kg, get_movie_lens from .deduplicate import deduplicate __all__ = ['hadoop_open', @@ -15,7 +15,6 @@ 'hadoop_is_file', 'hadoop_stat', 'hadoop_ls', - 'hadoop_scheme_supported', 'copy_log', 'wrap_to_list', 'new_local_temp_dir', @@ -39,7 +38,6 @@ 'HailSeedGenerator', 'LinkedList', 'get_1kg', - 'get_hgdp', 'get_movie_lens', 'timestamp_path', '_dumps_partitions', diff --git a/hail/python/hail/utils/hadoop_utils.py b/hail/python/hail/utils/hadoop_utils.py index 232b22ccca5..1a7742377da 100644 --- a/hail/python/hail/utils/hadoop_utils.py +++ b/hail/python/hail/utils/hadoop_utils.py @@ -214,26 +214,6 @@ def hadoop_ls(path: str) -> List[Dict]: return Env.fs().ls(path) -def hadoop_scheme_supported(scheme: str) -> bool: - """Returns ``True`` if the Hadoop filesystem supports URLs with the given - scheme. - - Examples - -------- - - >>> hadoop_scheme_supported('gs') # doctest: +SKIP - - Parameters - ---------- - scheme : :class:`str` - - Returns - ------- - :obj:`.bool` - """ - return Env.fs().supports_scheme(scheme) - - def copy_log(path: str) -> None: """Attempt to copy the session log to a hadoop-API-compatible location. diff --git a/hail/python/hail/utils/misc.py b/hail/python/hail/utils/misc.py index 21a9088e2e8..e22c4b70c3f 100644 --- a/hail/python/hail/utils/misc.py +++ b/hail/python/hail/utils/misc.py @@ -262,9 +262,9 @@ def fmt_field(field): elif isinstance(obj, StructExpression): return 'StructExpression', StructExpression, struct_error(obj), True elif isinstance(obj, ArrayStructExpression): - return 'ArrayStructExpression', ArrayStructExpression, struct_error(obj), True + return 'ArrayStructExpression', StructExpression, struct_error(obj), True elif isinstance(obj, SetStructExpression): - return 'SetStructExpression', SetStructExpression, struct_error(obj), True + return 'SetStructExpression', StructExpression, struct_error(obj), True else: raise NotImplementedError(obj) diff --git a/hail/python/hail/utils/tutorial.py b/hail/python/hail/utils/tutorial.py index 41afab13e01..27bb819d523 100644 --- a/hail/python/hail/utils/tutorial.py +++ b/hail/python/hail/utils/tutorial.py @@ -8,17 +8,13 @@ __all__ = [ 'get_1kg', - 'get_hgdp', 'get_movie_lens' ] resources = { '1kg_annotations': 'https://storage.googleapis.com/hail-tutorial/1kg_annotations.txt', '1kg_matrix_table': 'https://storage.googleapis.com/hail-tutorial/1kg.vcf.bgz', - '1kg_ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/ensembl_gene_annotations.txt', - 'HGDP_annotations': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_pop_and_sex_annotations.tsv', - 'HGDP_matrix_table': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_subset.vcf.bgz', - 'HGDP_ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_gene_annotations.tsv', + 'ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/ensembl_gene_annotations.txt', 'movie_lens_100k': 'http://files.grouplens.org/datasets/movielens/ml-100k.zip', } @@ -92,7 +88,7 @@ def get_1kg(output_dir, overwrite: bool = False): sync_retry_transient_errors(urlretrieve, source, tmp_sample_annot) tmp_gene_annot = os.path.join(tmp_dir, 'ensembl_gene_annotations.txt') - source = resources['1kg_ensembl_gene_annotations'] + source = resources['ensembl_gene_annotations'] info(f'downloading Ensembl gene annotations ...\n' f' Source: {source}') sync_retry_transient_errors(urlretrieve, source, tmp_gene_annot) @@ -105,67 +101,6 @@ def get_1kg(output_dir, overwrite: bool = False): info('1KG files found') -def get_hgdp(output_dir, overwrite: bool = False): - """Download subset of the `Human Genome Diversity Panel - `__ - dataset and sample annotations. - - Notes - ----- - The download is about 30MB. - - Parameters - ---------- - output_dir - Directory in which to write data. - overwrite - If ``True``, overwrite any existing files/directories at `output_dir`. - """ - fs = Env.fs() - - if not _dir_exists(fs, output_dir): - fs.mkdir(output_dir) - - matrix_table_path = os.path.join(output_dir, 'HGDP.mt') - vcf_path = os.path.join(output_dir, 'HGDP.vcf.bgz') - sample_annotations_path = os.path.join(output_dir, 'HGDP_annotations.txt') - gene_annotations_path = os.path.join(output_dir, 'ensembl_gene_annotations.txt') - - if (overwrite - or not _dir_exists(fs, matrix_table_path) - or not _file_exists(fs, sample_annotations_path) - or not _file_exists(fs, vcf_path) - or not _file_exists(fs, gene_annotations_path)): - init_temp_dir() - tmp_vcf = os.path.join(tmp_dir, 'HGDP.vcf.bgz') - source = resources['HGDP_matrix_table'] - info(f'downloading HGDP VCF ...\n' - f' Source: {source}') - sync_retry_transient_errors(urlretrieve, resources['HGDP_matrix_table'], tmp_vcf) - cluster_readable_vcf = _copy_to_tmp(fs, local_path_uri(tmp_vcf), extension='vcf.bgz') - info('importing VCF and writing to matrix table...') - hl.import_vcf(cluster_readable_vcf, min_partitions=16, reference_genome='GRCh38').write(matrix_table_path, overwrite=True) - - tmp_sample_annot = os.path.join(tmp_dir, 'HGDP_annotations.txt') - source = resources['HGDP_annotations'] - info(f'downloading HGDP annotations ...\n' - f' Source: {source}') - sync_retry_transient_errors(urlretrieve, source, tmp_sample_annot) - - tmp_gene_annot = os.path.join(tmp_dir, 'ensembl_gene_annotations.txt') - source = resources['HGDP_ensembl_gene_annotations'] - info(f'downloading Ensembl gene annotations ...\n' - f' Source: {source}') - sync_retry_transient_errors(urlretrieve, source, tmp_gene_annot) - - hl.hadoop_copy(local_path_uri(tmp_sample_annot), sample_annotations_path) - hl.hadoop_copy(local_path_uri(tmp_gene_annot), gene_annotations_path) - hl.hadoop_copy(local_path_uri(tmp_vcf), vcf_path) - info('Done!') - else: - info('HGDP files found') - - def get_movie_lens(output_dir, overwrite: bool = False): """Download public Movie Lens dataset. diff --git a/hail/python/hailtop/aiogoogle/auth/session.py b/hail/python/hailtop/aiogoogle/auth/session.py index 8818bb591df..e94b80fd8b0 100644 --- a/hail/python/hailtop/aiogoogle/auth/session.py +++ b/hail/python/hailtop/aiogoogle/auth/session.py @@ -2,7 +2,6 @@ from typing import Optional, Type, TypeVar, Mapping import abc import aiohttp -import hailtop.httpx from hailtop.utils import request_retry_transient_errors, RateLimit, RateLimiter from .credentials import Credentials from .access_token import AccessToken @@ -67,10 +66,11 @@ class Session(BaseSession): def __init__(self, *, credentials: Credentials = None, params: Optional[Mapping[str, str]] = None, **kwargs): if credentials is None: credentials = Credentials.default_credentials() + if 'raise_for_status' not in kwargs: kwargs['raise_for_status'] = True self._params = params - self._session = hailtop.httpx.ClientSession(**kwargs) + self._session = aiohttp.ClientSession(**kwargs) self._access_token = AccessToken(credentials) async def request(self, method: str, url: str, **kwargs): @@ -97,7 +97,7 @@ async def request(self, method: str, url: str, **kwargs): return await self._session.request(method, url, **kwargs) async def close(self) -> None: - if hasattr(self, '_session'): + if hasattr(self._session, '_session'): await self._session.close() del self._session del self._access_token diff --git a/hail/python/hailtop/aiogoogle/client/compute_client.py b/hail/python/hailtop/aiogoogle/client/compute_client.py index d0b8b25cfa3..ffe33daf818 100644 --- a/hail/python/hailtop/aiogoogle/client/compute_client.py +++ b/hail/python/hailtop/aiogoogle/client/compute_client.py @@ -1,12 +1,9 @@ import uuid from typing import Mapping, Any, Optional, MutableMapping -import logging from .base_client import BaseClient from hailtop.utils import sleep_and_backoff -log = logging.getLogger('compute_client') - async def request_with_wait_for_done(request_f, path, params: MutableMapping[str, Any] = None, **kwargs): assert 'params' not in kwargs diff --git a/hail/python/hailtop/aiogoogle/client/storage_client.py b/hail/python/hailtop/aiogoogle/client/storage_client.py index 6d9ad04f871..de35f3d836a 100644 --- a/hail/python/hailtop/aiogoogle/client/storage_client.py +++ b/hail/python/hailtop/aiogoogle/client/storage_client.py @@ -363,7 +363,6 @@ async def insert_object(self, bucket: str, name: str, **kwargs) -> WritableStrea # https://cloud.google.com/storage/docs/performing-resumable-uploads assert upload_type == 'resumable' chunk_size = kwargs.get('bufsize', 256 * 1024) - resp = await self._session.post( f'https://storage.googleapis.com/upload/storage/v1/b/{bucket}/o', **kwargs) @@ -425,6 +424,7 @@ async def __getitem__(self, key: str) -> str: class GoogleStorageFileListEntry(FileListEntry): def __init__(self, url: str, items: Optional[Dict[str, Any]]): + assert url.endswith('/') == (items is None), f'{url} {items}' self._url = url self._items = items self._status: Optional[GetObjectFileStatus] = None @@ -448,7 +448,7 @@ async def is_dir(self) -> bool: async def status(self) -> FileStatus: if self._status is None: if self._items is None: - raise IsADirectoryError(self._url) + raise ValueError("directory has no file status") self._status = GetObjectFileStatus(self._items) return self._status @@ -478,10 +478,10 @@ def _tmp_name(self, filename: str) -> str: def _part_name(self, number: int) -> str: return self._tmp_name(f'part-{number}') - async def create_part(self, number: int, start: int) -> WritableStream: + async def create_part(self, number: int, start: int, *, retry_writes: bool = True) -> WritableStream: part_name = self._part_name(number) params = { - 'uploadType': 'media' + 'uploadType': 'resumable' if retry_writes else 'media' } return await self._fs._storage_client.insert_object(self._bucket, part_name, params=params) @@ -546,13 +546,8 @@ async def tree_compose(names, dest_name): class GoogleStorageAsyncFS(AsyncFS): def __init__(self, *, storage_client: Optional[StorageClient] = None, - project: Optional[str] = None, **kwargs): if not storage_client: - if project is not None: - if 'params' not in kwargs: - kwargs['params'] = {} - kwargs['params']['userProject'] = project storage_client = StorageClient(**kwargs) self._storage_client = storage_client @@ -581,7 +576,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: return await self._storage_client.get_object( bucket, name, headers={'Range': f'bytes={start}-'}) - async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: + async def create(self, url: str, retry_writes: bool = True) -> WritableStream: bucket, name = self._get_bucket_name(url) params = { 'uploadType': 'resumable' if retry_writes else 'media' @@ -663,7 +658,8 @@ async def _listfiles_flat(self, bucket: str, name: str) -> AsyncIterator[FileLis if prefixes: for prefix in prefixes: assert prefix.endswith('/') - yield GoogleStorageFileListEntry(f'gs://{bucket}/{prefix}', None) + url = f'gs://{bucket}/{prefix}' + yield GoogleStorageFileListEntry(url, None) items = page.get('items') if items: @@ -686,23 +682,11 @@ async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[Fi except StopAsyncIteration: raise FileNotFoundError(url) # pylint: disable=raise-missing-from - async def should_yield(entry): - url = await entry.url() - if url.endswith('/') and await entry.is_file(): - stat = await entry.status() - if await stat.size() != 0: - raise FileAndDirectoryError(url) - return False - return True - async def cons(first_entry, it): - if await should_yield(first_entry): - yield first_entry + yield first_entry try: while True: - next_entry = await it.__anext__() - if await should_yield(next_entry): - yield next_entry + yield await it.__anext__() except StopAsyncIteration: pass @@ -724,7 +708,7 @@ async def isfile(self, url: str) -> bool: async def isdir(self, url: str) -> bool: bucket, name = self._get_bucket_name(url) - assert not name or name.endswith('/'), name + assert not name or name.endswith('/') params = { 'prefix': name, 'delimiter': '/', @@ -739,31 +723,27 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: bucket, name = self._get_bucket_name(url) + await self._storage_client.delete_object(bucket, name) + + async def _remove_doesnt_exist_ok(self, url: str) -> None: try: + bucket, name = self._get_bucket_name(url) await self._storage_client.delete_object(bucket, name) + except FileNotFoundError: + pass except aiohttp.ClientResponseError as e: - if e.status == 404: - raise FileNotFoundError(url) from e - raise + if e.status != 404: + raise - async def _rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: async with OnlineBoundedGather2(sema) as pool: - bucket, name = self._get_bucket_name(url) - if name and not name.endswith('/'): - name = f'{name}/' - it = self._listfiles_recursive(bucket, name) + try: + it = await self.listfiles(url, recursive=True) + except FileNotFoundError: + return async for entry in it: await pool.call(self._remove_doesnt_exist_ok, await entry.url()) - async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: - if sema is None: - sema = asyncio.Semaphore(50) - async with sema: - return await self._rmtree(sema, url) - - return await self._rmtree(sema, url) - async def close(self) -> None: - if hasattr(self, '_storage_client'): - await self._storage_client.close() - del self._storage_client + await self._storage_client.close() + del self._storage_client diff --git a/hail/python/hailtop/aiotools/fs.py b/hail/python/hailtop/aiotools/fs.py index 5c0de3d5033..eb9f0290218 100644 --- a/hail/python/hailtop/aiotools/fs.py +++ b/hail/python/hailtop/aiotools/fs.py @@ -1,4 +1,4 @@ -from typing import Any, AsyncContextManager, Optional, List, Type, BinaryIO, cast, Set, AsyncIterator, Union, Dict +from typing import Any, Optional, List, Type, BinaryIO, cast, Set, AsyncIterator, Union, Dict from types import TracebackType import abc import os @@ -9,11 +9,10 @@ import asyncio from concurrent.futures import ThreadPoolExecutor import urllib.parse -import functools import humanize from hailtop.utils import ( retry_transient_errors, blocking_to_async, url_basename, url_join, bounded_gather2, - time_msecs, humanize_timedelta_msecs, OnlineBoundedGather2) + time_msecs, humanize_timedelta_msecs) from .stream import ReadableStream, WritableStream, blocking_readable_stream_to_async, blocking_writable_stream_to_async @@ -55,7 +54,7 @@ async def status(self) -> FileStatus: class MultiPartCreate(abc.ABC): @abc.abstractmethod - async def create_part(self, number: int, start: int) -> AsyncContextManager[WritableStream]: + async def create_part(self, number: int, start: int, *, retry_writes: bool = True): pass @abc.abstractmethod @@ -87,7 +86,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: pass @abc.abstractmethod - async def create(self, url: str, *, retry_writes: bool = True) -> AsyncContextManager[WritableStream]: + async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: pass @abc.abstractmethod @@ -118,33 +117,6 @@ async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[Fi async def staturl(self, url: str) -> str: pass - async def _staturl_parallel_isfile_isdir(self, url: str) -> str: - assert not url.endswith('/') - - async def with_exception(f, *args, **kwargs): - try: - return (await f(*args, **kwargs)), None - except Exception as e: - return None, e - - [(is_file, isfile_exc), (is_dir, isdir_exc)] = await asyncio.gather( - with_exception(self.isfile, url), with_exception(self.isdir, url + '/')) - # raise exception deterministically - if isfile_exc: - raise isfile_exc - if isdir_exc: - raise isdir_exc - - if is_file: - if is_dir: - raise FileAndDirectoryError(url) - return AsyncFS.FILE - - if is_dir: - return AsyncFS.DIR - - raise FileNotFoundError(url) - @abc.abstractmethod async def isfile(self, url: str) -> bool: pass @@ -157,48 +129,14 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: pass - async def _remove_doesnt_exist_ok(self, url): - try: - await self.remove(url) - except FileNotFoundError: - pass - @abc.abstractmethod - async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: + async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: pass - async def _rmtree_with_recursive_listfiles(self, sema: asyncio.Semaphore, url: str) -> None: - async with OnlineBoundedGather2(sema) as pool: - try: - it = await self.listfiles(url, recursive=True) - except FileNotFoundError: - return - async for entry in it: - await pool.call(self._remove_doesnt_exist_ok, await entry.url()) - async def touch(self, url: str) -> None: async with await self.create(url): pass - async def read(self, url: str) -> bytes: - async with await self.open(url) as f: - return await f.read() - - async def read_from(self, url: str, start: int) -> bytes: - async with await self.open_from(url, start) as f: - return await f.read() - - async def read_range(self, url: str, start: int, end: int) -> bytes: - n = (end - start) + 1 - async with await self.open_from(url, start) as f: - return await f.read(n) - - async def write(self, url: str, data: bytes) -> None: - async def _write() -> None: - async with await self.create(url, retry_writes=False) as f: - await f.write(data) - await retry_transient_errors(_write) - async def close(self) -> None: pass @@ -253,7 +191,7 @@ async def is_dir(self) -> bool: async def status(self) -> LocalStatFileStatus: if self._status is None: if await self.is_dir(): - raise IsADirectoryError() + raise ValueError("directory has no file status") self._status = LocalStatFileStatus(await blocking_to_async(self._thread_pool, self._entry.stat)) return self._status @@ -264,7 +202,7 @@ def __init__(self, fs: 'LocalAsyncFS', path: str, num_parts: int): self._path = path self._num_parts = num_parts - async def create_part(self, number: int, start: int): # pylint: disable=unused-argument + async def create_part(self, number: int, start: int, *, retry_writes: bool = True): # pylint: disable=unused-argument assert 0 <= number < self._num_parts f = await blocking_to_async(self._fs._thread_pool, open, self._path, 'r+b') f.seek(start) @@ -398,9 +336,9 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: path = self._get_path(url) - return await blocking_to_async(self._thread_pool, os.remove, path) + return os.remove(path) - async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: + async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: path = self._get_path(url) await blocking_to_async(self._thread_pool, shutil.rmtree, path) @@ -557,12 +495,12 @@ async def _copy_file(self, srcfile: str, destfile: str) -> None: async with await self.router_fs.open(srcfile) as srcf: try: - dest_cm = await self.router_fs.create(destfile, retry_writes=False) + destf = await self.router_fs.create(destfile, retry_writes=False) except FileNotFoundError: await self.router_fs.makedirs(os.path.dirname(destfile), exist_ok=True) - dest_cm = await self.router_fs.create(destfile) + destf = await self.router_fs.create(destfile) - async with dest_cm as destf: + async with destf: while True: b = await srcf.read(Copier.BUFFER_SIZE) if not b: @@ -573,7 +511,7 @@ async def _copy_file(self, srcfile: str, destfile: str) -> None: async def _copy_part(self, source_report, srcfile, part_number, part_creator, return_exceptions): try: async with await self.router_fs.open_from(srcfile, part_number * self.PART_SIZE) as srcf: - async with await part_creator.create_part(part_number, part_number * self.PART_SIZE) as destf: + async with await part_creator.create_part(part_number, part_number * self.PART_SIZE, retry_writes=False) as destf: n = self.PART_SIZE while n > 0: b = await srcf.read(min(Copier.BUFFER_SIZE, n)) @@ -612,7 +550,7 @@ async def _copy_file_multi_part_main( async with part_creator: await bounded_gather2(sema, *[ - functools.partial(retry_transient_errors, self._copy_part, source_report, srcfile, i, part_creator, return_exceptions) + retry_transient_errors(self._copy_part, source_report, srcfile, i, part_creator, return_exceptions) for i in range(n_parts) ], cancel_on_error=True) @@ -694,7 +632,6 @@ async def copy_as_dir(self, sema: asyncio.Semaphore, source_report: SourceReport src = self.src if not src.endswith('/'): src = src + '/' - try: srcentries = await self.router_fs.listfiles(src, recursive=True) except (NotADirectoryError, FileNotFoundError): @@ -729,7 +666,7 @@ async def copy_source(srcentry): await self._copy_file_multi_part(sema, source_report, srcfile, await srcentry.status(), url_join(full_dest, relsrcfile), return_exceptions) await bounded_gather2(sema, *[ - functools.partial(copy_source, srcentry) + copy_source(srcentry) async for srcentry in srcentries], cancel_on_error=True) async def copy(self, sema: asyncio.Semaphore, source_report: SourceReport, return_exceptions: bool): @@ -813,7 +750,7 @@ async def _copy_one_transfer(self, sema: asyncio.Semaphore, transfer_report: Tra raise NotADirectoryError(transfer.dest) await bounded_gather2(sema, *[ - functools.partial(self.copy_source, sema, transfer, r, s, dest_type_task, return_exceptions) + self.copy_source(sema, transfer, r, s, dest_type_task, return_exceptions) for r, s in zip(src_report, src) ], cancel_on_error=True) @@ -839,7 +776,7 @@ async def copy(self, sema: asyncio.Semaphore, copy_report: CopyReport, transfer: assert isinstance(transfer_report, list) await bounded_gather2(sema, *[ - functools.partial(self._copy_one_transfer, sema, r, t, return_exceptions) + self._copy_one_transfer(sema, r, t, return_exceptions) for r, t in zip(transfer_report, transfer) ], return_exceptions=return_exceptions, cancel_on_error=True) except Exception as e: @@ -893,7 +830,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: fs = self._get_fs(url) return await fs.open_from(url, start) - async def create(self, url: str, retry_writes: bool = True) -> WritableStream: + async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: fs = self._get_fs(url) return await fs.create(url, retry_writes=retry_writes) @@ -937,7 +874,7 @@ async def remove(self, url: str) -> None: fs = self._get_fs(url) return await fs.remove(url) - async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: + async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: fs = self._get_fs(url) return await fs.rmtree(sema, url) diff --git a/hail/python/hailtop/aiotools/s3asyncfs.py b/hail/python/hailtop/aiotools/s3asyncfs.py deleted file mode 100644 index 9916afd1926..00000000000 --- a/hail/python/hailtop/aiotools/s3asyncfs.py +++ /dev/null @@ -1,423 +0,0 @@ -from typing import Any, AsyncIterator, BinaryIO, cast, AsyncContextManager, Dict, List, Optional, Set, Tuple, Type -from types import TracebackType -from concurrent.futures import ThreadPoolExecutor -import os.path -import urllib -import asyncio -import botocore.exceptions -import boto3 -from hailtop.utils import blocking_to_async -from hailtop.aiotools import ( - FileStatus, FileListEntry, ReadableStream, WritableStream, AsyncFS, - MultiPartCreate) -from .stream import ( - AsyncQueueWritableStream, - async_writable_blocking_readable_stream_pair, - blocking_readable_stream_to_async) - - -class PageIterator: - def __init__(self, fs: 'S3AsyncFS', bucket: str, prefix: str, delimiter: Optional[str] = None): - self._fs = fs - self._bucket = bucket - self._prefix = prefix - self._kwargs = {} - if delimiter is not None: - self._kwargs['Delimiter'] = delimiter - self._page = None - - def __aiter__(self) -> 'PageIterator': - return self - - async def __anext__(self): - if self._page is None: - self._page = await blocking_to_async(self._fs._thread_pool, self._fs._s3.list_objects_v2, - Bucket=self._bucket, - Prefix=self._prefix, - **self._kwargs) - return self._page - - next_continuation_token = self._page.get('NextContinuationToken') - if next_continuation_token is not None: - self._page = await blocking_to_async(self._fs._thread_pool, self._fs._s3.list_objects_v2, - Bucket=self._bucket, - Prefix=self._prefix, - ContinuationToken=next_continuation_token, - **self._kwargs) - return self._page - - raise StopAsyncIteration - - -class S3HeadObjectFileStatus(FileStatus): - def __init__(self, head_object_resp): - self.head_object_resp = head_object_resp - - async def size(self) -> int: - return self.head_object_resp['ContentLength'] - - async def __getitem__(self, key: str) -> Any: - return self.head_object_resp[key] - - -class S3ListFilesFileStatus(FileStatus): - def __init__(self, item: Dict[str, Any]): - self._item = item - - async def size(self) -> int: - return self._item['Size'] - - async def __getitem__(self, key: str) -> Any: - return self._item[key] - - -class S3CreateManager(AsyncContextManager[WritableStream]): - def __init__(self, fs: 'S3AsyncFS', bucket: str, name: str): - self.fs: S3AsyncFS = fs - self.bucket: str = bucket - self.name: str = name - self.async_writable: Optional[AsyncQueueWritableStream] = None - self.put_task: Optional[asyncio.Task] = None - self._value: Any = None - - async def __aenter__(self) -> WritableStream: - async_writable, blocking_readable = async_writable_blocking_readable_stream_pair() - self.async_writable = async_writable - self.put_task = asyncio.create_task( - blocking_to_async(self.fs._thread_pool, self.fs._s3.upload_fileobj, - blocking_readable, - Bucket=self.bucket, - Key=self.name)) - return async_writable - - async def __aexit__( - self, exc_type: Optional[Type[BaseException]] = None, - exc_value: Optional[BaseException] = None, - exc_traceback: Optional[TracebackType] = None) -> None: - assert self.async_writable - await self.async_writable.wait_closed() - assert self.put_task - self._value = await self.put_task - - -class S3FileListEntry(FileListEntry): - def __init__(self, bucket: str, key: str, item: Optional[Dict[str, Any]]): - assert key.endswith('/') == (item is None) - self._bucket = bucket - self._key = key - self._item = item - self._status: Optional[S3ListFilesFileStatus] = None - - def name(self) -> str: - return os.path.basename(self._key) - - async def url(self) -> str: - return f's3://{self._bucket}/{self._key}' - - def url_maybe_trailing_slash(self) -> str: - return f's3://{self._bucket}/{self._key}' - - async def is_file(self) -> bool: - return self._item is not None - - async def is_dir(self) -> bool: - return self._item is None - - async def status(self) -> FileStatus: - if self._status is None: - if self._item is None: - raise IsADirectoryError(f's3://{self._bucket}/{self._key}') - self._status = S3ListFilesFileStatus(self._item) - return self._status - - -def _upload_part(s3, bucket, key, number, f, upload_id): - b = f.read() - resp = s3.upload_part( - Bucket=bucket, - Key=key, - PartNumber=number + 1, - UploadId=upload_id, - Body=b) - return resp['ETag'] - - -class S3CreatePartManager(AsyncContextManager[WritableStream]): - def __init__(self, mpc, number: int): - self._mpc = mpc - self._number = number - self._async_writable: Optional[AsyncQueueWritableStream] = None - self._put_task: Optional[asyncio.Task] = None - - async def __aenter__(self) -> WritableStream: - async_writable, blocking_readable = async_writable_blocking_readable_stream_pair() - self._async_writable = async_writable - self._put_task = asyncio.create_task( - blocking_to_async(self._mpc._fs._thread_pool, _upload_part, - self._mpc._fs._s3, - self._mpc._bucket, - self._mpc._name, - self._number, - blocking_readable, - self._mpc._upload_id)) - return async_writable - - async def __aexit__( - self, exc_type: Optional[Type[BaseException]] = None, - exc_value: Optional[BaseException] = None, - exc_traceback: Optional[TracebackType] = None) -> None: - assert self._async_writable is not None - assert self._put_task is not None - try: - await self._async_writable.wait_closed() - finally: - self._mpc._etags[self._number] = await self._put_task - - -class S3MultiPartCreate(MultiPartCreate): - def __init__(self, sema: asyncio.Semaphore, fs: 'S3AsyncFS', bucket: str, name: str, num_parts: int): - self._sema = sema - self._fs = fs - self._bucket = bucket - self._name = name - self._num_parts = num_parts - self._upload_id = None - self._etags: List[Optional[str]] = [None] * num_parts - - async def __aenter__(self) -> 'S3MultiPartCreate': - resp = await blocking_to_async(self._fs._thread_pool, self._fs._s3.create_multipart_upload, - Bucket=self._bucket, - Key=self._name) - self._upload_id = resp['UploadId'] - return self - - async def __aexit__( - self, exc_type: Optional[Type[BaseException]] = None, - exc_value: Optional[BaseException] = None, - exc_traceback: Optional[TracebackType] = None) -> None: - if exc_value is not None: - await blocking_to_async(self._fs._thread_pool, self._fs._s3.abort_multipart_upload, - Bucket=self._bucket, - Key=self._name, - UploadId=self._upload_id) - return - - parts = [] - part_number = 1 - for etag in self._etags: - assert etag is not None - parts.append({ - 'ETag': etag, - 'PartNumber': part_number - }) - part_number += 1 - - await blocking_to_async(self._fs._thread_pool, self._fs._s3.complete_multipart_upload, - Bucket=self._bucket, - Key=self._name, - MultipartUpload={'Parts': parts}, - UploadId=self._upload_id) - - async def create_part(self, number: int, start: int) -> S3CreatePartManager: # pylint: disable=unused-argument - return S3CreatePartManager(self, number) - - -class S3AsyncFS(AsyncFS): - def __init__(self, thread_pool: ThreadPoolExecutor, max_workers=None): - if not thread_pool: - thread_pool = ThreadPoolExecutor(max_workers=max_workers) - self._thread_pool = thread_pool - self._s3 = boto3.client('s3') - - def schemes(self) -> Set[str]: - return {'s3'} - - @staticmethod - def _get_bucket_name(url: str) -> Tuple[str, str]: - parsed = urllib.parse.urlparse(url) - if parsed.scheme != 's3': - raise ValueError(f"invalid scheme, expected s3: {parsed.scheme}") - - name = parsed.path - if name: - assert name[0] == '/' - name = name[1:] - - return (parsed.netloc, name) - - async def open(self, url: str) -> ReadableStream: - bucket, name = self._get_bucket_name(url) - resp = await blocking_to_async(self._thread_pool, self._s3.get_object, - Bucket=bucket, - Key=name) - return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body'])) - - async def open_from(self, url: str, start: int) -> ReadableStream: - bucket, name = self._get_bucket_name(url) - resp = await blocking_to_async(self._thread_pool, self._s3.get_object, - Bucket=bucket, - Key=name, - Range=f'bytes={start}-') - return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body'])) - - async def create(self, url: str, *, retry_writes: bool = True) -> S3CreateManager: # pylint: disable=unused-argument - # It may be possible to write a more efficient version of this - # that takes advantage of retry_writes=False. Here's the - # background information: - # - # There are essentially three options for implementing writes. - # The first two handle retries: - # - # 1. Use some form of multipart uploads (which, in the case - # of GCS, we implement by writing temporary objects and - # then calling compose). - # - # 2. Use resumable uploads. This is what the GCS backend - # does, although the performance is must worse than - # non-resumable uploads so in fact it may always be better - # to always use multipart uploads (1). - # - # The third does not handle failures: - # - # 3. Don't be failure/retry safe. Just write the object, and - # if the API call fails, fail. This is useful when you can - # retry at a higher level (this is what the copy code does). - # - # Unfortunately, I don't see how to do (3) with boto3, since - # AWS APIs require a header that includes a hash of the - # request body, and that needs to be computed up front. In - # terms of the boto3 interface, this contraint translates into - # calls like `put_object` require bytes or a seekable stream - # (so it can make two passes over the data, one to compute the - # checksome, and the other to send the data). - # - # Here, we use S3CreateManager, which in turn uses boto3 - # `upload_fileobj` which is implemented in terms of multipart - # uploads. - # - # Another possibility is to make an alternate `create` call - # that takes bytes instead of returning a file-like object, - # and then using `put_object`, and make copy use that - # interface. This has the disadvantage that the read must - # complete before the write can begin (unlike the current - # code, that copies 128MB parts in 256KB chunks). - bucket, name = self._get_bucket_name(url) - return S3CreateManager(self, bucket, name) - - async def multi_part_create( - self, - sema: asyncio.Semaphore, - url: str, - num_parts: int) -> MultiPartCreate: - bucket, name = self._get_bucket_name(url) - return S3MultiPartCreate(sema, self, bucket, name, num_parts) - - async def mkdir(self, url: str) -> None: - pass - - async def makedirs(self, url: str, exist_ok: bool = False) -> None: - pass - - async def statfile(self, url: str) -> FileStatus: - bucket, name = self._get_bucket_name(url) - try: - resp = await blocking_to_async(self._thread_pool, self._s3.head_object, - Bucket=bucket, - Key=name) - return S3HeadObjectFileStatus(resp) - except botocore.exceptions.ClientError as e: - if e.response['ResponseMetadata']['HTTPStatusCode'] == 404: - raise FileNotFoundError(url) from e - raise e - - async def _listfiles_recursive(self, bucket: str, name: str) -> AsyncIterator[FileListEntry]: - assert not name or name.endswith('/') - async for page in PageIterator(self, bucket, name): - assert 'CommonPrefixes' not in page - contents = page.get('Contents') - if contents: - for item in contents: - yield S3FileListEntry(bucket, item['Key'], item) - - async def _listfiles_flat(self, bucket: str, name: str) -> AsyncIterator[FileListEntry]: - assert not name or name.endswith('/') - async for page in PageIterator(self, bucket, name, delimiter='/'): - prefixes = page.get('CommonPrefixes') - if prefixes is not None: - for prefix in prefixes: - yield S3FileListEntry(bucket, prefix['Prefix'], None) - contents = page.get('Contents') - if contents: - for item in contents: - yield S3FileListEntry(bucket, item['Key'], item) - - async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[FileListEntry]: - bucket, name = self._get_bucket_name(url) - if name and not name.endswith('/'): - name += '/' - if recursive: - it = self._listfiles_recursive(bucket, name) - else: - it = self._listfiles_flat(bucket, name) - - it = it.__aiter__() - try: - first_entry = await it.__anext__() - except StopAsyncIteration: - raise FileNotFoundError(url) # pylint: disable=raise-missing-from - - async def cons(first_entry, it): - yield first_entry - try: - while True: - yield await it.__anext__() - except StopAsyncIteration: - pass - - return cons(first_entry, it) - - async def staturl(self, url: str) -> str: - return await self._staturl_parallel_isfile_isdir(url) - - async def isfile(self, url: str) -> bool: - try: - bucket, name = self._get_bucket_name(url) - await blocking_to_async(self._thread_pool, self._s3.head_object, - Bucket=bucket, - Key=name) - return True - except botocore.exceptions.ClientError as e: - if e.response['ResponseMetadata']['HTTPStatusCode'] == 404: - return False - raise e - - async def isdir(self, url: str) -> bool: - try: - async for _ in await self.listfiles(url, recursive=True): - return True - assert False # unreachable - except FileNotFoundError: - return False - - async def remove(self, url: str) -> None: - try: - bucket, name = self._get_bucket_name(url) - await blocking_to_async(self._thread_pool, self._s3.delete_object, - Bucket=bucket, - Key=name) - except self._s3.exceptions.NoSuchKey as e: - raise FileNotFoundError(url) from e - - async def _rmtree(self, sema: asyncio.Semaphore, url: str) -> None: - await self._rmtree_with_recursive_listfiles(sema, url) - - async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: - if sema is None: - sema = asyncio.Semaphore(50) - async with sema: - return await self._rmtree(sema, url) - - return await self._rmtree(sema, url) - - async def close(self) -> None: - pass diff --git a/hail/python/hailtop/aiotools/stream.py b/hail/python/hailtop/aiotools/stream.py index 7e222444e35..b1cf2e56f55 100644 --- a/hail/python/hailtop/aiotools/stream.py +++ b/hail/python/hailtop/aiotools/stream.py @@ -1,10 +1,7 @@ -from typing import BinaryIO, Optional, Tuple, Type +from typing import Optional, Type, BinaryIO from types import TracebackType import abc -import io -import os from concurrent.futures import ThreadPoolExecutor -import janus from hailtop.utils import blocking_to_async @@ -32,7 +29,7 @@ async def wait_closed(self) -> None: self._waited_closed = True @property - def closed(self) -> bool: + def closed(self) -> None: return self._closed async def __aenter__(self) -> 'ReadableStream': @@ -72,7 +69,7 @@ async def wait_closed(self) -> None: self._waited_closed = True @property - def closed(self) -> bool: + def closed(self) -> None: return self._closed async def __aenter__(self) -> 'WritableStream': @@ -95,8 +92,6 @@ def __init__(self, thread_pool: ThreadPoolExecutor, f: BinaryIO): self._f = f async def read(self, n: int = -1) -> bytes: - if n == -1: - return await blocking_to_async(self._thread_pool, self._f.read) return await blocking_to_async(self._thread_pool, self._f.read, n) async def _wait_closed(self) -> None: @@ -120,8 +115,6 @@ async def write(self, b: bytes) -> int: return await blocking_to_async(self._thread_pool, self._f.write, b) async def _wait_closed(self) -> None: - await blocking_to_async(self._thread_pool, self._f.flush) - await blocking_to_async(self._thread_pool, os.fsync, self._f.fileno()) await blocking_to_async(self._thread_pool, self._f.close) del self._f @@ -132,74 +125,3 @@ def blocking_readable_stream_to_async(thread_pool: ThreadPoolExecutor, f: Binary def blocking_writable_stream_to_async(thread_pool: ThreadPoolExecutor, f: BinaryIO) -> _WritableStreamFromBlocking: return _WritableStreamFromBlocking(thread_pool, f) - - -class BlockingQueueReadableStream(io.RawIOBase): - # self.closed and self.close() must be multithread safe, because - # they can be accessed by both the stream reader and writer which - # are in different threads. - def __init__(self, q: janus.Queue): - super().__init__() - self._q = q - self._saw_eos = False - self._closed = False - self._unread = b'' - - def readable(self) -> bool: - return True - - def readinto(self, b: bytearray) -> int: - if self._closed: - raise ValueError('read on closed stream') - if self._saw_eos: - return 0 - - if not self._unread: - self._unread = self._q.sync_q.get() - if self._unread is None: - self._saw_eos = True - return 0 - assert self._unread - - n = min(len(self._unread), len(b)) - b[:n] = self._unread[:n] - self._unread = self._unread[n:] - return n - - def close(self): - self._closed = True - # drain the q so the writer doesn't deadlock - while not self._saw_eos: - c = self._q.sync_q.get() - if c is None: - self._saw_eos = True - - -class AsyncQueueWritableStream(WritableStream): - def __init__(self, q: janus.Queue, blocking_readable: BlockingQueueReadableStream): - super().__init__() - self._sent_eos = False - self._q = q - self._blocking_readable = blocking_readable - - async def write(self, b: bytes) -> int: - if self._blocking_readable._closed: - if not self._sent_eos: - await self._q.async_q.put(None) - self._sent_eos = True - raise ValueError('reader closed') - if b: - await self._q.async_q.put(b) - return len(b) - - async def _wait_closed(self) -> None: - if not self._sent_eos: - await self._q.async_q.put(None) - self._sent_eos = True - - -def async_writable_blocking_readable_stream_pair() -> Tuple[AsyncQueueWritableStream, BlockingQueueReadableStream]: - q: janus.Queue = janus.Queue(maxsize=1) - blocking_readable = BlockingQueueReadableStream(q) - async_writable = AsyncQueueWritableStream(q, blocking_readable) - return async_writable, blocking_readable diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 5d7bbffd709..6155c6f0dbb 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -1,47 +1,34 @@ -from typing import Optional, Dict, Any, TypeVar, Generic +from typing import Optional, Dict, Any import sys import abc import os import subprocess as sp import uuid import time -import functools import copy from shlex import quote as shq import webbrowser import warnings -from concurrent.futures import ThreadPoolExecutor from hailtop.config import get_deploy_config, get_user_config -from hailtop.utils import is_google_registry_domain, parse_docker_image_reference, async_to_blocking, bounded_gather, tqdm +from hailtop.utils import is_google_registry_domain, parse_docker_image_reference from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES from hailtop.batch_client.parse import parse_cpu_in_mcpu import hailtop.batch_client.client as bc from hailtop.batch_client.client import BatchClient -from hailtop.aiotools import RouterAsyncFS, LocalAsyncFS, AsyncFS -from hailtop.aiogoogle import GoogleStorageAsyncFS from . import resource, batch, job as _job # pylint: disable=unused-import from .exceptions import BatchException -RunningBatchType = TypeVar('RunningBatchType') -""" -The type of value returned by :py:meth:`.Backend._run`. The value returned by some backends -enables the user to monitor the asynchronous execution of a Batch. -""" - -SelfType = TypeVar('SelfType') - - -class Backend(abc.ABC, Generic[RunningBatchType]): +class Backend(abc.ABC): """ Abstract class for backends. """ _DEFAULT_SHELL = '/bin/bash' @abc.abstractmethod - def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) -> RunningBatchType: + def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs): """ Execute a batch. @@ -49,12 +36,7 @@ def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs ------- This method should not be called directly. Instead, use :meth:`.batch.Batch.run`. """ - raise NotImplementedError() - - @property - @abc.abstractmethod - def _fs(self) -> AsyncFS: - raise NotImplementedError() + return # pylint: disable=R0201 def close(self): @@ -63,14 +45,14 @@ def close(self): """ return - def __enter__(self: SelfType) -> SelfType: + def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() -class LocalBackend(Backend[None]): +class LocalBackend(Backend): """ Backend that executes batches on a local computer. @@ -113,18 +95,13 @@ def __init__(self, flags += f' -v {gsa_key_file}:/gsa-key/key.json' self._extra_docker_run_flags = flags - self.__fs: AsyncFS = LocalAsyncFS(ThreadPoolExecutor()) - - @property - def _fs(self): - return self.__fs def _run(self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, - **backend_kwargs) -> None: # pylint: disable=R0915 + **backend_kwargs): # pylint: disable=R0915 """ Execute a batch. @@ -149,24 +126,11 @@ def _run(self, tmpdir = self._get_scratch_dir() - def new_code_block(): - return ['set -e' + ('x' if verbose else ''), - '\n', - '# change cd to tmp directory', - f"cd {tmpdir}", - '\n'] - - def run_code(code): - code = '\n'.join(code) - if dry_run: - print(code) - else: - try: - sp.check_call(code, shell=True) - except sp.CalledProcessError as e: - print(e) - print(e.output) - raise + lines = ['set -e' + ('x' if verbose else ''), + '\n', + '# change cd to tmp directory', + f"cd {tmpdir}", + '\n'] copied_input_resource_files = set() os.makedirs(tmpdir + '/inputs/', exist_ok=True) @@ -182,7 +146,7 @@ def copy_input(job, r): copied_input_resource_files.add(r) if r._input_path.startswith('gs://'): - return [f'gsutil {requester_pays_project} cp -r {shq(r._input_path)} {shq(r._get_path(tmpdir))}'] + return [f'gsutil {requester_pays_project} cp {shq(r._input_path)} {shq(r._get_path(tmpdir))}'] absolute_input_path = os.path.realpath(r._input_path) @@ -207,7 +171,7 @@ def _cp(dest): directory = os.path.dirname(dest) os.makedirs(directory, exist_ok=True) return 'cp' - return f'gsutil {requester_pays_project} cp -r' + return f'gsutil {requester_pays_project} cp' if isinstance(r, resource.InputResourceFile): return [f'{_cp(dest)} {shq(r._input_path)} {shq(dest)}' @@ -226,78 +190,81 @@ def symlink_input_resource_group(r): symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks - try: - write_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] - if write_inputs: - code = new_code_block() - code += ["# Write input resources to output destinations"] - code += write_inputs - code += ['\n'] - run_code(code) + write_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] + if write_inputs: + lines += ["# Write input resources to output destinations"] + lines += write_inputs + lines += ['\n'] - for job in batch._jobs: - if isinstance(job, _job.PythonJob): - async_to_blocking(job._compile(tmpdir, tmpdir)) + for job in batch._jobs: + if isinstance(job, _job.PythonJob): + job._compile(tmpdir, tmpdir) - os.makedirs(f'{tmpdir}/{job._job_id}/', exist_ok=True) + os.makedirs(f'{tmpdir}/{job._job_id}/', exist_ok=True) - code = new_code_block() + lines.append(f"# {job._job_id}: {job.name if job.name else ''}") - code.append(f"# {job._job_id}: {job.name if job.name else ''}") + lines += [x for r in job._inputs for x in copy_input(job, r)] + lines += [x for r in job._mentioned for x in symlink_input_resource_group(r)] - code += [x for r in job._inputs for x in copy_input(job, r)] - code += [x for r in job._mentioned for x in symlink_input_resource_group(r)] + resource_defs = [r._declare(tmpdir) for r in job._mentioned] + env = [f'export {k}={v}' for k, v in job._env.items()] - resource_defs = [r._declare(tmpdir) for r in job._mentioned] - env = [f'export {k}={v}' for k, v in job._env.items()] + job_shell = job._shell if job._shell else self._DEFAULT_SHELL - job_shell = job._shell if job._shell else self._DEFAULT_SHELL + defs = '; '.join(resource_defs) + '; ' if resource_defs else '' + joined_env = '; '.join(env) + '; ' if env else '' - defs = '; '.join(resource_defs) + '; ' if resource_defs else '' - joined_env = '; '.join(env) + '; ' if env else '' + cmd = " && ".join(f'{{\n{x}\n}}' for x in job._command) - cmd = " && ".join(f'{{\n{x}\n}}' for x in job._command) + quoted_job_script = shq(joined_env + defs + cmd) - quoted_job_script = shq(joined_env + defs + cmd) + if job._image: + cpu = f'--cpus={job._cpu}' if job._cpu else '' - if job._image: - cpu = f'--cpus={job._cpu}' if job._cpu else '' - - memory = job._memory - if memory is not None: - memory_ratios = {'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3} - if memory in memory_ratios: - if job._cpu is not None: - mcpu = parse_cpu_in_mcpu(job._cpu) - if mcpu is not None: - memory = str(int(memory_ratios[memory] * (mcpu / 1000))) - else: - raise BatchException(f'invalid value for cpu: {job._cpu}') + memory = job._memory + if memory is not None: + memory_ratios = {'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3} + if memory in memory_ratios: + if job._cpu is not None: + mcpu = parse_cpu_in_mcpu(job._cpu) + if mcpu is not None: + memory = str(int(memory_ratios[memory] * (mcpu / 1000))) else: - raise BatchException(f'must specify cpu when using {memory} to specify the memory') - memory = f'-m {memory}' if memory else '' - else: - memory = '' - - code.append(f"docker run " - "--entrypoint=''" - f"{self._extra_docker_run_flags} " - f"-v {tmpdir}:{tmpdir} " - f"-w {tmpdir} " - f"{memory} " - f"{cpu} " - f"{job._image} " - f"{job_shell} -c {quoted_job_script}") - else: - code.append(f"{job_shell} -c {quoted_job_script}") - - code += [x for r in job._external_outputs for x in copy_external_output(r)] - code += ['\n'] - - run_code(code) - finally: - if delete_scratch_on_exit: - sp.run(f'rm -rf {tmpdir}', shell=True, check=False) + raise BatchException(f'invalid value for cpu: {job._cpu}') + else: + raise BatchException(f'must specify cpu when using {memory} to specify the memory') + memory = f'-m {memory}' if memory else '' + + lines.append(f"docker run " + "--entrypoint=''" + f"{self._extra_docker_run_flags} " + f"-v {tmpdir}:{tmpdir} " + f"-w {tmpdir} " + f"{memory} " + f"{cpu} " + f"{job._image} " + f"{job_shell} -c {quoted_job_script}") + else: + lines.append(f"{job_shell} -c {quoted_job_script}") + + lines += [x for r in job._external_outputs for x in copy_external_output(r)] + lines += ['\n'] + + script = "\n".join(lines) + + if dry_run: + print(lines) + else: + try: + sp.check_call(script, shell=True) + except sp.CalledProcessError as e: + print(e) + print(e.output) + raise + finally: + if delete_scratch_on_exit: + sp.run(f'rm -rf {tmpdir}', shell=True, check=False) print('Batch completed successfully!') @@ -311,17 +278,14 @@ def _get_random_name(): return _get_random_name() - def close(self): - async_to_blocking(self._fs.close()) - -class ServiceBackend(Backend[bc.Batch]): +class ServiceBackend(Backend): """Backend that executes batches on Hail's Batch Service on Google Cloud. Examples -------- - >>> service_backend = ServiceBackend('my-billing-account', bucket='my-bucket') # doctest: +SKIP + >>> service_backend = ServiceBackend('my-billing-account', 'my-bucket') # doctest: +SKIP >>> b = Batch(backend=service_backend) # doctest: +SKIP >>> b.run() # doctest: +SKIP >>> service_backend.close() # doctest: +SKIP @@ -335,58 +299,22 @@ class ServiceBackend(Backend[bc.Batch]): >>> b.run() # doctest: +SKIP >>> service_backend.close() - Instead of a bucket, a full path may be specified for the remote temporary directory: - - >>> service_backend = ServiceBackend('my-billing-account', - ... remote_tmpdir='gs://my-bucket/temporary-files/') - >>> b = Batch(backend=service_backend) - >>> b.run() # doctest: +SKIP - >>> service_backend.close() - Parameters ---------- billing_project: Name of billing project to use. bucket: - Name of bucket to use. Should not include the ``gs://`` prefix. Cannot be used with - remote_tmpdir. Temporary data will be stored in the "/batch" folder of this - bucket. Using this parameter as a positional argument is deprecated. - remote_tmpdir: - Temporary data will be stored in this google cloud storage folder. Cannot be used with - bucket. - google_project: - If specified, the project to use when authenticating with Google - Storage. Google Storage is used to transfer serialized values between - this computer and the cloud machines that execute Python jobs. + Name of bucket to use. Should not include the ``gs://`` + prefix. token: The authorization token to pass to the batch client. Should only be set for user delegation purposes. """ def __init__(self, - *args, - billing_project: Optional[str] = None, - bucket: Optional[str] = None, - remote_tmpdir: Optional[str] = None, - google_project: Optional[str] = None, - token: str = None - ): - if len(args) > 2: - raise TypeError(f'ServiceBackend() takes 2 positional arguments but {len(args)} were given') - if len(args) >= 1: - if billing_project is not None: - raise TypeError('ServiceBackend() got multiple values for argument \'billing_project\'') - warnings.warn('Use of deprecated positional argument \'billing_project\' in ServiceBackend(). Specify \'billing_project\' as a keyword argument instead.') - billing_project = args[0] - if len(args) >= 2: - if bucket is not None: - raise TypeError('ServiceBackend() got multiple values for argument \'bucket\'') - warnings.warn('Use of deprecated positional argument \'bucket\' in ServiceBackend(). Specify \'bucket\' as a keyword argument instead.') - bucket = args[1] - - if remote_tmpdir is not None and bucket is not None: - raise ValueError('Cannot specify both remote_tmpdir and bucket in ServiceBackend()') - + billing_project: str = None, + bucket: str = None, + token: str = None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: @@ -395,31 +323,15 @@ def __init__(self, 'or run `hailctl config set batch/billing_project ' 'MY_BILLING_PROJECT`') self._batch_client = BatchClient(billing_project, _token=token) - self.__fs: AsyncFS = RouterAsyncFS('file', [LocalAsyncFS(ThreadPoolExecutor()), - GoogleStorageAsyncFS(project=google_project)]) - if remote_tmpdir is None: - if bucket is None: - bucket = get_user_config().get('batch', 'bucket', fallback=None) - if bucket is None: - raise ValueError( - 'either the bucket or remote_tmpdir parameter of ServiceBackend ' - 'must be set or run `hailctl config set batch/bucket MY_BUCKET`') - if 'gs://' in bucket: - raise ValueError( - 'The bucket parameter to ServiceBackend() should be a bucket name, not a path. ' - 'Use the remote_tmpdir parameter to specify a path.') - remote_tmpdir = f'gs://{bucket}/batch' - else: - if not remote_tmpdir.startswith('gs://'): - raise ValueError( - 'remote_tmpdir must be a google storage path like gs://bucket/folder') - if remote_tmpdir[-1] != '/': - remote_tmpdir += '/' - self.remote_tmpdir = remote_tmpdir - @property - def _fs(self): - return self.__fs + if bucket is None: + bucket = get_user_config().get('batch', 'bucket', fallback=None) + if bucket is None: + raise ValueError( + 'the bucket parameter of ServiceBackend must be set ' + 'or run `hailctl config set batch/bucket ' + 'MY_BUCKET`') + self._bucket_name = bucket def close(self): """ @@ -431,7 +343,6 @@ def close(self): end of your script. """ self._batch_client.close() - async_to_blocking(self._fs.close()) def _run(self, batch: 'batch.Batch', @@ -443,7 +354,7 @@ def _run(self, disable_progress_bar: bool = False, callback: Optional[str] = None, token: Optional[str] = None, - **backend_kwargs) -> bc.Batch: # pylint: disable-msg=too-many-statements + **backend_kwargs): # pylint: disable-msg=too-many-statements """Execute a batch. Warning @@ -473,27 +384,14 @@ def _run(self, token: If not `None`, a string used for idempotency of batch submission. """ - return async_to_blocking( - self._async_run(batch, dry_run, verbose, delete_scratch_on_exit, wait, open, disable_progress_bar, callback, token, **backend_kwargs)) - - async def _async_run(self, - batch: 'batch.Batch', - dry_run: bool, - verbose: bool, - delete_scratch_on_exit: bool, - wait: bool = True, - open: bool = False, - disable_progress_bar: bool = False, - callback: Optional[str] = None, - token: Optional[str] = None, - **backend_kwargs): # pylint: disable-msg=too-many-statements + if backend_kwargs: raise ValueError(f'ServiceBackend does not support any of these keywords: {backend_kwargs}') build_dag_start = time.time() uid = uuid.uuid4().hex[:6] - batch_remote_tmpdir = f'{self.remote_tmpdir}{uid}' + remote_tmpdir = f'gs://{self._bucket_name}/batch/{uid}' local_tmpdir = f'/io/batch/{uid}' default_image = 'ubuntu:18.04' @@ -521,11 +419,11 @@ def copy_input(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) - return [(r._get_path(batch_remote_tmpdir), r._get_path(local_tmpdir))] + return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) - return [(r._get_path(local_tmpdir), r._get_path(batch_remote_tmpdir))] + return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] def copy_external_output(r): if isinstance(r, resource.InputResourceFile): @@ -562,22 +460,16 @@ def _cp(src, dst): jobs_to_command[j] = write_cmd n_jobs_submitted += 1 - pyjobs = [j for j in batch._jobs if isinstance(j, _job.PythonJob)] - for job in pyjobs: - if job._image is None: - version = sys.version_info - if version.major != 3 or version.minor not in (6, 7, 8): - raise BatchException( - f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") - job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' - - with tqdm(total=len(pyjobs), desc='upload python functions', disable=disable_progress_bar) as pbar: - async def compile_job(job): - await job._compile(local_tmpdir, batch_remote_tmpdir) - pbar.update(1) - await bounded_gather(*[functools.partial(compile_job, j) for j in pyjobs], parallelism=150) - - for job in tqdm(batch._jobs, desc='create job objects', disable=disable_progress_bar): + for job in batch._jobs: + if isinstance(job, _job.PythonJob): + if job._image is None: + version = sys.version_info + if version.major != 3 or version.minor not in (6, 7, 8): + raise BatchException( + f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") + job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' + job._compile(local_tmpdir, remote_tmpdir) + inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [x for r in job._internal_outputs for x in copy_internal_output(r)] @@ -660,7 +552,7 @@ async def compile_job(job): if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) - rm_cmd = f'gsutil -m rm -r {batch_remote_tmpdir}' + rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' cmd = f''' {bash_flags} {activate_service_account} diff --git a/hail/python/hailtop/batch/batch.py b/hail/python/hailtop/batch/batch.py index d4edf3f7688..e6945026e54 100644 --- a/hail/python/hailtop/batch/batch.py +++ b/hail/python/hailtop/batch/batch.py @@ -1,16 +1,15 @@ import os -import warnings import re -from concurrent.futures import ThreadPoolExecutor +import concurrent from typing import Optional, Dict, Union, List, Any, Set from hailtop.utils import secret_alnum_string -from hailtop.aiotools import AsyncFS, RouterAsyncFS, LocalAsyncFS -from hailtop.aiogoogle import GoogleStorageAsyncFS from . import backend as _backend, job, resource as _resource # pylint: disable=cyclic-import from .exceptions import BatchException +from ..google_storage import GCS + class Batch: """ @@ -79,15 +78,13 @@ class Batch: `dill` pre-installed will automatically be used if the current Python version is 3.6, 3.7, or 3.8. project: - DEPRECATED: please specify `google_project` on the ServiceBackend instead. If specified, - the project to use when authenticating with Google Storage. Google Storage is used to - transfer serialized values between this computer and the cloud machines that execute Python - jobs. + If specified, the project to use when authenticating with Google + Storage. Google Storage is used to transfer serialized values between + this computer and the cloud machines that execute Python jobs. cancel_after_n_failures: Automatically cancel the batch after N failures have occurred. The default behavior is there is no limit on the number of failures. Only applicable for the :class:`.ServiceBackend`. Must be greater than 0. - """ _counter = 0 @@ -140,23 +137,17 @@ def __init__(self, self._default_shell = default_shell self._default_python_image = default_python_image - if project is not None: - warnings.warn( - 'The project argument to Batch is deprecated, please instead use the google_project argument to ' - 'ServiceBackend. Use of this argument may trigger warnings from aiohttp about unclosed objects.') - self._DEPRECATED_project = project - self._DEPRECATED_fs: Optional[RouterAsyncFS] = None + self._project = project + self.__gcs: Optional[GCS] = None self._cancel_after_n_failures = cancel_after_n_failures @property - def _fs(self) -> AsyncFS: - if self._DEPRECATED_project is not None: - if self._DEPRECATED_fs is None: - self._DEPRECATED_fs = RouterAsyncFS('file', [LocalAsyncFS(ThreadPoolExecutor()), - GoogleStorageAsyncFS(project=self._DEPRECATED_project)]) - return self._DEPRECATED_fs - return self._backend._fs + def _gcs(self): + if self.__gcs is None: + self.__gcs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(), + project=self._project) + return self.__gcs def new_job(self, name: Optional[str] = None, @@ -562,12 +553,7 @@ def schedule_job(j): raise BatchException("cycle detected in dependency graph") self._jobs = ordered_jobs - run_result = self._backend._run(self, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) # pylint: disable=assignment-from-no-return - if self._DEPRECATED_fs is not None: - # best effort only because this is deprecated - self._DEPRECATED_fs.close() - self._DEPRECATED_fs = None - return run_result + return self._backend._run(self, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) def __str__(self): return self._uid diff --git a/hail/python/hailtop/batch/batch_pool_executor.py b/hail/python/hailtop/batch/batch_pool_executor.py index 44855035d87..9aa4b5ea5b5 100644 --- a/hail/python/hailtop/batch/batch_pool_executor.py +++ b/hail/python/hailtop/batch/batch_pool_executor.py @@ -128,7 +128,8 @@ def __init__(self, *, if not isinstance(self.backend, ServiceBackend): raise ValueError(f'BatchPoolExecutor is not compatible with {type(backend)}') self.batches: List[Batch] = [] - self.directory = self.backend.remote_tmpdir + f'batch-pool-executor/{self.name}/' + bucket: str = self.backend._bucket_name + self.directory = f'gs://{bucket}/batch-pool-executor/{self.name}/' self.inputs = self.directory + 'inputs/' self.outputs = self.directory + 'outputs/' self.gcs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(), diff --git a/hail/python/hailtop/batch/docs/api.rst b/hail/python/hailtop/batch/docs/api.rst index 5a773be52fd..a6a752c27b6 100644 --- a/hail/python/hailtop/batch/docs/api.rst +++ b/hail/python/hailtop/batch/docs/api.rst @@ -96,7 +96,6 @@ at ``__. :nosignatures: :template: class.rst - backend.RunningBatchType backend.Backend backend.LocalBackend backend.ServiceBackend diff --git a/hail/python/hailtop/batch/docs/change_log.rst b/hail/python/hailtop/batch/docs/change_log.rst index 1b8e258b35e..2c5e6222c0b 100644 --- a/hail/python/hailtop/batch/docs/change_log.rst +++ b/hail/python/hailtop/batch/docs/change_log.rst @@ -3,21 +3,6 @@ Change Log ========== -**Version 0.2.70** - -- Made submitting ``PythonJob`` faster when using the ``ServiceBackend`` - -**Version 0.2.69** - -- Added the option to specify either `remote_tmpdir` or `bucket` when using the ``ServiceBackend`` - -**Version 0.2.68** - -- Fixed copying a directory from GCS when using the ``LocalBackend`` -- Fixed writing files to GCS when the bucket name starts with a "g" or an "s" -- Fixed the error "Argument list too long" when using the ``LocalBackend`` -- Fixed an error where memory is set to None when using the ``LocalBackend`` - **Version 0.2.66** - Removed the need for the ``project`` argument in ``Batch()`` unless you are creating a PythonJob diff --git a/hail/python/hailtop/batch/docs/conf.py b/hail/python/hailtop/batch/docs/conf.py index a876394f22a..3e51ed4aaf4 100644 --- a/hail/python/hailtop/batch/docs/conf.py +++ b/hail/python/hailtop/batch/docs/conf.py @@ -29,7 +29,6 @@ # The full version, including alpha/beta/rc tags release = '' nitpicky = True -nitpick_ignore = [('py:class', 'hailtop.batch_client.client.Batch')] # -- General configuration --------------------------------------------------- diff --git a/hail/python/hailtop/batch/job.py b/hail/python/hailtop/batch/job.py index f2d6ce87767..d574cb82ac8 100644 --- a/hail/python/hailtop/batch/job.py +++ b/hail/python/hailtop/batch/job.py @@ -379,11 +379,6 @@ def gcsfuse(self, bucket, mount_point, read_only=True): if not isinstance(self._batch._backend, backend.ServiceBackend): raise NotImplementedError("A ServiceBackend is required to use the 'gcsfuse' option") - if bucket == '': - raise BatchException('bucket cannot be the empty string') - if mount_point == '': - raise BatchException('mount_point cannot be the empty string') - self._gcsfuse.append((bucket, mount_point, read_only)) return self @@ -876,7 +871,7 @@ def handle_arg(r): return result - async def _compile(self, local_tmpdir, remote_tmpdir): + def _compile(self, local_tmpdir, remote_tmpdir): for i, (result, unapplied, args, kwargs) in enumerate(self._functions): def prepare_argument_for_serialization(arg): if isinstance(arg, _resource.PythonResult): @@ -915,8 +910,13 @@ def wrapped(*args, **kwargs): job_path = os.path.dirname(result._get_path(remote_tmpdir)) code_path = f'{job_path}/code{i}.p' - await self._batch._fs.makedirs(os.path.dirname(code_path), exist_ok=True) - await self._batch._fs.write(code_path, pipe.getvalue()) + if isinstance(self._batch._backend, backend.LocalBackend): + os.makedirs(os.path.dirname(code_path), exist_ok=True) + with open(code_path, 'wb') as f: + f.write(pipe.getvalue()) + else: + assert isinstance(self._batch._backend, backend.ServiceBackend) + self._batch._gcs._write_gs_file_from_file_like_object(code_path, pipe) code = self._batch.read_input(code_path) self._add_inputs(code) diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 54274fe43e1..72c11bfe6aa 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -425,8 +425,7 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, service_account=None, attributes=None, parents=None, input_files=None, output_files=None, always_run=False, timeout=None, gcsfuse=None, requester_pays_project=None, - mount_tokens=False, network: Optional[str] = None, - unconfined: bool = False): + mount_tokens=False, network: Optional[str] = None): if self._submitted: raise ValueError("cannot create a job in an already submitted batch") @@ -500,8 +499,6 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, job_spec['mount_tokens'] = mount_tokens if network: job_spec['network'] = network - if unconfined: - job_spec['unconfined'] = unconfined self._job_specs.append(job_spec) diff --git a/hail/python/hailtop/batch_client/client.py b/hail/python/hailtop/batch_client/client.py index 19c6ebd2e2b..ddeab7438ca 100644 --- a/hail/python/hailtop/batch_client/client.py +++ b/hail/python/hailtop/batch_client/client.py @@ -202,8 +202,7 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, service_account=None, attributes=None, parents=None, input_files=None, output_files=None, always_run=False, timeout=None, gcsfuse=None, requester_pays_project=None, - mount_tokens=False, network: Optional[str] = None, - unconfined: bool = False) -> Job: + mount_tokens=False, network: Optional[str] = None) -> Job: if parents: parents = [parent._async_job for parent in parents] @@ -215,7 +214,7 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, input_files=input_files, output_files=output_files, always_run=always_run, timeout=timeout, gcsfuse=gcsfuse, requester_pays_project=requester_pays_project, mount_tokens=mount_tokens, - network=network, unconfined=unconfined) + network=network) return Job.from_async_job(async_job) diff --git a/hail/python/hailtop/google_storage.py b/hail/python/hailtop/google_storage.py index 645a21a9f5e..00d7581efb6 100644 --- a/hail/python/hailtop/google_storage.py +++ b/hail/python/hailtop/google_storage.py @@ -18,7 +18,7 @@ class GCS: @staticmethod def _parse_uri(uri: str): assert uri.startswith('gs://'), uri - uri_parts = uri[5:].split('/') + uri_parts = uri.lstrip('gs://').split('/') bucket = uri_parts[0] path = '/'.join(uri_parts[1:]) return bucket, path diff --git a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py index 871ce513546..072b20c1371 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py +++ b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py @@ -82,7 +82,6 @@ def mkdir_if_not_exists(path): 'SPARK_HOME': '/usr/lib/spark/', 'PYSPARK_PYTHON': '/opt/conda/default/bin/python', 'PYSPARK_DRIVER_PYTHON': '/opt/conda/default/bin/python', - 'HAIL_LOG_DIR': '/home/hail', } # VEP ENV diff --git a/hail/python/hailtop/httpx.py b/hail/python/hailtop/httpx.py index bb28ad0906b..cb9f051a08f 100644 --- a/hail/python/hailtop/httpx.py +++ b/hail/python/hailtop/httpx.py @@ -7,62 +7,6 @@ from .config.deploy_config import get_deploy_config -class ClientResponseError(aiohttp.ClientResponseError): - def __init__(self, - request_info: aiohttp.RequestInfo, - history: Tuple[aiohttp.ClientResponse, ...], - body: str = "", - **kwargs): - super().__init__(request_info, history, **kwargs) - self.body = body - - def __str__(self) -> str: - return "{}, message={!r}, url={!r} body={!r}".format( - self.status, - self.message, - self.request_info.real_url, - self.body - ) - - def __repr__(self) -> str: - args = f"{self.request_info!r}, {self.history!r}" - if self.status != 0: - args += f", status={self.status!r}" - if self.message != "": - args += f", message={self.message!r}" - if self.headers is not None: - args += f", headers={self.headers!r}" - if self.body is not None: - args += f", body={self.body!r}" - return "{}({})".format(type(self).__name__, args) - - -class ClientSession(aiohttp.ClientSession): - async def _request( - self, - method: str, - str_or_url: aiohttp.client.StrOrURL, - **kwargs - ): - raise_for_status = kwargs.pop('raise_for_status', self._raise_for_status) - resp = await super()._request(method, str_or_url, raise_for_status=False, **kwargs) - if raise_for_status: - if resp.status >= 400: - # reason should always be not None for a started response - assert resp.reason is not None - body = (await resp.read()).decode() - resp.release() - raise ClientResponseError( - resp.request_info, - resp.history, - status=resp.status, - message=resp.reason, - headers=resp.headers, - body=body - ) - return resp - - def client_session(*args, raise_for_status: bool = True, timeout: Union[aiohttp.ClientTimeout, float] = None, @@ -86,7 +30,7 @@ def client_session(*args, timeout = aiohttp.ClientTimeout(total=5) kwargs['timeout'] = timeout - return ClientSession(*args, **kwargs) + return aiohttp.ClientSession(*args, **kwargs) def blocking_client_session(*args, **kwargs) -> 'BlockingClientSession': diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py index dd94f1f81bd..11e146fdbe4 100644 --- a/hail/python/hailtop/utils/__init__.py +++ b/hail/python/hailtop/utils/__init__.py @@ -10,7 +10,7 @@ flatten, partition, cost_str, external_requests_client_session, url_basename, url_join, is_google_registry_domain, parse_docker_image_reference, url_scheme, Notice, periodically_call, dump_all_stacktraces, find_spark_home, TransientError, - bounded_gather2, OnlineBoundedGather2, unpack_comma_delimited_inputs, retry_all_errors_n_times) + bounded_gather2, OnlineBoundedGather2, unpack_comma_delimited_inputs) from .process import ( CalledProcessError, check_shell, check_shell_output, sync_check_shell, sync_check_shell_output) @@ -81,6 +81,5 @@ 'OnlineBoundedGather2', 'unpack_comma_delimited_inputs', 'is_google_registry_domain', - 'parse_docker_image_reference', - 'retry_all_errors_n_times' + 'parse_docker_image_reference' ] diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 16ecf752e9f..0ad2d1f2adf 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -23,11 +23,8 @@ from requests.adapters import HTTPAdapter from urllib3.poolmanager import PoolManager -import hailtop - from .time import time_msecs - log = logging.getLogger('hailtop.utils') @@ -271,19 +268,62 @@ async def __aexit__(self, await self.wait() -class WithoutSemaphore: - def __init__(self, sema): +class Subsemaphore: + def __init__(self, sema: asyncio.Semaphore): self._sema = sema + self._borrowed = 0 + self._lent = False + self._pending: List[Callable[[], None]] = [] + + async def acquire(self): + if not self._lent: + self._lent = True + return self + + acquired = asyncio.Event() + + async def borrow(): + await self._sema.acquire() + if acquired.is_set(): + self._sema.release() + return + self._borrowed += 1 + acquired.set() + + def on_return(): + assert not self._lent + if acquired.is_set(): + return + self._lent = True + acquired.set() + + asyncio.create_task(borrow()) + self._pending.append(on_return) + + await acquired.wait() + + return self - async def __aenter__(self) -> 'WithoutSemaphore': - self._sema.release() + def release(self): + if self._borrowed > 0: + self._sema.release() + self._borrowed -= 1 + else: + assert self._lent + self._lent = False + while self._pending and not self._lent: + f = self._pending.pop() + f() + + async def __aenter__(self) -> 'Subsemaphore': + await self.acquire() return self async def __aexit__(self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: - await self._sema.acquire() + self.release() class PoolShutdownError(Exception): @@ -314,7 +354,7 @@ class OnlineBoundedGather2: def __init__(self, sema: asyncio.Semaphore): self._counter = 0 - self._sema = sema + self._subsema = Subsemaphore(sema) self._pending: Optional[Dict[int, asyncio.Task]] = {} # done if there are no pending tasks (the tasks are all # complete), or if we've shutdown and the cancelled tasks are @@ -364,7 +404,7 @@ async def call(self, f, *args, **kwargs) -> asyncio.Task: async def run_and_cleanup(): try: - async with self._sema: + async with self._subsema: await f(*args, **kwargs) except asyncio.CancelledError: pass @@ -372,7 +412,7 @@ async def run_and_cleanup(): if self._exception is None: _, exc, _ = sys.exc_info() self._exception = exc - await asyncio.shield(self._shutdown()) + await self._shutdown() else: log.info('discarding exception', exc_info=True) @@ -397,16 +437,22 @@ async def wait(self, tasks: List[asyncio.Task]) -> None: pool after waiting. ''' - async with WithoutSemaphore(self._sema): + self._subsema.release() + try: await asyncio.wait(tasks) + finally: + await self._subsema.acquire() async def __aenter__(self) -> 'OnlineBoundedGather2': + await self._subsema.acquire() return self async def __aexit__(self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: + self._subsema.release() + if exc_val: if self._exception is None: self._exception = exc_val @@ -417,68 +463,64 @@ async def __aexit__(self, # wait for done and not pending _done_event.wait can return # when when there are pending jobs if the last job completed # (setting _done_event) and then more tasks were submitted - async with WithoutSemaphore(self._sema): - await self._done_event.wait() + await self._done_event.wait() while self._pending: assert not self._done_event.is_set() - async with WithoutSemaphore(self._sema): - await self._done_event.wait() + await self._done_event.wait() if self._exception: raise self._exception -async def bounded_gather2_return_exceptions(sema: asyncio.Semaphore, *pfs): - '''Run the partial functions `pfs` as tasks with parallelism bounded - by `sema`, which should be `asyncio.Semaphore` whose initial value - is the desired level of parallelism. - - The return value is the list of partial function results as pairs: - the pair `(value, None)` if the partial function returned value or - `(None, exc)` if the partial function raised the exception `exc`. +async def bounded_gather2_return_exceptions(sema: asyncio.Semaphore, *aws): + '''Run the awaitables aws as tasks with parallelism bounded by sema, + which should be asyncio.Semaphore whose initial value is the level + of parallelism. + The return value is the list of awaitable results as pairs: the + pair (value, None) if the awaitable returned value or (None, exc) + if the awaitable raised the exception exc. ''' - async def run_with_sema_return_exceptions(pf): + subsema = Subsemaphore(sema) + + async def run_with_sema_return_exceptions(aw): try: - async with sema: - return (await pf(), None) + async with subsema: + return (await aw, None) except: _, exc, _ = sys.exc_info() return (None, exc) - tasks = [asyncio.create_task(run_with_sema_return_exceptions(pf)) for pf in pfs] - async with WithoutSemaphore(sema): - return await asyncio.gather(*tasks) + return await asyncio.gather(*[asyncio.create_task(run_with_sema_return_exceptions(aw)) for aw in aws]) -async def bounded_gather2_raise_exceptions(sema: asyncio.Semaphore, *pfs, cancel_on_error: bool = False): - '''Run the partial functions `pfs` as tasks with parallelism bounded - by `sema`, which should be `asyncio.Semaphore` whose initial value - is the level of parallelism. +async def bounded_gather2_raise_exceptions(sema: asyncio.Semaphore, *aws, cancel_on_error: bool = False): + '''Run the awaitables aws as tasks with parallelism bounded by sema, + which should be asyncio.Semaphore whose initial value is the level + of parallelism. - The return value is the list of partial function results. + The return value is the list of awaitable results. - The first exception raised by a partial function is raised by + The first exception raised by an awaitable is raised by bounded_gather2_raise_exceptions. - If cancel_on_error is False (the default), the remaining partial - functions continue to run with bounded parallelism. If + If cancel_on_error is False (the default), the remaining + awaitables continue to run with bounded parallelism. If cancel_on_error is True, the unfinished tasks are all cancelled. - ''' - async def run_with_sema(pf): - async with sema: - return await pf() + subsema = Subsemaphore(sema) + + async def run_with_subsema(aw): + async with subsema: + return await aw - tasks = [asyncio.create_task(run_with_sema(pf)) for pf in pfs] + tasks = [asyncio.create_task(run_with_subsema(aw)) for aw in aws] if not cancel_on_error: - async with WithoutSemaphore(sema): - return await asyncio.gather(*tasks) + return await asyncio.gather(*tasks) try: - async with WithoutSemaphore(sema): - return await asyncio.gather(*tasks) + return await asyncio.gather(*tasks) finally: _, exc, _ = sys.exc_info() if exc is not None: @@ -486,14 +528,13 @@ async def run_with_sema(pf): if not task.done(): task.cancel() if tasks: - async with WithoutSemaphore(sema): - await asyncio.wait(tasks) + await asyncio.wait(tasks) -async def bounded_gather2(sema: asyncio.Semaphore, *pfs, return_exceptions: bool = False, cancel_on_error: bool = False): +async def bounded_gather2(sema: asyncio.Semaphore, *aws, return_exceptions: bool = False, cancel_on_error: bool = False): if return_exceptions: - return await bounded_gather2_return_exceptions(sema, *pfs) - return await bounded_gather2_raise_exceptions(sema, *pfs, cancel_on_error=cancel_on_error) + return await bounded_gather2_return_exceptions(sema, *aws) + return await bounded_gather2_raise_exceptions(sema, *aws, cancel_on_error=cancel_on_error) RETRYABLE_HTTP_STATUS_CODES = {408, 500, 502, 503, 504} @@ -558,9 +599,6 @@ def is_transient_error(e): # 408 request timeout, 500 internal server error, 502 bad gateway # 503 service unavailable, 504 gateway timeout return True - if isinstance(e, hailtop.httpx.ClientResponseError) and ( - e.status == 403 and 'rateLimitExceeded' in e.body): - return True if isinstance(e, aiohttp.ServerTimeoutError): return True if isinstance(e, aiohttp.ServerDisconnectedError): @@ -569,6 +607,9 @@ def is_transient_error(e): return True if isinstance(e, aiohttp.client_exceptions.ClientConnectorError): return hasattr(e, 'os_error') and is_transient_error(e.os_error) + if isinstance(e, aiohttp.ClientOSError): + # aiohttp/client_reqrep.py wraps all OSError instances with a ClientOSError + return is_transient_error(e.__cause__) # appears to happen when the connection is lost prematurely, see: # https://github.com/aio-libs/aiohttp/issues/4581 # https://github.com/aio-libs/aiohttp/blob/v3.7.4/aiohttp/client_proto.py#L85 @@ -584,9 +625,6 @@ def is_transient_error(e): errno.EPIPE )): return True - if isinstance(e, aiohttp.ClientOSError): - # aiohttp/client_reqrep.py wraps all OSError instances with a ClientOSError - return is_transient_error(e.__cause__) if isinstance(e, urllib3.exceptions.ReadTimeoutError): return True if isinstance(e, requests.exceptions.ReadTimeout): @@ -642,25 +680,6 @@ async def _wrapper(f, *args, **kwargs): return _wrapper -def retry_all_errors_n_times(max_errors=10, msg=None, error_logging_interval=10): - async def _wrapper(f, *args, **kwargs): - delay = 0.1 - errors = 0 - while True: - try: - return await f(*args, **kwargs) - except asyncio.CancelledError: # pylint: disable=try-except-raise - raise - except Exception: - errors += 1 - if msg and errors % error_logging_interval == 0: - log.exception(msg, stack_info=True) - if errors >= max_errors: - raise - delay = await sleep_and_backoff(delay) - return _wrapper - - T = TypeVar('T') # pylint: disable=invalid-name diff --git a/hail/python/hailtop/utils/validate/__init__.py b/hail/python/hailtop/utils/validate/__init__.py index 0af209db9ef..f77c23691fd 100644 --- a/hail/python/hailtop/utils/validate/__init__.py +++ b/hail/python/hailtop/utils/validate/__init__.py @@ -1,5 +1,5 @@ from .validate import anyof, bool_type, dictof, keyed, listof, int_type, nullable, \ - numeric, oneof, regex, required, str_type, non_empty_str_type, switch, ValidationError + numeric, oneof, regex, required, str_type, switch, ValidationError __all__ = [ 'anyof', @@ -14,7 +14,6 @@ 'regex', 'required', 'str_type', - 'non_empty_str_type', 'switch', 'ValidationError' ] diff --git a/hail/python/hailtop/utils/validate/validate.py b/hail/python/hailtop/utils/validate/validate.py index acf4f8ad62b..fcfc2df5b19 100644 --- a/hail/python/hailtop/utils/validate/validate.py +++ b/hail/python/hailtop/utils/validate/validate.py @@ -140,12 +140,6 @@ def validate(self, name: str, obj): self.checker.validate(name, obj) -class TruthyValidator: - def validate(self, name: str, obj): # pylint: disable=no-self-use - if not obj: - raise ValidationError(f'{name} cannot be {obj}') - - class MultipleValidator: def __init__(self, checkers: List['Validator']): self.checkers = checkers @@ -168,11 +162,10 @@ def required(key: str): str_type = TypedValidator(str) -non_empty_str_type = MultipleValidator([str_type, TruthyValidator()]) bool_type = TypedValidator(bool) int_type = TypedValidator(int) -Validator = Union[TypedValidator, NumericValidator, NullableValidator, TruthyValidator, SetValidator] +Validator = Union[TypedValidator, NumericValidator, NullableValidator, SetValidator] def dictof(vchecker: Validator): diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 5407945cc8e..34aa6f95fa8 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -2,8 +2,6 @@ aiohttp==3.7.4 aiohttp_session>=2.7,<2.8 asyncinit>=0.2.4,<0.3 bokeh>1.3,<2.0 -boto3>=1.17,<2.0 -botocore>=1.20,<2.0 decorator<5 Deprecated>=1.2.10,<1.3 dill>=0.3.1.1,<0.4 @@ -13,7 +11,6 @@ gcsfs==0.8.0 fsspec==0.9.0 humanize==1.0.0 hurry.filesize==0.9 -janus>=0.6,<0.7 nest_asyncio numpy<2 pandas>=1.1.0,<1.1.5 @@ -21,7 +18,7 @@ parsimonious<0.9 PyJWT pyspark>=3.1.1,<3.2.0 python-json-logger==0.1.11 -requests==2.25.1 +requests==2.22.0 scipy>1.2,<1.7 tabulate==0.8.3 tqdm==4.42.1 diff --git a/hail/python/setup.py b/hail/python/setup.py index 64bcd1789b4..c7141145641 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -38,7 +38,6 @@ project_urls={ 'Documentation': 'https://hail.is/docs/0.2/', 'Repository': 'https://github.com/hail-is/hail', - 'Change Log': 'https://hail.is/docs/0.2/change_log.html', }, packages=find_packages('.'), package_dir={ diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 11b7c364f69..660dc2765ed 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -1466,35 +1466,40 @@ def test_str_ops(self): self.assertFalse(hl.eval(s_whitespace.endswith('a'))) def test_str_parsing(self): - assert_all_eval_to(*[(hl.bool(x), True) for x in ('true', 'True', 'TRUE')]) - assert_all_eval_to(*[(hl.bool(x), False) for x in ('false', 'False', 'FALSE')]) + for x in ('true', 'True', 'TRUE'): + self.assertTrue(hl.eval(hl.bool(x))) + + for x in ('false', 'False', 'FALSE'): + self.assertFalse(hl.eval(hl.bool(x))) for x in ('nan', 'Nan', 'naN', 'NaN'): for f in (hl.float, hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - assert_all_eval_to( - (hl.is_nan(f(x)), True), - (hl.is_nan(f('+' + x)), True), - (hl.is_nan(f('-' + x)), True) - ) + self.assertTrue(hl.eval(hl.is_nan(f(x)))) + self.assertTrue(hl.eval(hl.is_nan(f('+' + x)))) + self.assertTrue(hl.eval(hl.is_nan(f('-' + x)))) + for x in ('inf', 'Inf', 'iNf', 'InF', 'infinity', 'InfiNitY', 'INFINITY'): for f in (hl.float, hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - assert_all_eval_to( - (hl.is_infinite(f(x)), True), - (hl.is_infinite(f('+' + x)), True), - (hl.is_infinite(f('-' + x)), True), - (f('-' + x) < 0.0, True) - ) + self.assertTrue(hl.eval(hl.is_infinite(f(x)))) + self.assertTrue(hl.eval(hl.is_infinite(f('+' + x)))) + self.assertTrue(hl.eval(hl.is_infinite(f('-' + x)))) + self.assertTrue(hl.eval(f('-' + x) < 0.0)) for x in ('0', '1', '-5', '12382421'): - assert_all_eval_to(*[(f(hl.literal(x)), int(x)) for f in (hl.int32, hl.int64, hl.parse_int32, hl.parse_int64)]) - assert_all_eval_to(*[(f(hl.literal(x)), float(x)) for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64)]) + for f in (hl.int32, hl.int64, hl.parse_int32, hl.parse_int64): + self.assertEqual(hl.eval(f(hl.literal(x))), int(x)) + for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): + self.assertEqual(hl.eval(f(hl.literal(x))), float(x)) for x in ('-1.5', '0.0', '2.5'): - assert_all_eval_to(*[(f(hl.literal(x)), float(x)) for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64)]) - assert_all_eval_to(*[(f(hl.literal(x)), None) for f in (hl.parse_int32, hl.parse_int64)]) + for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): + self.assertEqual(hl.eval(f(hl.literal(x))), float(x)) + for f in (hl.parse_int32, hl.parse_int64): + self.assertEqual(hl.eval(f(hl.literal(x))), None) for x in ('abc', '1abc', ''): - assert_all_eval_to(*[(f(hl.literal(x)), None) for f in (hl.parse_float32, hl.parse_float64, hl.parse_int32, hl.parse_int64)]) + for f in (hl.parse_float32, hl.parse_float64, hl.parse_int32, hl.parse_int64): + self.assertEqual(hl.eval(f(hl.literal(x))), None) def test_str_missingness(self): self.assertEqual(hl.eval(hl.str(1)), '1') @@ -1521,33 +1526,33 @@ def test_division(self): expected = [0.5, 1.0, 2.0, 4.0, None] expected_inv = [2.0, 1.0, 0.5, 0.25, None] - self.check_expr(a_int32 / 4, expected, tarray(tfloat64)) - self.check_expr(a_int64 / 4, expected, tarray(tfloat64)) + self.check_expr(a_int32 / 4, expected, tarray(tfloat32)) + self.check_expr(a_int64 / 4, expected, tarray(tfloat32)) self.check_expr(a_float32 / 4, expected, tarray(tfloat32)) self.check_expr(a_float64 / 4, expected, tarray(tfloat64)) - self.check_expr(int32_4s / a_int32, expected_inv, tarray(tfloat64)) - self.check_expr(int32_4s / a_int64, expected_inv, tarray(tfloat64)) + self.check_expr(int32_4s / a_int32, expected_inv, tarray(tfloat32)) + self.check_expr(int32_4s / a_int64, expected_inv, tarray(tfloat32)) self.check_expr(int32_4s / a_float32, expected_inv, tarray(tfloat32)) self.check_expr(int32_4s / a_float64, expected_inv, tarray(tfloat64)) - self.check_expr(a_int32 / int32_4s, expected, tarray(tfloat64)) - self.check_expr(a_int64 / int32_4s, expected, tarray(tfloat64)) + self.check_expr(a_int32 / int32_4s, expected, tarray(tfloat32)) + self.check_expr(a_int64 / int32_4s, expected, tarray(tfloat32)) self.check_expr(a_float32 / int32_4s, expected, tarray(tfloat32)) self.check_expr(a_float64 / int32_4s, expected, tarray(tfloat64)) - self.check_expr(a_int32 / int64_4, expected, tarray(tfloat64)) - self.check_expr(a_int64 / int64_4, expected, tarray(tfloat64)) + self.check_expr(a_int32 / int64_4, expected, tarray(tfloat32)) + self.check_expr(a_int64 / int64_4, expected, tarray(tfloat32)) self.check_expr(a_float32 / int64_4, expected, tarray(tfloat32)) self.check_expr(a_float64 / int64_4, expected, tarray(tfloat64)) - self.check_expr(int64_4 / a_int32, expected_inv, tarray(tfloat64)) - self.check_expr(int64_4 / a_int64, expected_inv, tarray(tfloat64)) + self.check_expr(int64_4 / a_int32, expected_inv, tarray(tfloat32)) + self.check_expr(int64_4 / a_int64, expected_inv, tarray(tfloat32)) self.check_expr(int64_4 / a_float32, expected_inv, tarray(tfloat32)) self.check_expr(int64_4 / a_float64, expected_inv, tarray(tfloat64)) - self.check_expr(a_int32 / int64_4s, expected, tarray(tfloat64)) - self.check_expr(a_int64 / int64_4s, expected, tarray(tfloat64)) + self.check_expr(a_int32 / int64_4s, expected, tarray(tfloat32)) + self.check_expr(a_int64 / int64_4s, expected, tarray(tfloat32)) self.check_expr(a_float32 / int64_4s, expected, tarray(tfloat32)) self.check_expr(a_float64 / int64_4s, expected, tarray(tfloat64)) @@ -2429,9 +2434,6 @@ def test_array_methods(self): fm = hl.flatmap(lambda x: hl.set(hl.range(0, x.length()).map(lambda i: x[i])), {"ABC", "AAa", "BD"}) self.assertEqual(hl.eval(fm), {'A', 'a', 'B', 'C', 'D'}) - def test_starmap(self): - self.assertEqual(hl.eval(hl.array([(1, 2), (2, 3)]).starmap(lambda x,y: x+y)), [3, 5]) - def test_array_corr(self): x1 = [random.uniform(-10, 10) for x in range(10)] x2 = [random.uniform(-10, 10) for x in range(10)] @@ -2470,10 +2472,6 @@ def test_sorted(self): self.assertEqual(hl.sorted([0, 1, 4, hl.missing(tint), 3, 2], lambda x: x, reverse=True).collect()[0], [4, 3, 2, 1, 0, None]) self.assertEqual(hl.eval(hl.sorted([0, 1, 4, hl.missing(tint), 3, 2], lambda x: x, reverse=True)), [4, 3, 2, 1, 0, None]) - self.assertEqual(hl.eval(hl.sorted({0, 1, 4, 3, 2})), [0, 1, 2, 3, 4]) - - self.assertEqual(hl.eval(hl.sorted({"foo": 1, "bar": 2})), [("bar", 2), ("foo", 1)]) - def test_sort_by(self): self.assertEqual(hl.eval(hl._sort_by(["c", "aaa", "bb", hl.missing(hl.tstr)], lambda l, r: hl.len(l) < hl.len(r))), ["c", "bb", "aaa", None]) self.assertEqual(hl.eval(hl._sort_by([hl.Struct(x=i, y="foo", z=5.5) for i in [5, 3, 8, 2, 5]], lambda l, r: l.x < r.x)), @@ -3060,7 +3058,6 @@ def test_set_functions(self): t = hl.set([3, 8]) self.assert_evals_to(s, set([1, 3, 7])) - self.assert_evals_to(hl.set(frozenset([1, 2, 3])), set([1, 2, 3])) self.assert_evals_to(s.add(3), set([1, 3, 7])) self.assert_evals_to(s.add(4), set([1, 3, 4, 7])) diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index 3192a9f4c0e..da727c40b50 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -73,8 +73,7 @@ def test_ndarray_slice(): a = [0, 1] an = np.array(a) ah = hl.nd.array(a) - ae_np = np.arange(4*4*5*6*5*4).reshape((4, 4, 5, 6, 5, 4)) - ae = hl.nd.array(ae_np) + assert_ndarrays_eq( (rect_prism[:, :, :], np_rect_prism[:, :, :]), (rect_prism[:, :, 1], np_rect_prism[:, :, 1]), @@ -88,26 +87,7 @@ def test_ndarray_slice(): np_rect_prism[0:, :, 1:4:2] + np_rect_prism[:, :1, 1:4:2]), (rect_prism[0, 0, -3:-1], np_rect_prism[0, 0, -3:-1]), (rect_prism[-1, 0:1, 3:0:-1], np_rect_prism[-1, 0:1, 3:0:-1]), - # partial indexing - (rect_prism[1], np_rect_prism[1]), - (rect_prism[1:2], np_rect_prism[1:2]), - (rect_prism[1:2:2], np_rect_prism[1:2:2]), - (rect_prism[1, 2], np_rect_prism[1, 2]), - (rect_prism[-1, 1:2:2], np_rect_prism[-1, 1:2:2]), - # ellipses inclusion - (rect_prism[...], np_rect_prism[...]), - (rect_prism[1, ...], np_rect_prism[1, ...]), - (rect_prism[..., 1], np_rect_prism[..., 1]), - # np.newaxis inclusion - (rect_prism[hl.nd.newaxis, :, :], np_rect_prism[np.newaxis, :, :]), - (rect_prism[hl.nd.newaxis], np_rect_prism[np.newaxis]), - (rect_prism[hl.nd.newaxis, np.newaxis, np.newaxis], np_rect_prism[np.newaxis, np.newaxis, np.newaxis]), - (rect_prism[hl.nd.newaxis, np.newaxis, 1:4:2], np_rect_prism[np.newaxis, np.newaxis, 1:4:2]), - (rect_prism[1, :, hl.nd.newaxis], np_rect_prism[1, :, np.newaxis]), - (rect_prism[1, hl.nd.newaxis, 1], np_rect_prism[1, np.newaxis, 1]), - (rect_prism[..., hl.nd.newaxis, 1], np_rect_prism[..., np.newaxis, 1]), - ) - assert_ndarrays_eq( + (flat[15:5:-1], np_flat[15:5:-1]), (flat[::-1], np_flat[::-1]), (flat[::22], np_flat[::22]), @@ -118,9 +98,6 @@ def test_ndarray_slice(): (flat[4:1:-2], np_flat[4:1:-2]), (flat[0:0:1], np_flat[0:0:1]), (flat[-4:-1:2], np_flat[-4:-1:2]), - # ellipses inclusion - (flat[...], np_flat[...]), - (mat[::-1, :], np_mat[::-1, :]), (mat[0, 1:4:2] + mat[:, 1:4:2], np_mat[0, 1:4:2] + np_mat[:, 1:4:2]), @@ -151,24 +128,11 @@ def test_ndarray_slice(): (mat[:-5:-1, 0], np_mat[:-5:-1, 0]), (mat[0:-5, 0], np_mat[0:-5, 0]), (mat[0:-5:-1, 0], np_mat[0:-5:-1, 0]), - # partial indexing - (mat[1], np_mat[1]), - (mat[0:1], np_mat[0:1]), - # ellipses inclusion - (mat[...], np_mat[...]), (ah[:-3:1], an[:-3:1]), (ah[:-3:-1], an[:-3:-1]), (ah[-3::-1], an[-3::-1]), - (ah[-3::1], an[-3::1]), - - # ellipses inclusion - (ae[..., 3], ae_np[..., 3]), - (ae[3, ...], ae_np[3, ...]), - (ae[2, 3, 1:2:2, ...], ae_np[2, 3, 1:2:2, ...]), - (ae[3, 2, 3, ..., 2], ae_np[3, 2, 3, ..., 2]), - (ae[3, 2, 2, ..., 2, 1:2:2], ae_np[3, 2, 2, ..., 2, 1:2:2]), - (ae[3, :, hl.nd.newaxis, ..., :, hl.nd.newaxis, 2], ae_np[3, :, np.newaxis, ..., :, np.newaxis, 2]) + (ah[-3::1], an[-3::1]) ) assert hl.eval(flat[hl.missing(hl.tint32):4:1]) is None @@ -186,12 +150,6 @@ def test_ndarray_slice(): with pytest.raises(HailUserError, match="Index -4 is out of bounds for axis 0 with size 2"): hl.eval(mat[-4, 0:3]) - with pytest.raises(IndexError, match="an index can only have a single ellipsis"): - hl.eval(rect_prism[..., ...]) - - with pytest.raises(IndexError, match="too many indices for array: array is 3-dimensional, but 4 were indexed"): - hl.eval(rect_prism[1, 1, 1, 1]) - def test_ndarray_transposed_slice(): a = hl.nd.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) @@ -367,7 +325,7 @@ def test_ndarray_reshape(): a.reshape(hl.tuple(['4', '5'])) -def test_ndarray_map1(): +def test_ndarray_map(): a = hl.nd.array([[2, 3, 4], [5, 6, 7]]) b = hl.map(lambda x: -x, a) b2 = b.map(lambda x: x * x) @@ -381,10 +339,9 @@ def test_ndarray_map1(): assert hl.eval(hl.missing(hl.tndarray(hl.tfloat, 1)).map(lambda x: x * 2)) is None - # NDArrays don't correctly support elements that contain pointers at the moment. - # s = hl.nd.array(["hail", "is", "great"]) - # s_lens = s.map(lambda e: hl.len(e)) - # assert np.array_equal(hl.eval(s_lens), np.array([4, 2, 5])) + s = hl.nd.array(["hail", "is", "great"]) + s_lens = s.map(lambda e: hl.len(e)) + assert np.array_equal(hl.eval(s_lens), np.array([4, 2, 5])) structs = hl.nd.array([hl.struct(x=5, y=True), hl.struct(x=9, y=False)]) assert np.array_equal(hl.eval(structs.map(lambda e: e.y)), np.array([True, False])) @@ -470,8 +427,7 @@ def test_ndarray_map2(): (ncube1 // ny, cube1 // y), (ny // ncube1, y // cube1), (ncube1 // nrow_vec, cube1 // row_vec), - (nrow_vec // ncube1, row_vec // cube1) - ) + (nrow_vec // ncube1, row_vec // cube1)) # Division assert_ndarrays_almost_eq( @@ -1090,11 +1046,13 @@ def assert_table(a, b): def test_eye(): for i in range(13): - assert_ndarrays_eq(*[(hl.nd.eye(i, y), np.eye(i, y)) for y in range(13)]) + for y in range(13): + assert(np.array_equal(hl.eval(hl.nd.eye(i, y)), np.eye(i, y))) def test_identity(): - assert_ndarrays_eq(*[(hl.nd.identity(i), np.identity(i)) for i in range(13)]) + for i in range(13): + assert(np.array_equal(hl.eval(hl.nd.identity(i)), np.identity(i))) def test_agg_ndarray_sum(): diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 1e43bb020dd..62e5b21c9c4 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -130,6 +130,7 @@ def wrapper(func, *args, **kwargs): return wrapper + def skip_when_service_backend(message='does not work on ServiceBackend'): from hail.backend.service_backend import ServiceBackend @decorator @@ -153,15 +154,6 @@ def wrapper(func, *args, **kwargs): reason="doesn't yet work on service backend", strict=True) -def check_spark(): - backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') - return backend_name == 'spark' - -fails_spark_backend = pytest.mark.xfail( - check_spark(), - reason="doesn't yet work on spark backend", - strict=True) - def run_with_cxx_compile(): @decorator diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 16afdff748a..4f7dbc12959 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -71,22 +71,26 @@ def assert_sums_agree(self, bm, nd): self._assert_close(bm.sum(axis=0), np.sum(nd, axis=0, keepdims=True)) self._assert_close(bm.sum(axis=1), np.sum(nd, axis=1, keepdims=True)) - def test_from_entry_expr_simple(self): + @fails_service_backend() + @fails_local_backend() + def test_from_entry_expr(self): mt = get_dataset() mt = mt.annotate_entries(x=hl.or_else(mt.GT.n_alt_alleles(), 0)).cache() - a1 = hl.eval(BlockMatrix.from_entry_expr(hl.or_else(mt.GT.n_alt_alleles(), 0), block_size=32).to_ndarray()) - a2 = hl.eval(BlockMatrix.from_entry_expr(mt.x, block_size=32).to_ndarray()) - a3 = hl.eval(BlockMatrix.from_entry_expr(hl.float64(mt.x), block_size=32).to_ndarray()) + a1 = BlockMatrix.from_entry_expr(hl.or_else(mt.GT.n_alt_alleles(), 0), block_size=32).to_numpy() + a2 = BlockMatrix.from_entry_expr(mt.x, block_size=32).to_numpy() + a3 = BlockMatrix.from_entry_expr(hl.float64(mt.x), block_size=32).to_numpy() self._assert_eq(a1, a2) self._assert_eq(a1, a3) with hl.TemporaryDirectory(ensure_exists=False) as path: BlockMatrix.write_from_entry_expr(mt.x, path, block_size=32) - a4 = hl.eval(BlockMatrix.read(path).to_ndarray()) + a4 = BlockMatrix.read(path).to_numpy() self._assert_eq(a1, a4) + @fails_service_backend() + @fails_local_backend() def test_from_entry_expr_options(self): def build_mt(a): data = [{'v': 0, 's': 0, 'x': a[0]}, @@ -98,10 +102,10 @@ def build_mt(a): return mt.choose_cols([ids.index(0), ids.index(1), ids.index(2)]) def check(expr, mean_impute, center, normalize, expected): - actual = np.squeeze(hl.eval(BlockMatrix.from_entry_expr(expr, + actual = np.squeeze(BlockMatrix.from_entry_expr(expr, mean_impute=mean_impute, center=center, - normalize=normalize).to_ndarray())) + normalize=normalize).to_numpy()) assert np.allclose(actual, expected) a = np.array([0.0, 1.0, 2.0]) @@ -121,6 +125,8 @@ def check(expr, mean_impute, center, normalize, expected): with self.assertRaises(Exception): BlockMatrix.from_entry_expr(mt.x) + @fails_service_backend() + @fails_local_backend() def test_write_from_entry_expr_overwrite(self): mt = hl.balding_nichols_model(1, 1, 1) mt = mt.select_entries(x=mt.GT.n_alt_alleles()) @@ -250,20 +256,19 @@ def test_to_matrix_table(self): mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major() assert mt._same(mt_round_trip) - def test_paired_elementwise_ops(self): + @fails_service_backend() + @fails_local_backend() + def test_elementwise_ops(self): nx = np.array([[2.0]]) nc = np.array([[1.0], [2.0]]) nr = np.array([[1.0, 2.0, 3.0]]) nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) e = 2.0 - # BlockMatrixMap requires very simple IRs on the SparkBackend. If I use - # `from_ndarray` here, it generates an `NDArrayRef` expression that it can't handle. - # Will be fixed by improving FoldConstants handling of ndarrays or fully lowering BlockMatrix. - x = BlockMatrix._create(1, 1, [2.0], block_size=8) - c = BlockMatrix.from_ndarray(hl.literal(nc), block_size=8) - r = BlockMatrix.from_ndarray(hl.literal(nr), block_size=8) - m = BlockMatrix.from_ndarray(hl.literal(nm), block_size=8) + x = BlockMatrix.from_numpy(nx, block_size=8) + c = BlockMatrix.from_numpy(nc, block_size=8) + r = BlockMatrix.from_numpy(nr, block_size=8) + m = BlockMatrix.from_numpy(nm, block_size=8) self.assertRaises(TypeError, lambda: x + np.array(['one'], dtype=str)) @@ -572,9 +577,11 @@ def test_sum_with_sparsify(self): self.assert_sums_agree(bm3, nd) self.assert_sums_agree(bm4, nd4) + @fails_service_backend() + @fails_local_backend() def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) - bm = BlockMatrix.from_ndarray(hl.literal(nd), block_size=3) + bm = BlockMatrix.from_numpy(nd, block_size=3) for indices in [(0, 0), (5, 7), (-3, 9), (-8, -10)]: self._assert_eq(bm[indices], nd[indices]) @@ -595,16 +602,14 @@ def test_slicing(self): self._assert_eq(bm[indices] - bm, nd[indices] - nd) self._assert_eq(bm - bm[indices], nd - nd[indices]) - for indices in [ - (slice(0, 8), slice(0, 10)), - (slice(0, 8, 2), slice(0, 10, 2)), - (slice(2, 4), slice(5, 7)), - (slice(-8, -1), slice(-10, -1)), - (slice(-8, -1, 2), slice(-10, -1, 2)), - (slice(None, 4, 1), slice(None, 4, 1)), - (slice(4, None), slice(4, None)), - (slice(None, None), slice(None, None)) - ]: + for indices in [(slice(0, 8), slice(0, 10)), + (slice(0, 8, 2), slice(0, 10, 2)), + (slice(2, 4), slice(5, 7)), + (slice(-8, -1), slice(-10, -1)), + (slice(-8, -1, 2), slice(-10, -1, 2)), + (slice(None, 4, 1), slice(None, 4, 1)), + (slice(4, None), slice(4, None)), + (slice(None, None), slice(None, None))]: self._assert_eq(bm[indices], nd[indices]) self._assert_eq(bm[indices][:, :2], nd[indices][:, :2]) self._assert_eq(bm[indices][:2, :], nd[indices][:2, :]) @@ -876,6 +881,8 @@ def test_to_ndarray(self): sparsed = BlockMatrix.from_ndarray(hl.nd.array(sparsed_numpy), block_size=4)._sparsify_blocks(blocks_to_sparsify).to_ndarray() self.assertTrue(np.array_equal(sparsed_numpy, hl.eval(sparsed))) + @fails_service_backend() + @fails_local_backend() def test_block_matrix_entries(self): n_rows, n_cols = 5, 3 rows = [{'i': i, 'j': j, 'entry': float(i + j)} for i in range(n_rows) for j in range(n_cols)] @@ -887,12 +894,14 @@ def test_block_matrix_entries(self): ndarray = np.reshape(list(map(lambda row: row['entry'], rows)), (n_rows, n_cols)) for block_size in [1, 2, 1024]: - block_matrix = BlockMatrix.from_ndarray(hl.literal(ndarray), block_size) + block_matrix = BlockMatrix.from_numpy(ndarray, block_size) entries_table = block_matrix.entries() self.assertEqual(entries_table.count(), n_cols * n_rows) self.assertEqual(len(entries_table.row), 3) self.assertTrue(table._same(entries_table)) + @fails_service_backend() + @fails_local_backend() def test_from_entry_expr_filtered(self): mt = hl.utils.range_matrix_table(1, 1).filter_entries(False) bm = hl.linalg.BlockMatrix.from_entry_expr(mt.row_idx + mt.col_idx, mean_impute=True) # should run without error @@ -1192,17 +1201,3 @@ def test_row_blockmatrix_sum(self): # Summing horizontally along a column vector to make sure nothing changes f = col.sum(axis=1) assert f.to_numpy().shape == (10, 1) - - - @fails_spark_backend() - def test_map(self): - np_mat = np.arange(20, dtype=np.float64).reshape((4, 5)) - bm = BlockMatrix.from_ndarray(hl.nd.array(np_mat)) - bm_mapped_arith = bm._map_dense(lambda x: (x * x) + 5) - self._assert_eq(bm_mapped_arith, np_mat * np_mat + 5) - - bm_mapped_if = bm._map_dense(lambda x: hl.if_else(x >= 1, x, -8.0)) - np_if = np_mat.copy() - np_if[0, 0] = -8.0 - self._assert_eq(bm_mapped_if, np_if) - diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index 01cfa5536f9..208c9476405 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -165,6 +165,7 @@ def test_filter(self): mt.count_rows() @fails_service_backend() + @fails_local_backend() def test_aggregate(self): mt = self.get_mt() @@ -430,6 +431,7 @@ def test_semi_anti_join_cols(self): assert mt.anti_join_cols(ht).count() == (3, 7) @fails_service_backend() + @fails_local_backend() def test_joins(self): mt = self.get_mt().select_rows(x1=1, y1=1) mt2 = mt.select_rows(x2=1, y2=2) @@ -449,6 +451,7 @@ def test_joins(self): self.assertTrue(ct.all(ct.c2 == 2)) @fails_service_backend() + @fails_local_backend() def test_joins_with_key_structs(self): mt = self.get_mt() @@ -560,7 +563,6 @@ def test_union_cols_distinct(self): mt = mt.key_rows_by(x = mt.row_idx // 2) assert mt.union_cols(mt).count_rows() == 5 - @skip_when_service_backend('flaky https://hail.zulipchat.com/#narrow/stream/127527-team/topic/CI.20Deploy.20Failure/near/237593731') def test_union_cols_outer(self): r, c = 10, 10 mt = hl.utils.range_matrix_table(2*r, c) @@ -842,13 +844,14 @@ def test_sample_rows(self): self.assertTrue(ds_small.count_rows() < ds.count_rows()) @fails_service_backend() + @fails_local_backend() def test_read_stored_cols(self): ds = self.get_mt() ds = ds.annotate_globals(x='foo') f = new_temp_file(extension='mt') ds.write(f) t = hl.read_table(f + '/cols') - self.assertTrue(ds.cols().key_by()._same(t)) + self.assertTrue(ds.cols()._same(t)) @skip_when_service_backend('Shuffler encoding/decoding is broken.') def test_read_stored_rows(self): @@ -868,6 +871,7 @@ def test_read_stored_globals(self): self.assertTrue(ds.globals_table()._same(t)) @fails_service_backend() + @fails_local_backend() def test_indexed_read(self): mt = hl.utils.range_matrix_table(2000, 100, 10) f = new_temp_file(extension='mt') @@ -887,6 +891,7 @@ def test_indexed_read(self): self.assertTrue(mt.filter_rows((mt.row_idx >= 150) & (mt.row_idx < 500))._same(mt2)) @fails_service_backend() + @fails_local_backend() def test_indexed_read_vcf(self): vcf = self.get_mt(10) f = new_temp_file(extension='mt') diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index 6a4fe7b4425..a4fa0a49286 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -75,10 +75,6 @@ def test_undeclared_info(self): self.assertFalse('undeclared' in info_type) self.assertFalse('undeclaredFlag' in info_type) - @fails_service_backend() - def test_can_import_bad_number_flag(self): - hl.import_vcf(resource('bad_flag_number.vcf')).rows()._force_count() - @fails_service_backend() def test_malformed(self): with self.assertRaisesRegex(FatalError, "invalid character"): @@ -956,24 +952,6 @@ def test_contig_recoding_defaults(self): resource('sex_mt_contigs.fam'), reference_genome='random') - @fails_service_backend() - @fails_local_backend() - def test_export_plink_struct_locus(self): - mt = hl.utils.range_matrix_table(10, 10) - mt = mt.key_rows_by(locus=hl.struct(contig=hl.str(mt.row_idx), position=mt.row_idx), alleles=['A', 'T']).select_rows() - mt = mt.key_cols_by(s=hl.str(mt.col_idx)).select_cols() - mt = mt.annotate_entries(GT=hl.call(0, 0)) - - out = new_temp_file() - - hl.export_plink(mt, out) - mt2 = hl.import_plink( - bed=out + '.bed', - bim=out + '.bim', - fam=out + '.fam', - reference_genome=None).select_rows().select_cols() - assert mt._same(mt2) - # this routine was used to generate resources random.gen, random.sample # random.bgen was generated with qctool v2.0rc9: @@ -1880,7 +1858,7 @@ def test_key_by_after_empty_key_import(self): @fails_service_backend() @fails_local_backend() - def test_devilish_nine_separated_eight_missing_file(self): + def test_devlish_nine_separated_eight_missing_file(self): fields = {'chr': hl.tstr, '': hl.tint32, 'ref': hl.tstr, @@ -2069,6 +2047,7 @@ def test_grep_show_false(self): @fails_service_backend() +@fails_local_backend() def test_matrix_and_table_read_intervals_with_hidden_key(): f1 = new_temp_file() f2 = new_temp_file() diff --git a/hail/python/test/hail/methods/test_misc.py b/hail/python/test/hail/methods/test_misc.py index 99b2f8577fc..54a8102763d 100644 --- a/hail/python/test/hail/methods/test_misc.py +++ b/hail/python/test/hail/methods/test_misc.py @@ -212,9 +212,3 @@ def test_lambda_gc(self): lgc2 = hl.lambda_gc(ht.x2) self.assertAlmostEqual(lgc, 1, places=1) # approximate, 1 place is safe self.assertAlmostEqual(lgc2, 1.89, places=1) # approximate, 1 place is safe - - def test_lambda_gc_nans(self): - N = 5000000 - ht = hl.utils.range_table(N).annotate(x = hl.scan.count() / N, is_even=hl.scan.count() % 2 == 0) - lgc_nan = hl.lambda_gc(hl.case().when(ht.is_even, hl.float('nan')).default(ht.x)) - self.assertAlmostEqual(lgc_nan, 1, places=1) # approximate, 1 place is safe diff --git a/hail/python/test/hail/methods/test_pca.py b/hail/python/test/hail/methods/test_pca.py index 83216629c55..25fb27a6e8c 100644 --- a/hail/python/test/hail/methods/test_pca.py +++ b/hail/python/test/hail/methods/test_pca.py @@ -128,52 +128,27 @@ def bound(vs, us): # equation 12 from https://www.ncbi.nlm.nih.gov/pmc/articles np.testing.assert_allclose(eigens, np_eigenvalues, rtol=0.05) assert bound(np_loadings, loadings) > 0.9 -def make_spectral_matrix(index_func, k, m, n): - sigma_dim = min(m, n) - answer = np.zeros((m, n)) - for j in range(sigma_dim): - answer[j, j] = index_func(j + 1, k) - return answer - -def matrix_table_from_numpy(np_mat): - rows, cols = np_mat.shape - mt = hl.utils.range_matrix_table(rows, cols) - mt = mt.annotate_globals(entries_global = np_mat) - mt = mt.annotate_entries(ent = mt.entries_global[mt.row_idx, mt.col_idx]) - return mt - -# k, m, n -dim_triplets = [(20, 1000, 1000), (10, 100, 200)] - -def spectra_helper(spec_func): - - for triplet in dim_triplets: - k, m, n = triplet - sigma = make_spectral_matrix(spec_func, k, m, n) - seed = 1025 - np.random.seed(seed) - U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] - V = np.linalg.qr(np.random.normal(0, 1, (n, n)))[0] - A = U @ sigma @ V - mt_A = matrix_table_from_numpy(A) - - eigenvalues, scores, loadings = hl._blanczos_pca(mt_A.ent, k=k, oversampling_param=k, compute_loadings=True, q_iterations=4) - singulars = np.sqrt(eigenvalues) - hail_V = (np.array(scores.scores.collect()) / singulars).T - hail_U = np.array(loadings.loadings.collect()) - approx_A = hail_U @ np.diag(singulars) @ hail_V - norm_of_diff = np.linalg.norm(A - approx_A, 2) - np.testing.assert_allclose(norm_of_diff, spec_func(k + 1, k), rtol=1e-02, err_msg=f"Norm test failed on triplet {triplet} ") - np.testing.assert_allclose(singulars, np.diag(sigma)[:k], rtol=1e-01, err_msg=f"Failed on triplet {triplet}") @fails_service_backend(reason='persist_ir') -def test_spectra_1(): +def test_spectra(): + def make_spectral_matrix(index_func, k, m, n): + sigma_dim = min(m, n) + answer = np.zeros((m, n)) + for j in range(sigma_dim): + answer[j, j] = index_func(j + 1, k) + return answer + + def matrix_table_from_numpy(np_mat): + rows, cols = np_mat.shape + mt = hl.utils.range_matrix_table(rows, cols) + mt = mt.annotate_globals(entries_global = np_mat) + mt = mt.annotate_entries(ent = mt.entries_global[mt.row_idx, mt.col_idx]) + return mt + + # Defined for j >= 1 def spec1(j, k): return 1/j - spectra_helper(spec1) -@fails_service_backend(reason='persist_ir') -def test_spectra_2(): def spec2(j, k): if j == 1: return 1 @@ -181,19 +156,13 @@ def spec2(j, k): return 2 * 10**-5 else: return (10**-5) * (k + 1)/j - spectra_helper(spec2) -@fails_service_backend(reason='persist_ir') -def test_spectra_3(): def spec3(j, k): if j <= k: return 10**(-5*(j-1)/(k-1)) else: return (10**-5)*(k+1)/j - spectra_helper(spec3) -@fails_service_backend(reason='persist_ir') -def test_spectra_4(): def spec4(j, k): if j <= k: return 10**(-5*(j-1)/(k-1)) @@ -201,13 +170,35 @@ def spec4(j, k): return 10**-5 else: return 0 - spectra_helper(spec4) -@fails_service_backend(reason='persist_ir') -def test_spectra_5(): def spec5(j, k): if j <= k: return 10**-5 + (1 - 10**-5)*(k - j)/(k - 1) else: return 10**-5 * math.sqrt((k + 1)/j) - spectra_helper(spec5) + + spectral_functions = [spec1, spec2, spec3, spec4, spec5] + + # k, m, n + dim_triplets = [(10, 1000, 1000), (20, 1000, 1000), (10, 100, 200)] + + for triplet in dim_triplets: + k, m, n = triplet + for idx, spec_func in enumerate(spectral_functions): + sigma = make_spectral_matrix(spec_func, k, m, n) + seed = 1025 + np.random.seed(seed) + U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] + V = np.linalg.qr(np.random.normal(0, 1, (n, n)))[0] + A = U @ sigma @ V + mt_A = matrix_table_from_numpy(A) + + eigenvalues, scores, loadings = hl._blanczos_pca(mt_A.ent, k=k, oversampling_param=k, compute_loadings=True, q_iterations=4) + singulars = np.sqrt(eigenvalues) + hail_V = (np.array(scores.scores.collect()) / singulars).T + hail_U = np.array(loadings.loadings.collect()) + approx_A = hail_U @ np.diag(singulars) @ hail_V + norm_of_diff = np.linalg.norm(A - approx_A, 2) + np.testing.assert_allclose(norm_of_diff, spec_func(k + 1, k), rtol=1e-02, err_msg=f"Norm test failed on triplet {triplet} on spec{idx + 1}") + np.testing.assert_allclose(singulars, np.diag(sigma)[:k], rtol=1e-01, err_msg=f"Failed on triplet {triplet} on spec{idx + 1}") + diff --git a/hail/python/test/hail/methods/test_statgen.py b/hail/python/test/hail/methods/test_statgen.py index 02525e206c8..5c146f1fad2 100644 --- a/hail/python/test/hail/methods/test_statgen.py +++ b/hail/python/test/hail/methods/test_statgen.py @@ -459,8 +459,6 @@ def eq(x1, x2): eq(combined.p_value, combined.multi.p_value[0]) & eq(combined.multi.p_value[0], combined.multi.p_value[1])))) - logreg_functions = [hl.logistic_regression_rows, hl._logistic_regression_rows_nd] if backend_name == "spark" else [hl._logistic_regression_rows_nd] - # comparing to R: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) # y = c(0, 0, 1, 1, 1, 1, 0, 0, 1, 1) @@ -472,6 +470,8 @@ def eq(x1, x2): # se <- waldtest["x", "Std. Error"] # zstat <- waldtest["x", "z value"] # pval <- waldtest["x", "Pr(>|z|)"] + @fails_service_backend() + @fails_local_backend() def test_logistic_regression_wald_test(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -481,35 +481,35 @@ def test_logistic_regression_wald_test(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) + ht = hl.logistic_regression_rows('wald', + y=pheno[mt.s].isCase, + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - for logistic_regression_function in self.logreg_functions: - ht = logistic_regression_function('wald', - y=pheno[mt.s].isCase, - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].standard_error, 2.1085483421, places=6) - self.assertAlmostEqual(results[1].z_stat, -0.3852261396, places=6) - self.assertAlmostEqual(results[1].p_value, 0.7000698784, places=6) + self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].standard_error, 2.1085483421, places=6) + self.assertAlmostEqual(results[1].z_stat, -0.3852261396, places=6) + self.assertAlmostEqual(results[1].p_value, 0.7000698784, places=6) - self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].standard_error, 1.0296902941, places=6) - self.assertAlmostEqual(results[2].z_stat, -0.4240057531, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6715616176, places=6) + self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].standard_error, 1.0296902941, places=6) + self.assertAlmostEqual(results[2].z_stat, -0.4240057531, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6715616176, places=6) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) + @fails_service_backend() + @fails_local_backend() def test_logistic_regression_wald_test_apply_multi_pheno(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -519,38 +519,35 @@ def test_logistic_regression_wald_test_apply_multi_pheno(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) + ht = hl.logistic_regression_rows('wald', + y=[pheno[mt.s].isCase], + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - for logistic_regression_function in self.logreg_functions: + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - ht = logistic_regression_function('wald', - y=[pheno[mt.s].isCase], - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + self.assertEqual(len(results[1].logistic_regression),1) + self.assertAlmostEqual(results[1].logistic_regression[0].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].standard_error, 2.1085483421, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].z_stat, -0.3852261396, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].p_value, 0.7000698784, places=6) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + self.assertEqual(len(results[2].logistic_regression),1) + self.assertAlmostEqual(results[2].logistic_regression[0].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].standard_error, 1.0296902941, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].z_stat, -0.4240057531, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].p_value, 0.6715616176, places=6) - self.assertEqual(len(results[1].logistic_regression),1) - self.assertAlmostEqual(results[1].logistic_regression[0].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].standard_error, 2.1085483421, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].z_stat, -0.3852261396, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].p_value, 0.7000698784, places=6) - - self.assertEqual(len(results[2].logistic_regression),1) - self.assertAlmostEqual(results[2].logistic_regression[0].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].standard_error, 1.0296902941, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].z_stat, -0.4240057531, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].p_value, 0.6715616176, places=6) - - def is_constant(r): - return (not r.logistic_regression[0].fit.converged) or np.isnan(r.logistic_regression[0].p_value) or abs(r.logistic_regression[0].p_value - 1) < 1e-4 - - self.assertEqual(len(results[3].logistic_regression),1) - self.assertFalse(results[3].logistic_regression[0].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + def is_constant(r): + return (not r.logistic_regression[0].fit.converged) or np.isnan(r.logistic_regression[0].p_value) or abs(r.logistic_regression[0].p_value - 1) < 1e-4 + + self.assertEqual(len(results[3].logistic_regression),1) + self.assertFalse(results[3].logistic_regression[0].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) @fails_service_backend() @fails_local_backend() @@ -565,28 +562,28 @@ def test_logistic_regression_wald_test_multi_pheno_bgen_dosage(self): mt = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage']).cache() - for logistic_regression_function in self.logreg_functions: - - ht_single_pheno = logistic_regression_function('wald', - y=pheno[mt.s].Pheno1, - x=mt.dosage, - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + ht_single_pheno = hl.logistic_regression_rows('wald', + y=pheno[mt.s].Pheno1, + x=mt.dosage, + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - ht_multi_pheno = logistic_regression_function('wald', - y=[pheno[mt.s].Pheno1, pheno[mt.s].Pheno2], - x=mt.dosage, - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + ht_multi_pheno = hl.logistic_regression_rows('wald', + y=[pheno[mt.s].Pheno1, pheno[mt.s].Pheno2], + x=mt.dosage, + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - single_results = dict(hl.tuple([ht_single_pheno.locus.position, ht_single_pheno.row]).collect()) - multi_results = dict(hl.tuple([ht_multi_pheno.locus.position, ht_multi_pheno.row]).collect()) - self.assertEqual(len(multi_results[1001].logistic_regression),2) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].beta, single_results[1001].beta, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].standard_error,single_results[1001].standard_error, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].z_stat, single_results[1001].z_stat, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].p_value,single_results[1001].p_value, places=6) - #TODO test handling of missingness + single_results = dict(hl.tuple([ht_single_pheno.locus.position, ht_single_pheno.row]).collect()) + multi_results = dict(hl.tuple([ht_multi_pheno.locus.position, ht_multi_pheno.row]).collect()) + self.assertEqual(len(multi_results[1001].logistic_regression),2) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].beta, single_results[1001].beta, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].standard_error,single_results[1001].standard_error, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].z_stat, single_results[1001].z_stat, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].p_value,single_results[1001].p_value, places=6) + #TODO test handling of missingness + @fails_service_backend() + @fails_local_backend() def test_logistic_regression_wald_test_pl(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -596,36 +593,33 @@ def test_logistic_regression_wald_test_pl(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) + ht = hl.logistic_regression_rows( + test='wald', + y=pheno[mt.s].isCase, + x=hl.pl_dosage(mt.PL), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - for logistic_regression_function in self.logreg_functions: - - ht = logistic_regression_function( - test='wald', - y=pheno[mt.s].isCase, - x=hl.pl_dosage(mt.PL), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[1].beta, -0.8286774, places=6) - self.assertAlmostEqual(results[1].standard_error, 2.151145, places=6) - self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=6) - self.assertAlmostEqual(results[1].p_value, 0.7000699, places=6) + self.assertAlmostEqual(results[1].beta, -0.8286774, places=6) + self.assertAlmostEqual(results[1].standard_error, 2.151145, places=6) + self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=6) + self.assertAlmostEqual(results[1].p_value, 0.7000699, places=6) - self.assertAlmostEqual(results[2].beta, -0.4431764, places=6) - self.assertAlmostEqual(results[2].standard_error, 1.045213, places=6) - self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6715616, places=6) + self.assertAlmostEqual(results[2].beta, -0.4431764, places=6) + self.assertAlmostEqual(results[2].standard_error, 1.045213, places=6) + self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6715616, places=6) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) @fails_service_backend() @fails_local_backend() @@ -639,36 +633,33 @@ def test_logistic_regression_wald_dosage(self): types={'isCase': hl.tbool}) mt = hl.import_gen(resource('regressionLogistic.gen'), sample_file=resource('regressionLogistic.sample')) + ht = hl.logistic_regression_rows( + test='wald', + y=pheno[mt.s].isCase, + x=hl.gp_dosage(mt.GP), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - for logistic_regression_function in self.logreg_functions: - - ht = logistic_regression_function( - test='wald', - y=pheno[mt.s].isCase, - x=hl.gp_dosage(mt.GP), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[1].beta, -0.8286774, places=4) - self.assertAlmostEqual(results[1].standard_error, 2.151145, places=4) - self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=4) - self.assertAlmostEqual(results[1].p_value, 0.7000699, places=4) + self.assertAlmostEqual(results[1].beta, -0.8286774, places=4) + self.assertAlmostEqual(results[1].standard_error, 2.151145, places=4) + self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=4) + self.assertAlmostEqual(results[1].p_value, 0.7000699, places=4) - self.assertAlmostEqual(results[2].beta, -0.4431764, places=4) - self.assertAlmostEqual(results[2].standard_error, 1.045213, places=4) - self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=4) - self.assertAlmostEqual(results[2].p_value, 0.6715616, places=4) + self.assertAlmostEqual(results[2].beta, -0.4431764, places=4) + self.assertAlmostEqual(results[2].standard_error, 1.045213, places=4) + self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=4) + self.assertAlmostEqual(results[2].p_value, 0.6715616, places=4) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) # comparing to output of R code: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) @@ -681,6 +672,8 @@ def is_constant(r): # lrtest <- anova(logfitnull, logfit, test="LRT") # chi2 <- lrtest[["Deviance"]][2] # pval <- lrtest[["Pr(>Chi)"]][2] + @fails_service_backend() + @fails_local_backend() def test_logistic_regression_lrt(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -690,33 +683,31 @@ def test_logistic_regression_lrt(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) + ht = hl.logistic_regression_rows( + test='lrt', + y=pheno[mt.s].isCase, + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - for logistic_regression_function in self.logreg_functions: - ht = logistic_regression_function( - test='lrt', - y=pheno[mt.s].isCase, - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].chi_sq_stat, 0.1503349167, places=6) - self.assertAlmostEqual(results[1].p_value, 0.6982155052, places=6) + self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].chi_sq_stat, 0.1503349167, places=6) + self.assertAlmostEqual(results[1].p_value, 0.6982155052, places=6) - self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].chi_sq_stat, 0.1813968574, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6701755415, places=6) + self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].chi_sq_stat, 0.1813968574, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6701755415, places=6) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) # comparing to output of R code: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) diff --git a/hail/python/test/hail/table/test_table.py b/hail/python/test/hail/table/test_table.py index 641639c3b3b..97be3a580d6 100644 --- a/hail/python/test/hail/table/test_table.py +++ b/hail/python/test/hail/table/test_table.py @@ -479,11 +479,13 @@ def test_multiple_entry_joins(self): b=mt2[mt.row_idx, mt.col_idx].x) @fails_service_backend() + @fails_local_backend() def test_multi_way_zip_join(self): d1 = [{"id": 0, "name": "a", "data": 0.0}, {"id": 1, "name": "b", "data": 3.14}, {"id": 2, "name": "c", "data": 2.78}] d2 = [{"id": 0, "name": "d", "data": 1.1}, + {"id": 0, "name": "x", "data": 2.2}, {"id": 2, "name": "v", "data": 7.89}] d3 = [{"id": 1, "name": "f", "data": 9.99}, {"id": 2, "name": "g", "data": -1.0}, @@ -494,6 +496,9 @@ def test_multi_way_zip_join(self): dexpected = [{"id": 0, "__data": [{"name": "a", "data": 0.0}, {"name": "d", "data": 1.1}, None]}, + {"id": 0, "__data": [None, + {"name": "x", "data": 2.2}, + None]}, {"id": 1, "__data": [{"name": "b", "data": 3.14}, None, {"name": "f", "data": 9.99}]}, @@ -514,7 +519,7 @@ def test_multi_way_zip_join(self): self.assertTrue(expected2._same(joined_same_name)) joined_nothing = hl.Table.multi_way_zip_join(ts, 'data', 'globals').drop('data', 'globals') - self.assertEqual(joined_nothing._force_count(), 4) + self.assertEqual(joined_nothing._force_count(), 5) def test_multi_way_zip_join_globals(self): t1 = hl.utils.range_table(1).annotate_globals(x=hl.missing(hl.tint32)) diff --git a/hail/python/test/hailtop/aiotools/test_copy.py b/hail/python/test/hailtop/aiotools/test_copy.py index 77585e6837b..e7a8bea3646 100644 --- a/hail/python/test/hailtop/aiotools/test_copy.py +++ b/hail/python/test/hailtop/aiotools/test_copy.py @@ -2,13 +2,10 @@ import secrets from concurrent.futures import ThreadPoolExecutor import asyncio -import functools import pytest from hailtop.utils import url_scheme, bounded_gather2 from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS, Transfer, FileAndDirectoryError -from hailtop.aiogoogle import GoogleStorageAsyncFS -from hailtop.aiotools.s3asyncfs import S3AsyncFS - +from hailtop.aiogoogle import StorageClient, GoogleStorageAsyncFS from .generate_copy_test_specs import ( run_test_spec, create_test_file, create_test_dir) @@ -42,36 +39,27 @@ async def router_filesystem(request): with ThreadPoolExecutor() as thread_pool: async with RouterAsyncFS( - 'file', [LocalAsyncFS(thread_pool), - GoogleStorageAsyncFS(), - S3AsyncFS(thread_pool)]) as fs: + 'file', [LocalAsyncFS(thread_pool), GoogleStorageAsyncFS()]) as fs: file_base = f'/tmp/{token}/' await fs.mkdir(file_base) - gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET'] - gs_base = f'gs://{gs_bucket}/tmp/{token}/' - - s3_bucket = os.environ['HAIL_TEST_S3_BUCKET'] - s3_base = f's3://{s3_bucket}/tmp/{token}/' + bucket = os.environ['HAIL_TEST_BUCKET'] + gs_base = f'gs://{bucket}/tmp/{token}/' bases = { 'file': file_base, - 'gs': gs_base, - 's3': s3_base + 'gs': gs_base } sema = asyncio.Semaphore(50) async with sema: yield (sema, fs, bases) await bounded_gather2(sema, - functools.partial(fs.rmtree, sema, file_base), - functools.partial(fs.rmtree, sema, gs_base), - functools.partial(fs.rmtree, sema, s3_base)) + fs.rmtree(sema, file_base), + fs.rmtree(sema, gs_base)) assert not await fs.isdir(file_base) assert not await fs.isdir(gs_base) - assert not await fs.isdir(s3_base) - async def fresh_dir(fs, bases, scheme): token = secrets.token_hex(16) @@ -80,9 +68,7 @@ async def fresh_dir(fs, bases, scheme): return dir -@pytest.fixture(params=['file/file', 'file/gs', 'file/s3', - 'gs/file', 'gs/gs', 'gs/s3', - 's3/file', 's3/gs', 's3/s3']) +@pytest.fixture(params=['file/file', 'file/gs', 'gs/file', 'gs/gs']) async def copy_test_context(request, router_filesystem): sema, fs, bases = router_filesystem @@ -107,7 +93,7 @@ async def test_copy_behavior(copy_test_context, test_spec): expected = test_spec['result'] dest_scheme = url_scheme(dest_base) - if ((dest_scheme == 'gs' or dest_scheme == 's3') + if (dest_scheme == 'gs' and 'files' in result and expected.get('exception') in ('IsADirectoryError', 'NotADirectoryError')): return @@ -132,19 +118,18 @@ class RaisedWrongExceptionError(Exception): pass -class RaisesOrObjectStore: +class RaisesOrGS: def __init__(self, dest_base, expected_type): - scheme = url_scheme(dest_base) - self._object_store = (scheme == 'gs' or scheme == 's3') + self._gs = url_scheme(dest_base) == 'gs' self._expected_type = expected_type def __enter__(self): return self def __exit__(self, type, value, traceback): - # object stores can succeed or throw + # gs can succeed or throw if type is None: - if not self._object_store: + if not self._gs: raise DidNotRaiseError() elif type != self._expected_type: raise RaisedWrongExceptionError(type) @@ -152,7 +137,6 @@ def __exit__(self, type, value, traceback): # suppress exception return True - @pytest.mark.asyncio async def test_copy_doesnt_exist(copy_test_context): sema, fs, src_base, dest_base = copy_test_context @@ -310,7 +294,7 @@ async def test_copy_dest_target_file_is_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - with RaisesOrObjectStore(dest_base, IsADirectoryError): + with RaisesOrGS(dest_base, IsADirectoryError): await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -382,7 +366,7 @@ async def test_copy_multiple_dest_target_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') await create_test_file(fs, 'src', src_base, 'b') - with RaisesOrObjectStore(dest_base, NotADirectoryError): + with RaisesOrGS(dest_base, NotADirectoryError): await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -394,7 +378,7 @@ async def test_copy_multiple_dest_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'b') await create_test_file(fs, 'dest', dest_base, 'x') - with RaisesOrObjectStore(dest_base, NotADirectoryError): + with RaisesOrGS(dest_base, NotADirectoryError): await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], f'{dest_base}x')) @@ -404,7 +388,7 @@ async def test_file_overwrite_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - with RaisesOrObjectStore(dest_base, IsADirectoryError): + with RaisesOrGS(dest_base, IsADirectoryError): await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -432,152 +416,3 @@ async def test_copy_src_parts(copy_test_context): await expect_file(fs, f'{dest_base}file1', 'src/a/file1') await expect_file(fs, f'{dest_base}subdir/file2', 'src/a/subdir/file2') - - -async def write_file(fs, url, data): - async with await fs.create(url) as f: - await f.write(data) - - -async def collect_files(it): - return [await x.url() async for x in it] - - -@pytest.mark.asyncio -async def test_file_and_directory_error_with_slash_empty_file(router_filesystem): - sema, fs, bases = router_filesystem - - src_base = await fresh_dir(fs, bases, 'gs') - - await write_file(fs, f'{src_base}empty/', '') - await write_file(fs, f'{src_base}empty/foo', b'foo') - - await collect_files(await fs.listfiles(f'{src_base}')) - await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) - await collect_files(await fs.listfiles(f'{src_base}empty/')) - await collect_files(await fs.listfiles(f'{src_base}empty/', recursive=True)) - - for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): - dest_base = await fresh_dir(fs, bases, 'gs') - - await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - dest_base = await fresh_dir(fs, bases, 'gs') - - await fs.copy(sema, Transfer(f'{src_base}empty/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - await collect_files(await fs.listfiles(f'{dest_base}')) - await collect_files(await fs.listfiles(f'{dest_base}', recursive=True)) - - if transfer_type == Transfer.DEST_DIR: - exp_dest = f'{dest_base}empty/foo' - await expect_file(fs, exp_dest, 'foo') - assert not await fs.isfile(f'{dest_base}empty/') - assert await fs.isdir(f'{dest_base}empty/') - await collect_files(await fs.listfiles(f'{dest_base}empty/')) - await collect_files(await fs.listfiles(f'{dest_base}empty/', recursive=True)) - else: - exp_dest = f'{dest_base}foo' - await expect_file(fs, exp_dest, 'foo') - - -@pytest.mark.asyncio -async def test_file_and_directory_error_with_slash_non_empty_file(router_filesystem): - sema, fs, bases = router_filesystem - - src_base = await fresh_dir(fs, bases, 'gs') - - await write_file(fs, f'{src_base}not-empty/', b'not-empty') - await write_file(fs, f'{src_base}not-empty/bar', b'bar') - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}')) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}not-empty/')) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}not-empty/', recursive=True)) - - for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): - dest_base = await fresh_dir(fs, bases, 'gs') - - await fs.copy(sema, Transfer(f'{src_base}not-empty/bar', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - if transfer_type == Transfer.DEST_DIR: - exp_dest = f'{dest_base}bar' - await expect_file(fs, exp_dest, 'bar') - assert not await fs.isfile(f'{dest_base}not-empty/') - assert not await fs.isdir(f'{dest_base}not-empty/') - x = await collect_files(await fs.listfiles(f'{dest_base}')) - assert x == [f'{dest_base}bar'], x - else: - await expect_file(fs, dest_base.rstrip('/'), 'bar') - - with pytest.raises(FileAndDirectoryError): - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}not-empty/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - with pytest.raises(FileAndDirectoryError): - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - -@pytest.mark.asyncio -async def test_file_and_directory_error_with_slash_empty_file_only(router_filesystem): - sema, fs, bases = router_filesystem - - src_base = await fresh_dir(fs, bases, 'gs') - - await write_file(fs, f'{src_base}empty-only/', '') - - await collect_files(await fs.listfiles(f'{src_base}')) - await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) - await collect_files(await fs.listfiles(f'{src_base}empty-only/')) - await collect_files(await fs.listfiles(f'{src_base}empty-only/', recursive=True)) - - for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}empty-only/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - # We ignore empty directories when copying - with pytest.raises(FileNotFoundError): - await collect_files(await fs.listfiles(f'{dest_base}empty-only/')) - - with pytest.raises(FileNotFoundError): - await collect_files(await fs.listfiles(f'{dest_base}empty-only/', recursive=True)) - - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - -@pytest.mark.asyncio -async def test_file_and_directory_error_with_slash_non_empty_file_only(router_filesystem): - sema, fs, bases = router_filesystem - - src_base = await fresh_dir(fs, bases, 'gs') - - await write_file(fs, f'{src_base}not-empty-file-w-slash/', b'not-empty') - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}')) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}not-empty-file-w-slash/')) - - with pytest.raises(FileAndDirectoryError): - await collect_files(await fs.listfiles(f'{src_base}not-empty-file-w-slash/', recursive=True)) - - for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): - with pytest.raises(FileAndDirectoryError): - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}not-empty-file-w-slash/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) - - with pytest.raises(FileAndDirectoryError): - dest_base = await fresh_dir(fs, bases, 'gs') - await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) diff --git a/hail/python/test/hailtop/batch/test_batch.py b/hail/python/test/hailtop/batch/test_batch.py index 9f692de8f09..3b3ccac34a6 100644 --- a/hail/python/test/hailtop/batch/test_batch.py +++ b/hail/python/test/hailtop/batch/test_batch.py @@ -8,7 +8,6 @@ import google.cloud.storage from hailtop.batch import Batch, ServiceBackend, LocalBackend -from hailtop.batch.exceptions import BatchException from hailtop.batch.globals import arg_max from hailtop.utils import grouped from hailtop.config import get_user_config @@ -610,14 +609,6 @@ def test_gcsfuse_implicit_dirs(self): res = b.run() assert res.status()['state'] == 'success', debug_info(res) - def test_gcsfuse_empty_string_bucket_fails(self): - b = self.batch() - j = b.new_job() - with self.assertRaises(BatchException): - j.gcsfuse('', '/empty_bucket') - with self.assertRaises(BatchException): - j.gcsfuse(self.bucket_name, '') - def test_requester_pays(self): b = self.batch(requester_pays_project='hail-vdc') input = b.read_input('gs://hail-services-requester-pays/hello') @@ -796,7 +787,7 @@ def test_fail_fast(self): b = self.batch(cancel_after_n_failures=1) j1 = b.new_job() - j1.command('false') + j1.command(f'false') j2 = b.new_job() j2.command('sleep 300') @@ -804,30 +795,3 @@ def test_fail_fast(self): res = b.run() job_status = res.get_job(2).status() assert job_status['state'] == 'Cancelled', str(job_status) - - def test_service_backend_bucket_parameter(self): - backend = ServiceBackend(bucket='hail-test-dmk9z') - b = Batch(backend=backend) - j1 = b.new_job() - j1.command(f'echo hello > {j1.ofile}') - j2 = b.new_job() - j2.command(f'cat {j1.ofile}') - b.run() - - def test_service_backend_remote_tempdir_with_trailing_slash(self): - backend = ServiceBackend(remote_tmpdir='gs://hail-test-dmk9z/temporary-files/') - b = Batch(backend=backend) - j1 = b.new_job() - j1.command(f'echo hello > {j1.ofile}') - j2 = b.new_job() - j2.command(f'cat {j1.ofile}') - b.run() - - def test_service_backend_remote_tempdir_with_no_trailing_slash(self): - backend = ServiceBackend(remote_tmpdir='gs://hail-test-dmk9z/temporary-files') - b = Batch(backend=backend) - j1 = b.new_job() - j1.command(f'echo hello > {j1.ofile}') - j2 = b.new_job() - j2.command(f'cat {j1.ofile}') - b.run() diff --git a/hail/python/test/hailtop/test_aiogoogle.py b/hail/python/test/hailtop/test_aiogoogle.py index c2c83460374..c10d0307910 100644 --- a/hail/python/test/hailtop/test_aiogoogle.py +++ b/hail/python/test/hailtop/test_aiogoogle.py @@ -1,4 +1,3 @@ -from typing import Optional import os import secrets import shutil @@ -7,28 +6,30 @@ import asyncio import pytest import concurrent -import urllib.parse -import functools from hailtop.utils import secret_alnum_string, bounded_gather2 from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS from hailtop.aiogoogle import StorageClient, GoogleStorageAsyncFS -@pytest.fixture(params=['gs', 'router/gs']) -async def gs_filesystem(request): +@pytest.fixture(params=['file', 'gs', 'router/file', 'router/gs']) +async def filesystem(request): token = secret_alnum_string() with ThreadPoolExecutor() as thread_pool: if request.param.startswith('router/'): fs = RouterAsyncFS( - 'file', [LocalAsyncFS(thread_pool), - GoogleStorageAsyncFS()]) + 'file', [LocalAsyncFS(thread_pool), GoogleStorageAsyncFS()]) + elif request.param == 'file': + fs = LocalAsyncFS(thread_pool) else: - assert request.param.endswith('gs') fs = GoogleStorageAsyncFS() async with fs: - bucket = os.environ['HAIL_TEST_GCS_BUCKET'] - base = f'gs://{bucket}/tmp/{token}/' + if request.param.endswith('file'): + base = f'/tmp/{token}/' + else: + assert request.param.endswith('gs') + bucket = os.environ['HAIL_TEST_BUCKET'] + base = f'gs://{bucket}/tmp/{token}/' await fs.mkdir(base) sema = asyncio.Semaphore(50) @@ -38,9 +39,150 @@ async def gs_filesystem(request): assert not await fs.isdir(base) +@pytest.fixture +async def local_filesystem(request): + token = secret_alnum_string() + + with ThreadPoolExecutor() as thread_pool: + async with LocalAsyncFS(thread_pool) as fs: + base = f'/tmp/{token}/' + await fs.mkdir(base) + sema = asyncio.Semaphore(50) + async with sema: + yield (sema, fs, base) + await fs.rmtree(sema, base) + assert not await fs.isdir(base) + + +@pytest.fixture(params=['small', 'multipart', 'large']) +async def file_data(request): + if request.param == 'small': + return [b'foo'] + elif request.param == 'multipart': + return [b'foo', b'bar', b'baz'] + else: + assert request.param == 'large' + return [secrets.token_bytes(1_000_000)] + + +@pytest.mark.asyncio +async def test_write_read(filesystem, file_data): + sema, fs, base = filesystem + + file = f'{base}foo' + + async with await fs.create(file) as f: + for b in file_data: + await f.write(b) + + expected = b''.join(file_data) + async with await fs.open(file) as f: + actual = await f.read() + + assert expected == actual + + +@pytest.mark.asyncio +async def test_open_from(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + async with await fs.create(file) as f: + await f.write(b'abcde') + + async with await fs.open_from(file, 2) as f: + r = await f.read() + assert r == b'cde' + + +@pytest.mark.asyncio +async def test_isfile(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + # doesn't exist yet + assert not await fs.isfile(file) + + await fs.touch(file) + + assert await fs.isfile(file) + + +@pytest.mark.asyncio +async def test_isdir(filesystem): + sema, fs, base = filesystem + + # mkdir with trailing slash + dir = f'{base}dir/' + await fs.mkdir(dir) + + await fs.touch(f'{dir}foo') + + # can't test this until after creating foo + assert await fs.isdir(dir) + + # mkdir without trailing slash + dir2 = f'{base}dir2' + await fs.mkdir(dir2) + + await fs.touch(f'{dir2}/foo') + + assert await fs.isdir(dir) + + +@pytest.mark.asyncio +async def test_isdir_subdir_only(filesystem): + sema, fs, base = filesystem + + dir = f'{base}dir/' + await fs.mkdir(dir) + + subdir = f'{dir}subdir/' + await fs.mkdir(subdir) + + await fs.touch(f'{subdir}foo') + + # can't test this until after creating foo + assert await fs.isdir(dir) + assert await fs.isdir(subdir) + + +@pytest.mark.asyncio +async def test_remove(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + await fs.touch(file) + assert await fs.isfile(file) + + await fs.remove(file) + + assert not await fs.isfile(file) + + +@pytest.mark.asyncio +async def test_rmtree(filesystem): + sema, fs, base = filesystem + + dir = f'{base}foo/' + + await fs.mkdir(dir) + await fs.touch(f'{dir}a') + await fs.touch(f'{dir}b') + + assert await fs.isdir(dir) + + await fs.rmtree(sema, dir) + + assert not await fs.isdir(dir) + + @pytest.mark.asyncio async def test_get_object_metadata(): - bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + bucket = os.environ['HAIL_TEST_BUCKET'] file = secrets.token_hex(16) async with StorageClient() as client: @@ -55,7 +197,7 @@ async def test_get_object_metadata(): @pytest.mark.asyncio async def test_get_object_headers(): - bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + bucket = os.environ['HAIL_TEST_BUCKET'] file = secrets.token_hex(16) async with StorageClient() as client: @@ -70,7 +212,7 @@ async def test_get_object_headers(): @pytest.mark.asyncio async def test_compose(): - bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + bucket = os.environ['HAIL_TEST_BUCKET'] token = secret_alnum_string() part_data = [b'a', b'bb', b'ccc'] @@ -88,16 +230,126 @@ async def test_compose(): @pytest.mark.asyncio -async def test_multi_part_create_many_two_level_merge(gs_filesystem): - # This is a white-box test. compose has a maximum of 32 inputs, - # so if we're composing more than 32 parts, the - # GoogleStorageAsyncFS does a multi-level hierarhical merge. +async def test_statfile_nonexistent_file(filesystem): + sema, fs, base = filesystem + + with pytest.raises(FileNotFoundError): + await fs.statfile(f'{base}foo') + + +@pytest.mark.asyncio +async def test_statfile_directory(filesystem): + sema, fs, base = filesystem + + await fs.mkdir(f'{base}dir/') + await fs.touch(f'{base}dir/foo') + + with pytest.raises(FileNotFoundError): + # statfile raises FileNotFound on directories + await fs.statfile(f'{base}dir') + + +@pytest.mark.asyncio +async def test_statfile(filesystem): + sema, fs, base = filesystem + + n = 37 + file = f'{base}bar' + async with await fs.create(file) as f: + await f.write(secrets.token_bytes(n)) + + status = await fs.statfile(file) + assert await status.size() == n + +@pytest.mark.asyncio +async def test_listfiles(filesystem): + sema, fs, base = filesystem + + with pytest.raises(FileNotFoundError): + await fs.listfiles(f'{base}does/not/exist') + + with pytest.raises(FileNotFoundError): + await fs.listfiles(f'{base}does/not/exist', recursive=True) + + # create the following directory structure in base: + # foobar + # foo/a + # foo/b/c + a = f'{base}foo/a' + b = f'{base}foo/b/' + c = f'{base}foo/b/c' + await fs.touch(f'{base}foobar') + await fs.mkdir(f'{base}foo/') + await fs.touch(a) + await fs.mkdir(b) + await fs.touch(c) + + async def listfiles(dir, recursive): + return {(await entry.url(), await entry.is_file()) async for entry in await fs.listfiles(dir, recursive)} + + assert await listfiles(f'{base}foo/', recursive=True) == {(a, True), (c, True)} + assert await listfiles(f'{base}foo/', recursive=False) == {(a, True), (b, False)} + + # without trailing slash + assert await listfiles(f'{base}foo', recursive=True) == {(a, True), (c, True)} + assert await listfiles(f'{base}foo', recursive=False) == {(a, True), (b, False)} + + # test FileListEntry.status raises on directory + async for entry in await fs.listfiles(f'{base}foo/', recursive=False): + if await entry.is_dir(): + with pytest.raises(ValueError): + await entry.status() + else: + stat = await entry.status() + assert await stat.size() == 0 + +@pytest.mark.asyncio +@pytest.mark.parametrize("permutation", [ + None, + [0, 1, 2], + [0, 2, 1], + [1, 2, 0], + [2, 1, 0] +]) +async def test_multi_part_create(filesystem, permutation): + sema, fs, base = filesystem + + part_data = [secrets.token_bytes(s) for s in [8192, 600, 20000]] + + s = 0 + part_start = [] + for b in part_data: + part_start.append(s) + s += len(b) + + path = f'{base}a' + async with await fs.multi_part_create(sema, path, len(part_data)) as c: + async def create_part(i): + async with await c.create_part(i, part_start[i]) as f: + await f.write(part_data[i]) + + if permutation: + # do it in a fixed order + for i in permutation: + await create_part(i) + else: + # do in parallel + await asyncio.gather(*[ + create_part(i) for i in range(len(part_data))]) + + expected = b''.join(part_data) + async with await fs.open(path) as f: + actual = await f.read() + assert expected == actual + + +@pytest.mark.asyncio +async def test_multi_part_create_many(filesystem): try: - sema, fs, base = gs_filesystem + sema, fs, base = filesystem - # > 32 so we perform at least 2 levels of merging - part_data_size = [100 for _ in range(40)] - part_data = [secrets.token_bytes(s) for s in part_data_size] + # > 32 so we perform at least 2 layers of merging + part_data = [secrets.token_bytes(100) for _ in range(80)] s = 0 part_start = [] @@ -113,10 +365,11 @@ async def create_part(i): # do in parallel await bounded_gather2(sema, *[ - functools.partial(create_part, i) for i in range(len(part_data))]) + create_part(i) for i in range(len(part_data))]) expected = b''.join(part_data) - actual = await fs.read(path) + async with await fs.open(path) as f: + actual = await f.read() assert expected == actual except (concurrent.futures._base.CancelledError, asyncio.CancelledError) as err: raise AssertionError('uncaught cancelled error') from err diff --git a/hail/python/test/hailtop/test_fs.py b/hail/python/test/hailtop/test_fs.py deleted file mode 100644 index c106cfcb929..00000000000 --- a/hail/python/test/hailtop/test_fs.py +++ /dev/null @@ -1,338 +0,0 @@ -from typing import Optional -import os -import secrets -import shutil -from itertools import accumulate -from concurrent.futures import ThreadPoolExecutor -import asyncio -import pytest -import concurrent -import urllib.parse -from hailtop.utils import secret_alnum_string -from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS -from hailtop.aiotools.s3asyncfs import S3AsyncFS -from hailtop.aiogoogle import GoogleStorageAsyncFS - - -@pytest.fixture(params=['file', 'gs', 's3', 'router/file', 'router/gs', 'router/s3']) -async def filesystem(request): - token = secret_alnum_string() - - with ThreadPoolExecutor() as thread_pool: - if request.param.startswith('router/'): - fs = RouterAsyncFS( - 'file', [LocalAsyncFS(thread_pool), - GoogleStorageAsyncFS(), - S3AsyncFS(thread_pool)]) - elif request.param == 'file': - fs = LocalAsyncFS(thread_pool) - elif request.param.endswith('gs'): - fs = GoogleStorageAsyncFS() - else: - assert request.param.endswith('s3') - fs = S3AsyncFS(thread_pool) - async with fs: - if request.param.endswith('file'): - base = f'/tmp/{token}/' - elif request.param.endswith('gs'): - bucket = os.environ['HAIL_TEST_GCS_BUCKET'] - base = f'gs://{bucket}/tmp/{token}/' - else: - assert request.param.endswith('s3') - bucket = os.environ['HAIL_TEST_S3_BUCKET'] - base = f's3://{bucket}/tmp/{token}/' - - await fs.mkdir(base) - sema = asyncio.Semaphore(50) - async with sema: - yield (sema, fs, base) - await fs.rmtree(sema, base) - assert not await fs.isdir(base) - - -@pytest.fixture -async def local_filesystem(request): - token = secret_alnum_string() - - with ThreadPoolExecutor() as thread_pool: - async with LocalAsyncFS(thread_pool) as fs: - base = f'/tmp/{token}/' - await fs.mkdir(base) - sema = asyncio.Semaphore(50) - async with sema: - yield (sema, fs, base) - await fs.rmtree(sema, base) - assert not await fs.isdir(base) - - -@pytest.fixture(params=['small', 'multipart', 'large']) -async def file_data(request): - if request.param == 'small': - return [b'foo'] - elif request.param == 'multipart': - return [b'foo', b'bar', b'baz'] - else: - assert request.param == 'large' - return [secrets.token_bytes(1_000_000)] - - -@pytest.mark.asyncio -async def test_write_read(filesystem, file_data): - sema, fs, base = filesystem - - file = f'{base}foo' - - async with await fs.create(file) as f: - for b in file_data: - await f.write(b) - - expected = b''.join(file_data) - async with await fs.open(file) as f: - actual = await f.read() - - assert expected == actual - - -@pytest.mark.asyncio -async def test_open_from(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - async with await fs.create(file) as f: - await f.write(b'abcde') - - async with await fs.open_from(file, 2) as f: - r = await f.read() - assert r == b'cde' - - -@pytest.mark.asyncio -async def test_read_from(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - await fs.write(file, b'abcde') - r = await fs.read_from(file, 2) - assert r == b'cde' - - -@pytest.mark.asyncio -async def test_read_range(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - await fs.write(file, b'abcde') - - r = await fs.read_range(file, 2, 2) - assert r == b'c' - - r = await fs.read_range(file, 2, 4) - assert r == b'cde' - - r = await fs.read_range(file, 2, 10) - assert r == b'cde' - - -@pytest.mark.asyncio -async def test_isfile(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - # doesn't exist yet - assert not await fs.isfile(file) - - await fs.touch(file) - - assert await fs.isfile(file) - - -@pytest.mark.asyncio -async def test_isdir(filesystem): - sema, fs, base = filesystem - - # mkdir with trailing slash - dir = f'{base}dir/' - await fs.mkdir(dir) - - await fs.touch(f'{dir}foo') - - # can't test this until after creating foo - assert await fs.isdir(dir) - - # mkdir without trailing slash - dir2 = f'{base}dir2' - await fs.mkdir(dir2) - - await fs.touch(f'{dir2}/foo') - - assert await fs.isdir(dir) - - -@pytest.mark.asyncio -async def test_isdir_subdir_only(filesystem): - sema, fs, base = filesystem - - dir = f'{base}dir/' - await fs.mkdir(dir) - - subdir = f'{dir}subdir/' - await fs.mkdir(subdir) - - await fs.touch(f'{subdir}foo') - - # can't test this until after creating foo - assert await fs.isdir(dir) - assert await fs.isdir(subdir) - - -@pytest.mark.asyncio -async def test_remove(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - await fs.touch(file) - assert await fs.isfile(file) - - await fs.remove(file) - - assert not await fs.isfile(file) - - -@pytest.mark.asyncio -async def test_rmtree(filesystem): - sema, fs, base = filesystem - - dir = f'{base}foo/' - - await fs.mkdir(dir) - await fs.touch(f'{dir}a') - await fs.touch(f'{dir}b') - - assert await fs.isdir(dir) - - await fs.rmtree(sema, dir) - - assert not await fs.isdir(dir) - - -@pytest.mark.asyncio -async def test_statfile_nonexistent_file(filesystem): - sema, fs, base = filesystem - - with pytest.raises(FileNotFoundError): - await fs.statfile(f'{base}foo') - - -@pytest.mark.asyncio -async def test_statfile_directory(filesystem): - sema, fs, base = filesystem - - await fs.mkdir(f'{base}dir/') - await fs.touch(f'{base}dir/foo') - - with pytest.raises(FileNotFoundError): - # statfile raises FileNotFound on directories - await fs.statfile(f'{base}dir') - - -@pytest.mark.asyncio -async def test_statfile(filesystem): - sema, fs, base = filesystem - - n = 37 - file = f'{base}bar' - await fs.write(file, secrets.token_bytes(n)) - status = await fs.statfile(file) - assert await status.size() == n - -@pytest.mark.asyncio -async def test_listfiles(filesystem): - sema, fs, base = filesystem - - with pytest.raises(FileNotFoundError): - await fs.listfiles(f'{base}does/not/exist') - - with pytest.raises(FileNotFoundError): - await fs.listfiles(f'{base}does/not/exist', recursive=True) - - # create the following directory structure in base: - # foobar - # foo/a - # foo/b/c - a = f'{base}foo/a' - b = f'{base}foo/b/' - c = f'{base}foo/b/c' - await fs.touch(f'{base}foobar') - await fs.mkdir(f'{base}foo/') - await fs.touch(a) - await fs.mkdir(b) - await fs.touch(c) - - async def listfiles(dir, recursive): - return {(await entry.url(), await entry.is_file()) async for entry in await fs.listfiles(dir, recursive)} - - assert await listfiles(f'{base}foo/', recursive=True) == {(a, True), (c, True)} - assert await listfiles(f'{base}foo/', recursive=False) == {(a, True), (b, False)} - - # without trailing slash - assert await listfiles(f'{base}foo', recursive=True) == {(a, True), (c, True)} - assert await listfiles(f'{base}foo', recursive=False) == {(a, True), (b, False)} - - # test FileListEntry.status raises on directory - async for entry in await fs.listfiles(f'{base}foo/', recursive=False): - if await entry.is_dir(): - with pytest.raises(IsADirectoryError): - await entry.status() - else: - stat = await entry.status() - assert await stat.size() == 0 - -@pytest.mark.asyncio -@pytest.mark.parametrize("permutation", [ - None, - [0, 1, 2], - [0, 2, 1], - [1, 2, 0], - [2, 1, 0] -]) -async def test_multi_part_create(filesystem, permutation): - sema, fs, base = filesystem - - # S3 has a minimum part size (except for the last part) of 5GiB - if base.startswith('s3'): - min_part_size = 5 * 1024 * 1024 - part_data_size = [min_part_size, min_part_size, min_part_size] - else: - part_data_size = [8192, 600, 20000] - part_data = [secrets.token_bytes(s) for s in part_data_size] - - s = 0 - part_start = [] - for b in part_data: - part_start.append(s) - s += len(b) - - path = f'{base}a' - async with await fs.multi_part_create(sema, path, len(part_data)) as c: - async def create_part(i): - async with await c.create_part(i, part_start[i]) as f: - await f.write(part_data[i]) - - if permutation: - # do it in a fixed order - for i in permutation: - await create_part(i) - else: - # do in parallel - await asyncio.gather(*[ - create_part(i) for i in range(len(part_data))]) - - expected = b''.join(part_data) - async with await fs.open(path) as f: - actual = await f.read() - assert expected == actual diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala index 5846d92c09b..d6f1a6d8773 100644 --- a/hail/src/main/scala/is/hail/HailContext.scala +++ b/hail/src/main/scala/is/hail/HailContext.scala @@ -477,8 +477,7 @@ object HailFeatureFlags { ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), - ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), - ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")) + ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)) ) } diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 375bf850ca7..8bde2e6a2a8 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -76,8 +76,6 @@ class ClassesBytes(classesBytes: Array[(String, Array[Byte])]) extends Serializa } class AsmTuple[C](val cb: ClassBuilder[C], val fields: IndexedSeq[Field[_]], val ctor: MethodBuilder[C]) { - val ti: TypeInfo[_] = cb.ti - def newTuple(elems: IndexedSeq[Code[_]]): Code[C] = Code.newInstance(cb, ctor, elems) def loadElementsAny(t: Value[_]): IndexedSeq[Code[_]] = fields.map(_.get(coerce[C](t) )) @@ -110,29 +108,18 @@ class ModuleBuilder() { def tupleClass(fieldTypes: IndexedSeq[TypeInfo[_]]): AsmTuple[_] = { tuples.getOrElseUpdate(fieldTypes, { - val kb = genClass[Unit](s"Tuple${fieldTypes.length}") + val cb = genClass[AnyRef]("Tuple") val fields = fieldTypes.zipWithIndex.map { case (ti, i) => - kb.newField(s"_$i")(ti) + cb.newField(s"_$i")(ti) } - val ctor = kb.newMethod("", fieldTypes, UnitInfo) + val ctor = cb.newMethod("", fieldTypes, UnitInfo) ctor.emitWithBuilder { cb => - // FIXME, maybe a more elegant way to do this? - val L = new lir.Block() - L.append( - lir.methodStmt(INVOKESPECIAL, - "java/lang/Object", - "", - "()V", - false, - UnitInfo, - FastIndexedSeq(lir.load(ctor._this.asInstanceOf[LocalRef[_]].l)))) - cb += new VCode(L, L, null) fields.zipWithIndex.foreach { case (f, i) => cb += f.putAny(ctor._this, ctor.getArg(i + 1)(f.ti).get) } Code._empty } - new AsmTuple(kb, fields, ctor) + new AsmTuple(cb, fields, ctor) }) } @@ -229,7 +216,7 @@ class ClassBuilder[C]( val sourceFile: Option[String] ) extends WrappedModuleBuilder { - val ti: ClassInfo[C] = new ClassInfo[C](className) + val ti: TypeInfo[C] = new ClassInfo[C](className) val lclass = new lir.Classx[C](className, "java/lang/Object", sourceFile) @@ -304,6 +291,13 @@ class ClassBuilder[C]( mb } + def genDependentFunction[A1 : TypeInfo, R : TypeInfo](baseName: String): DependentFunctionBuilder[AsmFunction1[A1, R]] = { + val depCB = modb.genClass[AsmFunction1[A1, R]](baseName) + val apply = depCB.newMethod("apply", Array(GenericTypeInfo[A1]), GenericTypeInfo[R]) + val dep_apply_method = new DependentMethodBuilder(apply) + new DependentFunctionBuilder[AsmFunction1[A1, R]](dep_apply_method) + } + def newField[T: TypeInfo](name: String): Field[T] = new Field[T](this, name) def newStaticField[T: TypeInfo](name: String): StaticField[T] = new StaticField[T](this, name) @@ -500,8 +494,7 @@ class MethodBuilder[C]( assert(ti == cb.ti, s"$ti != ${ cb.ti }") else { val static = (!isStatic).toInt - assert(ti == parameterTypeInfo(i - static), - s"$ti != ${ parameterTypeInfo(i - static) }\n params: $parameterTypeInfo") + assert(ti == parameterTypeInfo(i - static), s"$ti != ${ parameterTypeInfo(i - static) }") } new LocalRef(lmethod.getParam(i)) } @@ -557,6 +550,58 @@ class MethodBuilder[C]( } } +class DependentMethodBuilder[C](val mb: MethodBuilder[C]) extends WrappedMethodBuilder[C] { + var setFields: mutable.ArrayBuffer[(lir.ValueX) => Code[Unit]] = new mutable.ArrayBuffer() + + def newDepField[T : TypeInfo](value: Code[T]): Value[T] = { + val cfr = genFieldThisRef[T]() + setFields += { (obj: lir.ValueX) => + value.end.append(lir.putField(cb.className, cfr.name, typeInfo[T], obj, value.v)) + val newC = new VCode(value.start, value.end, null) + value.clear() + newC + } + cfr + } + + def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = + newDepField(value.asInstanceOf[Code[T]]) + + def newInstance(mb: MethodBuilder[_]): Code[C] = { + val L = new lir.Block() + + val obj = new lir.Local(null, "new_dep_fun", cb.ti) + L.append(lir.store(obj, lir.newInstance(cb.ti, cb.lInit, FastIndexedSeq.empty[lir.ValueX]))) + + var end = L + setFields.foreach { f => + val c = f(lir.load(obj)) + end.append(lir.goto(c.start)) + end = c.end + } + new VCode(L, end, lir.load(obj)) + } + + override def result(pw: Option[PrintWriter]): () => C = + throw new UnsupportedOperationException("cannot call result() on a dependent function") +} + +trait WrappedDependentMethodBuilder[C] extends WrappedMethodBuilder[C] { + def dmb: DependentMethodBuilder[C] + + def mb: MethodBuilder[C] = dmb.mb + + def newDepField[T : TypeInfo](value: Code[T]): Value[T] = dmb.newDepField(value) + + def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = dmb.newDepFieldAny[T](value) + + def newInstance(mb: MethodBuilder[_]): Code[C] = dmb.newInstance(mb) +} + +class DependentFunctionBuilder[F](apply_method: DependentMethodBuilder[F]) extends WrappedDependentMethodBuilder[F] { + def dmb: DependentMethodBuilder[F] = apply_method +} + class FunctionBuilder[F]( val apply_method: MethodBuilder[F] ) extends WrappedMethodBuilder[F] { diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index e322912a823..a166aa42060 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -315,7 +315,6 @@ object Code { invokeStatic[S](tct.runtimeClass, method, Array[Class[_]](a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass), Array[Code[_]](a1, a2, a3, a4, a5))(sct) def _null[T >: Null](implicit tti: TypeInfo[T]): Code[T] = Code(lir.insn0(ACONST_NULL, tti)) - def _uncheckednull(tti: TypeInfo[_]): Code[_] = Code(lir.insn0(ACONST_NULL, tti)) def _empty: Code[Unit] = Code[Unit](null: lir.ValueX) @@ -1052,25 +1051,6 @@ class CodeArray[T](val lhs: Code[Array[T]])(implicit tti: TypeInfo[T]) { Code(lhs, lir.insn1(ARRAYLENGTH)) } -class UntypedCodeArray(val lhs: Code[_], tti: TypeInfo[_]) { - def apply(i: Code[Int]): Code[_] = - Code(lhs, i, lir.insn2(tti.aloadOp)) - - def update(i: Code[Int], x: Code[_]): Code[Unit] = { - lhs.start.append(lir.goto(i.end)) - i.start.append(lir.goto(x.start)) - x.end.append(lir.stmtOp(tti.astoreOp, lhs.v, i.v, x.v)) - val newC = new VCode(lhs.start, x.end, null) - lhs.clear() - i.clear() - x.clear() - newC - } - - def length(): Code[Int] = - Code(lhs, lir.insn1(ARRAYLENGTH)) -} - object CodeLabel { def apply(): CodeLabel = { val L = new lir.Block() diff --git a/hail/src/main/scala/is/hail/asm4s/package.scala b/hail/src/main/scala/is/hail/asm4s/package.scala index 0268b2b4165..a15d094ef77 100644 --- a/hail/src/main/scala/is/hail/asm4s/package.scala +++ b/hail/src/main/scala/is/hail/asm4s/package.scala @@ -28,8 +28,6 @@ package asm4s { } override def toString: String = desc - - def uninitializedValue: Code[_] } class ClassInfo[C](className: String) extends TypeInfo[C] { @@ -42,8 +40,6 @@ package asm4s { val returnOp = ARETURN def newArray(): AbstractInsnNode = new TypeInsnNode(ANEWARRAY, iname) - - override def uninitializedValue: Code[_] = Code._uncheckednull(this) } class ArrayInfo[T](implicit val tti: TypeInfo[T]) extends TypeInfo[Array[T]] { @@ -56,8 +52,6 @@ package asm4s { val returnOp = ARETURN def newArray() = new TypeInsnNode(ANEWARRAY, iname) - - override def uninitializedValue: Code[_] = Code._null[Array[T]](this) } } @@ -111,14 +105,12 @@ package object asm4s { val desc = "Z" val loadOp = ILOAD val storeOp = ISTORE - val aloadOp = BALOAD - val astoreOp = BASTORE + val aloadOp = IALOAD + val astoreOp = IASTORE val returnOp = IRETURN val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_BOOLEAN) - - override def uninitializedValue: Code[_] = const(false) } implicit object ByteInfo extends TypeInfo[Byte] { @@ -131,8 +123,6 @@ package object asm4s { val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_BYTE) - - override def uninitializedValue: Code[_] = const(0.toByte) } implicit object ShortInfo extends TypeInfo[Short] { @@ -145,8 +135,6 @@ package object asm4s { val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_SHORT) - - override def uninitializedValue: Code[_] = const(0.toShort) } implicit object IntInfo extends TypeInfo[Int] { @@ -158,8 +146,6 @@ package object asm4s { val returnOp = IRETURN def newArray() = new IntInsnNode(NEWARRAY, T_INT) - - override def uninitializedValue: Code[_] = const(0) } implicit object LongInfo extends TypeInfo[Long] { @@ -172,8 +158,6 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_LONG) - - override def uninitializedValue: Code[_] = const(0L) } implicit object FloatInfo extends TypeInfo[Float] { @@ -186,8 +170,6 @@ package object asm4s { def newArray() = new IntInsnNode(NEWARRAY, T_FLOAT) - - override def uninitializedValue: Code[_] = const(0f) } implicit object DoubleInfo extends TypeInfo[Double] { @@ -200,8 +182,6 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_DOUBLE) - - override def uninitializedValue: Code[_] = const(0d) } implicit object CharInfo extends TypeInfo[Char] { @@ -214,8 +194,6 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_CHAR) - - override def uninitializedValue: Code[_] = const(0.toChar) } implicit object UnitInfo extends TypeInfo[Unit] { @@ -228,8 +206,6 @@ package object asm4s { override def slots = ??? def newArray() = ??? - - override def uninitializedValue: Code[_] = Code._empty } def classInfoFromClass[C](c: Class[C]): ClassInfo[C] = { diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index 182cb1b87b8..e872ac6d40c 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -15,8 +15,7 @@ import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.linalg.BlockMatrix import is.hail.types._ import is.hail.types.encoded.EType -import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType} -import is.hail.types.physical.{PTuple, PType, PVoid} +import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType, PVoid, SingleCodeType} import is.hail.types.virtual.TVoid import is.hail.utils._ import is.hail.variant.ReferenceGenome diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 57eba44f1c9..f1820f28aff 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -22,7 +22,6 @@ import is.hail.services.shuffler.ShuffleClient import is.hail.types._ import is.hail.types.encoded._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant.ReferenceGenome diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index f8c57cba6cd..ed28cb96b92 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -9,10 +9,9 @@ import is.hail.HailContext import is.hail.expr.{JSONAnnotationImpex, SparkAnnotationImpex, Validate} import is.hail.expr.ir.lowering._ import is.hail.expr.ir._ -import is.hail.types.physical.{PStruct, PTuple, PType} -import is.hail.types.virtual.{TArray, TInterval, TStruct, TVoid, Type} +import is.hail.types.physical.{PStruct, PTuple, PType, PTypeReferenceSingleCodeType} +import is.hail.types.virtual.{TStruct, TVoid, Type} import is.hail.backend.{Backend, BackendContext, BroadcastValue, HailTaskContext} -import is.hail.expr.ir.IRParser.parseType import is.hail.io.fs.{FS, HadoopFS} import is.hail.utils._ import is.hail.io.bgen.IndexBgen @@ -32,12 +31,10 @@ import is.hail.linalg.{BlockMatrix, RowMatrix} import is.hail.rvd.RVD import is.hail.stats.LinearMixedModel import is.hail.types._ -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.variant.ReferenceGenome import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.util.TaskCompletionListener -import org.json4s import org.json4s.JsonAST.{JInt, JObject} @@ -552,28 +549,6 @@ class SparkBackend( } } - def pyReadMultipleMatrixTables(jsonQuery: String): java.util.List[MatrixIR] = { - log.info("pyReadMultipleMatrixTables: got query") - val kvs = JsonMethods.parse(jsonQuery) match { - case json4s.JObject(values) => values.toMap - } - - val paths = kvs("paths").asInstanceOf[json4s.JArray].arr.toArray.map { case json4s.JString(s) => s } - - val intervalPointType = parseType(kvs("intervalPointType").asInstanceOf[json4s.JString].s) - val intervalObjects = JSONAnnotationImpex.importAnnotation(kvs("intervals"), TArray(TInterval(intervalPointType))) - .asInstanceOf[IndexedSeq[Interval]] - - val opts = NativeReaderOptions(intervalObjects, intervalPointType, filterIntervals = false) - val matrixReaders: IndexedSeq[MatrixIR] = paths.map { p => - log.info(s"creating MatrixRead node for $p") - val mnr = MatrixNativeReader(fs, p, Some(opts)) - MatrixRead(mnr.fullMatrixType, false, false, mnr): MatrixIR - } - log.info("pyReadMultipleMatrixTables: returning N matrix tables") - matrixReaders.asJava - } - def pyReferenceAddLiftover(name: String, chainFile: String, destRGName: String): Unit = { ExecutionTimer.logTime("SparkBackend.pyReferenceAddLiftover") { timer => withExecuteContext(timer) { ctx => diff --git a/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala b/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala index b93be8b0d41..41b4476815f 100644 --- a/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala +++ b/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala @@ -1,8 +1,6 @@ package is.hail.experimental import is.hail.expr.ir.functions._ -import is.hail.types.physical.stypes.SType -import is.hail.types.physical.stypes.concrete.SIndexablePointer import is.hail.types.physical.{PCanonicalArray, PFloat64, PType} import is.hail.types.virtual.{TArray, TFloat64, TInt32, Type} @@ -11,7 +9,7 @@ object ExperimentalFunctions extends RegistryFunctions { def registerAll() { val experimentalPackageClass = Class.forName("is.hail.experimental.package$") - registerScalaFunction("filtering_allele_frequency", Array(TInt32, TInt32, TFloat64), TFloat64, null)(experimentalPackageClass, "calcFilterAlleleFreq") - registerWrappedScalaFunction1("haplotype_freq_em", TArray(TInt32), TArray(TFloat64), (_: Type, pt: SType) => SIndexablePointer(PCanonicalArray(PFloat64(true))))(experimentalPackageClass, "haplotypeFreqEM") + registerScalaFunction("filtering_allele_frequency", Array(TInt32, TInt32, TFloat64), TFloat64, (_: Type, pt: Seq[PType]) => PFloat64())(experimentalPackageClass, "calcFilterAlleleFreq") + registerWrappedScalaFunction1("haplotype_freq_em", TArray(TInt32), TArray(TFloat64), (_: Type, pt: PType) => PCanonicalArray(PFloat64(true)))(experimentalPackageClass, "haplotypeFreqEM") } } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala b/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala index 3e60eec73eb..72be5592af3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala @@ -2,208 +2,75 @@ package is.hail.expr.ir import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.types.physical.stypes.interfaces.SIndexableCode -import is.hail.types.physical.{PCanonicalArray, PCanonicalDict, PCanonicalSet} -import is.hail.types.virtual.{TArray, TDict, TSet, Type} -import is.hail.utils.FastIndexedSeq - -import scala.language.existentials +import is.hail.types.physical.{PCanonicalArray, PCanonicalDict, PCanonicalSet, PCode, PIndexableCode, PType, typeToTypeInfo} class ArraySorter(r: EmitRegion, array: StagedArrayBuilder) { - val ti: TypeInfo[_] = array.elt.ti + val typ: PType = array.elt + val ti: TypeInfo[_] = typeToTypeInfo(typ) val mb: EmitMethodBuilder[_] = r.mb - private[this] var prunedMissing: Boolean = false - - private[this] val workingArrayInfo = arrayInfo(array.ti) - private[this] val workingArray1 = mb.genFieldThisRef("sorter_working_array")(workingArrayInfo) - private[this] val workingArray2 = mb.genFieldThisRef("sorter_working_array")(workingArrayInfo) - - private[this] def arrayRef(workingArray: Code[Array[_]]): UntypedCodeArray = new UntypedCodeArray(workingArray, array.ti) - - def sort(cb: EmitCodeBuilder, region: Value[Region], comparesLessThan: (EmitCodeBuilder, Value[Region], Code[_], Code[_]) => Code[Boolean]): Unit = { - - val sortMB = cb.emb.ecb.genEmitMethod("arraySorter_outer", FastIndexedSeq[ParamType](classInfo[Region]), UnitInfo) - sortMB.voidWithBuilder { cb => - - val newEnd = cb.newLocal[Int]("newEnd", 0) - val i = cb.newLocal[Int]("i", 0) - val size = cb.newLocal[Int]("size", array.size) - - cb.whileLoop(i < size, { - cb.ifx(!array.isMissing(i), { - cb.ifx(newEnd.cne(i), cb += array.update(newEnd, array.apply(i))) - cb.assign(newEnd, newEnd + 1) - }) - cb.assign(i, i + 1) - }) - cb.assign(i, newEnd) - cb.whileLoop(i < size, { - cb += array.setMissing(i, true) - cb.assign(i, i + 1) - }) - - // sort elements in [0, newEnd] - - // merging into B - val mergeMB = cb.emb.ecb.genEmitMethod("arraySorter_merge", FastIndexedSeq[ParamType](classInfo[Region], IntInfo, IntInfo, IntInfo, workingArrayInfo, workingArrayInfo), UnitInfo) - mergeMB.voidWithBuilder { cb => - val r = mergeMB.getCodeParam[Region](1) - val begin = mergeMB.getCodeParam[Int](2) - val mid = mergeMB.getCodeParam[Int](3) - val end = mergeMB.getCodeParam[Int](4) - - def arrayA = new UntypedCodeArray(mergeMB.getCodeParam(5)(workingArrayInfo), array.ti) - - def arrayB = new UntypedCodeArray(mergeMB.getCodeParam(6)(workingArrayInfo), array.ti) - - val i = cb.newLocal[Int]("mergemb_i", begin) - val j = cb.newLocal[Int]("mergemb_j", mid) - - val k = cb.newLocal[Int]("mergemb_k", i) - cb.whileLoop(k < end, { - - val LtakeFromLeft = CodeLabel() - val LtakeFromRight = CodeLabel() - val Ldone = CodeLabel() - - cb.ifx(j < end, { - cb.ifx(i >= mid, cb.goto(LtakeFromRight)) - cb.ifx(comparesLessThan(cb, r, arrayA(j), arrayA(i)), cb.goto(LtakeFromRight), cb.goto(LtakeFromLeft)) - }, cb.goto(LtakeFromLeft)) - - cb.define(LtakeFromLeft) - cb += arrayB.update(k, arrayA(i)) - cb.assign(i, i + 1) - cb.goto(Ldone) - - cb.define(LtakeFromRight) - cb += arrayB.update(k, arrayA(j)) - cb.assign(j, j + 1) - cb.goto(Ldone) - - cb.define(Ldone) - cb.assign(k, k + 1) - }) - } - - val splitMergeMB = cb.emb.ecb.genEmitMethod("arraySorter_splitMerge", FastIndexedSeq[ParamType](classInfo[Region], IntInfo, IntInfo, workingArrayInfo, workingArrayInfo), UnitInfo) - splitMergeMB.voidWithBuilder { cb => - val r = splitMergeMB.getCodeParam[Region](1) - val begin = splitMergeMB.getCodeParam[Int](2) - val end = splitMergeMB.getCodeParam[Int](3) - - val arrayB = splitMergeMB.getCodeParam(4)(workingArrayInfo) - val arrayA = splitMergeMB.getCodeParam(5)(workingArrayInfo) - - cb.ifx(end - begin > 1, { - val mid = cb.newLocal[Int]("splitMerge_mid", (begin + end) / 2) - - cb.invokeVoid(splitMergeMB, r, begin, mid, arrayA, arrayB) - cb.invokeVoid(splitMergeMB, r, mid, end, arrayA, arrayB) - - // result goes in A - cb.invokeVoid(mergeMB, r, begin, mid, end, arrayB, arrayA) - }) - } - - // these arrays should be allocated once and reused - cb.ifx(workingArray1.isNull || arrayRef(workingArray1).length() < newEnd, { - cb.assignAny(workingArray1, Code.newArray(newEnd)(array.ti)) - cb.assignAny(workingArray2, Code.newArray(newEnd)(array.ti)) - }) - - cb.assign(i, 0) - cb.whileLoop(i < newEnd, { - cb += arrayRef(workingArray1).update(i, array(i)) - cb += arrayRef(workingArray2).update(i, array(i)) - cb.assign(i, i + 1) - }) - - // elements are sorted in workingArray2 after calling splitMergeMB - cb.invokeVoid(splitMergeMB, sortMB.getCodeParam[Region](1), const(0), newEnd, workingArray1, workingArray2) - - cb.assign(i, 0) - cb.whileLoop(i < newEnd, { - cb += array.update(i, arrayRef(workingArray2)(i)) - cb.assign(i, i + 1) - }) - + def sort(sorter: DependentEmitFunctionBuilder[_]): Code[Unit] = { + val localF = ti match { + case BooleanInfo => mb.genFieldThisRef[AsmFunction2[Boolean, Boolean, Boolean]]() + case IntInfo => mb.genFieldThisRef[AsmFunction2[Int, Int, Boolean]]() + case LongInfo => mb.genFieldThisRef[AsmFunction2[Int, Int, Boolean]]() + case FloatInfo => mb.genFieldThisRef[AsmFunction2[Long, Long, Boolean]]() + case DoubleInfo => mb.genFieldThisRef[AsmFunction2[Double, Double, Boolean]]() } - cb.invokeVoid(sortMB, region) - - + Code(localF.storeAny(Code.checkcast(sorter.newInstance(mb))(localF.ti)), array.sort(localF)) } - def toRegion(cb: EmitCodeBuilder, t: Type): SIndexableCode = { + def toRegion(cb: EmitCodeBuilder, t: PType): PIndexableCode = { t match { - case pca: TArray => + case pca: PCanonicalArray => val len = cb.newLocal[Int]("arraysorter_to_region_len", array.size) - // fixme element requiredness should be set here - val arrayType = PCanonicalArray(array.elt.loadedSType.canonicalPType().setRequired(this.prunedMissing || array.eltRequired)) - - arrayType.constructFromElements(cb, r.region, len, deepCopy = false) { (cb, idx) => - array.loadFromIndex(cb, r.region, idx) + pca.constructFromElements(cb, r.region, len, deepCopy = false) { (cb, idx) => + IEmitCode(cb, array.isMissing(idx), PCode(typ, array(idx))) } - case td: TDict => - PCanonicalDict.coerceArrayCode(toRegion(cb, TArray(td.elementType))) - case ts: TSet => - PCanonicalSet.coerceArrayCode(toRegion(cb, TArray(ts.elementType))) + case td: PCanonicalDict => + td.construct(toRegion(cb, td.arrayRep)) + case ts: PCanonicalSet => + ts.construct(toRegion(cb, ts.arrayRep)) } } - def pruneMissing(cb: EmitCodeBuilder): Unit = { - this.prunedMissing = true - - val i = cb.newLocal[Int]("i", 0) - val n = cb.newLocal[Int]("n", 0) - val size = cb.newLocal[Int]("size", array.size) - cb.whileLoop(i < size, { - cb.ifx(!array.isMissing(i), { - cb.ifx(i.cne(n), - cb += array.update(n, array(i))) - cb.assign(n, n + 1) - }) - cb.assign(i, i + 1) - }) - cb += array.setSize(n) + def pruneMissing: Code[Unit] = { + val i = mb.newLocal[Int]() + val n = mb.newLocal[Int]() + + Code( + n := 0, + i := 0, + Code.whileLoop(i < array.size, + Code( + array.isMissing(i).mux( + Code._empty, + i.ceq(n).mux( + n += 1, + Code(array.setMissing(n, false), array.update(n, array(i)), n += 1))), + i += 1)), + array.setSize(n)) } - def distinctFromSorted(cb: EmitCodeBuilder, region: Value[Region], discardNext: (EmitCodeBuilder, Value[Region], EmitCode, EmitCode) => Code[Boolean]): Unit = { - - val distinctMB = cb.emb.genEmitMethod("distinctFromSorted", FastIndexedSeq[ParamType](classInfo[Region]), UnitInfo) - distinctMB.voidWithBuilder { cb => - val region = distinctMB.getCodeParam[Region](1) - val i = cb.newLocal[Int]("i", 0) - val n = cb.newLocal[Int]("n", 0) - val size = cb.newLocal[Int]("size", array.size) - cb.whileLoop(i < size, { - cb.assign(i, i + 1) - - val LskipLoopBegin = CodeLabel() - val LskipLoopEnd = CodeLabel() - cb.define(LskipLoopBegin) - cb.ifx(i >= size, cb.goto(LskipLoopEnd)) - cb.ifx(!discardNext(cb, region, - EmitCode.fromI(distinctMB)(cb => array.loadFromIndex(cb, region, n)), - EmitCode.fromI(distinctMB)(cb => array.loadFromIndex(cb, region, i))), - cb.goto(LskipLoopEnd)) - cb.assign(i, i + 1) - cb.goto(LskipLoopBegin) - - cb.define(LskipLoopEnd) - - cb.assign(n, n + 1) - - cb.ifx(i < size && i.cne(n), { - cb += array.setMissing(n, array.isMissing(i)) - cb.ifx(!array.isMissing(n), cb += array.update(n, array(i))) - }) - - }) - cb += array.setSize(n) - } - - cb.invokeVoid(distinctMB, region) + def distinctFromSorted(discardNext: (Code[Region], Code[_], Code[Boolean], Code[_], Code[Boolean]) => Code[Boolean]): Code[Unit] = { + val i = mb.newLocal[Int]() + val n = mb.newLocal[Int]() + + Code( + i := 0, + n := 0, + Code.whileLoop(i < array.size, + i += 1, + Code.whileLoop(i < array.size && discardNext(r.region, array(n), array.isMissing(n), array(i), array.isMissing(i)), + i += 1), + n += 1, + (i < array.size && i.cne(n)).mux( + Code( + array.setMissing(n, array.isMissing(i)), + array.isMissing(n).mux( + Code._empty, + array.update(n, array(i)))), + Code._empty)), + array.setSize(n)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala index 6282610b13b..8e19bdc98e9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala @@ -2,15 +2,13 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.types._ -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ object BinaryOp { private val returnType: ((BinaryOp, Type, Type)) => Option[Type] = lift { - case (FloatingPointDivide(), TInt32, TInt32) => TFloat64 - case (FloatingPointDivide(), TInt64, TInt64) => TFloat64 + case (FloatingPointDivide(), TInt32, TInt32) => TFloat32 + case (FloatingPointDivide(), TInt64, TInt64) => TFloat32 case (FloatingPointDivide(), TFloat32, TFloat32) => TFloat32 case (FloatingPointDivide(), TFloat64, TFloat64) => TFloat64 case (Add() | Subtract() | Multiply() | RoundToNegInfDivide() | BitAnd() | BitOr() | BitXOr(), TInt32, TInt32) => TInt32 @@ -34,15 +32,7 @@ object BinaryOp { private def incompatible[T](lt: Type, rt: Type, op: BinaryOp): T = throw new RuntimeException(s"Cannot apply $op to $lt and $rt") - def emit(cb: EmitCodeBuilder, op: BinaryOp, l: SCode, r: SCode): SCode = { - val lt = l.st.virtualType - val rt = r.st.virtualType - - val retCode = emit(op, lt, rt, SType.extractPrimCode(cb, l), SType.extractPrimCode(cb, r)) - primitive(getReturnType(op, lt, rt), retCode) - } - - private[this] def emit(op: BinaryOp, lt: Type, rt: Type, l: Code[_], r: Code[_]): Code[_] = + def emit(op: BinaryOp, lt: Type, rt: Type, l: Code[_], r: Code[_]): Code[_] = (lt, rt) match { case (TInt32, TInt32) => val ll = coerce[Int](l) @@ -51,7 +41,7 @@ object BinaryOp { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toD / rr.toD + case FloatingPointDivide() => ll.toF / rr.toF case RoundToNegInfDivide() => Code.invokeStatic2[Math, Int, Int, Int]("floorDiv", ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -77,7 +67,7 @@ object BinaryOp { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toD / rr.toD + case FloatingPointDivide() => ll.toF / rr.toF case RoundToNegInfDivide() => Code.invokeStatic2[Math, Long, Long, Long]("floorDiv", ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr diff --git a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala index 34d30e98ad1..b7934e543d0 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala @@ -1,82 +1,91 @@ package is.hail.expr.ir +import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SContainer, SInterval, SIntervalCode} import is.hail.utils.FastIndexedSeq import scala.language.existentials -class BinarySearch[C](mb: EmitMethodBuilder[C], containerType: SContainer, eltType: EmitType, keyOnly: Boolean) { +class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, keyOnly: Boolean) { - val containerElementType: EmitType = containerType.elementEmitType + val elt: PType = typ.elementType + val ti: TypeInfo[_] = typeToTypeInfo(elt) val (compare: CodeOrdering.F[Int], equiv: CodeOrdering.F[Boolean], findElt: EmitMethodBuilder[C]) = if (keyOnly) { - val kt: EmitType = containerElementType.st match { - case s: SBaseStruct => - require(s.size == 2) - s.fieldEmitTypes(0) - case interval: SInterval => - interval.pointEmitType + val kt = elt match { + case t: PBaseStruct => + require(t.size == 2) + t.types(0) + case t: PCanonicalInterval => + t.pointType } - val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](containerType.paramType, eltType.paramType), typeInfo[Int]) + val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], typeToTypeInfo(kt)), typeInfo[Int]) val comp: CodeOrdering.F[Int] = { (cb: EmitCodeBuilder, ec1: EmitCode, _ec2: EmitCode) => val ec2 = EmitCode.fromI(cb.emb) { cb => val iec = _ec2.toI(cb) iec.flatMap(cb) { - case v2: SBaseStructCode => + case v2: PBaseStructCode => v2.memoize(cb, "bs_comp_v2").loadField(cb, 0) - case v2: SIntervalCode => + case v2: PIntervalCode => v2.memoize(cb, "bs_comp_v2").loadStart(cb) - } + }.map(cb)(_.asPCode) } - findMB.ecb.getOrderingFunction(eltType.st, kt.st, CodeOrdering.Compare())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Compare())(cb, ec1, ec2) } val ceq: CodeOrdering.F[Boolean] = { (cb: EmitCodeBuilder, ec1: EmitCode, _ec2: EmitCode) => val ec2 = EmitCode.fromI(cb.emb) { cb => val iec = _ec2.toI(cb) iec.flatMap(cb) { - case v2: SBaseStructCode => + case v2: PBaseStructCode => v2.memoize(cb, "bs_eq_v2").loadField(cb, 0) - case v2: SIntervalCode => + case v2: PIntervalCode => v2.memoize(cb, "bs_comp_v2").loadStart(cb) - } + }.map(cb)(_.asPCode) } - findMB.ecb.getOrderingFunction(eltType.st, kt.st, CodeOrdering.Equiv())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Equiv())(cb, ec1, ec2) } (comp, ceq, findMB) } else - (mb.ecb.getOrderingFunction(eltType.st, containerElementType.st, CodeOrdering.Compare()), - mb.ecb.getOrderingFunction(eltType.st, containerElementType.st, CodeOrdering.Equiv()), - mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](containerType.paramType, eltType.paramType), typeInfo[Int])) + (mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Compare()), + mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Equiv()), + mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], elt.ti), typeInfo[Int])) - // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i - findElt.emitWithBuilder[Int] { cb => - val indexable = findElt.getSCodeParam(1).asIndexable.memoize(cb, "findElt_indexable") - - val elt = findElt.getEmitParam(2, null) // no streams - - val len = cb.newLocal[Int]("findelt_length", indexable.loadLength()) - val low = cb.newLocal("findelt_low", 0) - val high = cb.newLocal("findelt_high", len) + private[this] val array = findElt.getCodeParam[Long](1) + private[this] val m = findElt.getCodeParam[Boolean](2) + private[this] val e = findElt.getCodeParam(3)(eltType.ti) + private[this] val len = findElt.newLocal[Int]() + private[this] val i = findElt.newLocal[Int]() + private[this] val low = findElt.newLocal[Int]() + private[this] val high = findElt.newLocal[Int]() - cb.whileLoop(low < high, { - val i = cb.newLocal("findelt_i", (low + high) / 2) - cb.ifx(compare(cb, elt, EmitCode.fromI(findElt)(cb => indexable.loadElement(cb, i))) <= 0, - cb.assign(high, i), - cb.assign(low, i + 1) - ) - }) - low + def cmp(i: Code[Int]): Code[Int] = EmitCodeBuilder.scopedCode(findElt) { cb => + val ec1 = EmitCode(Code._empty, m, PCode(eltType, e)) + val ec2 = EmitCode.fromI(findElt) { cb => + PCode(typ, array).asIndexable.memoize(cb, "binsearch_cmp_i").loadElement(cb, i).map(cb)(_.asPCode) + } + compare(cb, ec1, ec2) } + // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i + findElt.emit(Code( + len := typ.loadLength(array), + low := 0, + high := len, + Code.whileLoop(low < high, + i := (low + high) / 2, + (cmp(i) <= 0).mux( + high := i, + low := i + 1)), + low)) + // check missingness of v before calling - def getClosestIndex(cb: EmitCodeBuilder, array: SCode, v: EmitCode): Code[Int] = { - cb.invokeCode[Int](findElt, array, v) + def getClosestIndex(array: Code[Long], m: Code[Boolean], v: Code[_]): Code[Int] = { + findElt.invokeCode[Int](array, m, v) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index 6afeafdc96b..2a0f3ab93af 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -243,6 +243,8 @@ class BlockMatrixLiteral(value: BlockMatrix) extends BlockMatrixIR { } case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDense: Boolean) extends BlockMatrixIR { + assert(f.isInstanceOf[ApplyUnaryPrimOp] || f.isInstanceOf[Apply] || f.isInstanceOf[ApplyBinaryPrimOp]) + override lazy val typ: BlockMatrixType = child.typ assert(!needsDense || !typ.isSparse) @@ -266,7 +268,6 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen f(_, scalar) override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = { - assert(f.isInstanceOf[ApplyUnaryPrimOp] || f.isInstanceOf[Apply] || f.isInstanceOf[ApplyBinaryPrimOp]) val prev = child.execute(ctx) val functionArgs = f match { @@ -358,6 +359,7 @@ case object NeedsDense extends SparsityStrategy { } case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: String, rightName: String, f: IR, sparsityStrategy: SparsityStrategy) extends BlockMatrixIR { + assert(f.isInstanceOf[ApplyBinaryPrimOp] || f.isInstanceOf[Apply]) assert( left.typ.nRows == right.typ.nRows && left.typ.nCols == right.typ.nCols && @@ -380,8 +382,6 @@ case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: } override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = { - assert(f.isInstanceOf[ApplyBinaryPrimOp] || f.isInstanceOf[Apply]) - left match { case BlockMatrixBroadcast(vectorIR: BlockMatrixIR, IndexedSeq(x), _, _) => val vector = coerceToVector(ctx , vectorIR) @@ -421,11 +421,6 @@ case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: case ValueToBlockMatrix(child, _, _) => Interpret[Any](ctx, child) match { case vector: IndexedSeq[_] => vector.asInstanceOf[IndexedSeq[Double]].toArray - case vector: NDArray => { - val IndexedSeq(numRows, numCols) = vector.shape - assert(numRows == 1L || numCols == 1L) - vector.getRowMajorElements().asInstanceOf[IndexedSeq[Double]].toArray - } } case _ => ir.execute(ctx).toBreezeMatrix().data } diff --git a/hail/src/main/scala/is/hail/expr/ir/Casts.scala b/hail/src/main/scala/is/hail/expr/ir/Casts.scala index 6eb7bdcbe87..484e482e018 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Casts.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Casts.scala @@ -2,33 +2,32 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.types._ -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import scala.language.existentials object Casts { - private val casts: Map[(Type, Type), (EmitCodeBuilder, SCode) => SCode] = Map( - (TInt32, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => x), - (TInt32, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toL)), - (TInt32, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toF)), - (TInt32, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toD)), - (TInt64, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toI)), - (TInt64, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => x), - (TInt64, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toF)), - (TInt64, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toD)), - (TFloat32, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toI)), - (TFloat32, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toL)), - (TFloat32, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => x), - (TFloat32, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toD)), - (TFloat64, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toI)), - (TFloat64, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toL)), - (TFloat64, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toF)), - (TFloat64, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => x)) + private val casts: Map[(Type, Type), (Code[T] => Code[_]) forSome {type T}] = Map( + (TInt32, TInt32) -> ((x: Code[Int]) => x), + (TInt32, TInt64) -> ((x: Code[Int]) => x.toL), + (TInt32, TFloat32) -> ((x: Code[Int]) => x.toF), + (TInt32, TFloat64) -> ((x: Code[Int]) => x.toD), + (TInt64, TInt32) -> ((x: Code[Long]) => x.toI), + (TInt64, TInt64) -> ((x: Code[Long]) => x), + (TInt64, TFloat32) -> ((x: Code[Long]) => x.toF), + (TInt64, TFloat64) -> ((x: Code[Long]) => x.toD), + (TFloat32, TInt32) -> ((x: Code[Float]) => x.toI), + (TFloat32, TInt64) -> ((x: Code[Float]) => x.toL), + (TFloat32, TFloat32) -> ((x: Code[Float]) => x), + (TFloat32, TFloat64) -> ((x: Code[Float]) => x.toD), + (TFloat64, TInt32) -> ((x: Code[Double]) => x.toI), + (TFloat64, TInt64) -> ((x: Code[Double]) => x.toL), + (TFloat64, TFloat32) -> ((x: Code[Double]) => x.toF), + (TFloat64, TFloat64) -> ((x: Code[Double]) => x), + (TInt32, TCall) -> ((x: Code[Int]) => x)) - def get(from: Type, to: Type): (EmitCodeBuilder, SCode) => SCode = - casts(from -> to) + def get(from: Type, to: Type): Code[_] => Code[_] = + casts(from -> to).asInstanceOf[Code[_] => Code[_]] def valid(from: Type, to: Type): Boolean = casts.contains(from -> to) diff --git a/hail/src/main/scala/is/hail/expr/ir/Children.scala b/hail/src/main/scala/is/hail/expr/ir/Children.scala index a88734dbeb8..3a677369ac6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Children.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Children.scala @@ -186,7 +186,6 @@ object Children { none case Die(message, typ, errorId) => Array(message) - case Trap(child) => Array(child) case ApplyIR(_, _, args) => args.toFastIndexedSeq case Apply(_, _, args, _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Compile.scala b/hail/src/main/scala/is/hail/expr/ir/Compile.scala index 75c19409e32..e4760eda286 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Compile.scala @@ -7,9 +7,8 @@ import is.hail.expr.ir.lowering.LoweringPipeline import is.hail.expr.ir.streams.{EmitStream, StreamArgType} import is.hail.io.fs.FS import is.hail.rvd.RVDContext -import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType, StreamSingleCodeType} import is.hail.types.physical.stypes.interfaces.SStream -import is.hail.types.physical.{PStream, PStruct, PType} +import is.hail.types.physical.{PStream, PStruct, PType, PTypeReferenceSingleCodeType, SingleCodeType, StreamSingleCodeType} import is.hail.types.virtual.Type import is.hail.utils._ @@ -49,6 +48,10 @@ object Compile { TypeCheck(ir, BindingEnv.empty) + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + InferPType(ir, Env.empty, requiredness, usesAndDefs) + val returnParam = CodeParamType(SingleCodeType.typeInfoFromType(ir.typ)) val fb = EmitFunctionBuilder[F](ctx, "Compiled", @@ -72,8 +75,8 @@ object Compile { assert(fb.mb.parameterTypeInfo == expectedCodeParamTypes, s"expected $expectedCodeParamTypes, got ${ fb.mb.parameterTypeInfo }") assert(fb.mb.returnTypeInfo == expectedCodeReturnType, s"expected $expectedCodeReturnType, got ${ fb.mb.returnTypeInfo }") - val emitContext = EmitContext.analyze(ctx, ir) - val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length) + val emitContext = new EmitContext(ctx, requiredness) + val rt = Emit(emitContext, ir, fb, expectedCodeReturnType) val f = fb.resultWithIndex(print) codeCache += k -> CodeCacheValue(rt, f) @@ -111,6 +114,10 @@ object CompileWithAggregators { TypeCheck(ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + InferPType(ir, Env.empty, requiredness, usesAndDefs) + val fb = EmitFunctionBuilder[F](ctx, "CompiledWithAggs", CodeParamType(typeInfo[Region]) +: params.map { case (_, pt) => pt }, SingleCodeType.typeInfoFromType(ir.typ), Some("Emit.scala")) @@ -128,8 +135,8 @@ object CompileWithAggregators { } */ - val emitContext = EmitContext.analyze(ctx, ir) - val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length, Some(aggSigs)) + val emitContext = new EmitContext(ctx, requiredness) + val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, Some(aggSigs)) val f = fb.resultWithIndex() codeCache += k -> CodeCacheValue(rt, f) @@ -199,26 +206,27 @@ object CompileIterator { val ir = LoweringPipeline.compileLowerer(true)(ctx, body).asInstanceOf[IR].noSharing TypeCheck(ir) - var elementAddress: Settable[Long] = null - var returnType: PType = null + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + InferPType(ir, Env.empty, requiredness, usesAndDefs) - stepF.emitWithBuilder[Boolean] { cb => - val emitContext = EmitContext.analyze(ctx, ir) - val emitter = new Emit(emitContext, stepFECB) + val emitContext = new EmitContext(ctx, requiredness) + val emitter = new Emit(emitContext, stepFECB) - val env = EmitEnv(Env.empty, argTypeInfo.indices.filter(i => argTypeInfo(i).isInstanceOf[EmitParamType]).map(i => stepF.storeEmitParam(i + 1, cb))) - val optStream = EmitCode.fromI(stepF)(cb => EmitStream.produce(emitter, ir, cb, outerRegion, env, None)) - returnType = optStream.st.asInstanceOf[SStream].elementEmitType.canonicalPType.setRequired(true) - val returnPType = optStream.st.asInstanceOf[SStream].elementType.canonicalPType() + val returnType = ir.pType.asInstanceOf[PStream].elementType.asInstanceOf[PStruct].setRequired(true) - elementAddress = stepF.genFieldThisRef[Long]("elementAddr") + val optStream = EmitCode.fromI(stepF)(cb => EmitStream.produce(emitter, ir, cb, outerRegion, Env.empty, None)) + val returnPType = optStream.st.asInstanceOf[SStream].elementType.canonicalPType() - val didSetup = stepF.genFieldThisRef[Boolean]("didSetup") - stepF.cb.emitInit(didSetup := false) + val elementAddress = stepF.genFieldThisRef[Long]("elementAddr") - val eosField = stepF.genFieldThisRef[Boolean]("eos") + val didSetup = stepF.genFieldThisRef[Boolean]("didSetup") + stepF.cb.emitInit(didSetup := false) - val producer = optStream.pv.asStream.producer + val eosField = stepF.genFieldThisRef[Boolean]("eos") + + val producer = optStream.pv.asStream.producer + stepF.emitWithBuilder[Boolean] { cb => val ret = cb.newLocal[Boolean]("stepf_ret") val Lreturn = CodeLabel() @@ -226,7 +234,11 @@ object CompileIterator { cb.ifx(!didSetup, { optStream.toI(cb).get(cb) // handle missing, but bound stream producer above - cb.assign(producer.elementRegion, eltRegionField) + if (producer.requiresMemoryManagementPerElement) + cb.assign(producer.elementRegion, Region.stagedCreate(Region.REGULAR, outerRegion.getPool())) + else + cb.assign(producer.elementRegion, outerRegion) + producer.initialize(cb) cb.assign(didSetup, true) cb.assign(eosField, false) @@ -241,6 +253,8 @@ object CompileIterator { stepF.implementLabel(producer.LendOfStream) { cb => producer.close(cb) + if (producer.requiresMemoryManagementPerElement) + cb += producer.elementRegion.invalidate() cb.assign(eosField, true) cb.assign(ret, false) cb.goto(Lreturn) diff --git a/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala b/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala index 3c705b655bd..7c8b9ac7f20 100644 --- a/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala +++ b/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala @@ -3,8 +3,7 @@ package is.hail.expr.ir import is.hail.annotations.{Region, SafeRow} import is.hail.asm4s._ import is.hail.expr.ir.lowering.LoweringPipeline -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType -import is.hail.types.physical.{PBaseStruct, PTuple} +import is.hail.types.physical.{PBaseStruct, PTuple, PTypeReferenceSingleCodeType, SingleCodeType} import is.hail.types.virtual.TVoid import is.hail.utils.{FastIndexedSeq, FastSeq} diff --git a/hail/src/main/scala/is/hail/expr/ir/Copy.scala b/hail/src/main/scala/is/hail/expr/ir/Copy.scala index 75ce84ea45f..0264b37dbca 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Copy.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Copy.scala @@ -280,9 +280,6 @@ object Copy { case Die(_, typ, errorId) => assert(newChildren.length == 1) Die(newChildren(0).asInstanceOf[IR], typ, errorId) - case Trap(child) => - assert(newChildren.length == 1) - Trap(newChildren(0).asInstanceOf[IR]) case x@ApplyIR(fn, typeArgs, args) => val r = ApplyIR(fn, typeArgs, newChildren.map(_.asInstanceOf[IR])) r.conversion = x.conversion diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 9b98956f382..57d49df0981 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -3,57 +3,82 @@ package is.hail.expr.ir import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.BackendContext +import is.hail.expr.ir.Emit.E import is.hail.expr.ir.agg.{AggStateSig, ArrayAggStateSig, GroupedStateSig} -import is.hail.expr.ir.analyses.{ComputeMethodSplits, ControlFlowPreventsSplit, ParentPointers} +import is.hail.expr.ir.functions.StringFunctions import is.hail.expr.ir.lowering.TableStageDependency -import is.hail.expr.ir.ndarrays.EmitNDArray import is.hail.expr.ir.streams.{EmitStream, StreamProducer, StreamUtils} -import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} +import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.linalg.{BLAS, LAPACK, LinalgCodeUtils} import is.hail.services.shuffler._ +import is.hail.types.TypeWithRequiredness import is.hail.types.physical._ -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete._ -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives._ +import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SCanonicalShufflePointer, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable} +import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SStreamCode} +import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat64, SInt32, SInt32Code, SInt64} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} import is.hail.types.virtual._ -import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq} import is.hail.utils._ +import is.hail.utils.richUtils.RichCodeRegion import java.io._ import scala.collection.mutable import scala.language.{existentials, postfixOps} // class for holding all information computed ahead-of-time that we need in the emitter -object EmitContext { - def analyze(ctx: ExecuteContext, ir: IR): EmitContext = { - ctx.timer.time("EmitContext.analyze") { - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - val inLoopCriticalPath = ControlFlowPreventsSplit(ir, ParentPointers(ir), usesAndDefs) - val methodSplits = ComputeMethodSplits(ir,inLoopCriticalPath) - new EmitContext(ctx, requiredness, usesAndDefs, methodSplits, inLoopCriticalPath, Memo.empty[Unit]) - } +class EmitContext(val executeContext: ExecuteContext, val req: RequirednessAnalysis) + + +object SetupBuilder { + def apply(mb: EmitMethodBuilder[_]): SetupBuilder = new SetupBuilder(mb, Code._empty) + + def apply(mb: EmitMethodBuilder[_], setup: Code[Unit]): SetupBuilder = new SetupBuilder(mb, setup) + + def map[T, U](mb: EmitMethodBuilder[_])(is: IndexedSeq[T])(f: (SetupBuilder, T) => U): (Code[Unit], IndexedSeq[U]) = { + val sb = SetupBuilder(mb) + val rs = sb.map(is)(f) + (sb.setup, rs) + } + + def map[T, U](mb: EmitMethodBuilder[_], setup: Code[Unit])(is: IndexedSeq[T])(f: (SetupBuilder, T) => U): (Code[Unit], IndexedSeq[U]) = { + val sb = SetupBuilder(mb, setup) + val rs = sb.map(is)(f) + (sb.setup, rs) } } -class EmitContext( - val executeContext: ExecuteContext, - val req: RequirednessAnalysis, - val usesAndDefs: UsesAndDefs, - val methodSplits: Memo[Unit], - val inLoopCriticalPath: Memo[Unit], - val tryingToSplit: Memo[Unit] -) +class SetupBuilder(mb: EmitMethodBuilder[_], var setup: Code[Unit]) { + def append(c: Code[Unit]): Unit = { + setup = Code(setup, c) + } + + def +=(c: Code[Unit]): Unit = append(c) + + def memoize[T](e: Code[T], name: String)(implicit tti: TypeInfo[T]): Value[T] = { + val l = mb.newLocal[T](name) + append(l := e) + l + } + + def memoizeField[T](e: Code[T], name: String)(implicit tti: TypeInfo[T]): Value[T] = { + val l = mb.genFieldThisRef[T](name) + append(l := e) + l + } -case class EmitEnv(bindings: Env[EmitValue], inputValues: IndexedSeq[Value[Region] => EmitValue]) { - def bind(name: String, v: EmitValue): EmitEnv = copy(bindings = bindings.bind(name, v)) + def map[T, U](is: IndexedSeq[T])(f: (SetupBuilder, T) => U): IndexedSeq[U] = is.map(f(this, _)) - def bind(newBindings: (String, EmitValue)*): EmitEnv = copy(bindings = bindings.bindIterable(newBindings)) + def result(): Code[Unit] = { + val r = setup + setup = null + r + } } object Emit { - def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], nParams: Int, aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { + type E = Env[EmitValue] + + def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { TypeCheck(ir) val mb = fb.apply_method @@ -65,21 +90,19 @@ object Emit { val region = mb.getCodeParam[Region](1) val returnTypeOption: Option[SingleCodeType] = if (ir.typ == TVoid) { fb.apply_method.voidWithBuilder { cb => - val env = EmitEnv(Env.empty, (0 until nParams).map(i => mb.storeEmitParam(i + 2, cb))) // this, region, ... - emitter.emitVoid(cb, ir, region, env, container, None) + emitter.emitVoid(cb, ir, region, Env.empty, container, None) } None } else { var sct: SingleCodeType = null fb.emitWithBuilder { cb => - val env = EmitEnv(Env.empty, (0 until nParams).map(i => mb.storeEmitParam(i + 2, cb))) // this, region, ... - val sc = emitter.emitI(ir, cb, region, env, container, None).handle(cb, { + val pcode = emitter.emitI(ir, cb, region, Env.empty, container, None).handle(cb, { cb._throw[RuntimeException]( Code.newInstance[RuntimeException, String]("cannot return empty")) }) - val scp = SingleCodeSCode.fromSCode(cb, sc, region) + val scp = SingleCodePCode.fromPCode(cb, pcode, region) assert(scp.typ.ti == rti, s"type info mismatch: expect $rti, got ${ scp.typ.ti }") sct = scp.typ scp.code @@ -104,21 +127,21 @@ object AggContainer { aggState } - val cleanup = { cb: EmitCodeBuilder => + val cleanup = { cb: EmitCodeBuilder => aggState.store(cb) cb += region.load().invalidate() - cb.assign(region, Code._null[Region]) + cb.assign(region, Code._null) } (AggContainer(aggs, aggState, () => ()), (cb: EmitCodeBuilder) => cb += setup, cleanup) } def fromMethodBuilder[C](aggs: Array[AggStateSig], mb: EmitMethodBuilder[C], varPrefix: String): (AggContainer, EmitCodeBuilder => Unit, EmitCodeBuilder => Unit) = - fromVars(aggs, mb, mb.genFieldThisRef[Region](s"${ varPrefix }_top_region"), mb.genFieldThisRef[Long](s"${ varPrefix }_off")) + fromVars(aggs, mb, mb.genFieldThisRef[Region](s"${varPrefix}_top_region"), mb.genFieldThisRef[Long](s"${varPrefix}_off")) def fromBuilder[C](cb: EmitCodeBuilder, aggs: Array[AggStateSig], varPrefix: String): AggContainer = { - val off = cb.newField[Long](s"${ varPrefix }_off") - val region = cb.newField[Region](s"${ varPrefix }_top_region", Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) + val off = cb.newField[Long](s"${varPrefix}_off") + val region = cb.newField[Region](s"${varPrefix}_top_region", Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) val states = agg.StateTuple(aggs.map(a => agg.AggStateSig.getState(a, cb.emb.ecb))) val aggState = new agg.TupleAggregatorState(cb.emb.ecb, states, region, off) cb += region.load().setNumParents(aggs.length) @@ -128,7 +151,7 @@ object AggContainer { AggContainer(aggs, aggState, { () => aggState.store(cb) cb += region.load().invalidate() - cb.assign(region, Code._null[Region]) + cb.assign(region, Code._null) }) } } @@ -159,19 +182,16 @@ case class EmitRegion(mb: EmitMethodBuilder[_], region: Value[Region]) { } abstract class EmitValue { - def emitType: EmitType - - def st: SType = emitType.st + def pt: PType def load: EmitCode - def get(cb: EmitCodeBuilder): SCode + def get(cb: EmitCodeBuilder): PCode } class EmitUnrealizableValue(private val ec: EmitCode) extends EmitValue { - val emitType: EmitType = ec.emitType - - assert(st.isInstanceOf[SStream]) + val pt: PType = ec.pt + assert(!pt.isRealizable) private[this] var used: Boolean = false def load: EmitCode = { @@ -180,17 +200,17 @@ class EmitUnrealizableValue(private val ec: EmitCode) extends EmitValue { ec } - override def get(cb: EmitCodeBuilder): SCode = throw new UnsupportedOperationException(s"Can't make PValue for unrealizable type ${ ec.st }") + override def get(cb: EmitCodeBuilder): PCode = throw new UnsupportedOperationException(s"Can't make PValue for unrealizable type ${pt}") } /** - * Notes on IEmitCode; - * 1. It is the responsibility of the producers of IEmitCode to emit the relevant - * jumps for the Lmissing and Lpresent labels (cb.goto or similar) - * 2. It is the responsibility of consumers to define these labels and to - * prevent the SCode from being used on any code path taken as a result of - * jumping to Lmissing. - */ + * Notes on IEmitCode; + * 1. It is the responsibility of the producers of IEmitCode to emit the relevant + * jumps for the Lmissing and Lpresent labels (cb.goto or similar) + * 2. It is the responsibility of consumers to define these labels and to + * prevent the pcode from being used on any code path taken as a result of + * jumping to Lmissing. + */ object IEmitCode { def apply[A](cb: EmitCodeBuilder, m: Code[Boolean], value: => A): IEmitCodeGen[A] = { Code.constBoolValue(m) match { @@ -221,7 +241,7 @@ object IEmitCode { IEmitCodeGen(Lmissing, CodeLabel(), defaultValue, false) } - def multiMapEmitCodes(cb: EmitCodeBuilder, seq: IndexedSeq[EmitCode])(f: IndexedSeq[SCode] => SCode): IEmitCode = { + def multiMapEmitCodes(cb: EmitCodeBuilder, seq: IndexedSeq[EmitCode])(f: IndexedSeq[PCode] => PCode): IEmitCode = { val Lmissing = CodeLabel() val Lpresent = CodeLabel() @@ -241,7 +261,7 @@ object IEmitCode { } def multiFlatMap[A, B, C](seq: IndexedSeq[A], toIec: A => IEmitCodeGen[B], cb: EmitCodeBuilder) - (f: IndexedSeq[B] => IEmitCodeGen[C]): IEmitCodeGen[C] = { + (f: IndexedSeq[B] => IEmitCodeGen[C]): IEmitCodeGen[C] = { val Lmissing = CodeLabel() var required: Boolean = true @@ -266,31 +286,26 @@ object IEmitCode { } object IEmitCodeGen { - - implicit class IEmitCode(val iec: IEmitCodeGen[SCode]) extends AnyVal { - def pc: SCode = iec.value - - def st: SType = pc.st + implicit class IEmitCode(val iec: IEmitCodeGen[PCode]) extends AnyVal { + def pc: PCode = iec.value + def pt: PType = pc.pt def memoize(cb: EmitCodeBuilder, name: String): EmitValue = cb.memoize(iec, name) - - def memoizeField(cb: EmitCodeBuilder, name: String): EmitValue = - cb.memoizeField(iec, name) } - } -case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, required: Boolean) { +case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, val required: Boolean) { lazy val emitType: EmitType = { value match { - case pc: SCode => EmitType(pc.st, required) + case pc: PCode => EmitType(pc.st, required) case _ => throw new UnsupportedOperationException(s"emitType on $value") } } - def setOptional: IEmitCodeGen[A] = copy(required = false) + // This method is a very temporary patch until we can properly separate SCode and PCode + def typecast[T]: IEmitCodeGen[T] = IEmitCodeGen(Lmissing, Lpresent, value.asInstanceOf[T], required) def map[B](cb: EmitCodeBuilder)(f: (A) => B): IEmitCodeGen[B] = { val Lpresent2 = CodeLabel() @@ -323,10 +338,7 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, value } - def get(cb: EmitCodeBuilder, errorMsg: String = s"expected non-missing"): A = - handle(cb, cb._fatal(errorMsg)) - - def get(cb: EmitCodeBuilder, errorMsg: Code[String]): A = + def get(cb: EmitCodeBuilder, errorMsg: String = "expected non-missing"): A = handle(cb, cb._fatal(errorMsg)) def consume(cb: EmitCodeBuilder, ifMissing: => Unit, ifPresent: (A) => Unit): Unit = { @@ -339,17 +351,17 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, cb.define(Lafter) } - def consumeSCode(cb: EmitCodeBuilder, ifMissing: => SCode, ifPresent: (A) => SCode): SCode = { + def consumePCode(cb: EmitCodeBuilder, ifMissing: => PCode, ifPresent: (A) => PCode): PCode = { val Lafter = CodeLabel() cb.define(Lmissing) val missingValue = ifMissing - val st = missingValue.st - val ret = cb.emb.newPLocal(st) + val pt = missingValue.pt + val ret = cb.emb.newPLocal(pt) cb.assign(ret, missingValue) cb.goto(Lafter) cb.define(Lpresent) val presentValue = ifPresent(value) - assert(presentValue.st == st) + assert(presentValue.pt == pt) cb.assign(ret, presentValue) cb.define(Lafter) ret @@ -363,7 +375,7 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, } object EmitCode { - def apply(setup: Code[Unit], m: Code[Boolean], pv: SCode): EmitCode = { + def apply(setup: Code[Unit], m: Code[Boolean], pv: PCode): EmitCode = { Code.constBoolValue(m) match { case Some(false) => val Lpresent = CodeLabel() @@ -376,7 +388,7 @@ object EmitCode { } } - def unapply(ec: EmitCode): Option[(Code[Boolean], SCode)] = + def unapply(ec: EmitCode): Option[(Code[Boolean], PCode)] = Some((ec.m, ec.pv)) def apply(setup: Code[Unit], ec: EmitCode): EmitCode = { @@ -385,9 +397,9 @@ object EmitCode { new EmitCode(Lstart, ec.iec) } - def present(mb: EmitMethodBuilder[_], pc: SCode): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, pc)) + def present(mb: EmitMethodBuilder[_], pc: PCode): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, pc)) - def missing(mb: EmitMethodBuilder[_], pt: SType): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.missing(cb, pt.defaultValue)) + def missing(mb: EmitMethodBuilder[_], pt: PType): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.missing(cb, pt.defaultValue(mb))) def fromI(mb: EmitMethodBuilder[_])(f: (EmitCodeBuilder) => IEmitCode): EmitCode = { val cb = EmitCodeBuilder(mb) @@ -401,39 +413,35 @@ class EmitCode(private val start: CodeLabel, private val iec: IEmitCode) { def required: Boolean = iec.required - def setOptional: EmitCode = new EmitCode(start, iec.setOptional) - lazy val emitType: EmitType = iec.emitType - def emitParamType: SCodeEmitParamType = emitType.paramType + def emitParamType: PCodeEmitParamType = PCodeEmitParamType(st.pType) def st: SType = iec.value.st - def pv: SCode = iec.value + def pv: PCode = iec.value val m: Code[Boolean] = new CCode(start.L, iec.Lmissing.L, iec.Lpresent.L) + def pt: PType = pv.pt + + def v: Code[_] = pv.code + def toI(cb: EmitCodeBuilder): IEmitCode = { cb.goto(start) iec } - def castTo(mb: EmitMethodBuilder[_], region: Value[Region], destType: SType, deepCopy: Boolean = false): EmitCode = { + def castTo(mb: EmitMethodBuilder[_], region: Value[Region], destType: PType, deepCopy: Boolean = false): EmitCode = { EmitCode.fromI(mb)(cb => toI(cb).map(cb)(_.castTo(cb, region, destType))) } - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = { - val ct = if (required) { - toI(cb).get(cb).makeCodeTuple(cb) - } else { - val es = cb.emb.newEmitLocal("ec_makecodetuple", emitType) - cb.assign(es, toI(cb)) - es.pv.makeCodeTuple(cb) :+ es.m - } - - assert(ct.zip(emitParamType.codeTupleTypes).forall { case (p, pt) => p.ti == pt.ti}, - s"ctt mismatch: $emitType\n param: ${ct.map(_.ti)}\n types: ${emitParamType.codeTupleTypes}") - ct + def codeTuple(): IndexedSeq[Code[_]] = { + val tc = pv.codeTuple() + if (pt.required) + tc + else + tc :+ m } def missingIf(mb: EmitMethodBuilder[_], cond: Code[Boolean]): EmitCode = @@ -449,63 +457,19 @@ class EmitCode(private val start: CodeLabel, private val iec: IEmitCode) { } def asVoid(): Code[Unit] = { - require(pv.st == SVoid) + require(pv.pt == PVoid) Code.toUnit(m) } } -object EmitSettable { - def present(vs: SSettable): EmitSettable = new EmitSettable(None, vs) -} - -class EmitSettable( - missing: Option[Settable[Boolean]], // required if None - vs: SSettable) extends EmitValue { - - lazy val required: Boolean = missing.isEmpty - - lazy val emitType: EmitType = EmitType(vs.st, required) - - def settableTuple(): IndexedSeq[Settable[_]] = { - missing match { - case Some(m) => vs.settableTuple() :+ m - case None => vs.settableTuple() - } - } - - def m: Code[Boolean] = missing.map(_.load()).getOrElse(const(false)) +abstract class EmitSettable extends EmitValue { + def store(cb: EmitCodeBuilder, ec: EmitCode): Unit - def load: EmitCode = { - val ec = EmitCode(Code._empty, - if (required) const(false) else missing.get.load(), - vs.get) - assert(ec.required == required) - ec - } - - def store(cb: EmitCodeBuilder, ec: EmitCode): Unit = { - store(cb, ec.toI(cb)) - } - - def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit = - if (required) - cb.assign(vs, iec.get(cb, s"Required EmitSettable cannot be missing ${ st }")) - else - iec.consume(cb, { - cb.assign(missing.get, true) - }, { value => - cb.assign(missing.get, false) - cb.assign(vs, value) - }) + def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit +} - override def get(cb: EmitCodeBuilder): SCode = { - if (required) { - vs - } else { - cb.ifx(missing.get, cb._fatal(s"Can't convert missing ${ st } to PValue")) - vs - } - } +abstract class PresentEmitSettable extends EmitValue { + def store(cb: EmitCodeBuilder, pc: PCode): Unit } class RichIndexedSeqEmitSettable(is: IndexedSeq[EmitSettable]) { @@ -513,9 +477,9 @@ class RichIndexedSeqEmitSettable(is: IndexedSeq[EmitSettable]) { } object LoopRef { - def apply(cb: EmitCodeBuilder, L: CodeLabel, args: IndexedSeq[(String, EmitType)], pool: Value[RegionPool], resultType: EmitType): LoopRef = { - val (loopArgs, tmpLoopArgs) = args.zipWithIndex.map { case ((name, et), i) => - (cb.emb.newEmitField(s"$name$i", et), cb.emb.newEmitField(s"tmp$name$i", et)) + def apply(cb: EmitCodeBuilder, L: CodeLabel, args: IndexedSeq[(String, PType)], pool: Value[RegionPool], resultType: EmitType): LoopRef = { + val (loopArgs, tmpLoopArgs) = args.zipWithIndex.map { case ((name, pt), i) => + (cb.emb.newEmitField(s"$name$i", pt, pt.required), cb.emb.newEmitField(s"tmp$name$i", pt, pt.required)) }.unzip val r1: Settable[Region] = cb.newLocal[Region]("loop_ref_r1") @@ -530,7 +494,7 @@ object LoopRef { class LoopRef( val L: CodeLabel, - val loopTypes: IndexedSeq[EmitType], + val loopTypes: IndexedSeq[PType], val loopArgs: IndexedSeq[EmitSettable], val tmpLoopArgs: IndexedSeq[EmitSettable], val r1: Settable[Region], @@ -545,68 +509,24 @@ abstract class EstimableEmitter[C] { class Emit[C]( val ctx: EmitContext, - val cb: EmitClassBuilder[C]) { - emitSelf => - - val methods: mutable.Map[(String, Seq[Type], Seq[SType], SType), EmitMethodBuilder[C]] = mutable.Map() - - def emitVoidInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { - assert(!ctx.inLoopCriticalPath.contains(ir)) - val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) - val r = cb.newField[Region]("emitVoidSeparate_region", region) - mb.voidWithBuilder { cb => - ctx.tryingToSplit.bind(ir, ()) - emitVoid(cb, ir, r, env, container, loopEnv) - } - cb.invokeVoid(mb) - } - - def emitSplitMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): (EmitSettable, EmitMethodBuilder[_]) = { - val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) - val r = cb.newField[Region]("emitInSeparate_region", region) - - var ev: EmitSettable = null - mb.voidWithBuilder { cb => - ctx.tryingToSplit.bind(ir, ()) - val result = emitI(ir, cb, r, env, container, loopEnv) - - ev = cb.emb.ecb.newEmitField(s"${context}_result", result.emitType) - cb.assign(ev, result) - } - (ev, mb) - } + val cb: EmitClassBuilder[C]) { emitSelf => - def emitInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCode = { - if (ir.typ == TVoid) { - emitVoidInSeparateMethod(context, cb, ir, region, env, container, loopEnv) - return IEmitCode.present(cb, SVoidCode) - } + val methods: mutable.Map[(String, Seq[Type], Seq[PType], PType), EmitMethodBuilder[C]] = mutable.Map() - assert(!ctx.inLoopCriticalPath.contains(ir)) - val (ev, mb) = emitSplitMethod(context, cb, ir, region, env, container, loopEnv) - cb.invokeVoid(mb) - ev.toI(cb) - } - - private[ir] def emitVoid(cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { - if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { - emitVoidInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv) - return - } + private[ir] def emitVoid(cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: E, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { val mb: EmitMethodBuilder[C] = cb.emb.asInstanceOf[EmitMethodBuilder[C]] - - def emit(ir: IR, mb: EmitMethodBuilder[C] = mb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = + def emit(ir: IR, mb: EmitMethodBuilder[C] = mb, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = this.emit(ir, mb, region, env, container, loopEnv) def emitStream(ir: IR, outerRegion: Value[Region], mb: EmitMethodBuilder[C] = mb): EmitCode = EmitCode.fromI(mb)(cb => EmitStream.produce(this, ir, cb, outerRegion, env, container)) - def emitVoid(ir: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = + def emitVoid(ir: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = this.emitVoid(cb, ir, region, env, container, loopEnv) - def emitI(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitI(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) (ir: @unchecked) match { @@ -628,6 +548,8 @@ class Emit[C]( } case StreamFor(a, valueName, body) => + val streamType = coerce[PStream](a.pType) + emitStream(a, region).toI(cb).consume(cb, {}, { case stream: SStreamCode => @@ -697,15 +619,15 @@ class Emit[C]( Array.range(start, start + ns).foreach(i => sc.newState(cb, i)) cb.assign(ib, spec.buildCodeInputBuffer( - Code.newInstance[ByteArrayInputStream, Array[Byte]]( - mb.getSerializedAgg(sIdx)))) + Code.newInstance[ByteArrayInputStream, Array[Byte]]( + mb.getSerializedAgg(sIdx)))) cb += mb.freeSerializedAgg(sIdx) (0 until ns).foreach { j => deserializers(j)(cb, ib) } - cb.assign(ib, Code._null[InputBuffer]) + cb.assign(ib, Code._null) case Die(m, typ, errorId) => val cm = emitI(m) @@ -719,12 +641,12 @@ class Emit[C]( val AggContainer(_, sc, _) = container.get val rvAgg = agg.Extract.getAgg(aggSig) val tempState = AggStateSig.getState(aggSig.state, mb.ecb) - val aggStateOffset = mb.genFieldThisRef[Long](s"combOpValue_${ i }_state"); + val aggStateOffset = mb.genFieldThisRef[Long](s"combOpValue_${i}_state"); val v = emitI(value) v.consume(cb, cb._fatal("cannot combOp a missing value"), - { case serializedValue: SBinaryCode => + { case serializedValue: PBinaryCode => cb.assign(aggStateOffset, region.allocate(tempState.storageType.alignment, tempState.storageType.byteSize)) tempState.createState(cb) tempState.newState(cb) @@ -740,7 +662,7 @@ class Emit[C]( val v = emitI(value) v.consume(cb, cb._fatal("cannot initialize aggs from a missing value"), - { case serializedValue: SBinaryCode => + { case serializedValue: PBinaryCode => sc.states(i).createState(cb) sc.newState(cb, i) sc.states(i).deserializeFromBytes(cb, serializedValue) @@ -749,113 +671,102 @@ class Emit[C]( } } - private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, env: EmitEnv, container: Option[AggContainer]): IEmitCode = { + private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, env: E, container: Option[AggContainer]): IEmitCode = { val region = cb.emb.getCodeParam[Region](1) emitI(ir, cb, region, env, container, None) } - private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region], env: EmitEnv, + private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region], env: E, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]] ): IEmitCode = { - if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { - return emitInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv) - } - val mb: EmitMethodBuilder[C] = cb.emb.asInstanceOf[EmitMethodBuilder[C]] - def emitI(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitI(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitInNewBuilder(cb: EmitCodeBuilder, ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitInNewBuilder(cb: EmitCodeBuilder, ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitStream(ir: IR, cb: EmitCodeBuilder, outerRegion: Value[Region]): IEmitCode = + def emitInMethod(cb: EmitCodeBuilder, ir: IR): IEmitCode = + this.emitI(ir, cb, Env.empty, container) + + def emitStream(ir: IR, cb: EmitCodeBuilder, outerRegion: Value[Region] ): IEmitCode = EmitStream.produce(this, ir, cb, outerRegion, env, container) - def emitVoid(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = + def emitVoid(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = this.emitVoid(cb, ir: IR, region, env, container, loopEnv) - def emitFallback(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitFallback(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emit(ir, mb, region, env, container, loopEnv, fallingBackFromEmitI = true).toI(cb) - def emitDeforestedNDArrayI(ir: IR): IEmitCode = EmitNDArray(this, ir, cb, region, env, container, loopEnv) + def emitDeforestedNDArrayI(ir: IR): IEmitCode = + deforestNDArrayI(ir, cb, region, env) def emitNDArrayColumnMajorStrides(ir: IR): IEmitCode = { - emitI(ir).map(cb) { case pNDCode: SNDArrayCode => + emitI(ir).map(cb){case pNDCode: PNDArrayCode => val pNDValue = pNDCode.memoize(cb, "ndarray_column_major_check") LinalgCodeUtils.checkColMajorAndCopyIfNeeded(pNDValue, cb, region) } } - // Returns an IEmitCode along with a Boolean that is true if the returned value is column major. If false it's row - // major instead. - def emitNDArrayStandardStriding(ir: IR): IEmitCodeGen[(SNDArrayValue, Value[Boolean])] = { - emitI(ir).map(cb) { case pNDCode: SNDArrayCode => - val pNDValue = pNDCode.memoize(cb, "ndarray_standard_striding_check") - LinalgCodeUtils.checkStandardStriding(pNDValue, cb, region) - } - } - - def typeWithReqx(node: IR): VirtualTypeWithReq = VirtualTypeWithReq(node.typ, ctx.req.lookup(node).asInstanceOf[TypeWithRequiredness]) - - def typeWithReq: VirtualTypeWithReq = typeWithReqx(ir) + val pt = ir.pType - if (ir.typ == TVoid) { + if (pt == PVoid) { emitVoid(ir) - return IEmitCode.present(cb, SVoidCode) + return IEmitCode(CodeLabel(), CodeLabel(), PCode._empty, required = true) } - def presentPC(pc: SCode): IEmitCode = IEmitCode.present(cb, pc) + def presentPC(pc: PCode): IEmitCode = IEmitCode.present(cb, pc) + + def presentC(c: Code[_]): IEmitCode = presentPC(PCode(pt, c)) val result: IEmitCode = (ir: @unchecked) match { case I32(x) => - presentPC(primitive(const(x))) + presentC(const(x)) case I64(x) => - presentPC(primitive(const(x))) + presentC(const(x)) case F32(x) => - presentPC(primitive(const(x))) + presentC(const(x)) case F64(x) => - presentPC(primitive(const(x))) + presentC(const(x)) case s@Str(x) => - presentPC(mb.addLiteral(x, typeWithReq)) + presentPC(mb.addLiteral(x, coerce[PString](s.pType))) case x@UUID4(_) => - val pt = PCanonicalString() - presentPC(pt.loadCheapSCode(cb, pt. + presentPC(PCode(x.pType, coerce[PString](x.pType). allocateAndStoreString(mb, region, Code.invokeScalaObject0[String]( Class.forName("is.hail.expr.ir.package$"), "uuid4")))) case x@Literal(t, v) => - presentPC(mb.addLiteral(v, typeWithReq)) + presentPC(mb.addLiteral(v, x.pType)) case x@EncodedLiteral(codec, value) => + assert(x.pType == codec.decodedPType()) presentPC(mb.addEncodedLiteral(x)) case True() => - presentPC(primitive(const(true))) + presentC(const(true)) case False() => - presentPC(primitive(const(false))) + presentC(const(false)) case Consume(value) => - emitI(value).map(cb) { pc => + emitI(value).map(cb){pc => cb.memoizeField(pc, "consumed_field") // Ignore pc, just return a 1 - primitive(const(1L)) + PCode(ir.pType, 1L) } case Cast(v, typ) => val iec = emitI(v) val cast = Casts.get(v.typ, typ) - iec.map(cb)(pc => cast(cb, pc)) + iec.map(cb)(pc => PCode(pt, cast(pc.code))) case CastRename(v, _typ) => emitI(v) - .map(cb)(pc => pc.st.castRename(_typ).fromCodes(pc.makeCodeTuple(cb))) + .map(cb)(pc => PCode(pt, pc.code)) case NA(typ) => - IEmitCode(cb, const(true), typeWithReq.canonicalEmitType.st.defaultValue) + IEmitCode(cb, const(true), pt.defaultValue(cb.emb)) case IsNA(v) => val m = emitI(v).consumeCode(cb, true, _ => false) - presentPC(primitive(m)) + presentC(m) case Coalesce(values) => + val coalescedValue = mb.newPLocal("coalesce_value", pt) val emittedValues = values.map(v => EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, v))) - val unifiedType = SType.chooseCompatibleType(typeWithReq, emittedValues.map(_.st): _*) - val coalescedValue = mb.newPLocal("coalesce_value", unifiedType) - val Ldefined = CodeLabel() val Lmissing = CodeLabel() @@ -863,7 +774,7 @@ class Emit[C]( value.toI(cb).consume(cb, {}, // fall through to next check { sc => - cb.assign(coalescedValue, sc.castTo(cb, region, unifiedType)) + cb.assign(coalescedValue, sc.castTo(cb, region, pt)) cb.goto(Ldefined) }) } @@ -877,27 +788,25 @@ class Emit[C]( emitI(cond).flatMap(cb) { condValue => - val codeCnsq = EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, cnsq)) val codeAltr = EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, altr)) - val outType = SType.chooseCompatibleType(typeWithReq, codeCnsq.st, codeAltr.st) val Lmissing = CodeLabel() val Ldefined = CodeLabel() - val out = mb.newPLocal(outType) + val out = mb.newPLocal(pt) cb.ifx(condValue.asBoolean.boolCode(cb), { codeCnsq.toI(cb).consume(cb, { cb.goto(Lmissing) - }, { sc => - cb.assign(out, sc.castTo(cb, region, outType)) + }, {sc => + cb.assign(out, sc.castTo(cb, region, pt)) }) }, { codeAltr.toI(cb).consume(cb, { cb.goto(Lmissing) - }, { sc => - cb.assign(out, sc.castTo(cb, region, outType)) + }, {sc => + cb.assign(out, sc.castTo(cb, region, pt)) }) }) cb.goto(Ldefined) @@ -906,73 +815,100 @@ class Emit[C]( } case x@MakeStruct(fields) => - presentPC(SStackStruct.constructFromArgs(cb, region, x.typ.asInstanceOf[TBaseStruct], - fields.map { case (_, x) => + val scode = x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, + region, + fields.map { case (name, x) => EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x)) - }: _* - )) + }.toFastIndexedSeq, + deepCopy = false) + presentPC(scode) case x@MakeTuple(fields) => - presentPC(SStackStruct.constructFromArgs(cb, region, x.typ.asInstanceOf[TBaseStruct], + val scode = x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, + region, fields.map { case (_, x) => - EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x)) - }: _* - )) + EmitCode.fromI(cb.emb)(emitInNewBuilder(_, x)) + }.toFastIndexedSeq, + deepCopy = false) + presentPC(scode) case x@SelectFields(oldStruct, fields) => emitI(oldStruct) - .map(cb) { case sc: SBaseStructCode => sc.subset(fields: _*) } + .map(cb) { case sc: SBaseStructCode => + val sv = sc.memoize(cb, "select_fields_scode") + x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, + region, + fields.map { field => + EmitCode.fromI(cb.emb)(cb => sv.loadField(cb, field).typecast[PCode]) + }.toFastIndexedSeq, + deepCopy = false) + } - case x@InsertFields(old, fields, _) => + case x@InsertFields(old, fields, fieldOrder) => if (fields.isEmpty) emitI(old) else { - emitI(old).map(cb) { old => - old.asBaseStruct.insert(cb, region, x.typ, - fields.map { case (name, x) => (name, EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x))) }: _*) + val codeOld = emitI(old) + val updateMap = Map(fields: _*) + + codeOld.map(cb) { oldPC => + val oldPV = oldPC.asBaseStruct.memoize(cb, "insert_fields_old") + + val itemsEC = x.pType.fields.map { f => + updateMap.get(f.name) match { + case Some(vir) => + EmitCode.fromI(mb)(emitInNewBuilder(_, vir)) + case None => + EmitCode.fromI(mb)(oldPV.loadField(_, f.name).typecast[PCode]) + } + } + + x.pType.asInstanceOf[PCanonicalBaseStruct] + .constructFromFields(cb, region, itemsEC, deepCopy = false) + .asPCode } } case ApplyBinaryPrimOp(op, l, r) => emitI(l).flatMap(cb) { pcL => - emitI(r).map(cb)(pcR => BinaryOp.emit(cb, op, pcL, pcR)) + emitI(r).map(cb)(pcR => PCode(pt, BinaryOp.emit(op, l.typ, r.typ, pcL.code, pcR.code))) } case ApplyUnaryPrimOp(op, x) => - emitI(x).map(cb)(pc => UnaryOp.emit(cb, op, pc)) + emitI(x).map(cb)(pc => PCode(pt, UnaryOp.emit(op, x.typ, pc.code))) case ApplyComparisonOp(op, l, r) => if (op.strict) { emitI(l).flatMap(cb) { l => emitI(r).map(cb) { r => val f = op.codeOrdering(cb.emb.ecb, l.st, r.st) - primitive(ir.typ, f(cb, EmitCode.present(cb.emb, l), EmitCode.present(cb.emb, r))) + PCode(pt, f(cb, EmitCode.present(cb.emb, l), EmitCode.present(cb.emb, r))) } } } else { val lc = emitI(l).memoize(cb, "l") val rc = emitI(r).memoize(cb, "r") val f = op.codeOrdering(cb.emb.ecb, lc.st, rc.st) - presentPC(primitive(ir.typ, f(cb, lc, rc))) + presentC(f(cb, lc, rc)) } case x@MakeArray(args, _) => + val pType = x.pType.asInstanceOf[PCanonicalArray] - val emittedArgs = args.map(a => EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, a))) - val pType = typeWithReq.canonicalPType.asInstanceOf[PCanonicalArray] val (pushElement, finish) = pType.constructFromFunctions(cb, region, args.size, deepCopy = false) - for (arg <- emittedArgs) { - pushElement(cb, arg.toI(cb)) + for (arg <- args) { + val v = emitI(arg) + pushElement(cb, v) } presentPC(finish(cb)) case ArrayZeros(length) => emitI(length).map(cb) { case n: SInt32Code => - val outputPType = PCanonicalArray(PInt32Required) + val outputPType = coerce[PArray](ir.pType) val elementSize = outputPType.elementByteSize val numElements = cb.newLocal[Int]("n_elements", n.intCode(cb)) val arrayAddress = cb.newLocal[Long]("array_addr", outputPType.allocate(region, numElements)) cb += outputPType.stagedInitialize(arrayAddress, numElements) cb += Region.setMemory(outputPType.firstElementOffset(arrayAddress), numElements.toL * elementSize, 0.toByte) - outputPType.loadCheapSCode(cb, arrayAddress) + PCode(pt, arrayAddress) } case x@ArrayRef(a, i, s) => @@ -1016,175 +952,102 @@ class Emit[C]( val av = ac.asIndexable.memoize(cb, "aref_a") val iv = cb.newLocal("i", ic.asInt.intCode(cb)) boundsCheck(cb, iv, av.loadLength()) - av.loadElement(cb, iv) + av.loadElement(cb, iv).typecast[PCode] } } + case CastToArray(a) => + emitI(a).map(cb)(pc => pt.fromCodeTuple(pc.codeTuple())) + case ArrayLen(a) => emitI(a).map(cb) { (ac) => - primitive(ac.asIndexable.loadLength()) + PCode(pt, ac.asIndexable.loadLength()) } case GetField(o, name) => emitI(o).flatMap(cb) { oc => - oc.asBaseStruct.loadSingleField(cb, name) + val ov = oc.asBaseStruct.memoize(cb, "get_tup_elem_o") + ov.loadField(cb, name).typecast[PCode] } case GetTupleElement(o, i) => emitI(o).flatMap(cb) { oc => - oc.asBaseStruct.loadSingleField(cb, o.typ.asInstanceOf[TTuple].fieldIndex(i)) + val ov = oc.asBaseStruct.memoize(cb, "get_tup_elem_o") + ov.loadField(cb, oc.pt.asInstanceOf[PTuple].fieldIndex(i)).typecast[PCode] } case x@LowerBoundOnOrderedCollection(orderedCollection, elem, onKey) => emitI(orderedCollection).map(cb) { a => + val typ: PContainer = coerce[PIterable](a.pt).asPContainer val e = EmitCode.fromI(cb.emb)(cb => this.emitI(elem, cb, region, env, container, loopEnv)) - val bs = new BinarySearch[C](mb, a.st.asInstanceOf[SContainer], e.emitType, keyOnly = onKey) - primitive(bs.getClosestIndex(cb, a, e)) + val bs = new BinarySearch[C](mb, typ, e.pt, keyOnly = onKey) + val arr = SingleCodePCode.fromPCode(cb, a, region) + PCode(pt, bs.getClosestIndex(arr.code.asInstanceOf[Code[Long]], e.m, e.v)) } - case x@ArraySort(a, left, right, lessThan) => - emitStream(a, cb, region).map(cb) { case stream: SStreamCode => - val producer = stream.producer - - val sct = SingleCodeType.fromSType(producer.element.st) - - val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) - StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) - val sorter = new ArraySorter(EmitRegion(mb, region), vab) - sorter.sort(cb, region, makeDependentSortingFunction(cb, sct, lessThan, env, emitSelf, Array(left, right))) - sorter.toRegion(cb, x.typ) + case GroupByKey(collection) => + // sort collection by group + val collectionTyp = coerce[PStream](collection.pType) + val keyValTyp = coerce[PBaseStruct](collectionTyp.elementType) + val keyTyp = keyValTyp.types(0) + val valTyp = keyValTyp.types(1) + val dictTyp = coerce[PCanonicalDict](ir.pType) + val groupTyp = dictTyp.elementType + val arrayTyp = PCanonicalArray(groupTyp, required = true) + + val sortedElts = new StagedArrayBuilder(keyValTyp, mb, 16) + val sorter = new ArraySorter(EmitRegion(mb, region), sortedElts) + + val (k1, k2) = keyValTyp match { + case t: PStruct => GetField(In(0, PCodeEmitParamType(t)), "key") -> GetField(In(1, PCodeEmitParamType(t)), "key") + case t: PTuple => + assert(t.fields(0).index == 0) + GetTupleElement(In(0, PCodeEmitParamType(t)), 0) -> GetTupleElement(In(1, PCodeEmitParamType(t)), 0) } - case x@ToSet(a) => - emitStream(a, cb, region).map(cb) { case stream: SStreamCode => - val producer = stream.producer - - val sct = SingleCodeType.fromSType(producer.element.st) - - val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) - StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) - val sorter = new ArraySorter(EmitRegion(mb, region), vab) - - def lessThan(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { - cb.emb.ecb.getOrdering(sct.loadedSType, sct.loadedSType) - .ltNonnull(cb, sct.loadToSCode(cb, region, l), sct.loadToSCode(cb, region, r)) - } - - sorter.sort(cb, region, lessThan) - - def skipNext(cb: EmitCodeBuilder, region: Value[Region], l: EmitCode, r: EmitCode): Code[Boolean] = { - cb.newLocal[Boolean]("asdb", cb.emb.ecb.getOrdering(l.st, r.st) - .equiv(cb, l, r, missingEqual = true)) - } - - sorter.distinctFromSorted(cb, region, skipNext) - sorter.toRegion(cb, x.typ) + val compare = ApplyComparisonOp(Compare(keyValTyp.types(0).virtualType), k1, k2) < 0 + InferPType(compare) + val leftRightComparatorNames = Array.empty[String] + val sortF = sortedElts.ti match { + case BooleanInfo => makeDependentSortingFunction[Boolean](region, keyValTyp, compare, env, leftRightComparatorNames) + case IntInfo => makeDependentSortingFunction[Int](region, keyValTyp, compare, env, leftRightComparatorNames) + case LongInfo => makeDependentSortingFunction[Long](region, keyValTyp, compare, env, leftRightComparatorNames) + case FloatInfo => makeDependentSortingFunction[Float](region, keyValTyp, compare, env, leftRightComparatorNames) + case DoubleInfo => makeDependentSortingFunction[Double](region, keyValTyp, compare, env, leftRightComparatorNames) } - case x@ToDict(a) => - emitStream(a, cb, region).map(cb) { case stream: SStreamCode => - val producer = stream.producer - - val sct = SingleCodeType.fromSType(producer.element.st) - - val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) - StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) - val sorter = new ArraySorter(EmitRegion(mb, region), vab) - - def lessThan(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { - val lk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, l) - .asBaseStruct.memoize(cb, "lt_l") - .loadField(cb, 0)) - - val rk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, r) - .asBaseStruct.memoize(cb, "lt_r") - .loadField(cb, 0)) - - cb.emb.ecb.getOrdering(lk.st, rk.st) - .lt(cb, lk, rk, missingEqual = true) - } - - sorter.sort(cb, region, lessThan) - sorter.pruneMissing(cb) + val groupSizes = new StagedArrayBuilder(PInt32(), mb, 0) - def skipNext(cb: EmitCodeBuilder, region: Value[Region], l: EmitCode, r: EmitCode): Code[Boolean] = { - - val lk = EmitCode.fromI(cb.emb) { cb => - l.toI(cb).flatMap(cb) { x => - x.asBaseStruct.memoize(cb, "lt_l") - .loadField(cb, 0) - } - } - - val rk = EmitCode.fromI(cb.emb) { cb => - r.toI(cb).flatMap(cb) { x => - x.asBaseStruct.memoize(cb, "lt_r") - .loadField(cb, 0) - } - } - - cb.emb.ecb.getOrdering(lk.st, rk.st) - .equiv(cb, lk, rk, missingEqual = true) - } - - sorter.distinctFromSorted(cb, region, skipNext) - sorter.toRegion(cb, x.typ) + val (lastKey, currKey) = (keyValTyp.virtualType: @unchecked) match { + case ts: TStruct => + GetField(In(0, PCodeEmitParamType(keyValTyp)), ts.fieldNames(0)) -> GetField(In(1, PCodeEmitParamType(keyValTyp)), ts.fieldNames(0)) + case tt: TTuple => + GetTupleElement(In(0, PCodeEmitParamType(keyValTyp)), tt.fields(0).index) -> GetTupleElement(In(1, PCodeEmitParamType(keyValTyp)), tt.fields(0).index) + } + val compare2 = ApplyComparisonOp(EQWithNA(keyTyp.virtualType), lastKey, currKey) + InferPType(compare2) + val isSame = mb.genEmitMethod("isSame", + FastIndexedSeq(typeInfo[Region], PCodeEmitParamType(keyValTyp), PCodeEmitParamType(keyValTyp)), + BooleanInfo) + isSame.emitWithBuilder { cb => + emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.asBoolean.boolCode(cb)) } - case GroupByKey(collection) => + val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx") + val grpIdx = mb.newLocal[Int]("groupByKey_grpIdx") + val withinGrpIdx = mb.newLocal[Int]("groupByKey_withinGrpIdx") + val outerSize = mb.newLocal[Int]("groupByKey_outerSize") + val groupSize = mb.newLocal[Int]("groupByKey_groupSize") + emitStream(collection, cb, region).map(cb) { case stream: SStreamCode => - val sct = SingleCodeType.fromSType(stream.producer.element.st) - val sortedElts = new StagedArrayBuilder(sct, stream.producer.element.required, mb, 16) StreamUtils.writeToArrayBuilder(cb, stream.producer, sortedElts, region) - val sorter = new ArraySorter(EmitRegion(mb, region), sortedElts) - - def lt(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { - val lk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, l) - .asBaseStruct.memoize(cb, "lt_l") - .loadField(cb, 0)) - val rk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, r) - .asBaseStruct.memoize(cb, "lt_r") - .loadField(cb, 0)) - cb.emb.ecb.getOrdering(lk.st, rk.st) - .lt(cb, lk, rk, missingEqual = true) - } - - sorter.sort(cb, region, lt) - sorter.pruneMissing(cb) - - val groupSizes = new StagedArrayBuilder(Int32SingleCodeType, true, mb, 0) - - val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx") - val grpIdx = mb.newLocal[Int]("groupByKey_grpIdx") - val withinGrpIdx = mb.newLocal[Int]("groupByKey_withinGrpIdx") - val outerSize = mb.newLocal[Int]("groupByKey_outerSize") - val groupSize = mb.newLocal[Int]("groupByKey_groupSize") - - + cb += sorter.sort(sortF) + cb += sorter.pruneMissing cb += groupSizes.clear cb.assign(eltIdx, 0) cb.assign(groupSize, 0) - def sameKeyAtIndices(cb: EmitCodeBuilder, region: Value[Region], idx1: Code[Int], idx2: Code[Int]): Code[Boolean] = { - val lk = EmitCode.fromI(cb.emb) { cb => - sortedElts.loadFromIndex(cb, region, idx1).flatMap(cb) { x => - x.asBaseStruct.memoize(cb, "lt_l") - .loadField(cb, 0) - } - } - - val rk = EmitCode.fromI(cb.emb) { cb => - sortedElts.loadFromIndex(cb, region, idx2).flatMap(cb) { x => - x.asBaseStruct.memoize(cb, "lt_r") - .loadField(cb, 0) - } - } - - cb.emb.ecb.getOrdering(lk.st, rk.st) - .equiv(cb, lk, rk, missingEqual = true) - } - cb.whileLoop(eltIdx < sortedElts.size, { val bottomOfLoop = CodeLabel() val newGroup = CodeLabel() @@ -1193,7 +1056,7 @@ class Emit[C]( cb.ifx(eltIdx.ceq(sortedElts.size - 1), { cb.goto(newGroup) }, { - cb.ifx(sameKeyAtIndices(cb, region, eltIdx, eltIdx + 1), { + cb.ifx(cb.invokeCode[Boolean](isSame, region, sortedElts.applyEV(mb, eltIdx), sortedElts.applyEV(mb, eltIdx + 1)), { cb.goto(bottomOfLoop) }, { cb.goto(newGroup) @@ -1208,12 +1071,7 @@ class Emit[C]( }) cb.assign(outerSize, groupSizes.size) - val loadedElementType = sct.loadedSType.asInstanceOf[SBaseStruct] - val innerType = PCanonicalArray(loadedElementType.fieldEmitTypes(1).canonicalPType, true) - val kt = loadedElementType.fieldEmitTypes(0).canonicalPType - val groupType = PCanonicalStruct(true, ("key", kt), ("value", innerType)) - val dictType = PCanonicalDict(kt, innerType, false) - val (addGroup, finishOuter) = dictType.arrayRep.constructFromFunctions(cb, region, outerSize, deepCopy = false) + val (addGroup, finishOuter) = arrayTyp.constructFromFunctions(cb, region, outerSize, deepCopy = false) cb.assign(eltIdx, 0) cb.assign(grpIdx, 0) @@ -1221,25 +1079,25 @@ class Emit[C]( cb.whileLoop(grpIdx < outerSize, { cb.assign(groupSize, coerce[Int](groupSizes(grpIdx))) cb.assign(withinGrpIdx, 0) - val firstStruct = sortedElts.loadFromIndex(cb, region, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_firstStruct") - val key = EmitCode.fromI(mb) { cb => firstStruct.loadField(cb, 0) } + val firstStruct = sortedElts.applyEV(mb, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_firstStruct") + val key = EmitCode.fromI(mb) { cb => firstStruct.loadField(cb, 0).typecast[PCode] } val group = EmitCode.fromI(mb) { cb => - val (addElt, finishInner) = innerType + val (addElt, finishInner) = PCanonicalArray(valTyp, required = true) .constructFromFunctions(cb, region, groupSize, deepCopy = false) cb.whileLoop(withinGrpIdx < groupSize, { - val struct = sortedElts.loadFromIndex(cb, region, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_struct") - addElt(cb, struct.loadField(cb, 1)) + val struct = sortedElts.applyEV(mb, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_struct") + addElt(cb, struct.loadField(cb, 1).typecast[PCode]) cb.assign(eltIdx, eltIdx + 1) cb.assign(withinGrpIdx, withinGrpIdx + 1) }) IEmitCode.present(cb, finishInner(cb)) } - val elt = groupType.constructFromFields(cb, region, FastIndexedSeq(key, group), deepCopy = false) + val elt = groupTyp.constructFromFields(cb, region, FastIndexedSeq(key, group), deepCopy = false) addGroup(cb, IEmitCode.present(cb, elt)) cb.assign(grpIdx, grpIdx + 1) }) - dictType.construct(finishOuter(cb)) + dictTyp.construct(finishOuter(cb)) } case x@StreamLen(a) => @@ -1250,7 +1108,7 @@ class Emit[C]( producer.initialize(cb) val xLen = cb.newLocal[Int]("streamlen_x", compLen(cb)) producer.close(cb) - primitive(xLen) + PCode(x.pType, xLen) case None => val count = cb.newLocal[Int]("stream_length", 0) producer.memoryManagedConsume(region, cb) { cb => @@ -1261,28 +1119,25 @@ class Emit[C]( case SStreamCode(_, nested) => StreamProducer.defineUnusedLabels(nested, mb) case _ => } - primitive(count) + PCode(x.pType, count) } } case x@MakeNDArray(dataIR, shapeIR, rowMajorIR, errorId) => + val xP = coerce[PCanonicalNDArray](x.pType) + val shapePType = coerce[PTuple](shapeIR.pType) + val nDims = shapePType.size emitI(rowMajorIR).flatMap(cb) { isRowMajorCode => - emitI(shapeIR).flatMap(cb) { case shapeTupleCode: SBaseStructCode => - emitI(dataIR).map(cb) { case dataCode: SIndexableCode => - - val shapeSType = shapeTupleCode.st - val nDims = shapeSType.size - val xP = PCanonicalNDArray(dataCode.st.elementType.canonicalPType().setRequired(true), nDims) - + emitI(shapeIR).flatMap(cb) { case shapeTupleCode: PBaseStructCode => + emitI(dataIR).map(cb) { case dataCode: PIndexableCode => val shapeTupleValue = shapeTupleCode.memoize(cb, "make_ndarray_shape") val memoData = dataCode.memoize(cb, "make_nd_array_memoized_data") cb.ifx(memoData.hasMissingValues(cb), { cb._throw(Code.newInstance[HailException, String, Int]( - "Cannot construct an ndarray with missing values.", errorId - )) - }) + "Cannot construct an ndarray with missing values.", errorId + ))}) (0 until nDims).foreach { index => cb.ifx(shapeTupleValue.isFieldMissing(index), @@ -1292,8 +1147,7 @@ class Emit[C]( val stridesSettables = (0 until nDims).map(i => cb.newLocal[Long](s"make_ndarray_stride_$i")) val shapeValues = (0 until nDims).map { i => - val shape = SingleCodeSCode.fromSCode(cb, shapeTupleValue.loadField(cb, i).get(cb), region) - cb.newLocalAny[Long](s"make_ndarray_shape_${ i }", shape.code) + shapeTupleValue.loadField(cb, i).get(cb).memoize(cb, s"make_ndarray_shape_${i}").asPValue.value.asInstanceOf[Value[Long]] } cb.ifx(isRowMajorCode.asBoolean.boolCode(cb), { @@ -1309,22 +1163,22 @@ class Emit[C]( } }) - xP.constructByCopyingArray(shapeValues, stridesSettables, memoData.sc.asIndexable, cb, region) + xP.constructByCopyingArray(shapeValues, stridesSettables, memoData.pc.asIndexable, cb, region) } } } case NDArrayShape(ndIR) => - emitI(ndIR).map(cb) { case pc: SNDArrayCode => pc.shape(cb) } + emitI(ndIR).map(cb){ case pc: PNDArrayCode => pc.shape(cb).asPCode} case x@NDArrayReindex(child, indexMap) => val childEC = emitI(child) - childEC.map(cb) { case pndCode: SNDArrayPointerCode => - val childPType = pndCode.st.pType + val childPType = coerce[PCanonicalNDArray](child.pType) + childEC.map(cb){ case pndCode: PNDArrayCode => val pndVal = pndCode.memoize(cb, "ndarray_reindex_child") val childShape = pndVal.shapes(cb) val childStrides = pndVal.strides(cb) - val pndAddr = SingleCodeSCode.fromSCode(cb, pndVal, region) - val dataArray = childPType.dataType.loadCheapSCode(cb, childPType.dataPArrayPointer(pndAddr.code.asInstanceOf[Code[Long]])) + val pndAddr = SingleCodePCode.fromPCode(cb, pndVal, region) + val dataArray = childPType.dataType.loadCheapPCode(cb, childPType.dataPArrayPointer(pndAddr.code.asInstanceOf[Code[Long]])) val newShape = indexMap.map { childIndex => if (childIndex < childPType.nDims) childShape(childIndex) else const(1L) @@ -1333,8 +1187,7 @@ class Emit[C]( if (childIndex < childPType.nDims) childStrides(childIndex) else const(0L) } - val newPType = childPType.copy(nDims = indexMap.length) - newPType.constructByCopyingArray( + x.pType.constructByCopyingArray( newShape, newStrides, dataArray, @@ -1345,25 +1198,29 @@ class Emit[C]( case NDArrayRef(nd, idxs, errorId) => val ndt = emitI(nd) - ndt.flatMap(cb) { case ndCode: SNDArrayCode => + ndt.flatMap(cb) { case ndCode: PNDArrayCode => val indexEmitCodes = idxs.map(idx => EmitCode.fromI(cb.emb)(emitInNewBuilder(_, idx))) - IEmitCode.multiMapEmitCodes(cb, indexEmitCodes) { idxPCodes: IndexedSeq[SCode] => - val idxValues = idxPCodes.zipWithIndex.map { case (pc, idx) => - cb.newLocal(s"ref_idx_$idx", pc.asInt64.longCode(cb)) + IEmitCode.multiMapEmitCodes(cb, indexEmitCodes) { idxPCodes: IndexedSeq[PCode] => + val memoizedIndices = idxPCodes.zipWithIndex.map { case (pc, idx) => + pc.memoize(cb,s"ref_idx_$idx") } val ndValue = ndCode.memoize(cb, "reffed_ndarray") + val idxValues = memoizedIndices.map(_.value.asInstanceOf[Value[Long]]) cb.append(ndValue.assertInBounds(idxValues, cb, errorId)) - ndValue.loadElement(idxValues, cb) + ndValue.loadElement(idxValues, cb).asPCode } } case NDArrayMatMul(lChild, rChild) => - emitNDArrayStandardStriding(lChild).flatMap(cb) { case (leftPVal: SNDArrayValue, leftIsColumnMajor: Value[Boolean]) => - emitNDArrayStandardStriding(rChild).map(cb) { case (rightPVal: SNDArrayValue, rightIsColumnMajor: Value[Boolean]) => - val lPType = leftPVal.st.asInstanceOf[SNDArrayPointer].pType - val rPType = rightPVal.st.asInstanceOf[SNDArrayPointer].pType + emitNDArrayColumnMajorStrides(lChild).flatMap(cb) { case leftPCode: PNDArrayCode => + emitNDArrayColumnMajorStrides(rChild).map(cb) { case rightPCode: PNDArrayCode => + val lPType = leftPCode.pt + val rPType = rightPCode.pt + + val leftPVal = leftPCode.memoize(cb, "left_ndarray_matmul") + val rightPVal = rightPCode.memoize(cb, "right_ndarray_matmul") val lShape = leftPVal.shapes(cb) val rShape = rightPVal.shapes(cb) @@ -1373,11 +1230,11 @@ class Emit[C]( val leftBroadcastMask = if (lPType.nDims > 2) NDArrayEmitter.broadcastMask(lShape) else IndexedSeq[Value[Long]]() val rightBroadcastMask = if (rPType.nDims > 2) NDArrayEmitter.broadcastMask(rShape) else IndexedSeq[Value[Long]]() - val outputPType = PCanonicalNDArray(lPType.elementType, TNDArray.matMulNDims(lPType.nDims, rPType.nDims)) + val outputPType = PCanonicalNDArray(lPType.elementType, TNDArray.matMulNDims(lPType.nDims, rPType.nDims), pt.required) if ((lPType.elementType.isInstanceOf[PFloat64] || lPType.elementType.isInstanceOf[PFloat32]) && lPType.nDims == 2 && rPType.nDims == 2) { - val leftPValAddr = SingleCodeSCode.fromSCode(cb, leftPVal, region) - val rightPValAddr = SingleCodeSCode.fromSCode(cb, rightPVal, region) + val leftPValAddr = SingleCodePCode.fromPCode(cb, leftPVal, region) + val rightPValAddr = SingleCodePCode.fromPCode(cb, rightPVal, region) val leftDataAddress = lPType.dataFirstElementPointer(leftPValAddr.code.asInstanceOf[Code[Long]]) val rightDataAddress = rPType.dataFirstElementPointer(rightPValAddr.code.asInstanceOf[Code[Long]]) @@ -1385,13 +1242,10 @@ class Emit[C]( val N = rShape(rPType.nDims - 1) val K = lShape(lPType.nDims - 1) - val LDA = leftIsColumnMajor.mux(M, K) - val LDB = rightIsColumnMajor.mux(K, N) + val LDA = M + val LDB = K val LDC = M - val TRANSA: Code[String] = leftIsColumnMajor.mux("N", "T") - val TRANSB: Code[String] = rightIsColumnMajor.mux("N", "T") - val (answerFirstElementAddr, answerFinisher) = outputPType.constructDataFunction( IndexedSeq(M, N), outputPType.makeColumnMajorStrides(IndexedSeq(M, N), region, cb), @@ -1402,8 +1256,8 @@ class Emit[C]( cb.append(lPType.elementType match { case PFloat32(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Float, Long, Int, Long, Int, Float, Long, Int, Unit](BLAS.getClass, method = "sgemm", - TRANSA, - TRANSB, + "N", + "N", M.toI, N.toI, K.toI, @@ -1418,8 +1272,8 @@ class Emit[C]( ) case PFloat64(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Double, Long, Int, Long, Int, Double, Long, Int, Unit](BLAS.getClass, method = "dgemm", - TRANSA, - TRANSB, + "N", + "N", M.toI, N.toI, K.toI, @@ -1444,8 +1298,8 @@ class Emit[C]( val numericElementType = coerce[PNumeric](lPType.elementType) val eVti = typeToTypeInfo(numericElementType) - val emitter = new NDArrayEmitter(unifiedShape, leftPVal.st.elementType) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): SCode = { + val emitter = new NDArrayEmitter(unifiedShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { val element = coerce[Any](cb.newField("matmul_element")(eVti)) val k = cb.newField[Long]("ndarray_matmul_k") @@ -1465,15 +1319,15 @@ class Emit[C]( val kLen = cb.newField[Long]("ndarray_matmul_kLen") - def multiply(l: SCode, r: SCode): Code[_] = { + def multiply(l: PCode, r: PCode): Code[_] = { (l.st, r.st) match { - case (SInt32, SInt32) => + case (_: SInt32, _: SInt32) => l.asInt.intCode(cb) * r.asInt.intCode(cb) - case (SInt64, SInt64) => + case (_: SInt64, _: SInt64) => l.asLong.longCode(cb) * r.asLong.longCode(cb) - case (SFloat32, SFloat32) => + case (_: SFloat32, _: SFloat32) => l.asFloat.floatCode(cb) * r.asFloat.floatCode(cb) - case (SFloat64, SFloat64) => + case (_: SFloat64, _: SFloat64) => l.asDouble.doubleCode(cb) * r.asDouble.doubleCode(cb) } } @@ -1483,10 +1337,10 @@ class Emit[C]( cb.forLoop(cb.assign(k, 0L), k < kLen, cb.assign(k, k + 1L), { val lElem = leftPVal.loadElement(lIndices, cb) val rElem = rightPVal.loadElement(rIndices, cb) - cb.assign(element, numericElementType.add(multiply(lElem, rElem), element)) + cb.assign(element, numericElementType.add(multiply(lElem.asPCode, rElem.asPCode), element)) }) - primitive(outputPType.elementType.virtualType, element) + PCode(outputPType.elementType, element) } } emitter.emit(cb, outputPType, region) @@ -1495,9 +1349,9 @@ class Emit[C]( } case NDArrayInv(nd) => // Based on https://github.com/numpy/numpy/blob/v1.19.0/numpy/linalg/linalg.py#L477-L547 - emitNDArrayColumnMajorStrides(nd).map(cb) { case pNDCode: SNDArrayCode => + emitNDArrayColumnMajorStrides(nd).map(cb) { case pNDCode: PNDArrayCode => val pndVal = pNDCode.memoize(cb, "ndarray_inverse_nd") - val ndPT = pndVal.st.asInstanceOf[SNDArrayPointer].pType + val ndPT = pndVal.pt.asInstanceOf[PCanonicalNDArray] val shapeArray = pndVal.shapes(cb) val stridesArray = ndPT.makeColumnMajorStrides(shapeArray, region, cb) @@ -1519,7 +1373,7 @@ class Emit[C]( val INFOdgetrf = mb.newLocal[Int]() val INFOdgetri = mb.newLocal[Int]() val INFOerror = (fun: String, info: LocalRef[Int]) => (info cne 0) - .orEmpty(Code._fatal[Unit](const(s"LAPACK error ${ fun }. Error code = ").concat(info.toS))) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error ${fun}. Error code = ").concat(info.toS))) cb.append((N cne M).orEmpty(Code._fatal[Unit](const("Can only invert square matrix")))) @@ -1556,11 +1410,11 @@ class Emit[C]( finish(cb) } case x@NDArraySVD(nd, full_matrices, computeUV) => - emitNDArrayColumnMajorStrides(nd).flatMap(cb) { case ndPCode: SNDArrayCode => + emitNDArrayColumnMajorStrides(nd).flatMap(cb){ case ndPCode: PNDArrayCode => val ndPVal = ndPCode.memoize(cb, "nd_svd_value") val infoDGESDDResult = cb.newLocal[Int]("infoDGESDD") - val infoDGESDDErrorTest = (extraErrorMsg: String) => (infoDGESDDResult cne 0) + val infoDGESDDErrorTest = (extraErrorMsg: String) => (infoDGESDDResult cne 0) .orEmpty(Code._fatal[Unit](const(s"LAPACK error DGESDD. $extraErrorMsg Error code = ").concat(infoDGESDDResult.toS))) val LWORKAddress = mb.newLocal[Long]("svd_lwork_address") @@ -1577,12 +1431,10 @@ class Emit[C]( val A = cb.newLocal[Long]("dgesdd_A_address") val firstElementDataAddress = ndPVal.firstDataAddress(cb) - cb.assign(LWORKAddress, Code.invokeStatic1[Memory, Long, Long]("malloc", 8L)) - - val retPTypeUncast = NDArraySVD.pTypes(computeUV, false) + cb.assign(LWORKAddress, Code.invokeStatic1[Memory, Long, Long]("malloc", 8L)) val (jobz, sPType, uData, uFinisher, vtData, vtFinisher) = if (computeUV) { - val outputPType = retPTypeUncast.asInstanceOf[PTuple] + val outputPType = x.pType.asInstanceOf[PTuple] val uPType = outputPType.fields(0).typ.asInstanceOf[PCanonicalNDArray] val sPType = outputPType.fields(1).typ.asInstanceOf[PCanonicalNDArray] val vtPType = outputPType.fields(2).typ.asInstanceOf[PCanonicalNDArray] @@ -1595,13 +1447,8 @@ class Emit[C]( (if (full_matrices) "A" else "S", sPType, uData, uFinisher, vtData, vtFinisher) } else { - val outputPType = retPTypeUncast.asInstanceOf[PCanonicalNDArray] - - def noOp(cb: EmitCodeBuilder): SNDArrayCode = { - throw new IllegalStateException("Can't happen") - } - - ("N", outputPType.asInstanceOf[PCanonicalNDArray], const(0L), noOp(_), const(0L), noOp(_)) + def noOp(cb: EmitCodeBuilder): SNDArrayCode = { throw new IllegalStateException("Can't happen")} + ("N", x.pType.asInstanceOf[PCanonicalNDArray], const(0L), noOp(_), const(0L), noOp(_)) } val (sDataAddress, sFinisher) = sPType.constructDataFunction(IndexedSeq(K), sPType.makeColumnMajorStrides(IndexedSeq(K), region, cb), cb, region) @@ -1630,7 +1477,6 @@ class Emit[C]( cb.append(Region.copyFrom(firstElementDataAddress, A, (M * N) * 8L)) def LWORK = Region.loadDouble(LWORKAddress).toI - val WORK = cb.newLocal[Long]("dgesdd_work_address") cb.assign(WORK, Code.invokeStatic1[Memory, Long, Long]("malloc", LWORK.toL * 8L)) @@ -1664,8 +1510,8 @@ class Emit[C]( val u = uFinisher(cb) val vt = vtFinisher(cb) - val outputPType = NDArraySVD.pTypes(true, false).asInstanceOf[PCanonicalTuple] - outputPType.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, u), EmitCode.present(cb.emb, s), EmitCode.present(cb.emb, vt)), deepCopy = false) + val outputPType = x.pType.asInstanceOf[PCanonicalTuple] + outputPType.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, u.asPCode), EmitCode.present(cb.emb, s), EmitCode.present(cb.emb, vt.asPCode)), deepCopy = false) } else { s } @@ -1674,14 +1520,11 @@ class Emit[C]( } case x@NDArrayQR(nd, mode) => // See here to understand different modes: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.qr.html - emitNDArrayColumnMajorStrides(nd).map(cb) { case pndCode: SNDArrayCode => - - val resultPType = NDArrayQR.pType(mode, false) - + emitNDArrayColumnMajorStrides(nd).map(cb) { case pndCode: PNDArrayCode => val pndValue = pndCode.memoize(cb, "ndarray_qr_nd") // This does a lot of byte level copying currently, so only trust // the PCanonicalNDArray representation. - val pType = pndValue.st.asInstanceOf[SNDArrayPointer].pType + assert(pndValue.pt.isInstanceOf[PCanonicalNDArray]) val shapeArray = pndValue.shapes(cb) @@ -1698,7 +1541,7 @@ class Emit[C]( def LWORK = (Region.loadDouble(LWORKAddress).toI > 0).mux(Region.loadDouble(LWORKAddress).toI, 1) - val ndPT = pType + val ndPT = pndValue.pt.asInstanceOf[PCanonicalNDArray] val dataFirstElementAddress = pndValue.firstDataAddress(cb) val hPType = ndPT @@ -1749,8 +1592,10 @@ class Emit[C]( val h = hFinisher(cb) val hMemo = h.memoize(cb, "ndarray_qr_h_memo") - val result: SCode = if (mode == "raw") { - val resultType = resultPType.asInstanceOf[PCanonicalBaseStruct] + val result: PCode = if (mode == "raw") { + val resultType = x.pType.asInstanceOf[PCanonicalBaseStruct] + val rawPType = x.pType.asInstanceOf[PTuple] + assert(hPType equalModuloRequired rawPType.types(0).asInstanceOf[PCanonicalNDArray], s"hPType = ${hPType}, other = ${rawPType.types(0).asInstanceOf[PCanonicalNDArray]}") val tau = tauFinisher(cb) resultType.constructFromFields(cb, region, FastIndexedSeq( @@ -1760,11 +1605,11 @@ class Emit[C]( } else { val (rPType, rRows, rCols) = if (mode == "r") { - (resultPType.asInstanceOf[PCanonicalNDArray], K, N) + (x.pType.asInstanceOf[PCanonicalNDArray], K, N) } else if (mode == "complete") { - (resultPType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], M, N) + (x.pType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], M, N) } else if (mode == "reduced") { - (resultPType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], K, N) + (x.pType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], K, N) } else { throw new AssertionError(s"Unsupported QR mode $mode") } @@ -1776,23 +1621,15 @@ class Emit[C]( val (rDataAddress, rFinisher) = rPType.constructDataFunction(rShapeArray, rStridesArray, cb, region) // This block assumes that `rDataAddress` and `aAddressDGEQRF` point to column major arrays. - // TODO: Abstract this into ndarray ptype/SCode interface methods. + // TODO: Abstract this into ndarray ptype/pcode interface methods. val currRow = cb.newLocal[Long]("ndarray_qr_currRow") val currCol = cb.newLocal[Long]("ndarray_qr_currCol") val curWriteAddress = cb.newLocal[Long]("ndarray_qr_curr_write_addr", rDataAddress) // I think this just copies out the upper triangle into new ndarray in column major order - cb.forLoop({ - cb.assign(currCol, 0L) - }, currCol < rCols, { - cb.assign(currCol, currCol + 1L) - }, { - cb.forLoop({ - cb.assign(currRow, 0L) - }, currRow < rRows, { - cb.assign(currRow, currRow + 1L) - }, { + cb.forLoop({cb.assign(currCol, 0L)}, currCol < rCols, {cb.assign(currCol, currCol + 1L)}, { + cb.forLoop({cb.assign(currRow, 0L)}, currRow < rRows, {cb.assign(currRow, currRow + 1L)}, { cb.append(Region.storeDouble( curWriteAddress, (currCol >= currRow).mux( @@ -1810,7 +1647,7 @@ class Emit[C]( computeR } else { - val crPType = resultPType.asInstanceOf[PCanonicalTuple] + val crPType = x.pType.asInstanceOf[PCanonicalTuple] val qPType = crPType.types(0).asInstanceOf[PCanonicalNDArray] val qShapeArray = if (mode == "complete") Array(M, M) else Array(M, K) @@ -1877,8 +1714,8 @@ class Emit[C]( } result } - case x: NDArrayMap => emitDeforestedNDArrayI(x) - case x: NDArrayMap2 => emitDeforestedNDArrayI(x) + case x: NDArrayMap => emitDeforestedNDArrayI(x) + case x: NDArrayMap2 => emitDeforestedNDArrayI(x) case x: NDArrayReshape => emitDeforestedNDArrayI(x) case x: NDArrayConcat => emitDeforestedNDArrayI(x) case x: NDArraySlice => emitDeforestedNDArrayI(x) @@ -1898,7 +1735,7 @@ class Emit[C]( case x@ResultOp(start, sig) => val AggContainer(aggs, sc, _) = container.get - val pt = PCanonicalTuple(false, sig.map(_.pResultType): _*) + val pt = x.pType.asInstanceOf[PCanonicalTuple] val addr = cb.newLocal("resultop_tuple_addr", pt.allocate(region)) cb += pt.stagedInitialize(addr, setMissing = false) @@ -1911,17 +1748,17 @@ class Emit[C]( (cb: EmitCodeBuilder) => cb += pt.setFieldMissing(addr, j)) } - presentPC(pt.loadCheapSCode(cb, addr)) + presentPC(pt.loadCheapPCode(cb, addr)) case x@ApplySeeded(fn, args, seed, rt) => - val codeArgs = args.map(a => EmitCode.fromI(cb.emb)(emitInNewBuilder(_, a))) + val codeArgs = args.map(a => (a.pType, EmitCode.fromI(cb.emb)(emitInNewBuilder(_, a)))) val impl = x.implementation val unified = impl.unify(Array.empty[Type], args.map(_.typ), rt) assert(unified) - impl.applySeededI(seed, cb, region, impl.computeReturnEmitType(x.typ, codeArgs.map(_.emitType)).st, codeArgs: _*) + impl.applySeededI(seed, cb, region, pt, codeArgs: _*) case AggStateValue(i, _) => val AggContainer(_, sc, _) = container.get - presentPC(sc.states(i).serializeToRegion(cb, PCanonicalBinary(), region)) + presentC(sc.states(i).serializeToRegion(cb, coerce[PBinary](pt), region)) case ToArray(a) => EmitStream.produce(this, a, cb, region, env, container) @@ -1932,9 +1769,7 @@ class Emit[C]( .flatMap(cb) { case (stream: SStreamCode) => val producer = stream.producer - val stateEmitType = VirtualTypeWithReq(zero.typ, ctx.req.lookupState(x).head.asInstanceOf[TypeWithRequiredness]).canonicalEmitType - - val xAcc = mb.newEmitField(accumName, stateEmitType) + val xAcc = mb.newEmitField(accumName, x.accPType, x.accPType.required) // in future, will choose compatible type for zero/body with requiredness val xElt = mb.newEmitField(valueName, producer.element.emitType) var tmpRegion: Settable[Region] = null @@ -1946,11 +1781,11 @@ class Emit[C]( cb.assign(tmpRegion, Region.stagedCreate(Region.REGULAR, region.getPool())) cb.assign(xAcc, emitI(zero, tmpRegion) - .map(cb)(pc => pc.castTo(cb, tmpRegion, stateEmitType.st))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, x.accPType))) } else { cb.assign(producer.elementRegion, region) cb.assign(xAcc, emitI(zero, producer.elementRegion) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, stateEmitType.st))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, x.accPType))) } producer.unmanagedConsume(cb) { cb => @@ -1958,19 +1793,19 @@ class Emit[C]( if (producer.requiresMemoryManagementPerElement) { cb.assign(xAcc, emitI(body, producer.elementRegion, env.bind(accumName -> xAcc, valueName -> xElt)) - .map(cb)(pc => pc.castTo(cb, tmpRegion, stateEmitType.st, deepCopy = true))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, x.accPType, deepCopy = true))) cb += producer.elementRegion.clearRegion() val swapRegion = cb.newLocal[Region]("streamfold_swap_region", producer.elementRegion) cb.assign(producer.elementRegion, tmpRegion.load()) cb.assign(tmpRegion, swapRegion.load()) } else { cb.assign(xAcc, emitI(body, producer.elementRegion, env.bind(accumName -> xAcc, valueName -> xElt)) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, stateEmitType.st, deepCopy = false))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, x.accPType, deepCopy = false))) } } if (producer.requiresMemoryManagementPerElement) { - cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.st, deepCopy = true))) + cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.pt, deepCopy = true))) cb += producer.elementRegion.invalidate() cb += tmpRegion.invalidate() } @@ -1984,13 +1819,9 @@ class Emit[C]( var tmpRegion: Settable[Region] = null - val accTypes = ctx.req.lookupState(x).zip(acc.map(_._2.typ)) - .map { case (btwr, t) => VirtualTypeWithReq(t, btwr.asInstanceOf[TypeWithRequiredness]) - .canonicalEmitType - } - val xElt = mb.newEmitField(valueName, producer.element.emitType) val names = acc.map(_._1) + val accTypes = x.accPTypes.map(pt => EmitType(pt.sType, pt.required)) val accVars = (names, accTypes).zipped.map(mb.newEmitField) val resEnv = env.bind(names.zip(accVars): _*) @@ -2003,12 +1834,12 @@ class Emit[C]( cb.assign(tmpRegion, Region.stagedCreate(Region.REGULAR, region.getPool())) (accVars, acc).zipped.foreach { case (xAcc, (_, x)) => - cb.assign(xAcc, emitI(x, tmpRegion).map(cb)(_.castTo(cb, tmpRegion, xAcc.st))) + cb.assign(xAcc, emitI(x, tmpRegion).map(cb)(_.castTo(cb, tmpRegion, xAcc.pt))) } } else { cb.assign(producer.elementRegion, region) (accVars, acc).zipped.foreach { case (xAcc, (_, x)) => - cb.assign(xAcc, emitI(x, region).map(cb)(_.castTo(cb, region, xAcc.st))) + cb.assign(xAcc, emitI(x, region).map(cb)(_.castTo(cb, region, xAcc.pt))) } } @@ -2018,7 +1849,7 @@ class Emit[C]( (accVars, seq).zipped.foreach { (accVar, ir) => cb.assign(accVar, emitI(ir, producer.elementRegion, env = seqEnv) - .map(cb)(pc => pc.castTo(cb, tmpRegion, accVar.st, deepCopy = true))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, accVar.pt, deepCopy = true))) } cb += producer.elementRegion.clearRegion() val swapRegion = cb.newLocal[Region]("streamfold2_swap_region", producer.elementRegion) @@ -2028,57 +1859,34 @@ class Emit[C]( (accVars, seq).zipped.foreach { (accVar, ir) => cb.assign(accVar, emitI(ir, producer.elementRegion, env = seqEnv) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, accVar.st, deepCopy = false))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, accVar.pt, deepCopy = false))) } } } if (producer.requiresMemoryManagementPerElement) { accVars.foreach { xAcc => - cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.st, deepCopy = true))) + cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.pt, deepCopy = true))) } cb += producer.elementRegion.invalidate() cb += tmpRegion.invalidate() } emitI(res, env = resEnv) } - case t@Trap(child) => - val (ev, mb) = emitSplitMethod("trap", cb, child, region, env, container, loopEnv) - val maybeException = cb.newLocal[(String, java.lang.Integer)]("trap_msg", cb.emb.ecb.runMethodWithHailExceptionHandler(mb.mb.methodName)) - val sst = SStringPointer(PCanonicalString(false)) - - val tt = t.typ.asInstanceOf[TTuple] - val errTupleType = tt.types(0).asInstanceOf[TTuple] - val errTuple = SStackStruct(errTupleType, FastIndexedSeq(EmitType(sst, true), EmitType(SInt32, true))) - val tv = cb.emb.newEmitField("trap_errTuple", EmitType(errTuple, false)) - - val maybeMissingEV = cb.emb.newEmitField("trap_value", ev.emitType.copy(required = false)) - cb.ifx(maybeException.isNull, { - cb.assign(tv, EmitCode.missing(cb.emb, errTuple)) - cb.assign(maybeMissingEV, ev) - }, { - val str = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, sst.constructFromString(cb, region, maybeException.invoke[String]("_1")))) - val errorId = EmitCode.fromI(mb)(cb => - IEmitCode.present(cb, primitive(maybeException.invoke[java.lang.Integer]("_2").invoke[Int]("intValue")))) - cb.assign(tv, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, errTupleType, str, errorId))) - cb.assign(maybeMissingEV, EmitCode.missing(cb.emb, ev.st)) - }) - IEmitCode.present(cb, { - SStackStruct.constructFromArgs(cb, region, t.typ.asInstanceOf[TBaseStruct], tv, maybeMissingEV) - }) case Die(m, typ, errorId) => val cm = emitI(m) val msg = cb.newLocal[String]("die_msg") cm.consume(cb, cb.assign(msg, ""), - { sc => cb.assign(msg, sc.asString.loadString()) }) + { sc => cb.assign(msg, sc.asString.loadString())}) cb._throw[HailException](Code.newInstance[HailException, String, Int](msg, errorId)) - IEmitCode.present(cb, typeWithReq.canonicalEmitType.st.defaultValue) + val t = PType.canonical(typ, true).deepInnerRequired(true) + IEmitCode.present(cb, t.defaultValue(cb.emb)) case CastToArray(a) => - emitI(a).map(cb) { ind => ind.asIndexable.castToArray(cb) } + emitI(a).map(cb) { ind => ind.asIndexable.castToArray(cb) }.typecast[PCode] case x@ShuffleWith( keyFields, @@ -2091,15 +1899,15 @@ class Emit[C]( ) => val shuffleType = x.shuffleType - val shuffleST = SCanonicalShufflePointer(PCanonicalShuffle(shuffleType, false)) - val settable = mb.newPField(shuffleST).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleST = SCanonicalShufflePointer(PCanonicalShuffle(shuffleType, true)) + val settable = mb.newPField(shuffleST.pType).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, settable) shuffle.start(cb, region) - val shuffleEnv = env.bind(name -> EmitSettable.present(settable)) + val shuffleEnv = env.bind(name -> mb.newPresentEmitSettable(settable)) - val successfulShuffleIds: SValue = emitI(writerIR, env = shuffleEnv) + val successfulShuffleIds: PValue = emitI(writerIR, env = shuffleEnv) .get(cb, "shuffle ID must be non-missing") // just store it so the writer gets run .memoize(cb, "shuffleSuccessfulShuffleIds") @@ -2134,10 +1942,10 @@ class Emit[C]( shuffle.finishPut(cb) shuffle.close(cb) - val resPType = PCanonicalBinary() + val resPType = pt.asInstanceOf[PCanonicalBinary] // FIXME: server needs to send uuid for the successful partition - val boff = cb.memoize(resPType.loadCheapSCode(cb, resPType.allocate(region, 0)), "shuffleWriteBOff") - val baddr = SingleCodeSCode.fromSCode(cb, boff, region) + val boff = cb.memoize(resPType.loadCheapPCode(cb, resPType.allocate(region, 0)), "shuffleWriteBOff") + val baddr = SingleCodePCode.fromPCode(cb, boff, region) cb += resPType.storeLength(baddr.code.asInstanceOf[Code[Long]], 0) presentPC(boff) @@ -2149,7 +1957,7 @@ class Emit[C]( } case WriteValue(value, path, spec) => - emitI(path).flatMap(cb) { case p: SStringCode => + emitI(path).flatMap(cb) { case p: PStringCode => val pv = p.memoize(cb, "write_path") emitI(value).map(cb) { v => val ob = cb.newLocal[OutputBuffer]("write_ob") @@ -2163,13 +1971,7 @@ class Emit[C]( case x@TailLoop(name, args, body) => val loopStartLabel = CodeLabel() - - val accTypes = ctx.req.lookupState(x).zip(args.map(_._2.typ)) - .map { case (btwr, t) => VirtualTypeWithReq(t, btwr.asInstanceOf[TypeWithRequiredness]) - .canonicalEmitType - } - - val inits = args.zip(accTypes) + val inits = args.zip(x.accPTypes) val stagedPool = cb.newLocal[RegionPool]("tail_loop_pool_ref") cb.assign(stagedPool, region.getPool()) @@ -2183,14 +1985,14 @@ class Emit[C]( val newLoopEnv = loopEnv.getOrElse(Env.empty) // Emit into LoopRef's current region. (region 1) - loopRef.loopArgs.zip(inits).foreach { case (settable, ((_, x), et)) => - settable.store(cb, emitI(x, loopRef.r1).map(cb)(_.castTo(cb, loopRef.r1, et.st))) + loopRef.loopArgs.zip(inits).foreach { case (settable, ((_, x), pt)) => + settable.store(cb, emitI(x, loopRef.r1).map(cb)(_.castTo(cb, loopRef.r1, pt))) } cb.define(loopStartLabel) val result = emitI(body, env = argEnv, loopEnv = Some(newLoopEnv.bind(name, loopRef))).map(cb) { pc => - val answerInRightRegion = pc.copyToRegion(cb, region, pc.st) + val answerInRightRegion = pc.copyToRegion(cb, region) cb.append(loopRef.r1.clearRegion()) cb.append(loopRef.r2.clearRegion()) answerInRightRegion @@ -2202,8 +2004,8 @@ class Emit[C]( val loopRef = loopEnv.get.lookup(name) // Need to emit into region 2, clear region 1, then swap them. - (loopRef.tmpLoopArgs, loopRef.loopTypes, args).zipped.map { case (tmpLoopArg, et, arg) => - tmpLoopArg.store(cb, emitI(arg, loopEnv = None, region = loopRef.r2).map(cb)(_.castTo(cb, loopRef.r2, et.st))) + (loopRef.tmpLoopArgs, loopRef.loopTypes, args).zipped.map { case (tmpLoopArg, pt, arg) => + tmpLoopArg.store(cb, emitI(arg, loopEnv = None, region = loopRef.r2).map(cb)(_.castTo(cb, loopRef.r2, pt))) } cb.append(loopRef.r1.clearRegion()) @@ -2223,65 +2025,55 @@ class Emit[C]( cb.define(deadLabel) val rt = loopRef.resultType - IEmitCode(CodeLabel(), CodeLabel(), rt.st.defaultValue, rt.required) + IEmitCode(CodeLabel(), CodeLabel(), rt.st.pType.defaultValue(mb), rt.required) case x@CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => + val ctxsType = coerce[PStream](contexts.pType) val parentCB = mb.ecb - emitStream(contexts, cb, region).map(cb) { case ctxStream: SStreamCode => - - def wrapInTuple(cb: EmitCodeBuilder, region: Value[Region], et: EmitCode): SBaseStructPointerCode = { - PCanonicalTuple(true, et.emitType.canonicalPType).constructFromFields(cb, region, FastIndexedSeq(et), deepCopy = false) - } - - val bufferSpec: BufferSpec = BufferSpec.defaultUncompressed - - val emitGlobals = EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, globals)) - val ctxType = ctxStream.st.elementEmitType - val contextPTuple: PTuple = PCanonicalTuple(required = true, ctxType.canonicalPType) - val globalPTuple: PTuple = PCanonicalTuple(required = true, emitGlobals.emitType.canonicalPType) - val contextSpec: TypedCodecSpec = TypedCodecSpec(contextPTuple, bufferSpec) - val globalSpec: TypedCodecSpec = TypedCodecSpec(globalPTuple, bufferSpec) - - // emit body in new FB + val functionID: String = { val bodyFB = EmitFunctionBuilder[Region, Array[Byte], Array[Byte], Array[Byte]](ctx.executeContext, "collect_distributed_array") - var bodySpec: TypedCodecSpec = null + // FIXME this is terrible + val m = MakeTuple.ordered(FastSeq(body)) + val bodyReturnPType = PCanonicalTuple(true, body.pType) + m._pType = bodyReturnPType + bodyFB.emitWithBuilder { cb => - val region = bodyFB.getCodeParam[Region](1) - val ctxIB = cb.newLocal[InputBuffer]("cda_ctx_ib", contextSpec.buildCodeInputBuffer( + val ctxIB = cb.newLocal[InputBuffer]("cda_ctx_ib", x.contextSpec.buildCodeInputBuffer( Code.newInstance[ByteArrayInputStream, Array[Byte]](bodyFB.getCodeParam[Array[Byte]](2)))) - val gIB = cb.newLocal[InputBuffer]("cda_g_ib", globalSpec.buildCodeInputBuffer( + val gIB = cb.newLocal[InputBuffer]("cda_g_ib", x.globalSpec.buildCodeInputBuffer( Code.newInstance[ByteArrayInputStream, Array[Byte]](bodyFB.getCodeParam[Array[Byte]](3)))) - val decodedContext = contextSpec.encodedType.buildDecoder(contextSpec.encodedVirtualType, bodyFB.ecb) - .apply(cb, region, ctxIB) + val decodedContext = x.contextSpec.encodedType.buildDecoder(x.contextSpec.encodedVirtualType, bodyFB.ecb) + .apply(cb, bodyFB.getCodeParam[Region](1), ctxIB) .asBaseStruct .memoize(cb, "decoded_context_tuple") .loadField(cb, 0) - .memoizeField(cb, "decoded_context") + .typecast[PCode] + .memoize(cb, "decoded_context") - val decodedGlobal = globalSpec.encodedType.buildDecoder(globalSpec.encodedVirtualType, bodyFB.ecb) - .apply(cb, region, gIB) + val decodedGlobal = x.globalSpec.encodedType.buildDecoder(x.globalSpec.encodedVirtualType, bodyFB.ecb) + .apply(cb, bodyFB.getCodeParam[Region](1), gIB) .asBaseStruct .memoize(cb, "decoded_global_tuple") .loadField(cb, 0) - .memoizeField(cb, "decoded_global") + .typecast[PCode] + .memoize(cb, "decoded_global") - val env = EmitEnv(Env[EmitValue]( + val env = Env[EmitValue]( (cname, decodedContext), - (gname, decodedGlobal)), FastIndexedSeq()) + (gname, decodedGlobal)) - val bodyResult = wrapInTuple(cb, - region, - EmitCode.fromI(cb.emb)(cb => new Emit(ctx, bodyFB.ecb).emitI(body, cb, env, None))) - - bodySpec = TypedCodecSpec(bodyResult.st.canonicalPType().setRequired(true), bufferSpec) + val bodyResult = new Emit(ctx, bodyFB.ecb) + .emitI(m, cb, env, None) + .get(cb, "cda return cannot be missing!") + .memoize(cb, "cda_body_result") val bOS = cb.newLocal[ByteArrayOutputStream]("cda_baos", Code.newInstance[ByteArrayOutputStream]()) - val bOB = cb.newLocal[OutputBuffer]("cda_ob", bodySpec.buildCodeOutputBuffer(bOS)) - bodySpec.encodedType.buildEncoder(bodyResult.st, cb.emb.ecb) - .apply(cb, bodyResult, bOB) + val bOB = cb.newLocal[OutputBuffer]("cda_ob", x.bodySpec.buildCodeOutputBuffer(bOS)) + x.bodySpec.encodedType.buildEncoder(bodyResult.st, cb.emb.ecb) + .apply(cb, bodyResult, bOB) cb += bOB.invoke[Unit]("flush") cb += bOB.invoke[Unit]("close") bOS.invoke[Array[Byte]]("toByteArray") @@ -2289,55 +2081,59 @@ class Emit[C]( val fID = genUID() parentCB.addModule(fID, bodyFB.resultWithIndex()) - val functionID = fID - - val spark = parentCB.backend() - - val baos = mb.genFieldThisRef[ByteArrayOutputStream]() - val buf = mb.genFieldThisRef[OutputBuffer]() - val ctxab = mb.genFieldThisRef[ByteArrayArrayBuilder]() - val encRes = mb.genFieldThisRef[Array[Array[Byte]]]() - - - def addContexts(cb: EmitCodeBuilder, ctxStream: StreamProducer): Unit = { - ctxStream.memoryManagedConsume(region, cb, setup = { cb => - cb += ctxab.invoke[Int, Unit]("ensureCapacity", ctxStream.length.map(_.apply(cb)).getOrElse(16)) - }) { cb => - cb += baos.invoke[Unit]("reset") - val ctxTuple = wrapInTuple(cb, region, ctxStream.element) - .memoize(cb, "cda_add_contexts_addr") - contextSpec.encodedType.buildEncoder(ctxTuple.st, parentCB) - .apply(cb, ctxTuple, buf) - cb += buf.invoke[Unit]("flush") - cb += ctxab.invoke[Array[Byte], Unit]("add", baos.invoke[Array[Byte]]("toByteArray")) - } - } + fID + } + + val spark = parentCB.backend() + + val baos = mb.genFieldThisRef[ByteArrayOutputStream]() + val buf = mb.genFieldThisRef[OutputBuffer]() + val ctxab = mb.genFieldThisRef[ByteArrayArrayBuilder]() + val encRes = mb.genFieldThisRef[Array[Array[Byte]]]() - def addGlobals(cb: EmitCodeBuilder): Unit = { - val wrapped = wrapInTuple(cb, region, emitGlobals) - globalSpec.encodedType.buildEncoder(wrapped.st, parentCB) - .apply(cb, wrapped, buf) + def wrapInTuple(cb: EmitCodeBuilder, et: EmitCode): SBaseStructPointerCode = { + PCanonicalTuple(true, et.pt).constructFromFields(cb, region, FastIndexedSeq(et), deepCopy = false) + } + + def addContexts(cb: EmitCodeBuilder, ctxStream: StreamProducer): Unit = { + ctxStream.memoryManagedConsume(region, cb, setup = { cb => + cb += ctxab.invoke[Int, Unit]("ensureCapacity", ctxStream.length.map(_.apply(cb)).getOrElse(16)) + }) { cb => + cb += baos.invoke[Unit]("reset") + val ctxTuple = wrapInTuple(cb, ctxStream.element) + .memoize(cb, "cda_add_contexts_addr") + x.contextSpec.encodedType.buildEncoder(ctxTuple.st, parentCB) + .apply(cb, ctxTuple, buf) cb += buf.invoke[Unit]("flush") + cb += ctxab.invoke[Array[Byte], Unit]("add", baos.invoke[Array[Byte]]("toByteArray")) } + } - def decodeResult(cb: EmitCodeBuilder): SCode = { - val len = mb.newLocal[Int]("cda_result_length") - val ib = mb.newLocal[InputBuffer]("decode_ib") - - cb.assign(len, encRes.length()) - val pt = PCanonicalArray(bodySpec.encodedType.decodedSType(bodySpec.encodedVirtualType).asInstanceOf[SBaseStruct].fieldEmitTypes(0).canonicalPType) - pt.asInstanceOf[PCanonicalArray].constructFromElements(cb, region, len, deepCopy = false) { (cb, i) => - cb.assign(ib, bodySpec.buildCodeInputBuffer(Code.newInstance[ByteArrayInputStream, Array[Byte]](encRes(i)))) - val eltTupled = bodySpec.encodedType.buildDecoder(bodySpec.encodedVirtualType, parentCB) - .apply(cb, region, ib) - .asBaseStruct - .memoize(cb, "cda_eltTupled") - eltTupled.loadField(cb, 0) - } + def addGlobals(cb: EmitCodeBuilder): Unit = { + val g = wrapInTuple(cb, EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, globals))).memoize(cb, "cda_g") + x.globalSpec.encodedType.buildEncoder(g.st, parentCB) + .apply(cb, g, buf) + cb += buf.invoke[Unit]("flush") + } + + def decodeResult(cb: EmitCodeBuilder): PCode = { + val len = mb.newLocal[Int]("cda_result_length") + val ib = mb.newLocal[InputBuffer]("decode_ib") + + cb.assign(len, encRes.length()) + x.pType.asInstanceOf[PCanonicalArray].constructFromElements(cb, region, len, deepCopy = false) { (cb, i) => + cb.assign(ib, x.bodySpec.buildCodeInputBuffer(Code.newInstance[ByteArrayInputStream, Array[Byte]](encRes(i)))) + val eltTupled = x.bodySpec.encodedType.buildDecoder(x.bodySpec.encodedVirtualType, parentCB) + .apply(cb, region, ib) + .asBaseStruct + .memoize(cb, "cda_eltTupled") + eltTupled.loadField(cb, 0) } + } + emitStream(contexts, cb, region).map(cb) { case ctxStream: SStreamCode => cb.assign(baos, Code.newInstance[ByteArrayOutputStream]()) - cb.assign(buf, contextSpec.buildCodeOutputBuffer(baos)) // TODO: take a closer look at whether we need two codec buffers? + cb.assign(buf, x.contextSpec.buildCodeOutputBuffer(baos)) // TODO: take a closer look at whether we need two codec buffers? cb.assign(ctxab, Code.newInstance[ByteArrayArrayBuilder, Int](16)) addContexts(cb, ctxStream.producer) cb += baos.invoke[Unit]("reset") @@ -2359,17 +2155,21 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.pt }\n${ Pretty(ir) }") } case _ => // we dynamically generate some IRs in emission. Ignore these... } - if (result.st.virtualType != ir.typ) - throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ir).take(50) }") - - result + if (result.pt != pt) { + if (!result.pt.equalModuloRequired(pt)) + throw new RuntimeException(s"ptype mismatch:\n emitted: ${ result.pt }\n inferred: ${ ir.pType }\n ir: $ir") + (result.pt.required, pt.required) match { + case (true, false) => result.map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code)) + case (false, true) => IEmitCode.present(cb, result.get(cb)) + } + } else result } /** @@ -2405,50 +2205,50 @@ class Emit[C]( * {@code tAggIn.elementType}. {@code tAggIn.symTab} is not used by Emit. * **/ - private[ir] def emit(ir: IR, mb: EmitMethodBuilder[C], env: EmitEnv, container: Option[AggContainer]): EmitCode = { + private[ir] def emit(ir: IR, mb: EmitMethodBuilder[C], env: E, container: Option[AggContainer]): EmitCode = { val region = mb.getCodeParam[Region](1) emit(ir, mb, region, env, container, None) } - private[ir] def emitWithRegion(ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], env: EmitEnv, container: Option[AggContainer]): EmitCode = + private[ir] def emitWithRegion(ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], env: E, container: Option[AggContainer]): EmitCode = emit(ir, mb, region, env, container, None) private def emit( ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], - env: EmitEnv, + env: E, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]], fallingBackFromEmitI: Boolean = false ): EmitCode = { - if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { - return EmitCode.fromI(mb)(cb => emitInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv)) - } - - - def emit(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = + def emit(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = this.emit(ir, mb, region, env, container, loopEnv) - def emitI(ir: IR, cb: EmitCodeBuilder, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitInMethod(ir: IR, mb: EmitMethodBuilder[C]): EmitCode = + this.emit(ir, mb, Env.empty, container) + + def emitI(ir: IR, cb: EmitCodeBuilder, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitVoid(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Code[Unit] = { + def emitVoid(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Code[Unit] = { EmitCodeBuilder.scopedVoid(mb) { cb => this.emitVoid(cb, ir, region, env, container, loopEnv) } } - def emitStream(ir: IR, outerRegion: Value[Region]): EmitCode = + def emitStream(ir: IR, outerRegion: Value[Region] ): EmitCode = EmitCode.fromI(mb)(cb => EmitStream.produce(this, ir, cb, outerRegion, env, container)) + val pt = ir.pType + // ideally, emit would not be called with void values, but initOp args can be void // working towards removing this - if (ir.typ == TVoid) + if (pt == PVoid) return EmitCode.fromI(mb) { cb => this.emitVoid(cb, ir, region, env, container, loopEnv) - IEmitCode.present(cb, SCode._empty) + IEmitCode.present(cb, PCode._empty) } val result: EmitCode = (ir: @unchecked) match { @@ -2461,15 +2261,90 @@ class Emit[C]( } } - case Ref(name, t) => - val ev = env.bindings.lookup(name) - if (ev.st.virtualType != t) - throw new RuntimeException(s"emit value type did not match specified type:\n name: $name\n ev: ${ ev.st.virtualType }\n ir: ${ ir.typ }") + case Ref(name, _) => + val ev = env.lookup(name) + if (!ev.pt.equalModuloRequired(pt)) + throw new RuntimeException(s"PValue type did not match inferred ptype:\n name: $name\n pv: ${ ev.pt }\n ir: $pt") ev.load + case x@(_: ArraySort | _: ToSet | _: ToDict) => + val resultTypeAsIterable = coerce[PIterable](x.pType) + val eltType = x.children(0).asInstanceOf[IR].pType.asInstanceOf[PIterable].elementType + val eltVType = eltType.virtualType + + val vab = new StagedArrayBuilder(resultTypeAsIterable.elementType, mb, 0) + val sorter = new ArraySorter(EmitRegion(mb, region), vab) + + val (array, lessThan, distinct, leftRightComparatorNames: Array[String]) = (x: @unchecked) match { + case ArraySort(a, l, r, lessThan) => (a, lessThan, Code._empty, Array(l, r)) + case ToSet(a) => + val discardNext = mb.genEmitMethod("discardNext", + FastIndexedSeq[ParamType](typeInfo[Region], PCodeEmitParamType(eltType), PCodeEmitParamType(eltType)), + typeInfo[Boolean]) + val cmp2 = ApplyComparisonOp(EQWithNA(eltVType), In(0, PCodeEmitParamType(eltType)), In(1, PCodeEmitParamType(eltType))) + InferPType(cmp2) + val EmitCode(m, pv) = emitInMethod(cmp2, discardNext) + discardNext.emitWithBuilder { cb => + m || pv.asBoolean.boolCode(cb) + } + val lessThan = ApplyComparisonOp(Compare(eltVType), In(0, PCodeEmitParamType(eltType)), In(1, PCodeEmitParamType(eltType))) < 0 + InferPType(lessThan) + (a, lessThan, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => + EmitCodeBuilder.scopedCode[Boolean](mb) { cb => + cb.invokeCode[Boolean](discardNext, r, + EmitCode(Code._empty, m1, PCode(eltType, v1)), + EmitCode(Code._empty, m2, PCode(eltType, v2))) + } + }, Array.empty[String]) + case ToDict(a) => + val (k0, k1, keyType) = eltType match { + case t: PStruct => (GetField(In(0, PCodeEmitParamType(eltType)), "key"), GetField(In(1, PCodeEmitParamType(eltType)), "key"), t.fieldType("key")) + case t: PTuple => (GetTupleElement(In(0, PCodeEmitParamType(eltType)), 0), GetTupleElement(In(1, PCodeEmitParamType(eltType)), 0), t.types(0)) + } + val discardNext = mb.genEmitMethod("discardNext", + FastIndexedSeq[ParamType](typeInfo[Region], PCodeEmitParamType(eltType), PCodeEmitParamType(eltType)), + typeInfo[Boolean]) + + val cmp2 = ApplyComparisonOp(EQWithNA(keyType.virtualType), k0, k1).deepCopy() + InferPType(cmp2) + val EmitCode(m, pv) = emitInMethod(cmp2, discardNext) + discardNext.emitWithBuilder { cb => + m || pv.asBoolean.boolCode(cb) + } + val lessThan = (ApplyComparisonOp(Compare(keyType.virtualType), k0, k1) < 0).deepCopy() + InferPType(lessThan) + (a, lessThan, Code(sorter.pruneMissing, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => + EmitCodeBuilder.scopedCode[Boolean](mb) { cb => + cb.invokeCode[Boolean](discardNext, r, + EmitCode(Code._empty, m1, PCode(eltType, v1)), + EmitCode(Code._empty, m2, PCode(eltType, v2))) + } + }), Array.empty[String]) + } + + val sort = vab.ti match { + case BooleanInfo => sorter.sort(makeDependentSortingFunction[Boolean]( + region, eltType, lessThan, env, leftRightComparatorNames)) + case IntInfo => sorter.sort(makeDependentSortingFunction[Int](region, eltType, lessThan, env, leftRightComparatorNames)) + case LongInfo => sorter.sort(makeDependentSortingFunction[Long]( + region, eltType, lessThan, env, leftRightComparatorNames)) + case FloatInfo => sorter.sort(makeDependentSortingFunction[Float]( + region, eltType, lessThan, env, leftRightComparatorNames)) + case DoubleInfo => sorter.sort(makeDependentSortingFunction[Double]( + region, eltType, lessThan, env, leftRightComparatorNames)) + } + + val optStream = emitStream(array, region) + EmitCode.fromI(mb)(cb => optStream.toI(cb).map(cb) { case stream: SStreamCode => + StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) + cb += sort + cb += distinct + sorter.toRegion(cb, x.pType) + }) + case In(i, expectedPType) => // this, Code[Region], ... - val ev = env.inputValues(i).apply(region) + val ev = mb.getEmitParam(2 + i, region) ev case ir@Apply(fn, typeArgs, args, rt) => @@ -2477,24 +2352,25 @@ class Emit[C]( val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) - val emitArgs = args.map(a => EmitCode.fromI(mb)(emitI(a, _))).toFastIndexedSeq - - val argSTypes = emitArgs.map(_.st) - val retType = impl.computeStrictReturnEmitType(ir.typ, argSTypes) - val k = (fn, typeArgs, argSTypes, retType) + val argPTypes = args.map(_.pType) + val k = (fn, typeArgs, argPTypes, pt) val meth = methods.get(k) match { case Some(funcMB) => funcMB case None => - val funcMB = impl.getAsMethod(mb.ecb, retType, typeArgs, argSTypes: _*) + val funcMB = impl.getAsMethod(mb.ecb, pt, typeArgs, argPTypes: _*) methods.update(k, funcMB) funcMB } + val vars = args.map { a => coerce[Any](mb.newLocal()(typeToTypeInfo(a.pType))) } EmitCode.fromI(mb) { cb => val emitArgs = args.map(a => EmitCode.fromI(cb.emb)(emitI(a, _))).toFastIndexedSeq IEmitCode.multiMapEmitCodes(cb, emitArgs) { codeArgs => - cb.invokeSCode(meth, FastIndexedSeq[Param](CodeParam(region)) ++ codeArgs.map(pc => pc: Param): _*) + for ((l, i) <- vars.zip(codeArgs)) { + cb.assign(l, i.code) + } + PCode(pt, meth.invokeCode[Any](CodeParam(region) +: vars.map(_.get: Param): _*)) } } case x@ApplySpecial(_, typeArgs, args, rt) => @@ -2502,8 +2378,7 @@ class Emit[C]( val impl = x.implementation val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) - val retType = impl.computeReturnEmitType(x.typ, codeArgs.map(_.emitType)) - impl.apply(EmitRegion(mb, region), retType.st, typeArgs, codeArgs: _*) + impl.apply(EmitRegion(mb, region), pt, typeArgs, codeArgs: _*) case x@WritePartition(stream, pctx, writer) => val ctxCode = emit(pctx) @@ -2526,44 +2401,463 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.pt }\n${ Pretty(ir) }") } case _ => // we dynamically generate some IRs in emission. Ignore these... } - if (result.st.virtualType != ir.typ) - throw new RuntimeException(s"type mismatch: EC=${ result.st.virtualType } / IR=${ ir.typ }\n") - result + if (result.pt != pt) { + if (!result.pt.equalModuloRequired(pt)) + throw new RuntimeException(s"ptype mismatch:\n emitted: ${ result.pt }\n inferred: ${ ir.pType }\n ir: $ir") + (result.pt.required, pt.required) match { + case (true, false) => EmitCode.fromI(mb)(cb => result.toI(cb).map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code))) + case (false, true) => EmitCode.fromI(mb) { cb => IEmitCode.present(cb, result.toI(cb).get(cb)) } + } + } else result } - private def makeDependentSortingFunction( - cb: EmitCodeBuilder, - elemSCT: SingleCodeType, ir: IR, env: EmitEnv, emitter: Emit[_], leftRightComparatorNames: Array[String]): (EmitCodeBuilder, Value[Region], Code[_], Code[_]) => Code[Boolean] = { - val fb = cb.emb.ecb + private def capturedReferences(ir: IR): (IR, (Emit.E, DependentEmitFunctionBuilder[_]) => Emit.E) = { + var ids = Set[String]() + + VisitIR(ir) { + case Ref(id, _) => + ids += id + case _ => + } + + (ir, { (env: Emit.E, f: DependentEmitFunctionBuilder[_]) => + Env[EmitValue](ids.toFastSeq.flatMap { id => + env.lookupOption(id).map { e => + (id, f.newDepEmitField(e.load)) + } + }: _*) + }) + } - var newEnv = env - val sort = fb.genEmitMethod("dependent_sorting_func", - FastIndexedSeq(typeInfo[Region], CodeParamType(elemSCT.ti), CodeParamType(elemSCT.ti)), + private def makeDependentSortingFunction[T: TypeInfo]( + region: Code[Region], + elemPType: PType, ir: IR, env: Emit.E, leftRightComparatorNames: Array[String]): DependentEmitFunctionBuilder[AsmFunction2[T, T, Boolean]] = { + val (newIR, getEnv) = capturedReferences(ir) + val f = cb.genDependentFunction[T, T, Boolean](baseName = "sort_compare") + val fregion = f.newDepField[Region](region) + var newEnv = getEnv(env, f) + + val leftEC = EmitCode(Code._empty, false, PCode(elemPType, f.getCodeParam[T](1))) + val rightEC = EmitCode(Code._empty, false, PCode(elemPType, f.getCodeParam[T](2))) + val sort = f.genEmitMethod("sort", + FastIndexedSeq(typeInfo[Region], leftEC.emitParamType, rightEC.emitParamType), BooleanInfo) - sort.emitWithBuilder[Boolean] { cb => - val region = sort.getCodeParam[Region](1) - val leftEC = cb.memoize(EmitCode.present(sort, elemSCT.loadToSCode(cb, region, sort.getCodeParam(2)(elemSCT.ti))), "sort_leftEC") - val rightEC = cb.memoize(EmitCode.present(sort, elemSCT.loadToSCode(cb, region, sort.getCodeParam(3)(elemSCT.ti))), "sort_rightEC") + if (leftRightComparatorNames.nonEmpty) { + assert(leftRightComparatorNames.length == 2) + newEnv = newEnv.bindIterable( + IndexedSeq( + (leftRightComparatorNames(0), sort.getEmitParam(2, fregion)), + (leftRightComparatorNames(1), sort.getEmitParam(3, fregion)))) + } + + val EmitCode(m, v) = new Emit(ctx, f.ecb).emit(newIR, sort, newEnv, None) - if (leftRightComparatorNames.nonEmpty) { - assert(leftRightComparatorNames.length == 2) - newEnv = newEnv.bind( - (leftRightComparatorNames(0), leftEC), - (leftRightComparatorNames(1), rightEC)) - } + sort.emit(m.mux(Code._fatal[Boolean]("Result of sorting function cannot be missing."), v.code)) + f.apply_method.emitWithBuilder(cb => cb.invokeCode[Boolean](sort, fregion, leftEC, rightEC)) + f + } + + private def present(pv: PCode): EmitCode = EmitCode(Code._empty, false, pv) + + private def present(pt: PType, c: Code[_]): EmitCode = + EmitCode(Code._empty, false, PCode(pt, c)) + + def deforestNDArrayI(x0: IR, cb: EmitCodeBuilder, region: Value[Region], env: E): IEmitCode = { + + def emit(ir: IR, env: E = env): IEmitCode = + this.emitI(ir, cb, region, env, None, None) + + def dEmit(ir: IR, env: E = env): IEmitCode = emit(ir, env) + + def deforest(x: IR): IEmitCodeGen[NDArrayEmitter] = { + val xType = coerce[PNDArray](x.pType) + val outputNDims = xType.nDims + + x match { + case NDArrayMap(child, elemName, body) => + deforest(child).map(cb) { childEmitter => + val childP = child.pType.asInstanceOf[PNDArray] + val elemPType = childP.elementType + + new NDArrayEmitter(childEmitter.outputShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val elemRef = cb.emb.newPresentEmitField("ndarray_map_element_name", elemPType) + + cb.assign(elemRef, childEmitter.outputElement(cb, idxVars)) + val bodyEnv = env.bind(elemName, elemRef) + val bodyI = dEmit(body, bodyEnv) + + bodyI.get(cb, "NDArray map body cannot be missing") + } + } + } + case NDArrayMap2(lChild, rChild, lName, rName, body) => + deforest(lChild).flatMap(cb) { leftChildEmitter => + deforest(rChild).map(cb) { rightChildEmitter => + val lP = coerce[PNDArray](lChild.pType) + val rP = coerce[PNDArray](rChild.pType) + + val leftShapeValues = leftChildEmitter.outputShape + val rightShapeValues = rightChildEmitter.outputShape + + val (newSetupShape, shapeArray) = NDArrayEmitter.unifyShapes2(cb.emb, leftShapeValues, rightShapeValues) + + cb.append(newSetupShape) + + new NDArrayEmitter(shapeArray) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val lElemRef = cb.emb.newPresentEmitField(lName, lP.elementType) + val rElemRef = cb.emb.newPresentEmitField(rName, rP.elementType) + + val bodyEnv = env.bind(lName, lElemRef) + .bind(rName, rElemRef) + + val lIdxVars2 = NDArrayEmitter.zeroBroadcastedDims2(cb.emb, idxVars, nDims, leftShapeValues) + val rIdxVars2 = NDArrayEmitter.zeroBroadcastedDims2(cb.emb, idxVars, nDims, rightShapeValues) + + cb.assign(lElemRef, leftChildEmitter.outputElement(cb, lIdxVars2)) + cb.assign(rElemRef, rightChildEmitter.outputElement(cb, rIdxVars2)) + + dEmit(body, bodyEnv).get(cb, "NDArrayMap2 body cannot be missing") + } + } + } + } + case NDArrayReindex(child, indexExpr) => + deforest(child).map(cb) { childEmitter => + val childPType = child.pType.asInstanceOf[PNDArray] + + val shapeSeq = indexExpr.map { childIndex => + if (childIndex < childPType.nDims) + childEmitter.outputShape(childIndex) + else + const(1L) + } + + new NDArrayEmitter(shapeSeq) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val concreteIdxsForChild = Array.tabulate(childEmitter.nDims) { childDim => + val parentDim = indexExpr.indexOf(childDim) + idxVars(parentDim) + } + childEmitter.outputElement(cb, concreteIdxsForChild) + } + } + } + case x@NDArrayReshape(childND, shape) => + deforest(childND).flatMap(cb) { childEmitter => + val outputNDims = x.pType.nDims + + val childShapeValues = childEmitter.outputShape + + val requestedShapeValues = Array.tabulate(x.pType.nDims)(i => cb.newLocal[Long](s"ndarray_reindex_request_shape_$i")).toIndexedSeq + + dEmit(shape, env).map(cb) { pc => + val tupleCode = pc.asBaseStruct + val tupleValue = tupleCode.memoize(cb, "ndarray_reshape_requested") + + val hasNegativeOne = cb.newLocal[Boolean]("ndarray_reshape_has_neg_one") + val runningProduct = cb.newLocal[Long]("ndarray_reshape_running_product") + val replacesNegativeOne = cb.newLocal[Long]("ndarray_reshape_replaces_neg_one") + val tempShapeElement = cb.newLocal[Long]("ndarray_reshape_temp_shape_element") + + cb.assign(hasNegativeOne, false) + cb.assign(runningProduct, 1L) + + (0 until outputNDims).foreach { i => + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) + cb.ifx(tempShapeElement < 0L, + { + cb.ifx(tempShapeElement ceq -1L, + { + cb.ifx(hasNegativeOne, { + cb._fatal("Can't infer shape, more than one -1") + }, { + cb.assign(hasNegativeOne, true) + }) + }, + { + cb._fatal("Can't reshape, new shape must contain only nonnegative numbers or -1") + } + ) + }, + { + cb.assign(runningProduct, runningProduct * tempShapeElement) + } + ) + } + + val numElements = cb.newLocal[Long]("ndarray_reshape_child_num_elements") + cb.assign(numElements, childND.pType.asInstanceOf[PNDArray].numElements(childShapeValues)) + + cb.ifx(hasNegativeOne.mux( + (runningProduct ceq 0L) || (numElements % runningProduct) > 0L, + numElements cne runningProduct + ), { + cb._fatal("Can't reshape since requested shape is incompatible with number of elements") + }) + cb.assign(replacesNegativeOne, (runningProduct ceq 0L).mux(0L, numElements / runningProduct)) + + (0 until outputNDims).foreach { i => + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) + cb.assign(requestedShapeValues(i), (tempShapeElement ceq -1L).mux(replacesNegativeOne, tempShapeElement)) + } + + new NDArrayEmitter(requestedShapeValues) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val storeElementIndex = cb.newLocal[Long]("ndarray_reshape_index_store") + cb.assign(storeElementIndex, LinalgCodeUtils.linearizeIndicesRowMajor(idxVars, requestedShapeValues, cb.emb)) + + val (newIdxVarsSetup, newIdxVars) = LinalgCodeUtils.unlinearizeIndexRowMajor(storeElementIndex, childShapeValues, cb.emb) + cb.append(newIdxVarsSetup) + assert(newIdxVars.length == childEmitter.nDims) - val iec = emitter.emitI(ir, cb, newEnv, None) - iec.get(cb, "Result of sorting function cannot be missing").asBoolean.boolCode(cb) + childEmitter.outputElement(cb, newIdxVars) + } + } + } + } + case x@NDArrayFilter(child, filters) => + deforest(child).map(cb) { childEmitter => + + val filterWasMissing = (0 until filters.size).map(i => cb.newField[Boolean](s"ndarray_filter_${i}_was_missing")) + val filtPValues = new Array[PIndexableValue](filters.size) + val outputShape = childEmitter.outputShape.map(_ => cb.newField[Long]("ndarray_filter_output_shapes")) + + filters.zipWithIndex.foreach { case (filt, i) => + // Each filt is a sequence that may be missing with elements that may not be missing. + emit(filt).consume(cb, + { + cb.assign(outputShape(i), childEmitter.outputShape(i)) + cb.assign(filterWasMissing(i), true) + }, + { + filtArrayPC => { + val filtArrayPValue = filtArrayPC.asIndexable.memoize(cb, s"ndarray_filt_array_${i}") + filtPValues(i) = filtArrayPValue + cb.assign(outputShape(i), filtArrayPValue.loadLength().toL) + cb.assign(filterWasMissing(i), false) + } + } + ) + } + + new NDArrayEmitter(outputShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val newIdxVars: IndexedSeq[Settable[Long]] = Array.tabulate(x.pType.nDims) { _ => cb.newField[Long]("ndarray_filter_new_idx_val") } + newIdxVars.zipWithIndex.foreach { case (newIdxVar, i) => + cb.ifx(filterWasMissing(i), { + cb.assign(newIdxVar, idxVars(i)) + }, + { + cb.assign(newIdxVar, filtPValues(i).loadElement(cb, idxVars(i).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$i)").asLong.longCode(cb)) + }) + } + + childEmitter.outputElement(cb, newIdxVars) + } + } + } + case NDArraySlice(child, slicesIR) => + deforest(child).flatMap(cb) { childEmitter => + emit(slicesIR).flatMap(cb) { slicesPC => + val slicesValue = slicesPC.asBaseStruct.memoize(cb, "ndarray_slice_tuple_pv") + + val (indexingIndices, slicingIndices) = slicesValue.pt.types.zipWithIndex.partition { case (pFieldType, idx) => + pFieldType.isPrimitive + } match { + case (a, b) => (a.map(_._2), b.map(_._2)) + } + + IEmitCode.multiFlatMap[Int, SCode, NDArrayEmitter](indexingIndices, indexingIndex => slicesValue.loadField(cb, indexingIndex), cb) { indexingSCodes => + val indexingValues = indexingSCodes.map(sCode => sCode.memoize(cb, "ndarray_slice_indexer")) + val slicingValueTriples = new BoxedArrayBuilder[(Value[Long], Value[Long], Value[Long])]() + val outputShape = { + IEmitCode.multiFlatMap[Int, SCode, IndexedSeq[Value[Long]]](slicingIndices, + valueIdx => slicesValue.loadField(cb, valueIdx), cb) { sCodeSlices: IndexedSeq[SCode] => + IEmitCode.multiFlatMap(sCodeSlices, { sCodeSlice: SCode => + val sValueSlice = sCodeSlice.asBaseStruct.memoize(cb, "ndarray_slice_sCodeSlice") + // I know I have a tuple of three elements here, start, stop, step + + val newDimSizeI = sValueSlice.loadField(cb, 0).flatMap(cb) { startC => + sValueSlice.loadField(cb, 1).flatMap(cb) { stopC => + sValueSlice.loadField(cb, 2).map(cb) { stepC => + val start = cb.newLocal[Long]("ndarray_slice_start", startC.asLong.longCode(cb)) + val stop = cb.newLocal[Long]("ndarray_slice_stop", stopC.asLong.longCode(cb)) + val step = cb.newLocal[Long]("ndarray_slice_step", stepC.asLong.longCode(cb)) + + slicingValueTriples.push((start, stop, step)) + + val newDimSize = cb.newLocal[Long]("new_dim_size") + cb.ifx(step >= 0L && start <= stop, { + cb.assign(newDimSize, const(1L) + ((stop - start) - 1L) / step) + }, { + cb.ifx(step < 0L && start >= stop, { + cb.assign(newDimSize, (((stop - start) + 1L) / step) + 1L) + }, { + cb.assign(newDimSize, 0L) + }) + }) + + newDimSize + + } + } + } + newDimSizeI + }, cb)(x => IEmitCode(cb, false, x)) + } + } + + outputShape.map(cb) { outputShapeSeq => + new NDArrayEmitter(outputShapeSeq) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + // Iterate through the slices tuple given in. For each single integer, should just copy that integer into + // an indexed seq. For each range, should use start and step to modify. + val oldIdxVarsIterator = idxVars.toIterator + val indexingIterator = indexingValues.toIterator + val slicingIterator = slicingValueTriples.result().toIterator + + val newIdxVars = slicesValue.pt.types.map { fieldType => fieldType match { + case indexer: PInt64 => { + indexingIterator.next().asPValue.value.asInstanceOf[Value[Long]] + } + case slicer: PBaseStruct => { + val (start, stop, step) = slicingIterator.next() + + cb.memoize(PCode.apply(PInt64Required, start + oldIdxVarsIterator.next() * step), "ndarray_slice_adjusted_lookup").value.asInstanceOf[Value[Long]] + } + }} + + childEmitter.outputElement(cb, newIdxVars) + } + } + } + } + } + } + case NDArrayConcat(nds, axis) => + emit(nds).flatMap(cb) { ndsPCode => + val ndsArrayPValue = ndsPCode.asIndexable.memoize(cb, "ndarray_concat_array_of_nds") + val arrLength = ndsArrayPValue.loadLength() + cb.ifx(arrLength ceq 0, { + cb._fatal("need at least one ndarray to concatenate") + }) + + val missing: Code[Boolean] = { + if (ndsArrayPValue.st.elementEmitType.required) + const(false) + else { + val missing = cb.newLocal[Boolean]("ndarray_concat_result_missing") + cb.assign(missing, false) + // Need to check if the any of the ndarrays are missing. + val missingCheckLoopIdx = cb.newLocal[Int]("ndarray_concat_missing_check_idx") + cb.forLoop(cb.assign(missingCheckLoopIdx, 0), missingCheckLoopIdx < arrLength, cb.assign(missingCheckLoopIdx, missingCheckLoopIdx + 1), + cb.assign(missing, missing | ndsArrayPValue.isElementMissing(missingCheckLoopIdx)) + ) + missing + } + } + + IEmitCode(cb, missing, { + val loopIdx = cb.newLocal[Int]("ndarray_concat_shape_check_idx") + val firstND = ndsArrayPValue.loadElement(cb, 0).map(cb) { sCode => sCode.asNDArray }.get(cb).memoize(cb, "ndarray_concat_input_0") + val newShape = (0 until outputNDims).map { dimIdx => + val localDim = cb.newLocal[Long](s"ndarray_concat_output_shape_element_${dimIdx}") + val ndShape = firstND.shapes(cb) + cb.assign(localDim, ndShape(dimIdx)) + cb.forLoop(cb.assign(loopIdx, 1), loopIdx < arrLength, cb.assign(loopIdx, loopIdx + 1), { + val shapeOfNDAtIdx = ndsArrayPValue.loadElement(cb, loopIdx).map(cb) { sCode => sCode.asNDArray }.get(cb).shape(cb).memoize(cb, "ndarray_concat_input_shape") + val dimLength = shapeOfNDAtIdx.loadField(cb, dimIdx).get(cb).toPCode(cb, region).memoize(cb, "dimLength").value.asInstanceOf[Value[Long]] + + if (dimIdx == axis) { + cb.assign(localDim, localDim + dimLength) + } + else { + cb.ifx(dimLength.cne(localDim), + cb._fatal(const(s"NDArrayConcat: mismatched dimensions of input NDArrays along axis ").concat(loopIdx.toS).concat(": expected ") + .concat(localDim.toS).concat(", got ") + .concat(dimLength.toS)) + ) + } + }) + localDim + } + + new NDArrayEmitter(newShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val concatAxisIdx = cb.newLocal[Long]("ndarray_concat_axis_id") + val whichNDArrayToRead = cb.newLocal[Int]("ndarray_concat_outputElement_i") + + cb.assign(concatAxisIdx, idxVars(axis)) + cb.assign(whichNDArrayToRead, 0) + val condition = EmitCodeBuilder.scopedCode[Boolean](cb.emb) { cb => + (concatAxisIdx >= ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.shape(cb).memoize(cb, "ndarray_concat_condition").loadField(cb, axis).get(cb).asLong.longCode(cb)) + } + cb.whileLoop(condition, { + cb.assign(concatAxisIdx, concatAxisIdx - ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.shape(cb).memoize(cb, "ndarray_concat_output_subtract").loadField(cb, axis).get(cb).asLong.longCode(cb)) + cb.assign(whichNDArrayToRead, whichNDArrayToRead + 1) + }) + cb.ifx(whichNDArrayToRead >= arrLength, cb._fatal(const("NDArrayConcat: trying to access element greater than length of concatenation axis: ").concat(whichNDArrayToRead.toS).concat(" > ").concat((arrLength - 1).toS))) + val transformedIdxs = Array.tabulate(nDims) { idx => + if (idx == axis) concatAxisIdx else idxVars(idx) + }.toFastIndexedSeq + ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.memoize(cb, "ndarray_to_load_element_from").loadElement(transformedIdxs, cb).toPCode(cb, region) + } + } + }) + } + case NDArrayAgg(child, axesToSumOut) => + deforest(child).map(cb) { childEmitter => + val childDims = child.typ.asInstanceOf[TNDArray].nDims + val axesToKeep = (0 until childDims).filter(axis => !axesToSumOut.contains(axis)) + val newOutputShape = axesToKeep.map(idx => childEmitter.outputShape(idx)) + val newOutputShapeComplement = axesToSumOut.map(idx => childEmitter.outputShape(idx)) + + new NDArrayEmitter(newOutputShape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val numericElementType = coerce[PNumeric](child.pType.asInstanceOf[PNDArray].elementType) + val runningSum = NumericPrimitives.newLocal(cb, "ndarray_agg_running_sum", numericElementType.virtualType) + cb.assign(runningSum, numericElementType.zero) + + SNDArray.forEachIndex(cb, newOutputShapeComplement, "NDArrayAgg_Sum_loop"){ case (cb, coordsBeingSummedOut) => + // Build the new list we need to pass down into child + val idxVarsIt = idxVars.toIterator + val summedOutIt = coordsBeingSummedOut.toIterator + val fullIndicesForChild = (0 until childDims).map(idx => + if (axesToSumOut.contains(idx)) summedOutIt.next() else idxVarsIt.next() + ) + cb.assign(runningSum, numericElementType.add(runningSum, childEmitter.outputElement(cb, fullIndicesForChild).code)) + } + + PCode.apply(numericElementType, runningSum) + } + } + } + case _ => + val ndI = emit(x) + ndI.map(cb){ ndPCode => + val ndPv = ndPCode.asNDArray.memoize(cb, "deforestNDArray_fall_through_ndarray") + val shape = ndPv.shapes(cb) + + new NDArrayEmitter(shape) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + ndPv.asInstanceOf[PNDArrayValue].loadElement(idxVars, cb).toPCode(cb, region) + } + } + } + } } - (cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]) => cb.invokeCode[Boolean](sort, region, l, r) + + deforest(x0).map(cb)(emitter => emitter.emit(cb, coerce[PCanonicalNDArray](x0.pType), region)) } } @@ -2588,15 +2882,15 @@ object NDArrayEmitter { def zeroBroadcastedDims(indices: IndexedSeq[Code[Long]], broadcastMask: IndexedSeq[Code[Long]]): IndexedSeq[Value[Long]] = { indices.zip(broadcastMask).map { case (index, flag) => new Value[Long] { def get: Code[Long] = index * flag - } - } + }} } - def unifyShapes2(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { + def unifyShapes2(mb: EmitMethodBuilder[_], leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): (Code[Unit], IndexedSeq[Value[Long]]) = { + val sb = SetupBuilder(mb) + val shape = leftShape.zip(rightShape).zipWithIndex.map { case ((left, right), i) => val notSameAndNotBroadcastable = !((left ceq right) || (left ceq 1L) || (right ceq 1L)) - cb.newField[Long]( - s"unify_shapes2_shape$i", + sb.memoizeField( notSameAndNotBroadcastable.mux( Code._fatal[Long](rightShape.foldLeft[Code[String]]( leftShape.foldLeft[Code[String]]( @@ -2605,14 +2899,16 @@ object NDArrayEmitter { .concat("] vs [ ") )((accum, v) => accum.concat(v.toS).concat(" ")) .concat("]")), - (left > right).mux(left, right))) + (left > right).mux(left, right)), + s"unify_shapes2_shape$i") } - shape + (sb.result(), shape) } def matmulShape(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { val mb = cb.emb + val sb = SetupBuilder(mb) assert(leftShape.nonEmpty) assert(rightShape.nonEmpty) @@ -2620,6 +2916,7 @@ object NDArrayEmitter { var lK: Value[Long] = null var rK: Value[Long] = null var shape: IndexedSeq[Value[Long]] = null + var setup: Code[Unit] = Code._empty if (leftShape.length == 1) { lK = leftShape.head @@ -2637,9 +2934,10 @@ object NDArrayEmitter { shape = leftShape.slice(0, leftShape.length - 1) } else { rK = rightShape(rightShape.length - 2) - val unifiedShape = unifyShapes2(cb, + val (unifiedSetup, unifiedShape) = unifyShapes2(mb, leftShape.slice(0, leftShape.length - 2), rightShape.slice(0, rightShape.length - 2)) + setup = Code(setup, unifiedSetup) shape = unifiedShape :+ leftShape(leftShape.length - 2) :+ rightShape.last } } @@ -2648,23 +2946,25 @@ object NDArrayEmitter { val rightShapeString = const("(").concat(rightShape.map(_.toS).reduce((a, b) => a.concat(", ").concat(b))).concat(")") - cb.ifx(lK.cne(rK), { - cb._fatal("Matrix dimensions incompatible: ", - leftShapeString, - " can't be multiplied by matrix with dimensions ", - rightShapeString) - }) + setup = Code(setup, + (lK cne rK).orEmpty( + Code._fatal[Unit](const("Matrix dimensions incompatible: ") + .concat(leftShapeString) + .concat(" can't be multiplied by matrix with dimensions ") + .concat(rightShapeString)))) + cb.append(setup) shape } } -abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]], val elementType: SType) { +abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]]) +{ val nDims = outputShape.length - def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): SCode + def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode - def emit(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region]): SCode = { + def emit(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region]): PCode = { val shapeArray = outputShape val idx = cb.newLocal[Int]("ndarrayemitter_emitloops_idx", 0) @@ -2675,11 +2975,9 @@ abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]], val elem cb, region) - SNDArray.forEachIndexColMajor(cb, shapeArray, "ndarrayemitter_emitloops") { case (cb, idxVars) => - val element = IEmitCode.present(cb, outputElement(cb, idxVars)).consume(cb, { - cb._fatal("NDArray elements cannot be missing") - }, { elementPc => - targetType.elementType.storeAtAddress(cb, firstElementAddress + (idx.toL * targetType.elementType.byteSize), region, elementPc, true) + SNDArray.forEachIndex(cb, shapeArray, "ndarrayemitter_emitloops") { case (cb, idxVars) => + val element = IEmitCode.present(cb, outputElement(cb, idxVars)).consume(cb, {cb._fatal("NDArray elements cannot be missing")}, { elementPc => + targetType.elementType.storeAtAddress(cb, firstElementAddress + (idx.toL * targetType.elementType.byteSize), region, elementPc, true) }) cb.assign(idx, idx + 1) } diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index c008e2505cb..f85479051d1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -2,21 +2,22 @@ package is.hail.expr.ir import is.hail.annotations.{Region, RegionPool, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.backend.BackendUtils +import is.hail.backend.{BackendUtils, HailTaskContext} import is.hail.expr.ir.functions.IRRandomness import is.hail.expr.ir.orderings.CodeOrdering import is.hail.io.fs.FS import is.hail.io.{BufferSpec, InputBuffer, TypedCodecSpec} -import is.hail.types.VirtualTypeWithReq -import is.hail.types.physical.stypes._ -import is.hail.types.physical.{PCanonicalTuple, PType} +import is.hail.lir +import is.hail.types.physical.stypes.{EmitType, SType} +import is.hail.types.physical.stypes.interfaces.PVoidCode.pt +import is.hail.types.physical.{PCanonicalTuple, PCode, PSettable, PStream, PType, PValue, typeToTypeInfo} import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.variant.ReferenceGenome import org.apache.spark.TaskContext import java.io._ -import java.lang.reflect.InvocationTargetException +import java.util.Base64 import scala.collection.mutable import scala.language.existentials @@ -83,19 +84,27 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def getOrDefineLazyField[T: TypeInfo](setup: Code[T], id: Any): Value[T] = ecb.getOrDefineLazyField(setup, id) - def newPSettable(sb: SettableBuilder, pt: SType, name: String = null): SSettable = ecb.newPSettable(sb, pt, name) + def newPSettable(sb: SettableBuilder, pt: PType, name: String = null): PSettable = ecb.newPSettable(sb, pt, name) - def newPField(pt: SType): SSettable = ecb.newPField(pt) + def newPField(pt: PType): PSettable = ecb.newPField(pt) - def newPField(name: String, pt: SType): SSettable = ecb.newPField(name, pt) + def newPField(name: String, pt: PType): PSettable = ecb.newPField(name, pt) - def newEmitField(et: EmitType): EmitSettable = ecb.newEmitField(et.st, et.required) + def newEmitField(et: EmitType): EmitSettable = ecb.newEmitField(et.st.pType, et.required) - def newEmitField(pt: SType, required: Boolean): EmitSettable = ecb.newEmitField(pt, required) + def newEmitField(pt: PType, required: Boolean): EmitSettable = ecb.newEmitField(pt, required) - def newEmitField(name: String, et: EmitType): EmitSettable = ecb.newEmitField(name, et.st, et.required) + def newEmitField(name: String, et: EmitType): EmitSettable = ecb.newEmitField(name, et.st.pType, et.required) - def newEmitField(name: String, pt: SType, required: Boolean): EmitSettable = ecb.newEmitField(name, pt, required) + def newEmitField(name: String, pt: PType, required: Boolean): EmitSettable = ecb.newEmitField(name, pt, required) + + def newEmitSettable(pt: PType, ms: Settable[Boolean], vs: PSettable, required: Boolean): EmitSettable = ecb.newEmitSettable(pt, ms, vs, required) + + def newPresentEmitField(pt: PType): PresentEmitSettable = ecb.newPresentEmitField(pt) + + def newPresentEmitField(name: String, pt: PType): PresentEmitSettable = ecb.newPresentEmitField(name, pt) + + def newPresentEmitSettable(ps: PSettable): PresentEmitSettable = ecb.newPresentEmitSettable(ps) def fieldBuilder: SettableBuilder = cb.fieldBuilder @@ -118,7 +127,7 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def partitionRegion: Settable[Region] = ecb.partitionRegion - def addLiteral(v: Any, t: VirtualTypeWithReq): SValue = ecb.addLiteral(v, t) + def addLiteral(v: Any, t: PType): PValue = ecb.addLiteral(v, t) def addEncodedLiteral(encodedLiteral: EncodedLiteral) = ecb.addEncodedLiteral(encodedLiteral) @@ -143,6 +152,11 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def addAggStates(aggSigs: Array[agg.AggStateSig]): agg.TupleAggregatorState = ecb.addAggStates(aggSigs) + def genDependentFunction[F](baseName: String, + maybeGenericParameterTypeInfo: IndexedSeq[MaybeGenericTypeInfo[_]], + maybeGenericReturnTypeInfo: MaybeGenericTypeInfo[_])(implicit fti: TypeInfo[F]): DependentEmitFunctionBuilder[F] = + ecb.genDependentFunction(baseName, maybeGenericParameterTypeInfo, maybeGenericReturnTypeInfo) + def newRNG(seed: Long): Value[IRRandomness] = ecb.newRNG(seed) def resultWithIndex(print: Option[PrintWriter] = None): (FS, Int, Region) => C = ecb.resultWithIndex(print) @@ -174,6 +188,14 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def create(path: Code[String]): Code[OutputStream] = getFS.invoke[String, OutputStream]("create", path) + + def genDependentFunction[A1: TypeInfo, A2: TypeInfo, R: TypeInfo]( + baseName: String = null + ): DependentEmitFunctionBuilder[AsmFunction2[A1, A2, R]] = + genDependentFunction[AsmFunction2[A1, A2, R]](baseName, Array(GenericTypeInfo[A1], GenericTypeInfo[A2]), GenericTypeInfo[R]) + + def genDependentFunction[A1: TypeInfo, A2: TypeInfo, A3: TypeInfo, R: TypeInfo]: DependentEmitFunctionBuilder[AsmFunction3[A1, A2, A3, R]] = + genDependentFunction[AsmFunction3[A1, A2, A3, R]](null, Array(GenericTypeInfo[A1], GenericTypeInfo[A2], GenericTypeInfo[A3]), GenericTypeInfo[R]) } class EmitClassBuilder[C]( @@ -203,19 +225,75 @@ class EmitClassBuilder[C]( // EmitClassBuilder methods - def newPSettable(sb: SettableBuilder, st: SType, name: String = null): SSettable = SSettable(sb, st, name) + def newPSettable(sb: SettableBuilder, pt: PType, name: String = null): PSettable = PSettable(sb, pt, name) + + def newPField(pt: PType): PSettable = newPSettable(fieldBuilder, pt) + + def newPField(name: String, pt: PType): PSettable = newPSettable(fieldBuilder, pt, name) + + def newEmitField(pt: PType, required: Boolean): EmitSettable = + newEmitSettable(pt, genFieldThisRef[Boolean](), newPField(pt), required) + + def newEmitField(name: String, emitType: EmitType): EmitSettable = newEmitField(name, emitType.st.pType, emitType.required) + + def newEmitField(name: String, pt: PType, required: Boolean): EmitSettable = + newEmitSettable(pt, genFieldThisRef[Boolean](name + "_missing"), newPField(name, pt), required) + + def newEmitSettable(_pt: PType, ms: Settable[Boolean], vs: PSettable, required: Boolean): EmitSettable = new EmitSettable { + if (!_pt.isRealizable) { + throw new UnsupportedOperationException(s"newEmitSettable can only be called on realizable PTypes. Called on ${_pt}") + } + + def pt: PType = _pt + + def load: EmitCode = { + val ec = EmitCode(Code._empty, + if (required) const(false) else ms.get, + vs.get) + assert(ec.required == required) + ec + } + + def store(cb: EmitCodeBuilder, ec: EmitCode): Unit = { + store(cb, ec.toI(cb)) + } + + def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit = + if (required) + cb.assign(vs, iec.get(cb, s"Required EmitSettable cannot be missing ${ _pt }")) + else + iec.consume(cb, { + cb.assign(ms, true) + }, { value => + cb.assign(ms, false) + cb.assign(vs, value) + }) + + override def get(cb: EmitCodeBuilder): PCode = { + if (required) { + vs + } else { + cb.ifx(ms, cb._fatal(s"Can't convert missing ${_pt} to PValue")) + vs + } + } + } - def newPField(st: SType): SSettable = newPSettable(fieldBuilder, st) + def newPresentEmitField(pt: PType): PresentEmitSettable = + newPresentEmitSettable(newPField(pt)) - def newPField(name: String, st: SType): SSettable = newPSettable(fieldBuilder, st, name) + def newPresentEmitField(name: String, pt: PType): PresentEmitSettable = + newPresentEmitSettable(newPField(name, pt)) - def newEmitField(st: SType, required: Boolean): EmitSettable = - new EmitSettable(if (required) None else Some(genFieldThisRef[Boolean]("emitfield_missing")), newPField(st)) + def newPresentEmitSettable(ps: PSettable): PresentEmitSettable = new PresentEmitSettable { + def pt: PType = ps.pt - def newEmitField(name: String, emitType: EmitType): EmitSettable = newEmitField(name, emitType.st, emitType.required) + def load: EmitCode = EmitCode(Code._empty, const(false), ps.load()) - def newEmitField(name: String, st: SType, required: Boolean): EmitSettable = - new EmitSettable(if (required) None else Some(genFieldThisRef[Boolean](name + "_missing")), newPField(name, st)) + def store(cb: EmitCodeBuilder, pv: PCode): Unit = ps.store(cb, pv) + + override def get(cb: EmitCodeBuilder): PCode = ps + } private[this] val typMap: mutable.Map[Type, Value[_ <: Type]] = mutable.Map() @@ -235,28 +313,29 @@ class EmitClassBuilder[C]( rgExists.mux(Code._empty, addRG) } - private[this] val literalsMap: mutable.Map[(VirtualTypeWithReq, Any), SSettable] = - mutable.Map[(VirtualTypeWithReq, Any), SSettable]() - private[this] val encodedLiteralsMap: mutable.Map[EncodedLiteral, SSettable] = - mutable.Map[EncodedLiteral, SSettable]() + private[this] val literalsMap: mutable.Map[(PType, Any), PSettable] = + mutable.Map[(PType, Any), PSettable]() + private[this] val encodedLiteralsMap: mutable.Map[EncodedLiteral, PSettable] = + mutable.Map[EncodedLiteral, PSettable]() private[this] lazy val encLitField: Settable[Array[Byte]] = genFieldThisRef[Array[Byte]]("encodedLiterals") lazy val partitionRegion: Settable[Region] = genFieldThisRef[Region]("partitionRegion") private[this] lazy val poolField: Settable[RegionPool] = genFieldThisRef[RegionPool]() - def addLiteral(v: Any, t: VirtualTypeWithReq): SValue = { + def addLiteral(v: Any, t: PType): PValue = { assert(v != null) - - literalsMap.getOrElseUpdate(t -> v, SSettable(fieldBuilder, t.canonicalEmitType.st, "literal")) + assert(t.isCanonical) + literalsMap.getOrElseUpdate(t -> v, PSettable(fieldBuilder, t, "literal")) } - def addEncodedLiteral(encodedLiteral: EncodedLiteral): SValue = { - encodedLiteralsMap.getOrElseUpdate(encodedLiteral, SSettable(fieldBuilder, encodedLiteral.codec.encodedType.decodedSType(encodedLiteral.typ), "encodedLiteral")) + def addEncodedLiteral(encodedLiteral: EncodedLiteral): PValue = { + assert(encodedLiteral._pType.isCanonical) + encodedLiteralsMap.getOrElseUpdate(encodedLiteral, PSettable(fieldBuilder, encodedLiteral._pType, "encodedLiteral")) } private[this] def encodeLiterals(): Array[Array[Byte]] = { val literals = literalsMap.toArray - val litType = PCanonicalTuple(true, literals.map(_._1._1.canonicalPType.setRequired(true)): _*) + val litType = PCanonicalTuple(true, literals.map(_._1._1): _*) val spec = TypedCodecSpec(litType, BufferSpec.defaultUncompressed) cb.addInterface(typeInfo[FunctionWithLiterals].iname) @@ -278,7 +357,7 @@ class EmitClassBuilder[C]( lits.loadField(cb, i) .consume(cb, cb._fatal("expect non-missing literals!"), - { pc => f.store(cb, pc) }) + { pc => f.store(cb, pc.asPCode) }) } // Handle the pre-encoded literals, which only need to be decoded. preEncodedLiterals.zipWithIndex.foreach { case ((encLit, f), index) => @@ -299,7 +378,7 @@ class EmitClassBuilder[C]( val rvb = new RegionValueBuilder(region) rvb.start(litType) rvb.startTuple() - literals.foreach { case ((typ, a), _) => rvb.addAnnotation(typ.t, a) } + literals.foreach { case ((typ, a), _) => rvb.addAnnotation(typ.virtualType, a) } rvb.endTuple() enc.writeRegionValue(rvb.end()) } @@ -398,12 +477,6 @@ class EmitClassBuilder[C]( _aggSerialized.load().update(i, Code._null) } - def runMethodWithHailExceptionHandler(mname: String): Code[(String, java.lang.Integer)] = { - Code.invokeScalaObject2[AnyRef, String, (String, java.lang.Integer)](CodeExceptionHandler.getClass, - "handleUserException", - cb._this.get.asInstanceOf[Code[AnyRef]], mname) - } - def backend(): Code[BackendUtils] = { if (_backendField == null) { cb.addInterface(typeInfo[FunctionWithBackend].iname) @@ -509,49 +582,64 @@ class EmitClassBuilder[C]( ): CodeOrdering.F[op.ReturnType] = getOrderingFunction(t, t, sortOrder, op) - private def getCodeArgsInfo(argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): (IndexedSeq[TypeInfo[_]], TypeInfo[_], AsmTuple[_]) = { + private def getCodeArgsInfo(argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): (IndexedSeq[TypeInfo[_]], TypeInfo[_]) = { val codeArgsInfo = argsInfo.flatMap { case CodeParamType(ti) => FastIndexedSeq(ti) case t: EmitParamType => t.codeTupleTypes - case SCodeParamType(pt) => pt.codeTupleTypes() + case PCodeParamType(pt) => pt.codeTupleTypes() } - val (codeReturnInfo, asmTuple) = returnInfo match { - case CodeParamType(ti) => ti -> null - case SCodeParamType(pt) if pt.nCodes == 1 => pt.codeTupleTypes().head -> null - case SCodeParamType(pt) => - val asmTuple = modb.tupleClass(pt.codeTupleTypes()) - asmTuple.ti -> asmTuple + val codeReturnInfo = returnInfo match { + case CodeParamType(ti) => ti + case PCodeParamType(pt) => pt.ti case t: EmitParamType => val ts = t.codeTupleTypes if (ts.length == 1) - ts.head -> null + ts.head else { - val asmTuple = modb.tupleClass(ts) - asmTuple.ti -> asmTuple + throw new UnsupportedOperationException } } - (codeArgsInfo, codeReturnInfo, asmTuple) + (codeArgsInfo, codeReturnInfo) } def newEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = { - val (codeArgsInfo, codeReturnInfo, asmTuple) = getCodeArgsInfo(argsInfo, returnInfo) + val (codeArgsInfo, codeReturnInfo) = getCodeArgsInfo(argsInfo, returnInfo) - new EmitMethodBuilder[C](argsInfo, returnInfo, this, cb.newMethod(name, codeArgsInfo, codeReturnInfo), asmTuple) + new EmitMethodBuilder[C]( + argsInfo, returnInfo, + this, + cb.newMethod(name, codeArgsInfo, codeReturnInfo)) } def newEmitMethod(name: String, argsInfo: IndexedSeq[MaybeGenericTypeInfo[_]], returnInfo: MaybeGenericTypeInfo[_]): EmitMethodBuilder[C] = { new EmitMethodBuilder[C]( argsInfo.map(ai => CodeParamType(ai.base)), CodeParamType(returnInfo.base), - this, cb.newMethod(name, argsInfo, returnInfo), asmTuple = null) + this, + cb.newMethod(name, argsInfo, returnInfo)) } def newStaticEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = { - val (codeArgsInfo, codeReturnInfo, asmTuple) = getCodeArgsInfo(argsInfo, returnInfo) + val (codeArgsInfo, codeReturnInfo) = getCodeArgsInfo(argsInfo, returnInfo) - new EmitMethodBuilder[C](argsInfo, returnInfo, this, - cb.newStaticMethod(name, codeArgsInfo, codeReturnInfo), - asmTuple) + new EmitMethodBuilder[C]( + argsInfo, returnInfo, + this, + cb.newStaticMethod(name, codeArgsInfo, codeReturnInfo)) + } + + def genDependentFunction[F](baseName: String, + maybeGenericParameterTypeInfo: IndexedSeq[MaybeGenericTypeInfo[_]], + maybeGenericReturnTypeInfo: MaybeGenericTypeInfo[_])(implicit fti: TypeInfo[F]): DependentEmitFunctionBuilder[F] = { + val depCB = emodb.genEmitClass[F](baseName) + val apply_method = depCB.cb.newMethod("apply", maybeGenericParameterTypeInfo, maybeGenericReturnTypeInfo) + val dep_apply_method = new DependentMethodBuilder(apply_method) + val emit_apply_method = new EmitMethodBuilder[F]( + maybeGenericParameterTypeInfo.map(pi => CodeParamType(pi.base)), + CodeParamType(maybeGenericReturnTypeInfo.base), + depCB, + apply_method) + new DependentEmitFunctionBuilder[F](this, dep_apply_method, emit_apply_method) } val rngs: BoxedArrayBuilder[(Settable[IRRandomness], Code[IRRandomness])] = new BoxedArrayBuilder() @@ -701,6 +789,14 @@ class EmitClassBuilder[C]( def getUnsafeWriter(path: Code[String]): Code[OutputStream] = getFS.invoke[String, OutputStream]("unsafeWriter", path) + + def genDependentFunction[A1: TypeInfo, A2: TypeInfo, R: TypeInfo]( + baseName: String = null + ): DependentEmitFunctionBuilder[AsmFunction2[A1, A2, R]] = + genDependentFunction[AsmFunction2[A1, A2, R]](baseName, Array(GenericTypeInfo[A1], GenericTypeInfo[A2]), GenericTypeInfo[R]) + + def genDependentFunction[A1: TypeInfo, A2: TypeInfo, A3: TypeInfo, R: TypeInfo]: DependentEmitFunctionBuilder[AsmFunction3[A1, A2, A3, R]] = + genDependentFunction[AsmFunction3[A1, A2, A3, R]](null, Array(GenericTypeInfo[A1], GenericTypeInfo[A2], GenericTypeInfo[A3]), GenericTypeInfo[R]) } object EmitFunctionBuilder { @@ -791,32 +887,11 @@ trait FunctionWithBackend { def setBackend(spark: BackendUtils): Unit } -object CodeExceptionHandler { - /** - * This method assumes that the method referred to by `methodName` - * is a 0-argument class method (only takes the class itself as an arg) - * which returns void. - */ - def handleUserException(obj: AnyRef, methodName: String): (String, java.lang.Integer) = { - try { - obj.getClass.getMethod(methodName).invoke(obj) - null - } catch { - case e: InvocationTargetException => - e.getTargetException match { - case ue: HailException => (ue.msg, ue.errorId) - case e => throw e - } - } - } -} - class EmitMethodBuilder[C]( val emitParamTypes: IndexedSeq[ParamType], val emitReturnType: ParamType, val ecb: EmitClassBuilder[C], - val mb: MethodBuilder[C], - private[ir] val asmTuple: AsmTuple[_] + val mb: MethodBuilder[C] ) extends WrappedEmitClassBuilder[C] { // wrapped MethodBuilder methods def newLocal[T: TypeInfo](name: String = null): LocalRef[T] = mb.newLocal[T](name) @@ -844,57 +919,18 @@ class EmitMethodBuilder[C]( } } - def getSCodeParam(emitIndex: Int): SCode = { + def getPCodeParam(emitIndex: Int): PCode = { assert(mb.isStatic || emitIndex != 0) val static = (!mb.isStatic).toInt - val _st = emitParamTypes(emitIndex - static).asInstanceOf[SCodeParamType].st - assert(_st.isRealizable) + val _pt = emitParamTypes(emitIndex - static).asInstanceOf[PCodeParamType].pt + assert(!_pt.isInstanceOf[PStream]) - val ts = _st.codeTupleTypes() + val ts = _pt.codeTupleTypes() val codeIndex = emitParamCodeIndex(emitIndex - static) - _st.fromCodes(ts.zipWithIndex.map { case (t, i) => + _pt.sType.fromCodes(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).load() - }) - } - - def storeEmitParam(emitIndex: Int, cb: EmitCodeBuilder): Value[Region] => EmitValue = { - assert(mb.isStatic || emitIndex != 0) - val static = (!mb.isStatic).toInt - val et = emitParamTypes(emitIndex - static) match { - case t: EmitParamType => t - case _ => throw new RuntimeException(s"isStatic=${ mb.isStatic }, emitIndex=$emitIndex, params=$emitParamTypes") - } - val codeIndex = emitParamCodeIndex(emitIndex - static) - - et match { - case SingleCodeEmitParamType(required, sct) => - val field = cb.newFieldAny(s"storeEmitParam_sct_$emitIndex", mb.getArg(codeIndex)(sct.ti).get)(sct.ti); - { region: Value[Region] => - val emitCode = EmitCode.fromI(this) { cb => - if (required) { - IEmitCode.present(cb, sct.loadToSCode(cb, region, field.load())) - } else { - IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToSCode(cb, null, field.load())) - } - } - - new EmitValue { - evSelf => - - override def emitType: EmitType = emitCode.emitType - - override def load: EmitCode = emitCode - - override def get(cb: EmitCodeBuilder): SCode = emitCode.toI(cb).get(cb) - } - } - - case SCodeEmitParamType(et) => - val fd = cb.memoizeField(getEmitParam(emitIndex, null), s"storeEmitParam_$emitIndex") - _ => fd - } - + }).asPCode } // needs region to support stream arguments @@ -912,47 +948,48 @@ class EmitMethodBuilder[C]( val emitCode = EmitCode.fromI(this) { cb => if (required) { - IEmitCode.present(cb, sct.loadToSCode(cb, r, mb.getArg(codeIndex)(sct.ti).get)) + IEmitCode.present(cb, sct.loadToPCode(cb, r, mb.getArg(codeIndex)(sct.ti).get)) } else { - IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToSCode(cb, null, mb.getArg(codeIndex)(sct.ti).get)) + IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToPCode(cb, null, mb.getArg(codeIndex)(sct.ti).get)) } } new EmitValue { evSelf => - - override def emitType: EmitType = emitCode.emitType + val pt: PType = emitCode.pt override def load: EmitCode = emitCode - override def get(cb: EmitCodeBuilder): SCode = emitCode.toI(cb).get(cb) + override def get(cb: EmitCodeBuilder): PCode = emitCode.toI(cb).get(cb) } - case SCodeEmitParamType(et) => - val ts = et.st.codeTupleTypes() + case PCodeEmitParamType(_pt) => + val ts = _pt.codeTupleTypes() new EmitValue { evSelf => - val emitType: EmitType = et + val pt: PType = _pt def load: EmitCode = { EmitCode(Code._empty, - if (et.required) + if (pt.required) const(false) else mb.getArg[Boolean](codeIndex + ts.length), - st.fromCodes(ts.zipWithIndex.map { case (t, i) => + pt.fromCodeTuple(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).get })) } - override def get(cb: EmitCodeBuilder): SCode = { - new SValue { - override def get: SCode = st.fromCodes(ts.zipWithIndex.map { case (t, i) => + override def get(cb: EmitCodeBuilder): PCode = { + new PValue { + override def pt: PType = evSelf.pt + + override def get: PCode = pt.fromCodeTuple(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).get }) - override def st: SType = evSelf.st + override def st: SType = evSelf.pt.sType } } } @@ -972,29 +1009,33 @@ class EmitMethodBuilder[C]( case EmitParam(ec) => fatal("EmitParam passed to invokeCode") }: _*) } - def newPLocal(st: SType): SSettable = newPSettable(localBuilder, st) + def newPLocal(pt: PType): PSettable = newPSettable(localBuilder, pt) + + def newPLocal(name: String, pt: PType): PSettable = newPSettable(localBuilder, pt, name) - def newPLocal(name: String, st: SType): SSettable = newPSettable(localBuilder, st, name) + def newEmitLocal(emitType: EmitType): EmitSettable = newEmitLocal(emitType.st.pType, emitType.required) + def newEmitLocal(pt: PType, required: Boolean): EmitSettable = + newEmitSettable(pt, if (required) null else newLocal[Boolean](), newPLocal(pt), required) - def newEmitLocal(emitType: EmitType): EmitSettable = newEmitLocal(emitType.st, emitType.required) - def newEmitLocal(st: SType, required: Boolean): EmitSettable = - new EmitSettable(if (required) None else Some(newLocal[Boolean]("anon_emitlocal_m")), newPLocal("anon_emitlocal_v", st)) + def newEmitLocal(name: String, emitType: EmitType): EmitSettable = newEmitLocal(name, emitType.st.pType, emitType.required) + def newEmitLocal(name: String, pt: PType, required: Boolean): EmitSettable = + newEmitSettable(pt, if (required) null else newLocal[Boolean](name + "_missing"), newPLocal(name, pt), required) - def newEmitLocal(name: String, emitType: EmitType): EmitSettable = newEmitLocal(name, emitType.st, emitType.required) - def newEmitLocal(name: String, st: SType, required: Boolean): EmitSettable = - new EmitSettable(if (required) None else Some(newLocal[Boolean](name + "_missing")), newPLocal(name, st)) + def newPresentEmitLocal(pt: PType): PresentEmitSettable = + newPresentEmitSettable(newPLocal(pt)) + + def newPresentEmitLocal(name: String, pt: PType): PresentEmitSettable = + newPresentEmitSettable(newPLocal(name, pt)) def emitWithBuilder[T](f: (EmitCodeBuilder) => Code[T]): Unit = emit(EmitCodeBuilder.scopedCode[T](this)(f)) def voidWithBuilder(f: (EmitCodeBuilder) => Unit): Unit = emit(EmitCodeBuilder.scopedVoid(this)(f)) - def emitSCode(f: (EmitCodeBuilder) => SCode): Unit = { + def emitPCode(f: (EmitCodeBuilder) => PCode): Unit = { + // FIXME: this should optionally construct a tuple to support multiple-code SCodes emit(EmitCodeBuilder.scopedCode(this) { cb => val res = f(cb) - if (res.st.nCodes == 1) - res.makeCodeTuple(cb).head - else - asmTuple.newTuple(res.makeCodeTuple(cb)) + res.code }) } @@ -1044,13 +1085,91 @@ trait WrappedEmitMethodBuilder[C] extends WrappedEmitClassBuilder[C] { def getEmitParam(emitIndex: Int, r: Value[Region]): EmitValue = emb.getEmitParam(emitIndex, r) - def newPLocal(st: SType): SSettable = emb.newPLocal(st) + def newPLocal(pt: PType): PSettable = emb.newPLocal(pt) + + def newPLocal(name: String, pt: PType): PSettable = emb.newPLocal(name, pt) - def newPLocal(name: String, st: SType): SSettable = emb.newPLocal(name, st) + def newEmitLocal(pt: PType, required: Boolean): EmitSettable = emb.newEmitLocal(pt, required) - def newEmitLocal(st: SType, required: Boolean): EmitSettable = emb.newEmitLocal(st, required) + def newEmitLocal(name: String, pt: PType, required: Boolean): EmitSettable = emb.newEmitLocal(name, pt, required) - def newEmitLocal(name: String, pt: SType, required: Boolean): EmitSettable = emb.newEmitLocal(name, pt, required) + def newPresentEmitLocal(pt: PType): PresentEmitSettable = emb.newPresentEmitLocal(pt) +} + +class DependentEmitFunctionBuilder[F]( + parentcb: EmitClassBuilder[_], + val dep_apply_method: DependentMethodBuilder[F], + val apply_method: EmitMethodBuilder[F] +) extends WrappedEmitMethodBuilder[F] { + def emb: EmitMethodBuilder[F] = apply_method + + // wrapped DependentMethodBuilder + def newDepField[T : TypeInfo](value: Code[T]): Value[T] = dep_apply_method.newDepField[T](value) + + def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = dep_apply_method.newDepFieldAny[T](value) + + def newInstance(mb: EmitMethodBuilder[_]): Code[F] = dep_apply_method.newInstance(mb.mb) + + private[this] val typMap: mutable.Map[Type, Value[Type]] = + mutable.Map[Type, Value[Type]]() + + private[this] val literalsMap: mutable.Map[(PType, Any), PValue] = + mutable.Map[(PType, Any), PValue]() + + override def getType(t: Type): Code[Type] = + typMap.getOrElseUpdate(t, { + val fromParent = parentcb.getType(t) + val field = newDepField[Type](fromParent) + field + }) + + override def addLiteral(v: Any, t: PType): PValue = { + assert(v != null) + literalsMap.getOrElseUpdate(t -> v, { + val fromParent = parentcb.addLiteral(v, t) + newDepPField(fromParent.get) + }) + } + + def newDepPField(pc: PCode): PValue = { + val ti = typeToTypeInfo(pc.pt) + val field = newPField(pc.pt) + dep_apply_method.setFields += { (obj: lir.ValueX) => + val code = pc.code + // XXX below assumes that the first settable is the 'base' of the PSettable + val baseField = field.settableTuple()(0).asInstanceOf[ThisFieldRef[_]] + code.end.append(lir.putField(className, baseField.name, ti, obj, code.v)) + // FIXME need to initialize other potential settables in the PSettable here + val newC = new VCode(code.start, code.end, null) + code.clear() + newC + } + field + } + + def newDepEmitField(ec: EmitCode): EmitValue = { + val _pt = ec.pt + val ti = typeToTypeInfo(_pt) + val m = genFieldThisRef[Boolean]() + val v = genFieldThisRef()(ti) + dep_apply_method.setFields += { (obj: lir.ValueX) => + ec.m.end.append(lir.putField(className, m.name, typeInfo[Boolean], obj, ec.m.v)) + ec.m.end.append(lir.putField(className, v.name, ti, obj, ec.v.v)) + val newC = new VCode(ec.m.start, ec.m.end, null) + ec.m.clear() + ec.v.clear() + newC + } + new EmitValue { + def pt: PType = _pt + + def get(cb: EmitCodeBuilder): PCode = load.toI(cb).get( + cb, + "Can't convert missing value to PValue.").memoize(cb, "newDepEmitField_memo") + + def load: EmitCode = EmitCode(Code._empty, m.load(), PCode(_pt, v.load())) + } + } } class EmitFunctionBuilder[F](val apply_method: EmitMethodBuilder[F]) extends WrappedEmitMethodBuilder[F] { diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala index 257f511216d..77055a3b868 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala @@ -5,8 +5,9 @@ import is.hail.asm4s.{coerce => _, _} import is.hail.expr.ir.functions.StringFunctions import is.hail.expr.ir.streams.StreamProducer import is.hail.lir -import is.hail.types.physical.stypes.{SCode, SSettable, SValue} +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces.SStreamCode +import is.hail.types.physical.{PCode, PSettable, PType, PValue} import is.hail.utils._ object EmitCodeBuilder { @@ -54,8 +55,8 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten tmp } - def assign(s: SSettable, v: SCode): Unit = { - assert(s.st == v.st, s"type mismatch!\n settable=${s.st}\n passed=${v.st}") + def assign(s: PSettable, v: PCode): Unit = { + assert(s.pt.equalModuloRequired(v.pt), s"type mismatch!\n settable=${s.pt}\n passed=${v.pt}") s.store(this, v) } @@ -71,37 +72,41 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten (is, ix).zipped.foreach { case (s, c) => s.store(this, c) } } - def memoize(pc: SCode, name: String): SValue = pc.memoize(this, name) + def assign(s: PresentEmitSettable, v: PCode): Unit = { + s.store(this, v) + } + + def memoize(pc: PCode, name: String): PValue = pc.memoize(this, name) - def memoizeField(pc: SCode, name: String): SValue = { - val f = emb.newPField(name, pc.st) + def memoizeField(pc: PCode, name: String): PValue = { + val f = emb.newPField(name, pc.pt) assign(f, pc) f } def memoize(v: EmitCode, name: String): EmitValue = { - require(v.st.isRealizable) + require(v.pt.isRealizable) val l = emb.newEmitLocal(name, v.emitType) assign(l, v) l } def memoize(v: IEmitCode, name: String): EmitValue = { - require(v.st.isRealizable) + require(v.pt.isRealizable) val l = emb.newEmitLocal(name, v.emitType) assign(l, v) l } def memoizeField[T](ec: EmitCode, name: String): EmitValue = { - require(ec.st.isRealizable) + require(ec.pt.isRealizable) val l = emb.newEmitField(name, ec.emitType) l.store(this, ec) l } def withScopedMaybeStreamValue[T](ec: EmitCode, name: String)(f: EmitValue => T): T = { - if (ec.st.isRealizable) { + if (ec.pt.isRealizable) { f(memoizeField(ec, name)) } else { val ev = new EmitUnrealizableValue(ec) @@ -114,7 +119,7 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten } def memoizeField(v: IEmitCode, name: String): EmitValue = { - require(v.st.isRealizable) + require(v.pt.isRealizable) val l = emb.newEmitField(name, v.emitType) assign(l, v) l @@ -134,29 +139,40 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten if (c.ti != cpt.ti) throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + s"\n got ${ c.ti }" + - s"\n expected ${ cpt.ti }" + - s"\n all param types: ${expectedArgs}-") + s"\n expected ${ cpt.ti }") FastIndexedSeq(c) - case (SCodeParam(pc), pcpt: SCodeParamType) => - if (pc.st != pcpt.st) + case (PCodeParam(pc), pcpt: PCodeParamType) => + if (pc.pt != pcpt.pt) throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + - s"\n got ${ pc.st }" + - s"\n expected ${ pcpt.st }") - pc.makeCodeTuple(this) - case (EmitParam(ec), SCodeEmitParamType(et)) => - if (!ec.emitType.equalModuloRequired(et)) { + s"\n got ${ pc.pt }" + + s"\n expected ${ pcpt.pt }") + pc.codeTuple() + case (EmitParam(ec), PCodeEmitParamType(pt)) => + if (!ec.pt.equalModuloRequired(pt)) { throw new RuntimeException(s"invoke ${callee.mb.methodName}: arg $i: type mismatch:" + - s"\n got ${ec.st}" + - s"\n expected ${et.st}") + s"\n got ${ec.pt}" + + s"\n expected ${pt}") } - val castEc = (ec.required, et.required) match { - case (true, false) => ec.setOptional + val castEc = (ec.pt.required, pt.required) match { + case (true, false) => + EmitCode.fromI(emb)(cb => ec.toI(cb).map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code))) case (false, true) => - EmitCode.fromI(emb) { cb => IEmitCode.present(cb, ec.toI(cb).get(cb)) } + EmitCode.fromI(emb) { cb => + val presentPC = ec.toI(cb).get(cb) + IEmitCode.present(cb, PCode(presentPC.pt.setRequired(pt.required), presentPC.code)) + } case _ => ec } - castEc.makeCodeTuple(this) + + if (castEc.pt.required) { + append(Code.toUnit(castEc.m)) + castEc.codeTuple() + } else { + val ev = memoize(castEc, "cb_invoke_setup_params") + ev.codeTuple() + } + case (arg, expected) => throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + s"\n got ${ arg }" + @@ -181,25 +197,25 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten _invoke[T](callee, args: _*) } - def invokeSCode(callee: EmitMethodBuilder[_], args: Param*): SCode = { - val st = callee.emitReturnType.asInstanceOf[SCodeParamType].st - if (st.nCodes == 1) - st.fromCodes(FastIndexedSeq(_invoke(callee, args: _*))) - else { - val tup = newLocal("invokepcode_tuple", _invoke(callee, args: _*))(callee.asmTuple.ti) - st.fromCodes(callee.asmTuple.loadElementsAny(tup)) - } + // FIXME: this should be invokeSCode and should allocate/destructure a tuple when more than one code is present + def invokePCode(callee: EmitMethodBuilder[_], args: Param*): PCode = { + val pt = callee.emitReturnType.asInstanceOf[PCodeParamType].pt + PCode(pt, _invoke(callee, args: _*)) } // for debugging - def strValue(sc: SCode): Code[String] = { - StringFunctions.scodeToJavaValue(this, emb.partitionRegion, sc).invoke[String]("toString") + def printRegionValue(value: Code[_], typ: PType, region: Value[Region]): Unit = { + append(Code._println(StringFunctions.boxArg(EmitRegion(emb, region), typ)(value))) + } + + // for debugging + def strValue(t: PType, code: Code[_]): Code[String] = { + StringFunctions.boxArg(EmitRegion(emb, emb.partitionRegion), t)(code).invoke[String]("toString") } - def strValue(ec: EmitCode): Code[String] = { - val s = newLocal[String]("s") - ec.toI(this).consume(this, assign(s, "NA"), sc => assign(s, strValue(sc))) - s + def strValue(sc: SCode): Code[String] = { + val x = sc.asPCode + strValue(x.pt, x.code) } // for debugging diff --git a/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala b/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala index 9729ed35281..4895a200a40 100644 --- a/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala +++ b/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala @@ -33,7 +33,6 @@ object FoldConstants { _: NDArrayAgg | _: NDArrayWrite | _: NDArrayMatMul | - _: Trap | _: Die => None case ir: IR if ir.typ.isInstanceOf[TStream] => None case ir: IR if !IsConstant(ir) && diff --git a/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala b/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala index edd541cf14b..52caa14d528 100644 --- a/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala +++ b/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala @@ -16,7 +16,7 @@ import is.hail.utils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.{Partition, TaskContext} -import org.json4s.JsonAST.{JObject, JString} +import org.json4s.JsonAST.JObject import org.json4s.{Extraction, JValue} class PartitionIteratorLongReader( @@ -60,18 +60,17 @@ class PartitionIteratorLongReader( cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltPType.loadCheapSCode(cb, rv))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltPType.loadCheapPCode(cb, rv))) override def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode(SStream(producer.element.emitType), producer) + SStreamCode(SStream(producer.element.st, true), producer) } } def toJValue: JValue = { JObject( - "category" -> JString("PartitionIteratorLongReader"), "fullRowType" -> Extraction.decompose(fullRowType)(PartitionReader.formats), "contextType" -> Extraction.decompose(contextType)(PartitionReader.formats)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index 96c2c31a09a..e5456c03d00 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -11,7 +11,6 @@ import is.hail.io.{AbstractTypedCodecSpec, BufferSpec, TypedCodecSpec} import is.hail.rvd.RVDSpecMaker import is.hail.types.encoded._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.{BooleanSingleCodeType, Float32SingleCodeType, Float64SingleCodeType, Int32SingleCodeType, Int64SingleCodeType, PTypeReferenceSingleCodeType, SType} import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, _} import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} @@ -19,8 +18,15 @@ import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} import scala.language.existentials sealed trait IR extends BaseIR { + protected[ir] var _pType: PType = null private var _typ: Type = null + def pType = { + assert(_pType != null) + + _pType + } + def typ: Type = { if (_typ == null) try { @@ -58,8 +64,9 @@ sealed trait IR extends BaseIR { def unwrap: IR = _unwrap(this) } -sealed trait TypedIR[T <: Type] extends IR { +sealed trait TypedIR[T <: Type, P <: PType] extends IR { override def typ: T = coerce[T](super.typ) + override def pType: P = coerce[P](super.pType) } object Literal { @@ -167,7 +174,7 @@ final case class Ref(name: String, var _typ: Type) extends BaseRef // Recur can't exist outside of loop // Loops can be nested, but we can't call outer loops in terms of inner loops so there can only be one loop "active" in a given context -final case class TailLoop(name: String, params: IndexedSeq[(String, IR)], body: IR) extends IR { +final case class TailLoop(name: String, params: IndexedSeq[(String, IR)], body: IR) extends IR with InferredState { lazy val paramIdx: Map[String, Int] = params.map(_._1).zipWithIndex.toMap } final case class Recur(name: String, args: IndexedSeq[IR], _typ: Type) extends BaseRef @@ -290,15 +297,20 @@ final case class StreamZip(as: IndexedSeq[IR], names: IndexedSeq[String], body: } final case class StreamMultiMerge(as: IndexedSeq[IR], key: IndexedSeq[String]) extends IR { override def typ: TStream = coerce[TStream](super.typ) + override def pType: PStream = coerce[PStream](super.pType) } - -/** - * The StreamZipJoin node assumes that input streams have distinct keys. If input streams - * do not have distinct keys, the key that is included in the result is undefined, but - * is likely the last. - */ final case class StreamZipJoin(as: IndexedSeq[IR], key: IndexedSeq[String], curKey: String, curVals: String, joinF: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) + override def pType: PStream = coerce[PStream](super.pType) + private var _curValsType: PCanonicalArray = null + def getOrComputeCurValsType(valsType: => PType): PCanonicalArray = { + if (_curValsType == null) _curValsType = valsType.asInstanceOf[PCanonicalArray] + _curValsType + } + def curValsType: PCanonicalArray = { + assert(_curValsType != null) + _curValsType + } } final case class StreamFilter(a: IR, name: String, cond: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) @@ -307,7 +319,11 @@ final case class StreamFlatMap(a: IR, name: String, body: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) } -final case class StreamFold(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR +trait InferredState extends IR { var accPTypes: Array[PType] = null } + +final case class StreamFold(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR with InferredState { + def accPType: PType = accPTypes.head +} object StreamFold2 { def apply(a: StreamFold): StreamFold2 = { @@ -315,12 +331,14 @@ object StreamFold2 { } } -final case class StreamFold2(a: IR, accum: IndexedSeq[(String, IR)], valueName: String, seq: IndexedSeq[IR], result: IR) extends IR { +final case class StreamFold2(a: IR, accum: IndexedSeq[(String, IR)], valueName: String, seq: IndexedSeq[IR], result: IR) extends IR with InferredState { assert(accum.length == seq.length) val nameIdx: Map[String, Int] = accum.map(_._1).zipWithIndex.toMap } -final case class StreamScan(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR +final case class StreamScan(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR with InferredState { + def accPType: PType = accPTypes.head +} final case class StreamFor(a: IR, valueName: String, body: IR) extends IR @@ -377,7 +395,7 @@ object StreamJoin { final case class StreamJoinRightDistinct(left: IR, right: IR, lKey: IndexedSeq[String], rKey: IndexedSeq[String], l: String, r: String, joinF: IR, joinType: String) extends IR -sealed trait NDArrayIR extends TypedIR[TNDArray] { +sealed trait NDArrayIR extends TypedIR[TNDArray, PNDArray] { def elementTyp: Type = typ.elementType } @@ -501,6 +519,8 @@ object InsertFields { final case class InsertFields(old: IR, fields: Seq[(String, IR)], fieldOrder: Option[IndexedSeq[String]]) extends IR { override def typ: TStruct = coerce[TStruct](super.typ) + + override def pType: PStruct = coerce[PStruct](super.pType) } object GetFieldByIdx { @@ -542,13 +562,6 @@ object Die { def apply(message: String, typ: Type, errorId: Int): Die = Die(Str(message), typ, errorId) } -/** - * the Trap node runs the `child` node with an exception handler. If the child - * throws a HailException (user exception), then we return the tuple ((msg, errorId), NA). - * If the child throws any other exception, we raise that exception. If the - * child does not throw, then we return the tuple (NA, child value). - */ -final case class Trap(child: IR) extends IR final case class Die(message: IR, _typ: Type, errorId: Int) extends IR final case class ApplyIR(function: String, typeArgs: Seq[Type], args: Seq[IR]) extends IR { @@ -616,7 +629,25 @@ final case class BlockMatrixWrite(child: BlockMatrixIR, writer: BlockMatrixWrite final case class BlockMatrixMultiWrite(blockMatrices: IndexedSeq[BlockMatrixIR], writer: BlockMatrixMultiWriter) extends IR -final case class CollectDistributedArray(contexts: IR, globals: IR, cname: String, gname: String, body: IR, tsd: Option[TableStageDependency] = None) extends IR +final case class CollectDistributedArray(contexts: IR, globals: IR, cname: String, gname: String, body: IR, tsd: Option[TableStageDependency] = None) extends IR { + val bufferSpec: BufferSpec = BufferSpec.defaultUncompressed + + lazy val contextPTuple: PTuple = PCanonicalTuple(required = true, coerce[PStream](contexts.pType).elementType) + lazy val globalPTuple: PTuple = PCanonicalTuple(required = true, globals.pType) + lazy val bodyPTuple: PTuple = PCanonicalTuple(required = true, body.pType) + + lazy val contextSpec: TypedCodecSpec = TypedCodecSpec(contextPTuple, bufferSpec) + lazy val globalSpec: TypedCodecSpec = TypedCodecSpec(globalPTuple, bufferSpec) + lazy val bodySpec: TypedCodecSpec = TypedCodecSpec(bodyPTuple, bufferSpec) + + lazy val decodedContextPTuple: PTuple = contextSpec.encodedType.decodedPType(contextPTuple.virtualType).asInstanceOf[PTuple] + lazy val decodedGlobalPTuple: PTuple = globalSpec.encodedType.decodedPType(globalPTuple.virtualType).asInstanceOf[PTuple] + lazy val decodedBodyPTuple: PTuple = bodySpec.encodedType.decodedPType(bodyPTuple.virtualType).asInstanceOf[PTuple] + + def decodedContextPType: PType = decodedContextPTuple.types(0) + def decodedGlobalPType: PType = decodedGlobalPTuple.types(0) + def decodedBodyPType: PType = decodedBodyPTuple.types(0) +} object PartitionReader { implicit val formats: Formats = new DefaultFormats() { diff --git a/hail/src/main/scala/is/hail/expr/ir/InferPType.scala b/hail/src/main/scala/is/hail/expr/ir/InferPType.scala index 13d6210085c..9b8740d4a80 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferPType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferPType.scala @@ -1,9 +1,22 @@ package is.hail.expr.ir -import is.hail.types.TypeWithRequiredness -import is.hail.types.physical._ +import is.hail.types.physical.{Int32SingleCodeType, _} +import is.hail.types.virtual._ +import is.hail.utils._ +import is.hail.HailContext +import is.hail.types.{RDict, RIterable, TypeWithRequiredness} object InferPType { + + def clearPTypes(x: BaseIR): Unit = { + x match { + case x: IR => + x._pType = null + case _ => + } + x.children.foreach(clearPTypes) + } + def getCompatiblePType(pTypes: Seq[PType]): PType = { val r = TypeWithRequiredness.apply(pTypes.head.virtualType) pTypes.foreach(r.fromPType) @@ -16,4 +29,323 @@ object InferPType { pTypes.head else result.canonicalPType(pTypes.head.virtualType) } + + def apply(ir: IR): Unit = apply(ir, Env.empty) + + private type AAB[T] = Array[BoxedArrayBuilder[RecursiveArrayBuilderElement[T]]] + + case class RecursiveArrayBuilderElement[T](value: T, nested: Option[AAB[T]]) + + def newBuilder[T](n: Int): AAB[T] = Array.fill(n)(new BoxedArrayBuilder[RecursiveArrayBuilderElement[T]]) + + def apply(ir: IR, env: Env[PType], requiredness: RequirednessAnalysis, usesAndDefs: UsesAndDefs): Unit = { + try { + requiredness.states.m.foreach { case (ir, types) => + ir.t match { + case x: StreamFold => x.accPTypes = types.map(r => r.canonicalPType(x.zero.typ)).toArray + case x: StreamScan => x.accPTypes = types.map(r => r.canonicalPType(x.zero.typ)).toArray + case x: StreamFold2 => + x.accPTypes = x.accum.zip(types).map { case ((_, arg), r) => r.canonicalPType(arg.typ) }.toArray + case x: TailLoop => + x.accPTypes = x.params.zip(types).map { case ((_, arg), r) => r.canonicalPType(arg.typ) }.toArray + } + } + _inferWithRequiredness(ir, env, requiredness, usesAndDefs) + } catch { + case e: Exception => + throw new RuntimeException(s"error while inferring IR:\n${Pretty(ir)}", e) + } + VisitIR(ir) { case (node: IR) => + if (node._pType == null) + throw new RuntimeException(s"ptype inference failure: node not inferred:\n${Pretty(node)}\n ** Full IR: **\n${Pretty(ir)}") + } + } + + def apply(ir: IR, env: Env[PType]): Unit = { + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, env) // Value IR inference doesn't need context + apply(ir, env, requiredness, usesAndDefs) + } + + private def lookup(name: String, r: TypeWithRequiredness, defNode: IR): PType = defNode match { + case Let(`name`, value, _) => value.pType + case TailLoop(`name`, _, body) => r.canonicalPType(body.typ) + case x: TailLoop => x.accPTypes(x.paramIdx(name)) + case ArraySort(a, l, r, c) => coerce[PStream](a.pType).elementType + case StreamMap(a, `name`, _) => coerce[PStream](a.pType).elementType + case x@StreamZip(as, _, _, _) => + coerce[PStream](as(x.nameIdx(name)).pType).elementType.setRequired(r.required) + case StreamZipJoin(as, key, `name`, _, joinF) => + assert(r.required) + getCompatiblePType(as.map { a => + PCanonicalStruct(true, key.map { k => + k -> coerce[PStruct](coerce[PStream](a.pType).elementType).fieldType(k) + }: _*) + }, r).setRequired(true) + case x@StreamZipJoin(as, key, _, `name`, joinF) => + assert(r.required) + assert(!r.asInstanceOf[RIterable].elementType.required) + x.getOrComputeCurValsType { + PCanonicalArray( + getCompatiblePType( + as.map(a => coerce[PStruct](coerce[PStream](a.pType).elementType)), + r.asInstanceOf[RIterable].elementType).setRequired(false), + required = true) + } + case StreamFilter(a, `name`, _) => coerce[PStream](a.pType).elementType + case StreamFlatMap(a, `name`, _) => coerce[PStream](a.pType).elementType + case StreamFor(a, `name`, _) => coerce[PStream](a.pType).elementType + case StreamFold(a, _, _, `name`, _) => coerce[PStream](a.pType).elementType + case x: StreamFold => x.accPType + case StreamScan(a, _, _, `name`, _) => coerce[PStream](a.pType).elementType + case x: StreamScan => x.accPType + case StreamFold2(a, _, `name`, _, _) => coerce[PStream](a.pType).elementType + case x: StreamFold2 => x.accPTypes(x.nameIdx(name)) + case StreamJoinRightDistinct(left, _, _, _, `name`, _, _, joinType) => + coerce[PStream](left.pType).elementType.orMissing(joinType == "left") + case StreamJoinRightDistinct(_, right, _, _, _, `name`, _, _) => + coerce[PStream](right.pType).elementType.setRequired(false) + case RunAggScan(a, `name`, _, _, _, _) => coerce[PStream](a.pType).elementType + case NDArrayMap(nd, `name`, _) => coerce[PNDArray](nd.pType).elementType + case NDArrayMap2(left, _, `name`, _, _) => coerce[PNDArray](left.pType ).elementType + case NDArrayMap2(_, right, _, `name`, _) => coerce[PNDArray](right.pType).elementType + case x@CollectDistributedArray(_, _, `name`, _, _, _) => x.decodedContextPType + case x@CollectDistributedArray(_, _, _, `name`, _, _) => x.decodedGlobalPType + case x@ShuffleWith(_, _, _, _, `name`, _, _) => x.shufflePType + case _ => throw new RuntimeException(s"$name not found in definition \n${ Pretty(defNode) }") + } + + private def _inferWithRequiredness(node: IR, env: Env[PType], requiredness: RequirednessAnalysis, usesAndDefs: UsesAndDefs): Unit = { + if (node._pType != null) + throw new RuntimeException(node.toString) + node.children.foreach { + case x: IR => _inferWithRequiredness(x, env, requiredness, usesAndDefs) + case c => throw new RuntimeException(s"unsupported node:\n${Pretty(c)}") + } + node._pType = node match { + case x if x.typ == TVoid => PVoid + case _: I32 | _: I64 | _: F32 | _: F64 | _: Str | _: UUID4 | _: Literal | _: True | _: False + | _: Cast | _: NA | _: Die | _: IsNA | _: ArrayZeros | _: ArrayLen | _: StreamLen + | _: LowerBoundOnOrderedCollection | _: ApplyBinaryPrimOp + | _: ApplyUnaryPrimOp | _: ApplyComparisonOp | _: WriteValue | _: Consume + | _: NDArrayAgg | _: ShuffleWrite | _: AggStateValue | _: CombOpValue | _: InitFromSerializedValue => + requiredness(node).canonicalPType(node.typ) + case EncodedLiteral(codec, _) => + codec.decodedPType() + case CastRename(v, typ) => v.pType.deepRename(typ) + case x: BaseRef if usesAndDefs.free.contains(RefEquality(x)) => + env.lookup(x.name) + case x: BaseRef => + lookup(x.name, requiredness(node), usesAndDefs.defs.lookup(node).asInstanceOf[IR]) + case MakeNDArray(data, shape, rowMajor, _) => + val nElem = shape.pType.asInstanceOf[PTuple].size + PCanonicalNDArray(coerce[PArray](data.pType).elementType.setRequired(true), nElem, requiredness(node).required) + case StreamRange(start: IR, stop: IR, step: IR, requiresMemoryManagementPerElement) => + assert(start.pType isOfType stop.pType) + assert(start.pType isOfType step.pType) + PCanonicalStream(start.pType.setRequired(true), required = requiredness(node).required) + case Let(_, _, body) => body.pType + case TailLoop(_, _, body) => body.pType + case a: AbstractApplyNode[_] => a.implementation.returnPType(a.returnType, a.args.map(_.pType)) + case ArrayRef(a, i, s) => + assert(i.pType isOfType PInt32()) + coerce[PArray](a.pType).elementType.setRequired(requiredness(node).required) + case ArraySort(a, leftName, rightName, lessThan) => + assert(lessThan.pType.isOfType(PBoolean())) + PCanonicalArray(coerce[PIterable](a.pType).elementType, requiredness(node).required) + case ToSet(a) => + PCanonicalSet(coerce[PIterable](a.pType).elementType, requiredness(node).required) + case ToDict(a) => + val elt = coerce[PBaseStruct](coerce[PIterable](a.pType).elementType) + PCanonicalDict(elt.types(0), elt.types(1), requiredness(node).required) + case ToArray(a) => + val elt = coerce[PIterable](a.pType).elementType + PCanonicalArray(elt, requiredness(node).required) + case CastToArray(a) => + val elt = coerce[PIterable](a.pType).elementType + PCanonicalArray(elt, requiredness(node).required) + case ToStream(a, requiresMemoryManagementPerElement) => + val elt = coerce[PIterable](a.pType).elementType + PCanonicalStream(elt, required = requiredness(node).required) + case GroupByKey(collection) => + val r = coerce[RDict](requiredness(node)) + val elt = coerce[PBaseStruct](coerce[PStream](collection.pType).elementType) + PCanonicalDict(elt.types(0), PCanonicalArray(elt.types(1), r.valueType.required), r.required) + case StreamTake(a, len) => + a.pType.setRequired(requiredness(node).required) + case StreamDrop(a, len) => + a.pType.setRequired(requiredness(node).required) + case StreamGrouped(a, size) => + val r = coerce[RIterable](requiredness(node)) + assert(size.pType isOfType PInt32()) + val innerPType = coerce[PStream](a.pType) + PCanonicalStream(innerPType.setRequired(r.elementType.required), r.required) + case StreamGroupByKey(a, key) => + val r = coerce[RIterable](requiredness(node)) + val innerPType = coerce[PStream](a.pType) + PCanonicalStream(innerPType.setRequired(r.elementType.required), r.required) + case StreamMap(a, name, body) => + PCanonicalStream(body.pType, requiredness(node).required) + case StreamZip(as, names, body, behavior) => + PCanonicalStream( + body.pType, + requiredness(node).required) + case StreamZipJoin(as, _, curKey, curVals, joinF) => + val r = requiredness(node).asInstanceOf[RIterable] + val rEltType = joinF.pType + PCanonicalStream( + rEltType, + r.required) + case StreamMultiMerge(as, _) => + val r = coerce[RIterable](requiredness(node)) + assert(r.elementType.required) + PCanonicalStream( + getCompatiblePType(as.map(_.pType.asInstanceOf[PStream].elementType), r.elementType), + r.required) + case StreamFilter(a, name, cond) => a.pType + case StreamFlatMap(a, name, body) => + val innerStreamType = coerce[PStream](body.pType) + PCanonicalStream( + innerStreamType.elementType, + requiredness(node).required) + case x: StreamFold => + x.accPType.setRequired(requiredness(node).required) + case x: StreamFold2 => + x.result.pType.setRequired(requiredness(node).required) + case x@StreamScan(a, _, _, _, body) => + val r = coerce[RIterable](requiredness(node)) + PCanonicalStream( + x.accPType.setRequired(r.elementType.required), + r.required) + case StreamJoinRightDistinct(left, right, _, _, _, _, join, _) => + PCanonicalStream( + join.pType, + requiredness(node).required) + case NDArrayShape(nd) => + val r = nd.pType.asInstanceOf[PCanonicalNDArray].shapeType + r.setRequired(requiredness(node).required) + case NDArrayReshape(nd, shape) => + val shapeT = shape.pType.asInstanceOf[PTuple] + PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, shapeT.size, + requiredness(node).required) + case NDArrayConcat(nds, _) => + val ndtyp = coerce[PNDArray](coerce[PArray](nds.pType).elementType) + ndtyp.setRequired(requiredness(node).required) + case NDArrayMap(nd, name, body) => + val ndPType = nd.pType.asInstanceOf[PNDArray] + PCanonicalNDArray(body.pType.setRequired(true), ndPType.nDims, requiredness(node).required) + case NDArrayMap2(l, r, lName, rName, body) => + val lPType = l.pType.asInstanceOf[PNDArray] + PCanonicalNDArray(body.pType.setRequired(true), lPType.nDims, requiredness(node).required) + case NDArrayReindex(nd, indexExpr) => + PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, indexExpr.length, requiredness(node).required) + case NDArrayRef(nd, idxs, _) => + coerce[PNDArray](nd.pType).elementType.setRequired(requiredness(node).required) + case NDArraySlice(nd, slices) => + val remainingDims = coerce[PTuple](slices.pType).types.filter(_.isInstanceOf[PTuple]) + PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, remainingDims.length, requiredness(node).required) + case NDArrayFilter(nd, filters) => coerce[PNDArray](nd.pType) + case NDArrayMatMul(l, r) => + val lTyp = coerce[PNDArray](l.pType) + val rTyp = coerce[PNDArray](r.pType) + PCanonicalNDArray(lTyp.elementType, TNDArray.matMulNDims(lTyp.nDims, rTyp.nDims), requiredness(node).required) + case NDArrayQR(child, mode) => NDArrayQR.pType(mode, child.pType.required) + case NDArraySVD(child, _, computeUV) => NDArraySVD.pTypes(computeUV, child.pType.required) + case NDArrayInv(_) => NDArrayInv.pType + case MakeStruct(fields) => + PCanonicalStruct(requiredness(node).required, + fields.map { case (name, a) => (name, a.pType) }: _ *) + case SelectFields(old, fields) => + if(HailContext.getFlag("use_spicy_ptypes") != null) { + PSubsetStruct(coerce[PStruct](old.pType), fields:_*) + } else { + val tbs = coerce[PStruct](old.pType) + tbs.selectFields(fields.toFastIndexedSeq) + } + case InsertFields(old, fields, fieldOrder) => + val tbs = coerce[PStruct](old.pType) + val s = tbs.insertFields(fields.map(f => { (f._1, f._2.pType) })) + fieldOrder.map { fds => + assert(fds.length == s.size) + PCanonicalStruct(tbs.required, fds.map(f => f -> s.fieldType(f)): _*) + }.getOrElse(s) + case GetField(o, name) => + val t = coerce[PStruct](o.pType) + if (t.index(name).isEmpty) + throw new RuntimeException(s"$name not in $t") + t.field(name).typ.setRequired(requiredness(node).required) + case MakeTuple(values) => + PCanonicalTuple(values.map { case (idx, v) => + PTupleField(idx, v.pType) + }.toFastIndexedSeq, requiredness(node).required) + case MakeArray(irs, t) => + val r = coerce[RIterable](requiredness(node)) + if (irs.isEmpty) r.canonicalPType(t) else + PCanonicalArray(getCompatiblePType(irs.map(_.pType), r.elementType), r.required) + case GetTupleElement(o, idx) => + val t = coerce[PTuple](o.pType) + t.fields(t.fieldIndex(idx)).typ.setRequired(requiredness(node).required) + case If(cond, cnsq, altr) => + assert(cond.pType isOfType PBoolean()) + val r = requiredness(node) + getCompatiblePType(FastIndexedSeq(cnsq.pType, altr.pType), r).setRequired(r.required) + case Coalesce(values) => + val r = requiredness(node) + getCompatiblePType(values.map(_.pType), r).setRequired(r.required) + case In(_, ept) => ept match { + case SingleCodeEmitParamType(required, sct) => sct match { + case StreamSingleCodeType(sr, eltType) => PCanonicalStream(eltType, required) + case Int32SingleCodeType => PInt32(required) + case Int64SingleCodeType => PInt64(required) + case Float32SingleCodeType => PFloat32(required) + case Float64SingleCodeType => PFloat64(required) + case BooleanSingleCodeType => PBoolean(required) + case PTypeReferenceSingleCodeType(pt) => pt.setRequired(required) + } + case PCodeEmitParamType(pt) => pt + } + case x: CollectDistributedArray => + PCanonicalArray(x.decodedBodyPType, requiredness(node).required) + case ReadPartition(context, rowType, reader) => + val child = reader.rowPType(rowType) + PCanonicalStream(child, required = requiredness(node).required) + case WritePartition(value, writeCtx, writer) => + writer.returnPType(writeCtx.pType, coerce[PStream](value.pType)) + case ReadValue(path, spec, requestedType) => + spec.decodedPType(requestedType).setRequired(requiredness(node).required) + case MakeStream(irs, t, requiresMemoryManagementPerElement) => + val r = coerce[RIterable](requiredness(node)) + if (irs.isEmpty) r.canonicalPType(t) else + PCanonicalStream(getCompatiblePType(irs.map(_.pType), r.elementType), r.required) + case x@ResultOp(resultIdx, sigs) => + PCanonicalTuple(true, sigs.map(_.pResultType): _*) + case x@RunAgg(body, result, signature) => result.pType + case x@RunAggScan(array, name, init, seq, result, signature) => + PCanonicalStream( + result.pType, + array.pType.required) + case ShuffleWith(keyFields, rowType, rowEType, keyEType, name, writer, readers) => + val r = requiredness(node) + assert(r.required == readers.pType.required) + readers.pType + case ShuffleWrite(id, rows) => + val r = requiredness(node) + assert(r.required) + PCanonicalBinary(true) + case ShufflePartitionBounds(id, nPartitions) => + val r = requiredness(node) + assert(r.required) + PCanonicalStream( + coerce[TShuffle](id.typ).keyDecodedPType, + required = true) + case ShuffleRead(id, keyRange) => + val r = requiredness(node) + assert(r.required) + PCanonicalStream( + coerce[TShuffle](id.typ).rowDecodedPType, + required = true) + } + if (node.pType.virtualType != node.typ) + throw new RuntimeException(s"pType.virtualType: ${node.pType.virtualType}, vType = ${node.typ}\n ir=$node") + } } diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index 53ddc49ac04..254f8546852 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -49,7 +49,6 @@ object InferType { case _: DeserializeAggs => TVoid case _: Begin => TVoid case Die(_, t, _) => t - case Trap(child) => TTuple(TTuple(TString, TInt32), child.typ) case If(cond, cnsq, altr) => assert(cond.typ == TBoolean) assert(cnsq.typ == altr.typ) @@ -91,7 +90,7 @@ object InferType { case ToDict(a) => val elt = coerce[TBaseStruct](coerce[TStream](a.typ).elementType) TDict(elt.types(0), elt.types(1)) - case ta@ToArray(a) => + case ToArray(a) => val elt = coerce[TStream](a.typ).elementType TArray(elt) case CastToArray(a) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala index 33e765ea877..f80dc15f952 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala @@ -3,18 +3,15 @@ package is.hail.expr.ir import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.lowering.LoweringPipeline -import is.hail.types.physical.{PTuple, PType, stypes} +import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType, SingleCodeType} import is.hail.types.virtual._ import is.hail.io.BufferSpec import is.hail.linalg.BlockMatrix import is.hail.rvd.RVDContext import is.hail.utils._ import is.hail.HailContext -import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType} import org.apache.spark.sql.Row -import scala.collection.mutable - object Interpret { type Agg = (IndexedSeq[Row], TStruct) @@ -133,7 +130,7 @@ object Interpret { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toDouble / rr.toDouble + case FloatingPointDivide() => ll.toFloat / rr.toFloat case RoundToNegInfDivide() => java.lang.Math.floorDiv(ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -157,7 +154,7 @@ object Interpret { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toDouble / rr.toDouble + case FloatingPointDivide() => ll.toFloat / rr.toFloat case RoundToNegInfDivide() => java.lang.Math.floorDiv(ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -517,7 +514,7 @@ object Interpret { for (i <- 0 until k) { advance(i) } - val builder = new mutable.ArrayBuffer[Any]() + val builder = new BoxedArrayBuilder[Any]() while (tournament(0) != k) { val i = tournament(0) val elt = Array.fill[Row](k)(null) @@ -532,7 +529,7 @@ object Interpret { } builder += interpret(joinF, env.bind(curKeyName -> curKey, curValsName -> elt.toFastIndexedSeq), args) } - builder.toFastIndexedSeq + builder.result().toFastIndexedSeq } case StreamFilter(a, name, cond) => val aValue = interpret(a, env, args) @@ -707,12 +704,6 @@ object Interpret { case Die(message, typ, errorId) => val message_ = interpret(message).asInstanceOf[String] fatal(if (message_ != null) message_ else "", errorId) - case Trap(child) => - try { - Row(null, interpret(child)) - } catch { - case e: HailException => Row(Row(e.msg, e.errorId), null) - } case ir@ApplyIR(function, _, functionArgs) => interpret(ir.explicitNode, env, args) case ApplySpecial("lor", _, Seq(left_, right_), _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala index 53f92888c70..7a9aa9bf159 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala @@ -20,7 +20,6 @@ import is.hail.variant._ import org.apache.spark.sql.Row import org.apache.spark.storage.StorageLevel import org.json4s._ -import org.json4s.jackson.JsonMethods object MatrixIR { def read(fs: FS, path: String, dropCols: Boolean = false, dropRows: Boolean = false, requestedType: Option[MatrixType] = None): MatrixIR = { @@ -106,19 +105,11 @@ trait MatrixReader { def lower(mr: MatrixRead): TableIR def toJValue: JValue - - def renderShort(): String - - def defaultRender(): String = { - StringEscapeUtils.escapeString(JsonMethods.compact(toJValue)) - } } abstract class MatrixHybridReader extends TableReader with MatrixReader { lazy val fullType: TableType = fullMatrixType.canonicalTableType - override def defaultRender(): String = super.defaultRender() - override def lower(mr: MatrixRead): TableIR = { var tr: TableIR = TableRead(mr.typ.canonicalTableType, mr.dropRows, this) if (mr.dropCols) { @@ -204,8 +195,6 @@ class MatrixNativeReader( ) extends MatrixReader { def pathsUsed: Seq[String] = FastSeq(params.path) - override def renderShort(): String = s"(MatrixNativeReader ${ params.path } ${ params.options.map(_.renderShort()).getOrElse("") })" - lazy val columnCount: Option[Int] = Some(spec.colsSpec .partitionCounts .sum @@ -312,8 +301,6 @@ class MatrixRangeReader( rowType = TStruct("row_idx" -> TInt32), entryType = TStruct.empty) - override def renderShort(): String = s"(MatrixRangeReader $params $nPartitionsAdj)" - val columnCount: Option[Int] = Some(params.nCols) lazy val partitionCounts: Option[IndexedSeq[Long]] = Some(partition(params.nRows, nPartitionsAdj).map(_.toLong)) diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala index 490f9533b61..4628869d701 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala @@ -96,7 +96,7 @@ case class MatrixValue( FileFormat.version.rep, is.hail.HAIL_PRETTY_VERSION, "../references", - typ.colsTableType.copy(key = FastIndexedSeq[String]()), + typ.colsTableType, Map("globals" -> RVDComponentSpec("../globals/rows"), "rows" -> RVDComponentSpec("rows"), "partition_counts" -> PartitionCountsComponentSpec(partitionCounts))) diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala index 5b38d8b0f1e..1f1a3232ba5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala @@ -1,33 +1,29 @@ package is.hail.expr.ir +import java.io.OutputStream import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.functions.MatrixWriteBlockMatrix import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, TableStage} import is.hail.expr.ir.streams.StreamProducer -import is.hail.expr.{JSONAnnotationImpex, Nat} import is.hail.io._ import is.hail.io.fs.FS import is.hail.io.gen.{ExportBGEN, ExportGen} import is.hail.io.index.StagedIndexWriter import is.hail.io.plink.ExportPlink import is.hail.io.vcf.ExportVCF -import is.hail.linalg.BlockMatrix -import is.hail.rvd.{IndexSpec, RVDPartitioner, RVDSpecMaker} -import is.hail.types.encoded.{EBaseStruct, EBlockMatrixNDArray, EType} -import is.hail.types.physical.stypes.SCode +import is.hail.rvd.{RVDPartitioner, RVDSpecMaker} +import is.hail.types.encoded.{EBaseStruct, EType} import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStream, PStruct, PType} +import is.hail.types.physical.{PBaseStructCode, PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PCode, PIndexableValue, PInt64, PInt64Required, PStream, PStruct, PType} import is.hail.types.virtual._ -import is.hail.types._ +import is.hail.types.{MatrixType, RTable, TableType} import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream -import org.apache.spark.sql.Row import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats, ShortTypeHints} -import java.io.OutputStream - object MatrixWriter { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints( @@ -118,15 +114,12 @@ case class MatrixNativeWriter( Str(partFile(1, 0)), globalWriter) val globalTableWriter = TableSpecWriter(s"$path/globals", TableType(tm.globalType, FastIndexedSeq(), TStruct.empty), "rows", "globals", "../references", log = false) - val colTableWriter = TableSpecWriter(s"$path/cols", tm.colsTableType.copy(key = FastIndexedSeq[String]()), "rows", "../globals/rows", "../references", log = false) + val colTableWriter = TableSpecWriter(s"$path/cols", tm.colsTableType, "rows", "../globals/rows", "../references", log = false) val rowTableWriter = TableSpecWriter(s"$path/rows", tm.rowsTableType, "rows", "../globals/rows", "../references", log = false) val entriesTableWriter = TableSpecWriter(s"$path/entries", TableType(tm.entriesRVType, FastIndexedSeq(), tm.globalType), "rows", "../globals/rows", "../references", log = false) val matrixWriter = MatrixSpecWriter(path, tm, "rows/rows", "globals/rows", "cols/rows", "entries/rows", "references", log = true) - val rowsIndexSpec = IndexSpec.defaultAnnotation("../../index", coerce[PStruct](pKey)) - val entriesIndexSpec = IndexSpec.defaultAnnotation("../../index", coerce[PStruct](pKey), withOffsetField = true) - RelationalWriter.scoped(path, overwrite = overwrite, Some(t.typ))( RelationalWriter.scoped(s"$path/globals", overwrite = false, None)( RelationalWriter.scoped(s"$path/cols", overwrite = false, None)( @@ -145,8 +138,8 @@ case class MatrixNativeWriter( WriteMetadata(MakeArray(GetField(colInfo, "partitionCounts")), colTableWriter), bindIR(ToArray(mapIR(ToStream(partInfo)) { fc => GetField(fc, "filePath") })) { files => Begin(FastIndexedSeq( - WriteMetadata(files, RVDSpecWriter(s"$path/rows/rows", RVDSpecMaker(rowSpec, lowered.partitioner, rowsIndexSpec))), - WriteMetadata(files, RVDSpecWriter(s"$path/entries/rows", RVDSpecMaker(entrySpec, RVDPartitioner.unkeyed(lowered.numPartitions), entriesIndexSpec))))) + WriteMetadata(files, RVDSpecWriter(s"$path/rows/rows", RVDSpecMaker(rowSpec, lowered.partitioner))), + WriteMetadata(files, RVDSpecWriter(s"$path/entries/rows", RVDSpecMaker(entrySpec, RVDPartitioner.unkeyed(lowered.numPartitions)))))) }, bindIR(ToArray(mapIR(ToStream(partInfo)) { fc => GetField(fc, "partitionCounts") })) { counts => Begin(FastIndexedSeq( @@ -186,13 +179,12 @@ case class SplitPartitionNativeWriter( context: EmitCode, region: Value[Region]): IEmitCode = { val keyType = ifIndexed { index.get._2 } - val iAnnotationType = PCanonicalStruct(required = true, "entries_offset" -> PInt64()) + val iAnnotationType = PCanonicalStruct(required = true, "entries_offset" -> PInt64Required) val mb = cb.emb - val indexWriter = ifIndexed { StagedIndexWriter.withDefaults(keyType, mb.ecb, annotationType = iAnnotationType) } - context.toI(cb).map(cb) { ctxCode: SCode => + context.toI(cb).map(cb) { ctxCode: PCode => val result = mb.newLocal[Long]("write_result") val filename1 = mb.newLocal[String]("filename1") val os1 = mb.newLocal[ByteTrackingOutputStream]("write_os1") @@ -210,7 +202,7 @@ case class SplitPartitionNativeWriter( indexWriter.add(cb, { IEmitCode.present(cb, keyType.asInstanceOf[PCanonicalBaseStruct] .constructFromFields(cb, stream.elementRegion, - keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name))), + keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name).typecast[PCode])), deepCopy = false)) }, ob1.invoke[Long]("indexOffset"), { IEmitCode.present(cb, @@ -231,37 +223,39 @@ case class SplitPartitionNativeWriter( cb.assign(n, n + 1L) } - val pctx = ctxCode.memoize(cb, "context") - cb.assign(filename1, pctx.asString.loadString()) - if (hasIndex) { - val indexFile = cb.newLocal[String]("indexFile") - cb.assign(indexFile, const(index.get._1).concat(filename1).concat(".idx")) - indexWriter.init(cb, indexFile) - } - cb.assign(filename2, const(partPrefix2).concat(filename1)) - cb.assign(filename1, const(partPrefix1).concat(filename1)) - cb.assign(os1, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename1))) - cb.assign(os2, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename2))) - cb.assign(ob1, spec1.buildCodeOutputBuffer(Code.checkcast[OutputStream](os1))) - cb.assign(ob2, spec2.buildCodeOutputBuffer(Code.checkcast[OutputStream](os2))) - cb.assign(n, 0L) - - stream.memoryManagedConsume(region, cb) { cb => - writeFile(cb, stream.element) - } + PCode(pResultType, EmitCodeBuilder.scopedCode(mb) { cb: EmitCodeBuilder => + val pctx = ctxCode.memoize(cb, "context") + cb.assign(filename1, pctx.asString.loadString()) + if (hasIndex) { + val indexFile = cb.newLocal[String]("indexFile") + cb.assign(indexFile, const(index.get._1).concat(filename1)) + indexWriter.init(cb, indexFile) + } + cb.assign(filename2, const(partPrefix2).concat(filename1)) + cb.assign(filename1, const(partPrefix1).concat(filename1)) + cb.assign(os1, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename1))) + cb.assign(os2, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename2))) + cb.assign(ob1, spec1.buildCodeOutputBuffer(Code.checkcast[OutputStream](os1))) + cb.assign(ob2, spec2.buildCodeOutputBuffer(Code.checkcast[OutputStream](os2))) + cb.assign(n, 0L) + + stream.memoryManagedConsume(region, cb) { cb => + writeFile(cb, stream.element) + } - cb += ob1.writeByte(0.asInstanceOf[Byte]) - cb += ob2.writeByte(0.asInstanceOf[Byte]) - cb.assign(result, pResultType.allocate(region)) - if (hasIndex) - indexWriter.close(cb) - cb += ob1.flush() - cb += ob2.flush() - cb += os1.invoke[Unit]("close") - cb += os2.invoke[Unit]("close") - filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) - cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) - pResultType.loadCheapSCode(cb, result.get) + cb += ob1.writeByte(0.asInstanceOf[Byte]) + cb += ob2.writeByte(0.asInstanceOf[Byte]) + cb.assign(result, pResultType.allocate(region)) + if (hasIndex) + indexWriter.close(cb) + cb += ob1.flush() + cb += ob2.flush() + cb += os1.invoke[Unit]("close") + cb += os2.invoke[Unit]("close") + filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) + cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) + result.get + }) } } } @@ -297,16 +291,16 @@ case class MatrixSpecWriter(path: String, typ: MatrixType, rowRelPath: String, g cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asBaseStruct + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PBaseStructCode] val partCounts = cb.newLocal[Array[Long]]("partCounts") val c = pc.memoize(cb, "matrixPartCounts") - val a = c.loadField(cb, "rows").get(cb).asIndexable.memoize(cb, "rowCounts") + val a = c.loadField(cb, "rows").get(cb).memoize(cb, "rowCounts").asInstanceOf[PIndexableValue] val n = cb.newLocal[Int]("n", a.loadLength()) val i = cb.newLocal[Int]("i", 0) cb.assign(partCounts, Code.newArray[Long](n)) cb.whileLoop(i < n, { - val count = a.loadElement(cb, i).get(cb, "part count can't be missing!") + val count = a.loadElement(cb, i).get(cb, "part count can't be missing!").asPCode cb += partCounts.update(i, count.asInt64.longCode(cb)) cb.assign(i, i + 1) }) @@ -352,119 +346,6 @@ case class MatrixBlockMatrixWriter( blockSize: Int ) extends MatrixWriter { def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = MatrixWriteBlockMatrix(ctx, mv, entryField, path, overwrite, blockSize) - - override def lower(colsFieldName: String, entriesFieldName: String, colKey: IndexedSeq[String], - ctx: ExecuteContext, ts: TableStage, t: TableIR, r: RTable, relationalLetsAbove: Map[String, IR]): IR = { - - val tm = MatrixType.fromTableType(t.typ, colsFieldName, entriesFieldName, colKey) - val rm = r.asMatrixType(colsFieldName, entriesFieldName) - - val countColumnsIR = ArrayLen(GetField(ts.getGlobals(), colsFieldName)) - val numCols: Int = CompileAndEvaluate(ctx, countColumnsIR, true).asInstanceOf[Int] - val numBlockCols: Int = (numCols - 1) / blockSize + 1 - val lastBlockNumCols = numCols % blockSize - - val rowCountIR = ts.mapCollect(relationalLetsAbove)(paritionIR => StreamLen(paritionIR)) - val inputRowCountPerPartition: IndexedSeq[Int] = CompileAndEvaluate(ctx, rowCountIR).asInstanceOf[IndexedSeq[Int]] - val inputPartStartsPlusLast = inputRowCountPerPartition.scanLeft(0L)(_ + _) - val inputPartStarts = inputPartStartsPlusLast.dropRight(1) - val inputPartStops = inputPartStartsPlusLast.tail - - val numRows = inputPartStartsPlusLast.last - val numBlockRows: Int = (numRows.toInt - 1) / blockSize + 1 - - // Zip contexts with partition starts and ends - val zippedWithStarts = ts.mapContexts{oldContextsStream => zipIR(IndexedSeq(oldContextsStream, ToStream(Literal(TArray(TInt64), inputPartStarts)), ToStream(Literal(TArray(TInt64), inputPartStops))), ArrayZipBehavior.AssertSameLength){ case IndexedSeq(oldCtx, partStart, partStop) => - MakeStruct(Seq[(String, IR)]("mwOld" -> oldCtx, "mwStartIdx" -> Cast(partStart, TInt32), "mwStopIdx" -> Cast(partStop, TInt32))) - }}(newCtx => GetField(newCtx, "mwOld")) - - // Now label each row with its idx. - val perRowIdxId = genUID() - val partsZippedWithIdx = zippedWithStarts.mapPartitionWithContext { (part, ctx) => - zip2(part, rangeIR(GetField(ctx, "mwStartIdx"), GetField(ctx, "mwStopIdx")), ArrayZipBehavior.AssertSameLength) { (partRow, idx) => - insertIR(partRow, (perRowIdxId, idx)) - } - } - - // Two steps, make a partitioner that works currently based on row_idx splits, then resplit accordingly. - val inputRowIntervals = inputPartStarts.zip(inputPartStops).map{ case (intervalStart, intervalEnd) => - Interval(Row(intervalStart.toInt), Row(intervalEnd.toInt), true, false) - } - val rowIdxPartitioner = RVDPartitioner.generate(TStruct((perRowIdxId, TInt32)), inputRowIntervals) - - val keyedByRowIdx = partsZippedWithIdx.changePartitionerNoRepartition(rowIdxPartitioner) - - // Now create a partitioner that makes appropriately sized blocks - val desiredRowStarts = (0 until numBlockRows).map(_ * blockSize) - val desiredRowStops = desiredRowStarts.drop(1) :+ numRows.toInt - val desiredRowIntervals = desiredRowStarts.zip(desiredRowStops).map{ - case (intervalStart, intervalEnd) => Interval(Row(intervalStart), Row(intervalEnd), true, false) - } - - val blockSizeGroupsPartitioner = RVDPartitioner.generate(TStruct((perRowIdxId, TInt32)), desiredRowIntervals) - val rowsInBlockSizeGroups: TableStage = keyedByRowIdx.repartitionNoShuffle(blockSizeGroupsPartitioner) - - def createBlockMakingContexts(tablePartsStreamIR: IR): IR = { - flatten(zip2(tablePartsStreamIR, rangeIR(numBlockRows), ArrayZipBehavior.AssertSameLength) { case (tableSinglePartCtx, blockColIdx) => - mapIR(rangeIR(I32(numBlockCols))){ blockColIdx => - MakeStruct(Seq("oldTableCtx" -> tableSinglePartCtx, "blockStart" -> (blockColIdx * I32(blockSize)), - "blockSize" -> If(blockColIdx ceq I32(numBlockCols - 1), I32(lastBlockNumCols), I32(blockSize)), - "blockColIdx" -> blockColIdx, - "blockRowIdx" -> blockColIdx)) - } - }) - } - - val tableOfNDArrays = rowsInBlockSizeGroups.mapContexts(createBlockMakingContexts)(ir => GetField(ir, "oldTableCtx")).mapPartitionWithContext{ (partIr, ctxRef) => - bindIR(GetField(ctxRef, "blockStart")){ blockStartRef => - val numColsOfBlock = GetField(ctxRef, "blockSize") - val arrayOfSlicesAndIndices = ToArray(mapIR(partIr) { singleRow => - val mappedSlice = ToArray(mapIR(ToStream(sliceArrayIR(GetField(singleRow, entriesFieldName), blockStartRef, blockStartRef + numColsOfBlock)))(entriesStructRef => - GetField(entriesStructRef, entryField) - )) - MakeStruct(Seq( - perRowIdxId -> GetField(singleRow, perRowIdxId), - "rowOfData" -> mappedSlice - )) - }) - bindIR(arrayOfSlicesAndIndices){ arrayOfSlicesAndIndicesRef => - val idxOfResult = GetField(ArrayRef(arrayOfSlicesAndIndicesRef, I32(0)), perRowIdxId) - val ndarrayData = ToArray(flatMapIR(ToStream(arrayOfSlicesAndIndicesRef)){idxAndSlice => - ToStream(GetField(idxAndSlice, "rowOfData")) - }) - val numRowsOfBlock = ArrayLen(arrayOfSlicesAndIndicesRef) - val shape = maketuple(Cast(numRowsOfBlock, TInt64), Cast(numColsOfBlock, TInt64)) - val ndarray = MakeNDArray(ndarrayData, shape, True(), ErrorIDs.NO_ERROR) - MakeStream(Seq(MakeStruct(Seq( - perRowIdxId -> idxOfResult, - "blockRowIdx" -> GetField(ctxRef, "blockRowIdx"), - "blockColIdx" -> GetField(ctxRef, "blockColIdx"), - "ndBlock" -> ndarray))), - TStream(TStruct(perRowIdxId -> TInt32, "blockRowIdx" -> TInt32, "blockColIdx" -> TInt32, "ndBlock" -> ndarray.typ))) - } - } - } - - val elementType = tm.entryType.fieldType(entryField) - val etype = EBlockMatrixNDArray(EType.fromTypeAndAnalysis(elementType, rm.entryType.field(entryField)), encodeRowMajor = true, required = true) - val spec = TypedCodecSpec(etype, TNDArray(tm.entryType.fieldType(entryField), Nat(2)), BlockMatrix.bufferSpec) - - val pathsWithColMajorIndices = tableOfNDArrays.mapCollect(relationalLetsAbove) { partition => - ToArray(mapIR(partition) { singleNDArrayTuple => - bindIR(GetField(singleNDArrayTuple, "blockRowIdx") + (GetField(singleNDArrayTuple, "blockColIdx") * numBlockRows)) { colMajorIndex => - val blockPath = - Str(s"$path/parts/part-") + - invoke("str", TString, colMajorIndex) + Str("-") + UUID4() - maketuple(colMajorIndex, WriteValue(GetField(singleNDArrayTuple, "ndBlock"), blockPath, spec)) - } - }) - } - val flatPathsAndIndices = flatMapIR(ToStream(pathsWithColMajorIndices))(ToStream(_)) - val sortedColMajorPairs = sortIR(flatPathsAndIndices){case (l, r) => ApplyComparisonOp(LT(TInt32), GetTupleElement(l, 0), GetTupleElement(r, 0))} - val flatPaths = ToArray(mapIR(ToStream(sortedColMajorPairs))(GetTupleElement(_, 1))) - val bmt = BlockMatrixType(elementType, IndexedSeq(numRows, numCols), numRows==1, blockSize, BlockMatrixSparsity.dense) - RelationalWriter.scoped(path, overwrite, None)(WriteMetadata(flatPaths, BlockMatrixNativeMetadataWriter(path, false, bmt))) - } } object MatrixNativeMultiWriter { diff --git a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala index 61d359a7b14..ea1e7451f97 100644 --- a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala @@ -55,6 +55,4 @@ case class NativeReaderOptions( "intervalPointType" -> intervalPointType.parsableString(), "filterIntervals" -> filterIntervals) } - - def renderShort(): String = s"(IntervalRead: ${intervals.length} intervals, filter=${filterIntervals})" } diff --git a/hail/src/main/scala/is/hail/expr/ir/Param.scala b/hail/src/main/scala/is/hail/expr/ir/Param.scala index 0c7a266e386..f415646a66b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Param.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Param.scala @@ -1,7 +1,7 @@ package is.hail.expr.ir import is.hail.asm4s.{BooleanInfo, Code, TypeInfo, classInfo} -import is.hail.types.physical.stypes.{EmitType, SCode, SType, SingleCodeType} +import is.hail.types.physical.{PCode, PType, SingleCodePCode, SingleCodeType} import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq @@ -17,10 +17,10 @@ case class CodeParamType(ti: TypeInfo[_]) extends ParamType { override def toString: String = s"CodeParam($ti)" } -case class SCodeParamType(st: SType) extends ParamType { - def nCodes: Int = st.nCodes +case class PCodeParamType(pt: PType) extends ParamType { + def nCodes: Int = pt.nCodes - override def toString: String = s"SCodeParam($st, $nCodes)" + override def toString: String = s"PCodeParam($pt, $nCodes)" } trait EmitParamType extends ParamType { @@ -49,16 +49,18 @@ case class SingleCodeEmitParamType(required: Boolean, sct: SingleCodeType) exten override def toString: String = s"SingleCodeEmitParamType($required, $sct)" } -case class SCodeEmitParamType(et: EmitType) extends EmitParamType { - def required: Boolean = et.required +case class PCodeEmitParamType(pt: PType) extends EmitParamType { + def required: Boolean = pt.required - def virtualType: Type = et.st.virtualType + def virtualType: Type = pt.virtualType - def definedTupleTypes(): IndexedSeq[TypeInfo[_]] = et.st.codeTupleTypes() + def definedTupleTypes(): IndexedSeq[TypeInfo[_]] = pt.codeTupleTypes() + + override def toString: String = s"PTypeEmitParamType($pt, $nCodes)" } sealed trait Param case class CodeParam(c: Code[_]) extends Param case class EmitParam(ec: EmitCode) extends Param -case class SCodeParam(sc: SCode) extends Param +case class PCodeParam(pc: PCode) extends Param diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 9f4d510402f..2e69b5787c0 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -19,7 +19,6 @@ import org.json4s.{Formats, JObject} import org.json4s.jackson.{JsonMethods, Serialization} import scala.collection.JavaConverters._ -import scala.collection.mutable import scala.reflect.ClassTag import scala.util.parsing.combinator.JavaTokenParsers import scala.util.parsing.input.Positional @@ -287,25 +286,25 @@ object IRParser { f: (TokenIterator) => T, sep: Token, end: Token)(implicit tct: ClassTag[T]): Array[T] = { - val xs = new mutable.ArrayBuffer[T]() + val xs = new BoxedArrayBuilder[T]() while (it.hasNext && it.head != end) { xs += f(it) if (it.head == sep) consumeToken(it) } - xs.toArray + xs.result() } def repUntil[T](it: TokenIterator, f: (TokenIterator) => StackFrame[T], end: Token)(implicit tct: ClassTag[T]): StackFrame[Array[T]] = { - val xs = new mutable.ArrayBuffer[T]() + val xs = new BoxedArrayBuilder[T]() var cont: T => StackFrame[Array[T]] = null def loop(): StackFrame[Array[T]] = { if (it.hasNext && it.head != end) { f(it).flatMap(cont) } else { - done(xs.toArray) + done(xs.result()) } } cont = { t => @@ -318,11 +317,11 @@ object IRParser { def repUntilNonStackSafe[T](it: TokenIterator, f: (TokenIterator) => T, end: Token)(implicit tct: ClassTag[T]): Array[T] = { - val xs = new mutable.ArrayBuffer[T]() + val xs = new BoxedArrayBuilder[T]() while (it.hasNext && it.head != end) { xs += f(it) } - xs.toArray + xs.result() } def base_seq_parser[T : ClassTag](f: TokenIterator => T)(it: TokenIterator): Array[T] = { @@ -1259,11 +1258,6 @@ object IRParser { ir_value_expr(env)(it).map { msg => Die(msg, typ, errorId) } - case "Trap" => - ir_value_expr(env)(it).map { child => - Trap(child) - } - case "ApplySeeded" => val function = identifier(it) val seed = int64_literal(it) diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index bb8a79050b2..3f787559efa 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -205,7 +205,7 @@ object Pretty { FastSeq(if (typ == reader.fullMatrixType) "None" else typ.parsableString(), prettyBooleanLiteral(dropCols), prettyBooleanLiteral(dropRows), - if (elideLiterals) reader.renderShort() else '"' + StringEscapeUtils.escapeString(JsonMethods.compact(reader.toJValue)) + '"') + '"' + StringEscapeUtils.escapeString(JsonMethods.compact(reader.toJValue)) + '"') case MatrixWrite(_, writer) => single('"' + StringEscapeUtils.escapeString(Serialization.write(writer)(MatrixWriter.formats)) + '"') case MatrixMultiWrite(_, writer) => @@ -256,7 +256,7 @@ object Pretty { case TableRead(typ, dropRows, tr) => FastSeq(if (typ == tr.fullType) "None" else typ.parsableString(), prettyBooleanLiteral(dropRows), - if (elideLiterals) tr.renderShort() else '"' + StringEscapeUtils.escapeString(JsonMethods.compact(tr.toJValue)) + '"') + '"' + StringEscapeUtils.escapeString(JsonMethods.compact(tr.toJValue)) + '"') case TableWrite(_, writer) => single('"' + StringEscapeUtils.escapeString(Serialization.write(writer)(TableWriter.formats)) + '"') case TableMultiWrite(_, writer) => @@ -293,8 +293,6 @@ object Pretty { single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case MatrixToValueApply(_, function) => single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) - case BlockMatrixToValueApply(_, function) => - single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case BlockMatrixToTableApply(_, _, function) => single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case TableRename(_, rowMap, globalMap) => diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala index 2b035a5aaa6..2c772477db6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala @@ -1373,20 +1373,6 @@ object PruneDeadFields { ) memoizeMatrixIR(child, dep, memo) BindingEnv.empty - case TailLoop(name, params, body) => - val bodyEnv = memoizeValueIR(body, body.typ, memo) - val paramTypes = params.map{ case (paramName, paramIR) => - bodyEnv.eval.lookupOption(paramName) match { - case Some(ab) => unifySeq(paramIR.typ, ab.result()) - case None => minimal(paramIR.typ) - } - } - unifyEnvsSeq( - IndexedSeq(bodyEnv.deleteEval(params.map(_._1))) ++ - params.zip(paramTypes).map{ case ((paramName, paramIR), paramType) => - memoizeValueIR(paramIR, paramType, memo) - } - ) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => val rArray = requestedType.asInstanceOf[TArray] val bodyEnv = memoizeValueIR(body, rArray.elementType, memo) diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index 40907aa4b2d..36aeb81be98 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -3,8 +3,7 @@ package is.hail.expr.ir import is.hail.expr.ir.functions.GetElement import is.hail.methods.ForceCountTable import is.hail.types._ -import is.hail.types.physical.stypes.{EmitType, PTypeReferenceSingleCodeType, StreamSingleCodeType} -import is.hail.types.physical.{PCanonicalStream, PStream, PType} +import is.hail.types.physical.{PCanonicalStream, PStream, PType, PTypeReferenceSingleCodeType, StreamSingleCodeType} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.catalyst.expressions.GenericRow @@ -25,7 +24,6 @@ object Requiredness { case class RequirednessAnalysis(r: Memo[BaseTypeWithRequiredness], states: Memo[IndexedSeq[TypeWithRequiredness]]) { def lookup(node: BaseIR): BaseTypeWithRequiredness = r.lookup(node) - def lookupState(node: BaseIR): IndexedSeq[BaseTypeWithRequiredness] = states.lookup(node) def lookupOpt(node: BaseIR): Option[BaseTypeWithRequiredness] = r.get(node) def apply(node: IR): TypeWithRequiredness = coerce[TypeWithRequiredness](lookup(node)) def getState(node: IR): IndexedSeq[TypeWithRequiredness] = states(node) @@ -101,8 +99,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { def addBindingRelations(node: BaseIR): Unit = { val refMap: Map[String, IndexedSeq[RefEquality[BaseRef]]] = usesAndDefs.uses(node).toFastIndexedSeq.groupBy(_.t.name) - def addElementBinding(name: String, d: IR, makeOptional: Boolean = false, makeRequired: Boolean = false): Unit = { - assert(!(makeOptional && makeRequired)) + def addElementBinding(name: String, d: IR, makeOptional: Boolean = false): Unit = { if (refMap.contains(name)) { val uses = refMap(name) val eltReq = coerce[RIterable](lookup(d)).elementType @@ -110,10 +107,6 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { val optional = eltReq.copy(eltReq.children) optional.union(false) optional - } else if (makeRequired) { - val req = eltReq.copy(eltReq.children) - req.union(true) - req } else eltReq uses.foreach { u => defs.bind(u, Array(req)) } dependents.getOrElseUpdate(d, mutable.Set[RefEquality[BaseIR]]()) ++= uses @@ -176,8 +169,8 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { case x@ApplyIR(_, _, args) => x.refIdx.foreach { case (n, i) => addBinding(n, args(i)) } case ArraySort(a, l, r, c) => - addElementBinding(l, a, makeRequired = true) - addElementBinding(r, a, makeRequired = true) + addElementBinding(l, a) + addElementBinding(r, a) case StreamMap(a, name, body) => addElementBinding(name, a) case x@StreamZip(as, names, body, behavior) => @@ -459,15 +452,6 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { // always required case _: I32 | _: I64 | _: F32 | _: F64 | _: Str | True() | False() | _: IsNA | _: Die | _: UUID4 | _: Consume => case _: CombOpValue | _: AggStateValue => - case Trap(child) => - // error message field is missing if the child runs without error - requiredness.asInstanceOf[RTuple].field(0).union(false) - - val childField = requiredness.asInstanceOf[RTuple].field(1) - // trap can return optional if child throws exception - childField.union(false) - - childField.unionFrom(lookup(child)) case x if x.typ == TVoid => case ApplyComparisonOp(EQWithNA(_, _), _, _) | ApplyComparisonOp(NEQWithNA(_, _), _, _) | ApplyComparisonOp(Compare(_, _), _, _) => case ApplyComparisonOp(op, l, r) => @@ -674,11 +658,8 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.unionFrom(oldReq.field(idx)) case x: ApplyIR => requiredness.unionFrom(lookup(x.body)) case x: AbstractApplyNode[_] => //FIXME: round-tripping via PTypes. - val argP = x.args.map { a => - val pt = lookup(a).canonicalPType(a.typ) - EmitType(pt.sType, pt.required) - } - requiredness.fromPType(x.implementation.computeReturnEmitType(x.returnType, argP).canonicalPType) + val argP = x.args.map(a => lookup(a).canonicalPType(a.typ)) + requiredness.fromPType(x.implementation.returnPType(x.returnType, argP)) case CollectDistributedArray(ctxs, globs, _, _, body, _) => requiredness.union(lookup(ctxs).required) coerce[RIterable](requiredness).elementType.unionFrom(lookup(body)) @@ -693,7 +674,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.union(lookup(path).required) requiredness.fromPType(spec.encodedType.decodedPType(rt)) case In(_, t) => t match { - case SCodeEmitParamType(et) => requiredness.fromPType(et.canonicalPType) + case PCodeEmitParamType(pt) => requiredness.fromPType(pt) case SingleCodeEmitParamType(required, StreamSingleCodeType(_, eltType)) => requiredness.fromPType(PCanonicalStream(eltType, required)) // fixme hacky case SingleCodeEmitParamType(required, PTypeReferenceSingleCodeType(pt)) => requiredness.fromPType(pt.setRequired(required)) case SingleCodeEmitParamType(required, _) => requiredness.union(required) @@ -728,9 +709,9 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.unionFrom(lookup(readers)) case ShuffleWrite(id, rows) => // required case ShufflePartitionBounds(id, nPartitions) => - coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).keyDecodedPType.setRequired(true)) + coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).keyDecodedPType) case ShuffleRead(id, keyRange) => - coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).rowDecodedPType.setRequired(true)) + coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).rowDecodedPType) } requiredness.probeChangedAndReset() } diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 1557ab833fc..e9daf779107 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -143,7 +143,6 @@ object Simplify { case StreamZip(as, names, body, _) if as.length == 1 => StreamMap(as.head, names.head, body) case StreamMap(StreamZip(as, names, zipBody, b), name, mapBody) => StreamZip(as, names, Let(name, zipBody, mapBody), b) - case StreamMap(StreamFlatMap(child, flatMapName, flatMapBody), mapName, mapBody) => StreamFlatMap(child, flatMapName, StreamMap(flatMapBody, mapName, mapBody)) case x@StreamFlatMap(NA(_), _, _) => NA(x.typ) @@ -175,7 +174,6 @@ object Simplify { case Cast(Cast(x, _), t) if x.typ == t =>x case CastRename(x, t) if x.typ == t => x - case CastRename(CastRename(x, _), t) => CastRename(x, t) case ApplyBinaryPrimOp(Add(), I32(0), x) => x case ApplyBinaryPrimOp(Add(), x, I32(0)) => x @@ -567,11 +565,9 @@ object Simplify { canBeLifted(query) } => query - case BlockMatrixToValueApply(ValueToBlockMatrix(child, IndexedSeq(nrows, ncols), _), functions.GetElement(Seq(i, j))) => child.typ match { - case TArray(_) => ArrayRef(child, I32((i * ncols + j).toInt)) - case TNDArray(_, _) => NDArrayRef(child, IndexedSeq(i, j), ErrorIDs.NO_ERROR) - case TFloat64 => child - } + case BlockMatrixToValueApply(ValueToBlockMatrix(child, IndexedSeq(nrows, ncols), _), functions.GetElement(Seq(i, j))) => + if (child.typ.isInstanceOf[TArray]) ArrayRef(child, I32((i * ncols + j).toInt)) else child + case LiftMeOut(child) if IsConstant(child) => child } @@ -945,12 +941,12 @@ object Simplify { case BlockMatrixSlice(BlockMatrixMap2(l, r, ln, rn, f, sparsityStrategy), slices) => BlockMatrixMap2(BlockMatrixSlice(l, slices), BlockMatrixSlice(r, slices), ln, rn, f, sparsityStrategy) case BlockMatrixMap2(BlockMatrixBroadcast(scalarBM, IndexedSeq(), _, _), right, leftName, rightName, f, sparsityStrategy) => - val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(IndexedSeq(0, 0))) + val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(Seq(0, 0))) val needsDense = sparsityStrategy == NeedsDense || sparsityStrategy.exists(leftBlock = true, rightBlock = false) val maybeDense = if (needsDense) BlockMatrixDensify(right) else right BlockMatrixMap(maybeDense, rightName, Subst(f, BindingEnv.eval(leftName -> getElement)), needsDense) case BlockMatrixMap2(left, BlockMatrixBroadcast(scalarBM, IndexedSeq(), _, _), leftName, rightName, f, sparsityStrategy) => - val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(IndexedSeq(0, 0))) + val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(Seq(0, 0))) val needsDense = sparsityStrategy == NeedsDense || sparsityStrategy.exists(leftBlock = false, rightBlock = true) val maybeDense = if (needsDense) BlockMatrixDensify(left) else left BlockMatrixMap(maybeDense, leftName, Subst(f, BindingEnv.eval(rightName -> getElement)), needsDense) diff --git a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala index f369f9d8d48..a5b20e009e3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala +++ b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala @@ -1,17 +1,15 @@ package is.hail.expr.ir -import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.types.physical.stypes.SingleCodeType -import is.hail.types.physical.{PType, typeToTypeInfo} +import is.hail.types.physical.{PCode, PType, PValue, typeToTypeInfo} import is.hail.types.virtual.Type import is.hail.utils.BoxedArrayBuilder import scala.reflect.ClassTag -class StagedArrayBuilder(val elt: SingleCodeType, val eltRequired: Boolean, mb: EmitMethodBuilder[_], len: Code[Int]) { +class StagedArrayBuilder(val elt: PType, mb: EmitMethodBuilder[_], len: Code[Int]) { - val ti: TypeInfo[_] = elt.ti + val ti: TypeInfo[_] = typeToTypeInfo(elt) val ref: Value[Any] = coerce[Any](ti match { case BooleanInfo => mb.genLazyFieldThisRef[BooleanMissingArrayBuilder](Code.newInstance[BooleanMissingArrayBuilder, Int](len), "zab") @@ -46,6 +44,26 @@ class StagedArrayBuilder(val elt: SingleCodeType, val eltRequired: Boolean, mb: case DoubleInfo => coerce[DoubleMissingArrayBuilder](ref).invoke[Int, Double, Unit]("update", i, coerce[Double](x)) } + def sort(compare: Code[AsmFunction2[_, _, _]]): Code[Unit] = { + ti match { + case BooleanInfo => + type F = AsmFunction2[Boolean, Boolean, Boolean] + coerce[BooleanMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) + case IntInfo => + type F = AsmFunction2[Int, Int, Boolean] + coerce[IntMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) + case LongInfo => + type F = AsmFunction2[Long, Long, Boolean] + coerce[LongMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) + case FloatInfo => + type F = AsmFunction2[Float, Float, Boolean] + coerce[FloatMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) + case DoubleInfo => + type F = AsmFunction2[Double, Double, Boolean] + coerce[DoubleMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) + } + } + def addMissing(): Code[Unit] = coerce[MissingArrayBuilder](ref).invoke[Unit]("addMissing") @@ -63,10 +81,19 @@ class StagedArrayBuilder(val elt: SingleCodeType, val eltRequired: Boolean, mb: def clear: Code[Unit] = coerce[MissingArrayBuilder](ref).invoke[Unit]("clear") - def loadFromIndex(cb: EmitCodeBuilder, r: Value[Region], i: Code[Int]): IEmitCode = { - val idx = cb.newLocal[Int]("loadFromIndex_idx", i) - IEmitCode(cb, isMissing(idx), elt.loadToSCode(cb, r, apply(idx))) - } + def applyEV(mb: EmitMethodBuilder[_], i: Code[Int]): EmitValue = + new EmitValue { + def pt: PType = elt + + def get(cb: EmitCodeBuilder): PCode = load.toI(cb).get( + cb, + s"Can't convert missing EmitValue of type ${pt} to PValue.") + + def load: EmitCode = { + val t = mb.newLocal[Int]("sab_applyEV_load_i") + EmitCode(t := i, isMissing(t), PCode(elt, apply(t))) + } + } } sealed abstract class MissingArrayBuilder(initialCapacity: Int) { @@ -466,22 +493,6 @@ final class LongArrayBuilder(initialCapacity: Int= 16) { } } - def +=(x: Long): Unit = add(x) - - def ++=(xs: Array[Long]) = { - val newLen = size + xs.length - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, xs.length) - size_ = newLen - } - - def ++=(xs: Array[Long], n: Int) = { - val newLen = size + n - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, n) - size_ = newLen - } - def add(x: Long): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -540,27 +551,6 @@ final class IntArrayBuilder(initialCapacity: Int = 16) { } } - def setSizeUninitialized(n: Int) = { - ensureCapacity(n) - size_ = n - } - - def +=(x: Int): Unit = add(x) - - def ++=(xs: Array[Int]) = { - val newLen = size + xs.length - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, xs.length) - size_ = newLen - } - - def ++=(xs: Array[Int], n: Int) = { - val newLen = size + n - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, n) - size_ = newLen - } - def add(x: Int): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -619,27 +609,6 @@ final class DoubleArrayBuilder(initialCapacity: Int = 16) { } } - def setSizeUninitialized(n: Int) = { - ensureCapacity(n) - size_ = n - } - - def +=(x: Double): Unit = add(x) - - def ++=(xs: Array[Double]) = { - val newLen = size + xs.length - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, xs.length) - size_ = newLen - } - - def ++=(xs: Array[Double], n: Int) = { - val newLen = size + n - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, n) - size_ = newLen - } - def add(x: Double): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -672,140 +641,6 @@ final class DoubleArrayBuilder(initialCapacity: Int = 16) { } } -final class ByteArrayBuilder(initialCapacity: Int = 16) { - - var size_ : Int = 0 - var b: Array[Byte] = new Array[Byte](initialCapacity) - - def size: Int = size_ - - def setSize(n: Int) { - require(n >= 0 && n <= size) - size_ = n - } - - def apply(i: Int): Byte = { - require(i >= 0 && i < size) - b(i) - } - - def ensureCapacity(n: Int): Unit = { - if (b.length < n) { - val newCapacity = math.max(n, b.length * 2) - val newb = new Array[Byte](newCapacity) - Array.copy(b, 0, newb, 0, size_) - b = newb - } - } - - def +=(x: Byte) = add(x) - - def ++=(xs: Array[Byte]) = { - val newLen = size + xs.length - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, xs.length) - size_ = newLen - } - - def ++=(xs: Array[Byte], n: Int) = { - val newLen = size + n - ensureCapacity(newLen) - System.arraycopy(xs, 0, b, size_, n) - size_ = newLen - } - - def add(x: Byte): Unit = { - ensureCapacity(size_ + 1) - b(size_) = x - size_ += 1 - } - - def update(i: Int, x: Byte): Unit = { - require(i >= 0 && i < size) - b(i) = x - } - - def clear() { size_ = 0 } - - def result(): Array[Byte] = b.slice(0, size_) - - def clearAndResize(): Unit = { - size_ = 0 - if (b.length > initialCapacity) - b = new Array[Byte](initialCapacity) - } - def appendFrom(ab2: ByteArrayBuilder): Unit = { - ensureCapacity(size_ + ab2.size_) - System.arraycopy(ab2.b, 0, b, size_, ab2.size_) - size_ = size_ + ab2.size_ - } - - def pop(): Byte = { - size_ -= 1 - b(size) - } -} - -final class BooleanArrayBuilder(initialCapacity: Int = 16) { - - var size_ : Int = 0 - var b: Array[Boolean] = new Array[Boolean](initialCapacity) - - def size: Int = size_ - - def setSize(n: Int) { - require(n >= 0 && n <= size) - size_ = n - } - - def apply(i: Int): Boolean = { - require(i >= 0 && i < size) - b(i) - } - - def ensureCapacity(n: Int): Unit = { - if (b.length < n) { - val newCapacity = math.max(n, b.length * 2) - val newb = new Array[Boolean](newCapacity) - Array.copy(b, 0, newb, 0, size_) - b = newb - } - } - - def +=(x: Boolean) = add(x) - - def add(x: Boolean): Unit = { - ensureCapacity(size_ + 1) - b(size_) = x - size_ += 1 - } - - def update(i: Int, x: Boolean): Unit = { - require(i >= 0 && i < size) - b(i) = x - } - - def clear() { size_ = 0 } - - def result(): Array[Boolean] = b.slice(0, size_) - - def clearAndResize(): Unit = { - size_ = 0 - if (b.length > initialCapacity) - b = new Array[Boolean](initialCapacity) - } - def appendFrom(ab2: BooleanArrayBuilder): Unit = { - ensureCapacity(size_ + ab2.size_) - System.arraycopy(ab2.b, 0, b, size_, ab2.size_) - size_ = size_ + ab2.size_ - } - - def pop(): Boolean = { - size_ -= 1 - b(size) - } -} - final class AnyRefArrayBuilder[T <: AnyRef](initialCapacity: Int = 16)(implicit ct: ClassTag[T]) { var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index 390464e1d0d..bb001efac33 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -16,16 +16,14 @@ import is.hail.linalg.{BlockMatrix, BlockMatrixMetadata, BlockMatrixReadRowBlock import is.hail.rvd._ import is.hail.sparkextras.ContextRDD import is.hail.types._ -import is.hail.types.physical.{stypes, _} -import is.hail.types.physical.stypes.{BooleanSingleCodeType, Int32SingleCodeType, PTypeReferenceSingleCodeType, StreamSingleCodeType} -import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, SStream, SStreamCode} +import is.hail.types.physical._ +import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.TaskContext import org.apache.spark.executor.InputMetrics import org.apache.spark.sql.Row import org.json4s.JsonAST.JString -import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} import java.io.{ByteArrayInputStream, DataInputStream, DataOutputStream, InputStream} @@ -418,12 +416,6 @@ abstract class TableReader { Extraction.decompose(this)(TableReader.formats) } - def renderShort(): String - - def defaultRender(): String = { - StringEscapeUtils.escapeString(JsonMethods.compact(toJValue)) - } - def lowerGlobals(ctx: ExecuteContext, requestedGlobalsType: TStruct): IR = throw new LowererUnsupportedOperation(s"${ getClass.getSimpleName }.lowerGlobals not implemented") @@ -491,6 +483,7 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { val iterator = mb.genFieldThisRef[Iterator[Long]]("rvdreader_iterator") val next = mb.genFieldThisRef[Long]("rvdreader_next") + val first = mb.genFieldThisRef[Boolean]("rvdreader_first") val region = mb.genFieldThisRef[Region]("rvdreader_region") val upcastF = mb.genFieldThisRef[AsmFunction2RegionLongLong]("rvdreader_upcast") @@ -511,12 +504,12 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { cb.assign(next, upcastF.invoke[Region, Long, Long]("apply", region, Code.longValue(iterator.invoke[java.lang.Long]("next")))) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, upcastPType.loadCheapSCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, upcastPType.loadCheapPCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, true), producer) } } @@ -546,7 +539,7 @@ case class PartitionNativeReader(spec: AbstractTypedCodecSpec) extends AbstractN context.toI(cb).map(cb) { path => val pathString = path.asString.loadString() val xRowBuf = mb.genFieldThisRef[InputBuffer]("pnr_xrowbuf") - val next = mb.newPSettable(mb.fieldBuilder, spec.encodedType.decodedSType(requestedType), "pnr_next") + val next = mb.newPSettable(mb.fieldBuilder, spec.decodedPType(requestedType), "pnr_next") val region = mb.genFieldThisRef[Region]("pnr_region") val producer = new StreamProducer { @@ -567,7 +560,7 @@ case class PartitionNativeReader(spec: AbstractTypedCodecSpec) extends AbstractN override def close(cb: EmitCodeBuilder): Unit = cb += xRowBuf.close() } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, true), producer) } } @@ -642,13 +635,12 @@ case class PartitionNativeReaderIndexed(spec: AbstractTypedCodecSpec, indexSpec: .consumeCode[Interval](cb, Code._fatal[Interval](""), { pc => - val pcm = pc.memoize(cb, "pnri_interval") - val pt = pcm.st.canonicalPType() + val pcm = pc.memoize(cb, "pnri_interval").asPValue Code.invokeScalaObject2[PType, Long, Interval]( PartitionBoundOrdering.getClass, "regionValueToJavaObject", - mb.getPType(pt), - pt.store(cb, region, pcm, false)) + mb.getPType(pcm.pt), + coerce[Long](pcm.code)) } ), Code._null[InputMetrics] @@ -662,11 +654,11 @@ case class PartitionNativeReaderIndexed(spec: AbstractTypedCodecSpec, indexSpec: cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = cb += it.invoke[Unit]("close") } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, true), producer) } } @@ -755,7 +747,7 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig context.toI(cb).map(cb) { ctxStruct => - def getIndexReader(cb: EmitCodeBuilder, ctxMemo: SBaseStructValue): Code[IndexReader] = { + def getIndexReader(cb: EmitCodeBuilder, ctxMemo: PBaseStructValue): Code[IndexReader] = { makeIndexCode match { case Some(makeIndex) => val indexPath = ctxMemo @@ -770,20 +762,19 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig } } - def getInterval(cb: EmitCodeBuilder, region: Value[Region], ctxMemo: SBaseStructValue): Code[Interval] = { + def getInterval(cb: EmitCodeBuilder, ctxMemo: PBaseStructValue): Code[Interval] = { makeIndexCode match { case Some(_) => ctxMemo.loadField(cb, "interval") .consumeCode[Interval](cb, Code._fatal[Interval](""), { pc => - val pcm = pc.memoize(cb, "pnri_interval") - val pt = pcm.st.canonicalPType() + val pcm = pc.memoize(cb, "pnri_interval").asPValue Code.invokeScalaObject2[PType, Long, Interval]( PartitionBoundOrdering.getClass, "regionValueToJavaObject", - mb.getPType(pt), - pt.store(cb, region, pcm, false)) + mb.getPType(pcm.pt), + coerce[Long](pcm.code)) } ) case None => Code._null[Interval] @@ -827,7 +818,7 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig getIndexReader(cb, ctxMemo), leftOffsetField.map[Code[String]](const(_)).getOrElse(Code._null[String]), rightOffsetField.map[Code[String]](const(_)).getOrElse(Code._null[String]), - getInterval(cb, region, ctxMemo), + getInterval(cb, ctxMemo), Code._null[InputMetrics] )) } @@ -839,11 +830,11 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig cb.assign(next, it.invoke[Long]("_next")) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = cb += it.invoke[Unit]("close") } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, true), producer) } } @@ -898,8 +889,6 @@ class TableNativeReader( decomposeWithName(params, "TableNativeReader") } - override def renderShort(): String = s"(TableNativeReader ${ params.path } ${ params.options.map(_.renderShort()).getOrElse("") })" - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { @@ -924,7 +913,7 @@ class TableNativeReader( else params.options.map(opts => new RVDPartitioner(specPart.kType, opts.intervals)) - spec.rowsSpec.readTableStage(ctx, spec.rowsComponent.absolutePath(params.path), requestedType, partitioner, filterIntervals).apply(globals) + spec.rowsSpec.readTableStage(ctx, spec.rowsComponent.absolutePath(params.path), requestedType.rowType, partitioner, filterIntervals).apply(globals) } } @@ -937,9 +926,7 @@ case class TableNativeZippedReader( ) extends TableReader { def pathsUsed: Seq[String] = FastSeq(pathLeft, pathRight) - override def renderShort(): String = s"(TableNativeZippedReader $pathLeft $pathRight ${ options.map(_.renderShort()).getOrElse("") })" - - private lazy val filterIntervals = options.exists(_.filterIntervals) + private lazy val filterIntervals = options.map(_.filterIntervals).getOrElse(false) private def intervals = options.map(_.intervals) @@ -1057,7 +1044,7 @@ case class TableNativeZippedReader( AbstractRVDSpec.readZippedLowered(ctx, specLeft.rowsSpec, specRight.rowsSpec, pathLeft + "/rows", pathRight + "/rows", - partitioner, filterIntervals, + partitioner, options.exists(_.filterIntervals), requestedType.rowType, reqLeft, reqRight, requestedType.key).apply(globals) } @@ -1123,8 +1110,6 @@ case class TableFromBlockMatrixNativeReader(params: TableFromBlockMatrixNativeRe override def toJValue: JValue = { decomposeWithName(params, "TableFromBlockMatrixNativeReader")(TableReader.formats) } - - def renderShort(): String = defaultRender() } object TableRead { @@ -1699,11 +1684,6 @@ case class TableIntervalJoin( } } -/** - * The TableMultiWayZipJoin node assumes that input tables have distinct keys. If inputs - * do not have distinct keys, the key that is included in the result is undefined, but - * is likely the last. - */ case class TableMultiWayZipJoin(children: IndexedSeq[TableIR], fieldName: String, globalName: String) extends TableIR { require(children.length > 0, "there must be at least one table as an argument") diff --git a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala index 445e2200ae9..dbca1b280c4 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala @@ -11,9 +11,8 @@ import is.hail.io.index.StagedIndexWriter import is.hail.io.{AbstractTypedCodecSpec, BufferSpec, OutputBuffer, TypedCodecSpec} import is.hail.rvd.{AbstractRVDSpec, IndexSpec, RVDPartitioner, RVDSpecMaker} import is.hail.types.encoded.EType -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces.SVoidCode -import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStream, PStruct, PType} +import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PCode, PIndexableCode, PInt64, PStream, PStringCode, PStruct, PType} +import is.hail.types.physical.stypes.interfaces.PVoidCode import is.hail.types.virtual._ import is.hail.types.{RTable, TableType} import is.hail.utils._ @@ -175,7 +174,7 @@ case class PartitionNativeWriter(spec: AbstractTypedCodecSpec, partPrefix: Strin val keyType = ifIndexed { index.get._2 } val indexWriter = ifIndexed { StagedIndexWriter.withDefaults(keyType, mb.ecb) } - context.toI(cb).map(cb) { ctxCode: SCode => + context.toI(cb).map(cb) { ctxCode: PCode => val result = mb.newLocal[Long]("write_result") val filename = mb.newLocal[String]("filename") @@ -190,65 +189,66 @@ case class PartitionNativeWriter(spec: AbstractTypedCodecSpec, partPrefix: Strin indexWriter.add(cb, { IEmitCode.present(cb, keyType.asInstanceOf[PCanonicalBaseStruct] .constructFromFields(cb, stream.elementRegion, - keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name))), + keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name).typecast[PCode])), deepCopy = false)) }, ob.invoke[Long]("indexOffset"), - IEmitCode.present(cb, PCanonicalStruct().loadCheapSCode(cb, 0L))) + IEmitCode.present(cb, PCode(+PCanonicalStruct(), 0L))) } - cb += ob.writeByte(1.asInstanceOf[Byte]) + cb += ob.writeByte(1.asInstanceOf[Byte]) - spec.encodedType.buildEncoder(row.st, cb.emb.ecb) - .apply(cb, row, ob) + spec.encodedType.buildEncoder(row.st, cb.emb.ecb) + .apply(cb, row, ob) - cb.assign(n, n + 1L) + cb.assign(n, n + 1L) } - val pctx = ctxCode.memoize(cb, "context") - cb.assign(filename, pctx.asString.loadString()) - if (hasIndex) { - val indexFile = cb.newLocal[String]("indexFile") - cb.assign(indexFile, const(index.get._1).concat(filename).concat(".idx")) - indexWriter.init(cb, indexFile) - } - cb.assign(filename, const(partPrefix).concat(filename)) - cb.assign(os, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename))) - cb.assign(ob, spec.buildCodeOutputBuffer(Code.checkcast[OutputStream](os))) - cb.assign(n, 0L) - - stream.memoryManagedConsume(region, cb) { cb => - writeFile(cb, stream.element) - } - - cb += ob.writeByte(0.asInstanceOf[Byte]) - cb.assign(result, pResultType.allocate(region)) - if (hasIndex) - indexWriter.close(cb) - cb += ob.flush() - cb += os.invoke[Unit]("close") - filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) - cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) - pResultType.loadCheapSCode(cb, result.get) + PCode(pResultType, EmitCodeBuilder.scopedCode(mb) { cb: EmitCodeBuilder => + val pctx = ctxCode.memoize(cb, "context") + cb.assign(filename, pctx.asString.loadString()) + if (hasIndex) { + val indexFile = cb.newLocal[String]("indexFile") + cb.assign(indexFile, const(index.get._1).concat(filename).concat(".idx")) + indexWriter.init(cb, indexFile) + } + cb.assign(filename, const(partPrefix).concat(filename)) + cb.assign(os, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename))) + cb.assign(ob, spec.buildCodeOutputBuffer(Code.checkcast[OutputStream](os))) + cb.assign(n, 0L) + + stream.memoryManagedConsume(region, cb) { cb => + writeFile(cb, stream.element) + } + + cb += ob.writeByte(0.asInstanceOf[Byte]) + cb.assign(result, pResultType.allocate(region)) + if (hasIndex) + indexWriter.close(cb) + cb += ob.flush() + cb += os.invoke[Unit]("close") + filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) + cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) + result.get + }) } } } case class RVDSpecWriter(path: String, spec: RVDSpecMaker) extends MetadataWriter { def annotationType: Type = TArray(TString) - def writeMetadata( writeAnnotations: => IEmitCode, cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asIndexable + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PIndexableCode] val a = pc.memoize(cb, "filePaths") val partFiles = cb.newLocal[Array[String]]("partFiles") val n = cb.newLocal[Int]("n", a.loadLength()) val i = cb.newLocal[Int]("i", 0) cb.assign(partFiles, Code.newArray[String](n)) cb.whileLoop(i < n, { - val s = a.loadElement(cb, i).get(cb, "file name can't be missing!").asString + val s = a.loadElement(cb, i).get(cb, "file name can't be missing!").asInstanceOf[PStringCode] cb += partFiles.update(i, s.loadString()) cb.assign(i, i + 1) }) @@ -286,7 +286,7 @@ case class TableSpecWriter(path: String, typ: TableType, rowRelPath: String, glo cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asIndexable + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PIndexableCode] val partCounts = cb.newLocal[Array[Long]]("partCounts") val a = pc.memoize(cb, "writePartCounts") @@ -328,7 +328,7 @@ case class RelationalWriter(path: String, overwrite: Boolean, maybeRefs: Option[ } } - writeAnnotations.consume(cb, {}, { pc => assert(pc == SVoidCode) }) // PVoidCode.code is Code._empty + writeAnnotations.consume(cb, {}, { pc => assert(pc == PVoidCode) }) // PVoidCode.code is Code._empty cb += Code.invokeScalaObject2[FS, String, Unit](Class.forName("is.hail.utils.package$"), "writeNativeFileReadMe", cb.emb.getFS, path) cb += cb.emb.create(s"$path/_SUCCESS").invoke[Unit]("close") diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index 2d57dc4233e..5636772d362 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -404,7 +404,6 @@ object TypeCheck { } case Die(msg, typ, _) => assert(msg.typ == TString) - case Trap(child) => case x@ApplyIR(fn, typeArgs, args) => case x: AbstractApplyNode[_] => assert(x.implementation.unify(x.typeArgs, x.args.map(_.typ), x.returnType)) @@ -428,8 +427,6 @@ object TypeCheck { case BlockMatrixCollect(_) => case BlockMatrixWrite(_, _) => case BlockMatrixMultiWrite(_, _) => - case ValueToBlockMatrix(child, _, _) => - assert(child.typ.isInstanceOf[TArray] || child.typ.isInstanceOf[TNDArray] || child.typ == TFloat64) case CollectDistributedArray(ctxs, globals, cname, gname, body, _) => assert(ctxs.typ.isInstanceOf[TStream]) case x@ReadPartition(context, rowType, reader) => diff --git a/hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala b/hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala similarity index 73% rename from hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala rename to hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala index c72c79dd6ba..5dbadb80899 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala @@ -5,12 +5,14 @@ import is.hail.types.virtual._ import scala.reflect.{ClassTag, classTag} -object PrimitiveTypeToIRIntermediateClassTag { +object TypeToIRIntermediateClassTag { def apply(t: Type): ClassTag[_] = t.fundamentalType match { + case TVoid => classTag[Unit] case TBoolean => classTag[Boolean] case TInt32 => classTag[Int] case TInt64 => classTag[Long] case TFloat32 => classTag[Float] case TFloat64 => classTag[Double] + case _: TBaseStruct | _: TArray | TBinary => classTag[Long] } } diff --git a/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala b/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala index eae7967b3bf..48bdceb92fa 100644 --- a/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala +++ b/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala @@ -3,8 +3,6 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.expr._ import is.hail.types._ -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ @@ -25,12 +23,7 @@ object UnaryOp { private def incompatible[T](t: Type, op: UnaryOp): T = throw new RuntimeException(s"Cannot apply $op to values of type $t") - def emit(cb: EmitCodeBuilder, op: UnaryOp, x: SCode): SCode = { - - primitive(getReturnType(op, x.st.virtualType), emit(op, x.st.virtualType, SType.extractPrimCode(cb, x))) - } - - private def emit(op: UnaryOp, t: Type, x: Code[_]): Code[_] = t match { + def emit(op: UnaryOp, t: Type, x: Code[_]): Code[_] = t match { case TBoolean => val xx = coerce[Boolean](x) op match { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala index 363164da111..b225d6f7a36 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala @@ -7,8 +7,7 @@ import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical._ import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SStackStruct} -import is.hail.types.physical.stypes.interfaces.SBinaryCode +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode} import is.hail.utils._ trait AggregatorState { @@ -35,14 +34,14 @@ trait AggregatorState { def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit - def deserializeFromBytes(cb: EmitCodeBuilder, bytes: SBinaryCode): Unit = { + def deserializeFromBytes(cb: EmitCodeBuilder, bytes: PBinaryCode): Unit = { val lazyBuffer = kb.getOrDefineLazyField[MemoryBufferWrapper](Code.newInstance[MemoryBufferWrapper](), (this, "bufferWrapper")) cb += lazyBuffer.invoke[Array[Byte], Unit]("set", bytes.loadBytes()) val ib = cb.newLocal("aggstate_deser_from_bytes_ib", lazyBuffer.invoke[InputBuffer]("buffer")) deserialize(BufferSpec.defaultUncompressed)(cb, ib) } - def serializeToRegion(cb: EmitCodeBuilder, t: PBinary, r: Code[Region]): SCode = { + def serializeToRegion(cb: EmitCodeBuilder, t: PBinary, r: Code[Region]): Code[Long] = { val lazyBuffer = kb.getOrDefineLazyField[MemoryWriterWrapper](Code.newInstance[MemoryWriterWrapper](), (this, "writerWrapper")) val addr = kb.genFieldThisRef[Long]("addr") cb += lazyBuffer.invoke[Unit]("clear") @@ -52,7 +51,7 @@ trait AggregatorState { cb += t.storeLength(addr, lazyBuffer.invoke[Int]("length")) cb += lazyBuffer.invoke[Long, Unit]("copyToAddress", t.bytesAddress(addr)) - t.loadCheapSCode(cb, addr) + addr } } @@ -142,18 +141,18 @@ abstract class AbstractTypedRegionBackedAggState(val ptype: PType) extends Regio } def get(cb: EmitCodeBuilder): IEmitCode = { - IEmitCode(cb, storageType.isFieldMissing(off, 0), ptype.loadCheapSCode(cb, storageType.loadField(off, 0))) + IEmitCode(cb, storageType.isFieldMissing(off, 0), ptype.loadCheapPCode(cb, storageType.loadField(off, 0))) } def copyFrom(cb: EmitCodeBuilder, src: Code[Long]): Unit = { newState(cb, off) - storageType.storeAtAddress(cb, off, region, storageType.loadCheapSCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, off, region, storageType.loadCheapPCode(cb, src), deepCopy = true) } def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { val codecSpec = TypedCodecSpec(storageType, codec) val enc = codecSpec.encodedType.buildEncoder(storageType.sType, kb) - (cb, ob: Value[OutputBuffer]) => enc(cb, storageType.loadCheapSCode(cb, off), ob) + (cb, ob: Value[OutputBuffer]) => enc(cb, storageType.loadCheapPCode(cb, off), ob) } def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { @@ -167,12 +166,12 @@ abstract class AbstractTypedRegionBackedAggState(val ptype: PType) extends Regio class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClassBuilder[_]) extends AggregatorState { private[this] val emitTypes = vtypes.map(_.canonicalEmitType) - assert(emitTypes.forall(_.st.isPrimitive)) + assert(emitTypes.forall(_.st.pType.isPrimitive)) val nFields: Int = emitTypes.length val fields: Array[EmitSettable] = Array.tabulate(nFields) { i => kb.newEmitField(s"primitiveRVA_${ i }_v", emitTypes(i)) } val storageType = PCanonicalTuple(true, emitTypes.map(_.canonicalPType): _*) - val sStorageType = storageType.sType + val sStorageType = SBaseStructPointer(storageType) def foreachField(f: (Int, EmitSettable) => Unit): Unit = { (0 until nFields).foreach { i => @@ -187,7 +186,7 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass private[this] def loadVarsFromRegion(cb: EmitCodeBuilder, srcc: Code[Long]): Unit = { val pv = new SBaseStructPointerCode(sStorageType, srcc).memoize(cb, "prim_rvastate_load_vars") foreachField { (i, es) => - cb.assign(es, pv.loadField(cb, i)) + cb.assign(es, pv.loadField(cb, i).map(cb)(_.asPCode)) } } @@ -197,11 +196,7 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def store(cb: EmitCodeBuilder, regionStorer: (EmitCodeBuilder, Value[Region]) => Unit, destc: Code[Long]): Unit = { val dest = cb.newLocal("prim_rvastate_store_dest", destc) - storageType.storeAtAddress(cb, - dest, - null, - SStackStruct.constructFromArgs(cb, null, storageType.virtualType, fields.map(_.load): _*), - false) + storageType.storeAtAddressFromFields(cb, dest, null, fields.map(_.load), false) } def copyFrom(cb: EmitCodeBuilder, src: Code[Long]): Unit = loadVarsFromRegion(cb, src) @@ -209,15 +204,11 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { (cb, ob: Value[OutputBuffer]) => foreachField { case (_, es) => - if (es.emitType.required) { - ob.writePrimitive(cb, es.get(cb)) + if (es.pt.required) { + cb += ob.writePrimitive(es.pt)(es.v) } else { - es.toI(cb).consume(cb, - cb += ob.writeBoolean(true), - { sc => - cb += ob.writeBoolean(false) - ob.writePrimitive(cb, sc) - }) + cb += ob.writeBoolean(es.m) + cb.ifx(!es.m, cb += ob.writePrimitive(es.pt)(es.v)) } } } @@ -225,12 +216,12 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { (cb, ib: Value[InputBuffer]) => foreachField { case (_, es) => - if (es.emitType.required) { - cb.assign(es, EmitCode.present(cb.emb, ib.readPrimitive(es.st.virtualType))) + if (es.pt.required) { + cb.assign(es, EmitCode.present(cb.emb, PCode(es.pt, ib.readPrimitive(es.pt)))) } else { cb.ifx(ib.readBoolean(), - cb.assign(es, EmitCode.missing(cb.emb, es.st)), - cb.assign(es, EmitCode.present(cb.emb, ib.readPrimitive(es.st.virtualType)))) + cb.assign(es, EmitCode.missing(cb.emb, es.pt)), + cb.assign(es, EmitCode.present(cb.emb, PCode(es.pt, ib.readPrimitive(es.pt))))) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala b/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala index 6f682c3a043..0255286df36 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala @@ -97,7 +97,7 @@ class AppendOnlyBTree(kb: EmitClassBuilder[_], val key: BTreeKey, region: Value[ private def insert(cb: EmitCodeBuilder, nodec: Code[Long], insertIdxc: Code[Int], kc: EmitCode, childC: Code[Long]): Code[Long] = { val kt = key.compType.sType - val castKCode = EmitCode.fromI(cb.emb)(cb => kc.toI(cb).map(cb)(k => kt.coerceOrCopy(cb, region, k, false))) + val castKCode = EmitCode.fromI(cb.emb)(cb => kc.toI(cb).map(cb)(k => kt.coerceOrCopy(cb, region, k, false).asPCode)) val insertAt = kb.getOrGenEmitMethod("btree_insert", (this, "insert", kt), FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Int], castKCode.emitParamType, typeInfo[Long]), typeInfo[Long]) { insertAt => val node: Value[Long] = insertAt.getCodeParam[Long](1) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala index d8300b75006..95b22966383 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala @@ -43,7 +43,7 @@ class ApproxCDFState(val kb: EmitClassBuilder[_]) extends AggregatorState { } def result(cb: EmitCodeBuilder, region: Value[Region]): SBaseStructPointerCode = { - QuantilesAggregator.resultType.loadCheapSCode(cb, aggr.invoke[Region, Long]("rvResult", region)) + QuantilesAggregator.resultType.loadCheapPCode(cb, aggr.invoke[Region, Long]("rvResult", region)) } def newState(cb: EmitCodeBuilder, off: Code[Long]): Unit = cb += region.getNewRegion(regionSize) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala index d63fb61b9ba..6913b9f9a10 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala @@ -257,7 +257,7 @@ class ArrayElementLengthCheckAggregator(nestedAggs: Array[StagedAggregator], kno cb.assign(i, i + 1) }) // don't need to deep copy because that's done in nested aggregators - pt.storeAtAddress(cb, addr, region, resultType.loadCheapSCode(cb, resultAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, resultType.loadCheapPCode(cb, resultAddr), deepCopy = false) } ) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala index 37987eec69d..fa2b3678127 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala @@ -6,7 +6,6 @@ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, IEmitCode} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} import is.hail.types.physical._ import is.hail.types.virtual.{TCall, TInt32, Type} -import is.hail.types.physical.stypes.interfaces._ import is.hail.utils._ import scala.language.existentials @@ -77,7 +76,7 @@ class CallStatsState(val kb: EmitClassBuilder[_]) extends PointerBasedRVAState { def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { (cb, ob) => val codecSpec = TypedCodecSpec(CallStatsState.stateType, codec) codecSpec.encodedType.buildEncoder(CallStatsState.stateType.sType, kb) - .apply(cb, CallStatsState.stateType.loadCheapSCode(cb, off), ob) + .apply(cb, CallStatsState.stateType.loadCheapPCode(cb, off), ob) } def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { @@ -92,7 +91,7 @@ class CallStatsState(val kb: EmitClassBuilder[_]) extends PointerBasedRVAState { } def copyFromAddress(cb: EmitCodeBuilder, src: Code[Long]): Unit = { - cb.assign(off, CallStatsState.stateType.store(cb, region, CallStatsState.stateType.loadCheapSCode(cb, src), deepCopy = true)) + cb.assign(off, CallStatsState.stateType.store(cb, region, CallStatsState.stateType.loadCheapPCode(cb, src), deepCopy = true)) loadNAlleles(cb) } } @@ -140,7 +139,7 @@ class CallStatsAggregator extends StagedAggregator { call.toI(cb).consume(cb, { /* do nothing if missing */ - }, { case callc: SCallCode => + }, { case callc: PCallCode => val call = callc.memoize(cb, "callstats_seqop_callv") val hom = cb.newLocal[Boolean]("hom", true) val lastAllele = cb.newLocal[Int]("lastAllele", -1) @@ -189,7 +188,7 @@ class CallStatsAggregator extends StagedAggregator { val ac = acType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val acAtIndex = cb.newLocal[Int]("callstats_result_acAtIndex", state.alleleCountAtIndex(i, state.nAlleles)) cb.assign(alleleNumber, alleleNumber + acAtIndex) - IEmitCode.present(cb, primitive(acAtIndex)) + IEmitCode.present(cb, PCode(acType.elementType, acAtIndex)) } acType.storeAtAddress(cb, rt.fieldOffset(addr, "AC"), region, ac, deepCopy = false) @@ -200,20 +199,20 @@ class CallStatsAggregator extends StagedAggregator { val afType = resultType.fieldType("AF").asInstanceOf[PCanonicalArray] val af = afType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val acAtIndex = cb.newLocal[Int]("callstats_result_acAtIndex", state.alleleCountAtIndex(i, state.nAlleles)) - IEmitCode.present(cb, primitive(acAtIndex.toD / alleleNumber.toD)) + IEmitCode.present(cb, PCode(afType.elementType, acAtIndex.toD / alleleNumber.toD)) } afType.storeAtAddress(cb, rt.fieldOffset(addr, "AF"), region, af, deepCopy = false) }) val anType = resultType.fieldType("AN") - val an = primitive(alleleNumber) + val an = PCode(anType, alleleNumber) anType.storeAtAddress(cb, rt.fieldOffset(addr, "AN"), region, an, deepCopy = false) val homCountType = resultType.fieldType("homozygote_count").asInstanceOf[PCanonicalArray] val homCount = homCountType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val homCountAtIndex = cb.newLocal[Int]("callstats_result_homCountAtIndex", state.homCountAtIndex(i, state.nAlleles)) - IEmitCode.present(cb, primitive(homCountAtIndex)) + IEmitCode.present(cb, PCode(PInt32Required, homCountAtIndex)) } homCountType.storeAtAddress(cb, rt.fieldOffset(addr, "homozygote_count"), region, homCount, deepCopy = false) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala index 7312df0c385..a3439e2f1d9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala @@ -8,7 +8,6 @@ import is.hail.io._ import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode import is.hail.types.virtual.Type import is.hail.utils._ @@ -18,8 +17,8 @@ class TypedKey(typ: PType, kb: EmitClassBuilder[_], region: Value[Region]) exten def isKeyMissing(src: Code[Long]): Code[Boolean] = storageType.isFieldMissing(src, 0) - def loadKey(cb: EmitCodeBuilder, src: Code[Long]): SCode = { - typ.loadCheapSCode(cb, storageType.loadField(src, 0)) + def loadKey(cb: EmitCodeBuilder, src: Code[Long]): PCode = { + typ.loadCheapPCode(cb, storageType.loadField(src, 0)) } def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = storageType.isFieldMissing(off, 1) @@ -46,7 +45,7 @@ class TypedKey(typ: PType, kb: EmitClassBuilder[_], region: Value[Region]) exten cb += Region.copyFrom(src, dest, storageType.byteSize) def deepCopy(cb: EmitCodeBuilder, er: EmitRegion, dest: Code[Long], src: Code[Long]): Unit = { - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = true) } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala index 89de267a373..db94bd44587 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala @@ -4,7 +4,6 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCode, EmitCodeBuilder} import is.hail.types.physical._ -import is.hail.types.physical.stypes.interfaces.primitive import is.hail.types.virtual.Type object CountAggregator extends StagedAggregator { @@ -18,21 +17,21 @@ object CountAggregator extends StagedAggregator { assert(init.length == 0) assert(state.vtypes.head.r.required) val ev = state.fields(0) - cb.assign(ev, EmitCode.present(cb.emb, primitive(const(0L)))) + cb.assign(ev, EmitCode.present(cb.emb, PCode(resultType, 0L))) } protected def _seqOp(cb: EmitCodeBuilder, state: State, seq: Array[EmitCode]): Unit = { assert(seq.length == 0) assert(state.vtypes.head.r.required) val ev = state.fields(0) - cb.assign(ev, EmitCode.present(cb.emb, primitive(ev.pv.asInt64.longCode(cb) + 1L))) + cb.assign(ev, EmitCode.present(cb.emb, PCode(resultType, ev.pv.asInt64.longCode(cb) + 1L))) } protected def _combOp(cb: EmitCodeBuilder, state: State, other: State): Unit = { assert(state.vtypes.head.r.required) val v1 = state.fields(0) val v2 = other.fields(0) - cb.assign(v1, EmitCode.present(cb.emb, primitive(v1.pv.asInt64.longCode(cb) + v2.pv.asInt64.longCode(cb)))) + cb.assign(v1, EmitCode.present(cb.emb, PCode(resultType, v1.pv.asInt64.longCode(cb) + v2.pv.asInt64.longCode(cb)))) } protected def _storeResult(cb: EmitCodeBuilder, state: State, pt: PType, addr: Value[Long], region: Value[Region], ifMissing: EmitCodeBuilder => Unit): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala index f574a974f53..4f6d0f51c6c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala @@ -57,7 +57,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ val codecSpec = TypedCodecSpec(arrayStorageType, codec) (cb: EmitCodeBuilder, ob: Value[OutputBuffer]) => { - val arrayCode = arrayStorageType.loadCheapSCode(cb, arrayAddr) + val arrayCode = arrayStorageType.loadCheapPCode(cb, arrayAddr) codecSpec.encodedType.buildEncoder(arrayCode.st, kb) .apply(cb, arrayCode, ob) cb += ob.writeInt(const(DensifyAggregator.END_SERIALIZATION)) @@ -88,7 +88,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ private def gc(cb: EmitCodeBuilder): Unit = { cb.ifx(region.totalManagedBytes() > maxRegionSize, { val newRegion = cb.newLocal[Region]("densify_gc", Region.stagedCreate(regionSize, kb.pool())) - cb.assign(arrayAddr, arrayStorageType.store(cb, newRegion, arrayStorageType.loadCheapSCode(cb, arrayAddr), deepCopy = true)) + cb.assign(arrayAddr, arrayStorageType.store(cb, newRegion, arrayStorageType.loadCheapPCode(cb, arrayAddr), deepCopy = true)) cb += region.invalidate() cb.assign(r, newRegion) @@ -113,7 +113,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ def combine(cb: EmitCodeBuilder, other: DensifyState): Unit = { assert(other.arrayStorageType == this.arrayStorageType) - val arr = arrayStorageType.loadCheapSCode(cb, other.arrayAddr).memoize(cb, "densify_comb_other") + val arr = arrayStorageType.loadCheapPCode(cb, other.arrayAddr).memoize(cb, "densify_comb_other") arr.asInstanceOf[SIndexableValue].forEachDefined(cb) { case (cb, idx, element) => cb += arrayStorageType.setElementPresent(arrayAddr, idx) eltType.storeAtAddress(cb, arrayStorageType.elementOffset(arrayAddr, length, idx), region, element, deepCopy = true) @@ -122,14 +122,14 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ } def result(cb: EmitCodeBuilder, region: Value[Region]): SIndexablePointerCode = { - arrayStorageType.loadCheapSCode(cb, arrayAddr) + arrayStorageType.loadCheapPCode(cb, arrayAddr) } def copyFrom(cb: EmitCodeBuilder, srcCode: Code[Long]): Unit = { cb.assign(arrayAddr, arrayStorageType.store(cb, region, - arrayStorageType.loadCheapSCode(cb, arrayStorageType.loadFromNested(srcCode)), + arrayStorageType.loadCheapPCode(cb, arrayStorageType.loadFromNested(srcCode)), deepCopy = true)) cb.assign(length, arrayStorageType.loadLength(arrayAddr)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala index 368c097adfb..076da1e431c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala @@ -3,12 +3,11 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering -import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitParamType, EmitRegion, IEmitCode, SCodeEmitParamType, ParamType} +import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitParamType, EmitRegion, IEmitCode, PCodeEmitParamType, ParamType} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ -import is.hail.types.physical.stypes.SingleCodeSCode import is.hail.types.physical.stypes.concrete.SIndexablePointerCode import is.hail.types.virtual._ import is.hail.utils._ @@ -23,7 +22,7 @@ class DownsampleBTreeKey(binType: PBaseStruct, pointType: PBaseStruct, kb: EmitC val compType: PType = binType private val kcomp = kb.getOrderingFunction(binType.sType, CodeOrdering.Compare()) - def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = PBooleanRequired.loadCheapSCode(cb, storageType.loadField(off, "empty")).boolCode(cb) + def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = PBooleanRequired.loadCheapPCode(cb, storageType.loadField(off, "empty")).boolCode(cb) def initializeEmpty(cb: EmitCodeBuilder, off: Code[Long]): Unit = cb += Region.storeBoolean(storageType.fieldOffset(off, "empty"), true) @@ -33,12 +32,12 @@ class DownsampleBTreeKey(binType: PBaseStruct, pointType: PBaseStruct, kb: EmitC val src = cb.newLocal[Long]("dsa_deep_copy_src", srcc) cb.ifx(Region.loadBoolean(storageType.loadField(src, "empty")), cb += Code._fatal[Unit]("key empty!")) - storageType.storeAtAddress(cb, dest, er.region, storageType.loadCheapSCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, er.region, storageType.loadCheapPCode(cb, src), deepCopy = true) } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = kcomp(cb, k1, k2) - def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = EmitCode.present(cb.emb, binType.loadCheapSCode(cb, storageType.loadField(off, "bin"))) + def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = EmitCode.present(cb.emb, binType.loadCheapPCode(cb, storageType.loadField(off, "bin"))) } @@ -208,10 +207,10 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq tree.bulkStore(cb, ob) { (cb, ob, srcCode) => val src = cb.newLocal("downsample_state_ser_src", srcCode) cb += Region.loadBoolean(key.storageType.loadField(src, "empty")).orEmpty(Code._fatal[Unit]("bad")) - val binCode = binType.loadCheapSCode(cb, key.storageType.loadField(src, "bin")) + val binCode = binType.loadCheapPCode(cb, key.storageType.loadField(src, "bin")) binET.buildEncoder(binCode.st, kb).apply(cb, binCode, ob) - val pointCode = pointType.loadCheapSCode(cb, key.storageType.loadField(src, "point")) + val pointCode = pointType.loadCheapPCode(cb, key.storageType.loadField(src, "point")) pointET.buildEncoder(pointCode.st, kb).apply(cb, pointCode, ob) } cb += ob.writeInt(DownsampleState.serializationEndMarker) @@ -291,13 +290,13 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq cb += Region.storeInt(binType.fieldOffset(binStaging, "x"), binX) cb += Region.storeInt(binType.fieldOffset(binStaging, "y"), binY) cb.assign(insertOffset, - tree.getOrElseInitialize(cb, EmitCode.present(cb.emb, storageType.fieldType("binStaging").loadCheapSCode(cb, binStaging)))) + tree.getOrElseInitialize(cb, EmitCode.present(cb.emb, storageType.fieldType("binStaging").loadCheapPCode(cb, binStaging)))) cb.ifx(key.isEmpty(cb, insertOffset), { cb.assign(binOffset, key.storageType.loadField(insertOffset, "bin")) cb += Region.storeInt(binType.loadField(binOffset, "x"), binX) cb += Region.storeInt(binType.loadField(binOffset, "y"), binY) cb.assign(insertedPointOffset, key.storageType.loadField(insertOffset, "point")) - pointType.storeAtAddress(cb, insertedPointOffset, region, pointType.loadCheapSCode(cb, point), deepCopy = deepCopy) + pointType.storeAtAddress(cb, insertedPointOffset, region, pointType.loadCheapPCode(cb, point), deepCopy = deepCopy) cb += Region.storeBoolean(key.storageType.loadField(insertOffset, "empty"), false) cb.assign(treeSize, treeSize + 1) }) @@ -368,7 +367,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq val yc = point.loadField(cb, "y").get(cb).asFloat64.doubleCode(cb) val x = cb.newLocal[Double]("x", xc) val y = cb.newLocal[Double]("y", yc) - val pointc = SingleCodeSCode.fromSCode(cb, point, region).code.asInstanceOf[Code[Long]] + val pointc = SingleCodePCode.fromPCode(cb, point, region).code.asInstanceOf[Code[Long]] insertIntoTree(cb, xBinCoordinate(x), yBinCoordinate(y), pointc, deepCopy = true) }) cb.assign(i, i + 1) @@ -394,7 +393,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq cb.assign(bufferRight, max(bufferRight, x)) cb.assign(bufferBottom, min(bufferBottom, y)) cb.assign(bufferTop, max(bufferTop, y)) - buffer.append(cb, pointType.loadCheapSCode(cb, point)) + buffer.append(cb, pointType.loadCheapPCode(cb, point)) cb.ifx(buffer.size >= maxBufferSize, dumpBuffer(cb)) } } @@ -444,13 +443,13 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq def insert(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, l: EmitCode): Unit = { val name = "downsample_insert" - val mb = kb.getOrGenEmitMethod(name, (this, name), FastIndexedSeq[ParamType](x.st.paramType, y.st.paramType, l.emitParamType), UnitInfo) { mb => + val mb = kb.getOrGenEmitMethod(name, (this, name), FastIndexedSeq[ParamType](x.pv.st.pType.asParam, y.pv.st.pType.asParam, PCodeEmitParamType(l.pv.st.pType)), UnitInfo) { mb => val pointStaging = mb.newLocal[Long]("pointStaging") mb.voidWithBuilder { cb => - val x = mb.getSCodeParam(1) + val x = mb.getPCodeParam(1) .memoize(cb, "downsample_insert_x") - val y = mb.getSCodeParam(2) + val y = mb.getPCodeParam(2) .memoize(cb, "downsample_insert_y") val l = mb.getEmitParam(3, region) @@ -515,7 +514,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq mb.emitWithBuilder { cb => cb.assign(i, 0) cb.whileLoop(i < other.buffer.size, { - val point = SingleCodeSCode.fromSCode(cb, other.buffer.loadElement(cb, i).pv, region) + val point = SingleCodePCode.fromPCode(cb, other.buffer.loadElement(cb, i).pv, region) deepCopyAndInsertPoint(cb, point.code.asInstanceOf[Code[Long]]) cb.assign(i, i + 1) }) @@ -536,7 +535,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq val (pushElement, finish) = resType.constructFromFunctions(cb, region, treeSize, deepCopy = true) cb.ifx(treeSize > 0, { tree.foreach(cb) { (cb, tv) => - val pointCode = pointType.loadCheapSCode(cb, key.storageType.loadField(tv, "point")) + val pointCode = pointType.loadCheapPCode(cb, key.storageType.loadField(tv, "point")) pushElement(cb, IEmitCode.present(cb, pointCode)) } }) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala index 3ecca5700cf..b9094408b04 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala @@ -8,7 +8,6 @@ import is.hail.io._ import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode import is.hail.types.virtual.{TVoid, Type} import is.hail.utils._ @@ -21,7 +20,7 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], override def compWithKey(cb: EmitCodeBuilder, off: Code[Long], k: EmitCode): Code[Int] = { val mb = kb.getOrGenEmitMethod("compWithKey", - ("compWithKey_grouped_btree", kt, k.emitType), + ("compWithKey_grouped_btree", kt, k.pt), FastIndexedSeq[ParamType](typeInfo[Long], k.emitParamType), typeInfo[Int] ) { mb => @@ -42,8 +41,8 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], def isKeyMissing(off: Code[Long]): Code[Boolean] = storageType.isFieldMissing(off, 0) - def loadKey(cb: EmitCodeBuilder, off: Code[Long]): SCode = { - kt.loadCheapSCode(cb, storageType.loadField(off, 0)) + def loadKey(cb: EmitCodeBuilder, off: Code[Long]): PCode = { + kt.loadCheapPCode(cb, storageType.loadField(off, 0)) } def initValue(cb: EmitCodeBuilder, destc: Code[Long], k: EmitCode, rIdx: Code[Int]): Unit = { @@ -82,11 +81,11 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], cb += Region.storeInt(storageType.fieldOffset(off, 1), -1) def copy(cb: EmitCodeBuilder, src: Code[Long], dest: Code[Long]): Unit = - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = false) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = false) def deepCopy(cb: EmitCodeBuilder, er: EmitRegion, dest: Code[Long], srcCode: Code[Long]): Unit = { val src = cb.newLocal("ga_deep_copy_src", srcCode) - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = true) container.copyFrom(cb, containerOffset(src)) container.store(cb) } @@ -313,6 +312,6 @@ class GroupedAggregator(ktV: VirtualTypeWithReq, nestedAggs: Array[StagedAggrega } // don't need to deep copy because that's done in nested aggregators - pt.storeAtAddress(cb, addr, region, resultType.loadCheapSCode(cb, resultAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, resultType.loadCheapPCode(cb, resultAddr), deepCopy = false) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala index 734c7991f85..ac361466dab 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala @@ -4,7 +4,6 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} import is.hail.types.physical._ -import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TInt32, TString, Type} import is.hail.types.{RPrimitive, VirtualTypeWithReq} import is.hail.utils._ @@ -81,7 +80,7 @@ class ImputeTypeState(kb: EmitClassBuilder[_]) extends PrimitiveRVAState(Array(V | (supportsI32.toI << 3) | (supportsI64.toI << 4) | (supportsF64.toI << 5)) - cb.assign(_repr, EmitCode.present(cb.emb, primitive(value))) + cb.assign(_repr, EmitCode.present(cb.emb, PCode(_repr.pt, value))) } def initialize(cb: EmitCodeBuilder): Unit = { @@ -91,8 +90,8 @@ class ImputeTypeState(kb: EmitClassBuilder[_]) extends PrimitiveRVAState(Array(V def seqOp(cb: EmitCodeBuilder, ec: EmitCode): Unit = { ec.toI(cb) .consume(cb, - cb.assign(_repr, EmitCode.present(cb.emb, primitive(repr & (~(1 << 1))))), - { case (pc: SStringCode) => + cb.assign(_repr, EmitCode.present(cb.emb, PCode(_repr.pt, repr & (~(1 << 1))))), + { case (pc: PStringCode) => val s = cb.newLocal[String]("impute_type_agg_seq_str") cb.assign(s, pc.loadString()) @@ -151,7 +150,7 @@ class ImputeTypeAggregator() extends StagedAggregator { Array(state.getAnyNonMissing, state.getAllDefined, state.getSupportsBool, state.getSupportsI32, state.getSupportsI64, state.getSupportsF64) .zipWithIndex.foreach { case (b, idx) => - rt.types(idx).storeAtAddress(cb, rt.fieldOffset(addr, idx), region, primitive(b), deepCopy = true) + rt.types(idx).storeAtAddress(cb, rt.fieldOffset(addr, idx), region, PCode(PBooleanRequired, b), deepCopy = true) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala index 0eaf4065640..37d3cda6bce 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala @@ -273,6 +273,6 @@ class LinearRegressionAggregator() extends StagedAggregator { stateType.loadField(state.off, 0), stateType.loadField(state.off, 1), Region.loadInt(stateType.loadField(state.off, 2)))) - pt.storeAtAddress(cb, addr, region, LinearRegressionAggregator.resultType.loadCheapSCode(cb, resAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, LinearRegressionAggregator.resultType.loadCheapPCode(cb, resAddr), deepCopy = false) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala index a1616d00883..c2a63f4211c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala @@ -4,8 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.functions.UtilFunctions import is.hail.expr.ir.{coerce => _, _} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.{PType, typeToTypeInfo} +import is.hail.types.physical.{PCode, PType, typeToTypeInfo} import is.hail.types.virtual._ import scala.language.existentials @@ -32,12 +31,12 @@ class MonoidAggregator(monoid: StagedMonoidSpec) extends StagedAggregator { assert(init.length == 0) val stateRequired = state.vtypes.head.r.required val ev = state.fields(0) - if (!ev.required) { + if (!ev.pt.required) { assert(!stateRequired, s"monoid=$monoid, stateRequired=$stateRequired") - cb.assign(ev, EmitCode.missing(cb.emb, ev.st)) + cb.assign(ev, EmitCode.missing(cb.emb, ev.pt)) } else { assert(stateRequired, s"monoid=$monoid, stateRequired=$stateRequired") - cb.assign(ev, EmitCode.present(cb.emb, primitive(ev.st.virtualType, monoid.neutral.get))) + cb.assign(ev, EmitCode.present(cb.emb, PCode(ev.pt, monoid.neutral.get))) } } @@ -65,11 +64,10 @@ class MonoidAggregator(monoid: StagedMonoidSpec) extends StagedAggregator { ev1: EmitSettable, ev2: EmitValue ): Unit = { - val combined = primitive(monoid.typ, monoid(ev1.pv.asPrimitive.primitiveCode, ev2.pv.asPrimitive.primitiveCode)) cb.ifx(ev1.m, cb.ifx(!ev2.m, cb.assign(ev1, ev2)), cb.ifx(!ev2.m, - cb.assign(ev1, EmitCode.present(cb.emb, combined)))) + cb.assign(ev1, EmitCode.present(cb.emb, PCode(ev1.pt, monoid(ev1.v, ev2.v)))))) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala index 00816d997f8..8d8d78d0b22 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala @@ -2,12 +2,11 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, SCodeEmitParamType} +import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, PCodeEmitParamType} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable -import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArrayValue} -import is.hail.types.physical.{PCanonicalNDArray, PType} +import is.hail.types.physical.stypes.interfaces.SNDArray +import is.hail.types.physical.{PCanonicalNDArray, PNDArrayCode, PNDArrayValue, PType} import is.hail.types.virtual.Type import is.hail.utils._ @@ -34,13 +33,13 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator override protected def _seqOp(cb: EmitCodeBuilder, state: State, seq: Array[EmitCode]): Unit = { val Array(nextNDCode) = seq - val seqOpMethod = cb.emb.genEmitMethod("ndarray_sum_aggregator_seq_op", FastIndexedSeq(nextNDCode.emitParamType), CodeParamType(UnitInfo)) + val seqOpMethod = cb.emb.genEmitMethod("ndarray_sum_aggregator_seq_op", FastIndexedSeq(PCodeEmitParamType(nextNDCode.pt)), CodeParamType(UnitInfo)) seqOpMethod.voidWithBuilder { cb => val nextNDInput = seqOpMethod.getEmitParam(1, null) // no streams here - nextNDInput.toI(cb).consume(cb, {}, { case nextNDArrayPCode: SNDArrayCode => + nextNDInput.toI(cb).consume(cb, {}, { case nextNDArrayPCode: PNDArrayCode => val nextNDPV = nextNDArrayPCode.memoize(cb, "ndarray_sum_seqop_next") - val statePV = state.storageType.loadCheapSCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_seq_op_state") + val statePV = state.storageType.loadCheapPCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_seq_op_state") statePV.loadField(cb, ndarrayFieldNumber).consume(cb, { cb += (state.region.getNewRegion(Region.TINY)) @@ -61,11 +60,11 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator val combOpMethod = cb.emb.genEmitMethod[Unit]("ndarray_sum_aggregator_comb_op") combOpMethod.voidWithBuilder { cb => - val rightPV = other.storageType.loadCheapSCode(cb, other.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_right") + val rightPV = other.storageType.loadCheapPCode(cb, other.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_right") rightPV.loadField(cb, ndarrayFieldNumber).consume(cb, {}, { rightNDPC => val rightNdValue = rightNDPC.asNDArray.memoize(cb, "right_ndarray_sum_agg") - val leftPV = state.storageType.loadCheapSCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_left") + val leftPV = state.storageType.loadCheapPCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_left") leftPV.loadField(cb, ndarrayFieldNumber).consume(cb, { state.storeNonmissing(cb, rightNdValue) @@ -81,16 +80,15 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator cb.invokeVoid(combOpMethod) } - private def addValues(cb: EmitCodeBuilder, region: Value[Region], leftNdValue: SNDArrayValue, rightNdValue: SNDArrayValue): Unit = { + private def addValues(cb: EmitCodeBuilder, region: Value[Region], leftNdValue: PNDArrayValue, rightNdValue: PNDArrayValue): Unit = { cb.ifx(!leftNdValue.sameShape(rightNdValue, cb), cb += Code._fatal[Unit]("Can't sum ndarrays of different shapes.")) - SNDArray.coiterate(cb, region, FastIndexedSeq((leftNdValue.get, "left"), (rightNdValue.get, "right")), { - case Seq(l, r) => - val newElement = SCode.add(cb, l, r, true) - cb.assign(l, newElement.copyToRegion(cb, region, leftNdValue.st.elementType)) - }) + SNDArray.forEachIndex(cb, leftNdValue.shapes(cb), "ndarray_sum_addvalues") { case (cb, indices) => + val newElement = SCode.add(cb, leftNdValue.loadElement(indices, cb), rightNdValue.loadElement(indices, cb), true) + ndTyp.setElement(cb, region, indices, leftNdValue.value.asInstanceOf[Value[Long]], newElement, deepCopy = true) + } } protected def _storeResult(cb: EmitCodeBuilder, state: State, pt: PType, addr: Value[Long], region: Value[Region], ifMissing: EmitCodeBuilder => Unit): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala index cae640bd80d..2dad65a2df5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala @@ -37,11 +37,11 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ cb.assign(tmpOff, src) cb.assign(size, Region.loadInt(currentSizeOffset(tmpOff))) cb.assign(capacity, Region.loadInt(capacityOffset(tmpOff))) - cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapSCode(cb, Region.loadAddress(dataOffset(tmpOff))), deepCopy = true)) + cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapPCode(cb, Region.loadAddress(dataOffset(tmpOff))), deepCopy = true)) } def reallocateData(cb: EmitCodeBuilder): Unit = { - cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapSCode(cb, data), deepCopy = true)) + cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapPCode(cb, data), deepCopy = true)) } def storeTo(cb: EmitCodeBuilder, dest: Code[Long]): Unit = { @@ -58,7 +58,7 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ cb += ob.writeInt(size) cb += ob.writeInt(capacity) codecSpec.encodedType.buildEncoder(eltArray.sType, kb) - .apply(cb, eltArray.loadCheapSCode(cb, data), ob) + .apply(cb, eltArray.loadCheapPCode(cb, data), ob) cb += ob.writeInt(const(StagedArrayBuilder.END_SERIALIZATION)) } } @@ -112,7 +112,7 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ def loadElement(cb: EmitCodeBuilder, idx: Value[Int]): EmitCode = { val m = eltArray.isElementMissing(data, idx) - EmitCode(Code._empty, m, eltType.loadCheapSCode(cb, eltArray.loadElement(data, capacity, idx))) + EmitCode(Code._empty, m, eltType.loadCheapPCode(cb, eltArray.loadElement(data, capacity, idx))) } private def resize(cb: EmitCodeBuilder): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala index bd75c85f1bc..53cdfd1fa71 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala @@ -6,7 +6,6 @@ import is.hail.expr.ir._ import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.encoded._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SIndexablePointerSettable} import is.hail.utils._ @@ -142,7 +141,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { cb.whileLoop(i < count(n), { f(cb, EmitCode(Code._empty, bufferType.isElementMissing(buffer(n), i), - elemType.loadCheapSCode(cb, bufferType.loadElement(buffer(n), capacity(n), i)))) + elemType.loadCheapPCode(cb, bufferType.loadElement(buffer(n), capacity(n), i)))) cb.assign(i, i + 1) }) } @@ -209,7 +208,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { foreachNode(cb, n) { cb => cb += ob.writeBoolean(true) cb.assign(b, buffer(n)) - bufferEType.buildPrefixEncoder(cb, bufferType.loadCheapSCode(cb, b).memoize(cb, "sbll_serialize_v"), ob, count(n)) + bufferEType.buildPrefixEncoder(cb, bufferType.loadCheapPCode(cb, b).memoize(cb, "sbll_serialize_v"), ob, count(n)) } cb += ob.writeBoolean(false) } @@ -231,7 +230,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { cb.invokeVoid(desF, region, inputBuffer) } - private def appendShallow(cb: EmitCodeBuilder, r: Code[Region], aCode: SCode): Unit = { + private def appendShallow(cb: EmitCodeBuilder, r: Code[Region], aCode: PCode): Unit = { val buff = cb.memoize(aCode, "sbll_append_shallow_a").asInstanceOf[SIndexablePointerSettable] val newNode = cb.newLocal[Long]("sbll_append_shallow_newnode", nodeType.allocate(r)) cb += initNode(newNode, buf = buff.a, count = buff.length) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala index 6ac443bf8fa..5635f88109f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala @@ -7,9 +7,8 @@ import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SIndexablePointerCode} -import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.interfaces.SBaseStruct import is.hail.types.virtual.{TInt32, Type} import is.hail.utils._ @@ -57,7 +56,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi ord.compare(cb, k1, k2, true) } - private def compareIndexedKey(cb: EmitCodeBuilder, k1: SCode, k2: SCode): Code[Int] = { + private def compareIndexedKey(cb: EmitCodeBuilder, k1: PCode, k2: PCode): Code[Int] = { val ord = StructOrdering.make(k1.st.asInstanceOf[SBaseStruct], k2.st.asInstanceOf[SBaseStruct], cb.emb.ecb, Array(so, Ascending), true) ord.compareNonnull(cb, k1, k2) } @@ -199,7 +198,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi private def keyIsMissing(offset: Code[Long]): Code[Boolean] = indexedKeyType.isFieldMissing(offset, 0) - private def loadKeyValue(cb: EmitCodeBuilder, offset: Code[Long]): SCode = keyType.loadCheapSCode(cb, indexedKeyType.loadField(offset, 0)) + private def loadKeyValue(cb: EmitCodeBuilder, offset: Code[Long]): PCode = keyType.loadCheapPCode(cb, indexedKeyType.loadField(offset, 0)) private def loadKey(cb: EmitCodeBuilder, offset: Value[Long]): EmitCode = EmitCode(Code._empty, keyIsMissing(offset), loadKeyValue(cb, offset)) @@ -210,8 +209,8 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi val j = mb.getCodeParam[Long](2) mb.emitWithBuilder(cb => compareIndexedKey(cb, - indexedKeyType.loadCheapSCode(cb, eltTuple.fieldOffset(i, 0)), - indexedKeyType.loadCheapSCode(cb, eltTuple.fieldOffset(j, 0)))) + indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(i, 0)), + indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(j, 0)))) mb.invokeCode(_, _) } @@ -329,7 +328,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi indexedKeyType.storeAtAddress(cb, eltTuple.fieldOffset(staging, 0), region, - indexedKeyType.loadCheapSCode(cb, indexedKey), + indexedKeyType.loadCheapPCode(cb, indexedKey), deepCopy = false) value.toI(cb) .consume(cb, @@ -343,12 +342,12 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi } private def swapStaging(cb: EmitCodeBuilder): Unit = { - eltTuple.storeAtAddress(cb, ab.elementOffset(0), region, eltTuple.loadCheapSCode(cb, staging), true) + eltTuple.storeAtAddress(cb, ab.elementOffset(0), region, eltTuple.loadCheapPCode(cb, staging), true) rebalanceDown(cb, 0) } private def enqueueStaging(cb: EmitCodeBuilder): Unit = { - ab.append(cb, eltTuple.loadCheapSCode(cb, staging)) + ab.append(cb, eltTuple.loadCheapPCode(cb, staging)) rebalanceUp(cb, ab.size - 1) } @@ -383,8 +382,8 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi // for tests def seqOp(cb: EmitCodeBuilder, vm: Code[Boolean], v: Code[_], km: Code[Boolean], k: Code[_]): Unit = { - val vec = EmitCode(Code._empty, vm, if (valueType.isPrimitive) primitive(valueType.virtualType, v) else valueType.loadCheapSCode(cb, coerce[Long](v))) - val kec = EmitCode(Code._empty, km, if (keyType.isPrimitive) primitive(keyType.virtualType, k) else keyType.loadCheapSCode(cb, coerce[Long](k))) + val vec = EmitCode(Code._empty, vm, PCode(valueType, v)) + val kec = EmitCode(Code._empty, km, PCode(keyType, k)) seqOp(cb, vec, kec) } @@ -531,11 +530,11 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi val sortedIdx = cb.newLocal[Int]("tba_result_sortedidx", Region.loadInt(indexOffset(idx))) ab.loadElement(cb, sortedIdx).toI(cb) .flatMap(cb) { case pct: SBaseStructPointerCode => - pct.memoize(cb, "takeby_result_tuple").loadField(cb, 1) + pct.memoize(cb, "takeby_result_tuple").loadField(cb, 1).typecast[PCode] } }.a } - resultType.loadCheapSCode(cb, cb.invokeCode[Long](mb, _r)) + resultType.loadCheapPCode(cb, cb.invokeCode[Long](mb, _r)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala deleted file mode 100644 index ce9f1eb6c15..00000000000 --- a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala +++ /dev/null @@ -1,45 +0,0 @@ -package is.hail.expr.ir.analyses - -import is.hail.HailContext -import is.hail.expr.ir._ - -object ComputeMethodSplits { - def apply(ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { - val m = Memo.empty[Unit] - - val splitThreshold = HailContext.getFlag("method_split_ir_limit").toInt - require(splitThreshold > 0, s"invalid method_split_ir_limit") - - def recurAndComputeSizeUnderneath(x: IR): Int = { - val sizeUnderneath = x.children.iterator.map { case child: IR => recurAndComputeSizeUnderneath(child) }.sum - - val shouldSplit = !controlFlowPreventsSplit.contains(x) && (x match { - case _: TailLoop => true - - // stream consumers - case _: ToArray => true - case _: ToSet => true - case _: ToDict => true - case _: StreamFold => true - case _: StreamFold2 => true - case _: StreamLen => true - case _: StreamFor => true - - case _ => sizeUnderneath > splitThreshold - }) - if (shouldSplit) { - m.bind(x, ()) - 0 // method call is small - } else { - sizeUnderneath + (x match { - case _: Ref => 0 - case _: In => 0 - case _ if IsConstant(x) => 0 - case _ => 1 - }) - } - } - recurAndComputeSizeUnderneath(ir) - m - } -} diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala deleted file mode 100644 index afee062cd8e..00000000000 --- a/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala +++ /dev/null @@ -1,33 +0,0 @@ -package is.hail.expr.ir.analyses - -import is.hail.expr.ir.{BaseIR, Memo, Recur, Ref, TailLoop, UsesAndDefs, VisitIR} -import is.hail.types.virtual.TStream - -object ControlFlowPreventsSplit { - - def apply(x: BaseIR, parentPointers: Memo[BaseIR], usesAndDefs: UsesAndDefs): Memo[Unit] = { - val m = Memo.empty[Unit] - VisitIR(x) { - case r@Recur(name, _, _) => - var parent: BaseIR = r - while (parent match { - case TailLoop(`name`, _, _) => false - case _ => true - }) { - if (!m.contains(parent)) - m.bind(parent, ()) - parent = parentPointers.lookup(parent) - } - case r@Ref(name, t) if t.isInstanceOf[TStream] => - val declaration = usesAndDefs.defs.lookup(r) - var parent: BaseIR = r - while (!(parent.eq(declaration))) { - if (!m.contains(parent)) - m.bind(parent, ()) - parent = parentPointers.lookup(parent) - } - case _ => - } - m - } -} diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala deleted file mode 100644 index fb6f52b6552..00000000000 --- a/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala +++ /dev/null @@ -1,17 +0,0 @@ -package is.hail.expr.ir.analyses - -import is.hail.expr.ir.{BaseIR, Memo} - -object ParentPointers { - def apply(x: BaseIR): Memo[BaseIR] = { - val m = Memo.empty[BaseIR] - - def recur(ir: BaseIR, parent: BaseIR): Unit = { - m.bind(ir, parent) - ir.children.foreach(recur(_, ir)) - } - - recur(x, null) - m - } -} diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala index 92c77f32490..aaad04eb331 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala @@ -4,9 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir._ import is.hail.types.coerce -import is.hail.types.physical.stypes.EmitType -import is.hail.types.physical.stypes.primitives.SFloat64 -import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.{PArray, PCode, PFloat64, PIndexableCode, PType} import is.hail.types.virtual._ import is.hail.utils._ @@ -307,10 +305,10 @@ object ArrayFunctions extends RegistryFunctions { } registerIEmitCode2("corr", TArray(TFloat64), TArray(TFloat64), TFloat64, { - (_: Type, _: EmitType, _: EmitType) => EmitType(SFloat64, false) + (_: Type, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, ec1, ec2) => - ec1.toI(cb).flatMap(cb) { case pc1: SIndexableCode => - ec2.toI(cb).flatMap(cb) { case pc2: SIndexableCode => + ec1.toI(cb).flatMap(cb) { case pc1: PIndexableCode => + ec2.toI(cb).flatMap(cb) { case pc2: PIndexableCode => val pv1 = pc1.memoize(cb, "corr_a1") val pv2 = pc2.memoize(cb, "corr_a2") val l1 = cb.newLocal("len1", pv1.loadLength()) @@ -348,7 +346,7 @@ object ArrayFunctions extends RegistryFunctions { MathFunctions.mathPackageClass, "sqrt", (n.toD * xSqSum - xSum * xSum) * (n.toD * ySqSum - ySum * ySum)) - primitive(res) + PCode(rt, res) }) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala index d0a86ae95b3..db99a38d660 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala @@ -1,96 +1,44 @@ package is.hail.expr.ir.functions -import is.hail.asm4s.Code -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SIndexablePointer} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32} -import is.hail.types.physical.{PCanonicalArray, PInt32} +import is.hail.expr.ir.InferPType +import is.hail.types._ +import is.hail.types.physical.{PBoolean, PCanonicalArray, PCanonicalCall, PInt32, PType} import is.hail.types.virtual._ +import is.hail.utils.FastSeq import is.hail.variant._ -import scala.reflect.classTag - object CallFunctions extends RegistryFunctions { def registerAll() { - registerWrappedScalaFunction1("Call", TString, TCall, (rt: Type, st: SType) => SCanonicalCall)(Call.getClass, "parse") - - registerSCode1("callFromRepr", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, repr) => SCanonicalCall.constructFromIntRepr(repr.asInt.intCode(cb)) - } + registerWrappedScalaFunction1("Call", TString, TCall, (rt: Type, st: PType) => PCanonicalCall(st.required))(Call.getClass, "parse") - registerSCode1("Call", TBoolean, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, phased) => - SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( - Call0.getClass, "apply", Array(classTag[Boolean].runtimeClass), Array(phased.asBoolean.boolCode(cb)))) - } + registerScalaFunction("Call", Array(TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call0.getClass, "apply") - registerSCode2("Call", TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, a1, phased) => - SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( - Call1.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) - } + registerScalaFunction("Call", Array(TInt32, TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call1.getClass, "apply") - registerSCode3("Call", TInt32, TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, a1, a2, phased) => - SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( - Call2.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), a2.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) - } - - registerSCode1("UnphasedDiploidGtIndexCall", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, x) => - SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( - Call2.getClass, "fromUnphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(x.asInt.intCode(cb)))) - } + registerScalaFunction("Call", Array(TInt32, TInt32, TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call2.getClass, "apply") + registerScalaFunction("UnphasedDiploidGtIndexCall", Array(TInt32), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call2.getClass, "fromUnphasedDiploidGtIndex") registerWrappedScalaFunction2("Call", TArray(TInt32), TBoolean, TCall, { - case (rt: Type, _: SType, _: SType) => SCanonicalCall + case(rt: Type, _: PType, _: PType) => PCanonicalCall() })(CallN.getClass, "apply") val qualities = Array("isPhased", "isHomRef", "isHet", "isHomVar", "isNonRef", "isHetNonRef", "isHetRef") - for (q <- qualities) { - registerSCode1(q, TCall, TBoolean, (rt: Type, _: SType) => SBoolean) { - case (er, cb, rt, call) => - primitive(Code.invokeScalaObject[Boolean]( - Call.getClass, q, Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) - } - } - - registerSCode1("ploidy", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call) => - primitive(Code.invokeScalaObject[Int]( - Call.getClass, "ploidy", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) - } + for (q <- qualities) registerScalaFunction(q, Array(TCall), TBoolean, (rt: Type, _: Seq[PType]) => PBoolean())(Call.getClass, q) - registerSCode1("nNonRefAlleles", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call) => - primitive(Code.invokeScalaObject[Int]( - Call.getClass, "nNonRefAlleles", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) - } + registerScalaFunction("ploidy", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "ploidy") - registerSCode1("unphasedDiploidGtIndex", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call) => - primitive(Code.invokeScalaObject[Int]( - Call.getClass, "unphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) - } + registerScalaFunction("nNonRefAlleles", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "nNonRefAlleles") - registerSCode2("index", TCall, TInt32, TInt32, (rt: Type, _: SType, _: SType) => SInt32) { - case (er, cb, rt, call, idx) => - primitive(Code.invokeScalaObject[Int]( - Call.getClass, "alleleByIndex", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), idx.asInt.intCode(cb)))) - } + registerScalaFunction("unphasedDiploidGtIndex", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "unphasedDiploidGtIndex") + registerScalaFunction("index", Array(TCall, TInt32), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "alleleByIndex") - registerSCode2("downcode", TCall, TInt32, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, call, downcodedAllele) => - SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( - Call.getClass, "downcode", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), downcodedAllele.asInt.intCode(cb)))) - } + registerScalaFunction("downcode", Array(TCall, TInt32), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call.getClass, "downcode") registerWrappedScalaFunction2("oneHotAlleles", TCall, TInt32, TArray(TInt32), { - case (rt: Type, _: SType, _: SType) => SIndexablePointer(PCanonicalArray(PInt32(true))) + case(rt: Type, _: PType, _: PType) => PCanonicalArray(PInt32(true)) })(Call.getClass, "oneHotAlleles") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index 60727916e77..e6d72ecc860 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -8,8 +8,8 @@ import is.hail.utils._ import is.hail.asm4s.coerce import is.hail.experimental.ExperimentalFunctions import is.hail.types.physical._ -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SCanonicalCall, SCanonicalCallCode, SIndexablePointer, SStringPointer} +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ @@ -250,6 +250,22 @@ abstract class RegistryFunctions { def tnum(name: String): TVariable = tv(name, "numeric") + def wrapArg(r: EmitRegion, t: PType): Code[_] => Code[_] = t match { + case _: PBoolean => coerce[Boolean] + case _: PInt32 => coerce[Int] + case _: PInt64 => coerce[Long] + case _: PFloat32 => coerce[Float] + case _: PFloat64 => coerce[Double] + case _: PCall => coerce[Int] + case t: PString => c => t.loadString(coerce[Long](c)) + case t: PLocus => c => EmitCodeBuilder.scopedCode(r.mb)(cb => PCode(t, c).asLocus.getLocusObj(cb)) + case _ => c => + Code.invokeScalaObject3[PType, Region, Long, Any]( + UnsafeRow.getClass, "read", + r.mb.getPType(t), + r.region, coerce[Long](c)) + } + def boxedTypeInfo(t: Type): TypeInfo[_ >: Null] = t match { case TBoolean => classInfo[java.lang.Boolean] case TInt32 => classInfo[java.lang.Integer] @@ -264,12 +280,12 @@ abstract class RegistryFunctions { def scodeToJavaValue(cb: EmitCodeBuilder, r: Value[Region], sc: SCode): Code[AnyRef] = { sc.st match { - case SInt32 => Code.boxInt(sc.asInt32.intCode(cb)) - case SInt64 => Code.boxLong(sc.asInt64.longCode(cb)) - case SFloat32 => Code.boxFloat(sc.asFloat32.floatCode(cb)) - case SFloat64 => Code.boxDouble(sc.asFloat64.doubleCode(cb)) - case SBoolean => Code.boxBoolean(sc.asBoolean.boolCode(cb)) - case _: SCall => Code.boxInt(sc.asCall.loadCanonicalRepresentation(cb)) + case _: SInt32 => Code.boxInt(sc.asInt32.intCode(cb)) + case _: SInt64 => Code.boxLong(sc.asInt64.longCode(cb)) + case _: SFloat32 => Code.boxFloat(sc.asFloat32.floatCode(cb)) + case _: SFloat64 => Code.boxDouble(sc.asFloat64.doubleCode(cb)) + case _: SBoolean => Code.boxBoolean(sc.asBoolean.boolCode(cb)) + case _: SCall => Code.boxInt(coerce[Int](sc.asPCode.code)) case _: SString => sc.asString.loadString() case _: SLocus => sc.asLocus.getLocusObj(cb) case t => @@ -283,21 +299,34 @@ abstract class RegistryFunctions { } } - def unwrapReturn(cb: EmitCodeBuilder, r: Value[Region], st: SType, value: Code[_]): SCode = st.virtualType match { - case TBoolean => primitive(coerce[Boolean](value)) - case TInt32 => primitive(coerce[Int](value)) - case TInt64 => primitive(coerce[Long](value)) - case TFloat32 => primitive(coerce[Float](value)) - case TFloat64 => primitive(coerce[Double](value)) + def boxArg(r: EmitRegion, t: PType): Code[_] => Code[AnyRef] = t match { + case _: PBoolean => c => Code.boxBoolean(coerce[Boolean](c)) + case _: PInt32 => c => Code.boxInt(coerce[Int](c)) + case _: PInt64 => c => Code.boxLong(coerce[Long](c)) + case _: PFloat32 => c => Code.boxFloat(coerce[Float](c)) + case _: PFloat64 => c => Code.boxDouble(coerce[Double](c)) + case _: PCall => c => Code.boxInt(coerce[Int](c)) + case t: PString => c => t.loadString(coerce[Long](c)) + case t: PLocus => c => EmitCodeBuilder.scopedCode(r.mb)(cb => PCode(t, c).asLocus.getLocusObj(cb)) + case _ => c => + Code.invokeScalaObject3[PType, Region, Long, AnyRef]( + UnsafeRow.getClass, "readAnyRef", + r.mb.getPType(t), + r.region, coerce[Long](c)) + } + + def unwrapReturn(cb: EmitCodeBuilder, r: Value[Region], pt: PType, value: Code[_]): PCode = pt.virtualType match { + case TBoolean => PCode(pt, value) + case TInt32 => PCode(pt, value) + case TInt64 => PCode(pt, value) + case TFloat32 => PCode(pt, value) + case TFloat64 => PCode(pt, value) case TString => - val sst = st.asInstanceOf[SStringPointer] - sst.constructFromString(cb, r, coerce[String](value)) - case TCall => - assert(st == SCanonicalCall) - new SCanonicalCallCode(coerce[Int](value)) + val st = SStringPointer(pt.asInstanceOf[PCanonicalString]) + st.constructFromString(cb, r, coerce[String](value)) + case TCall => PCode(pt, value) case TArray(TInt32) => - val ast = st.asInstanceOf[SIndexablePointer] - val pca = ast.pType.asInstanceOf[PCanonicalArray] + val pca = pt.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[Int]]("unrwrap_return_array_int32_arr", coerce[IndexedSeq[Int]](value)) val len = cb.newLocal[Int]("unwrap_return_array_int32_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => @@ -306,8 +335,7 @@ abstract class RegistryFunctions { IEmitCode(cb, elt.isNull, primitive(elt.invoke[Int]("intValue"))) } case TArray(TFloat64) => - val ast = st.asInstanceOf[SIndexablePointer] - val pca = ast.pType.asInstanceOf[PCanonicalArray] + val pca = pt.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[Double]]("unrwrap_return_array_float64_arr", coerce[IndexedSeq[Double]](value)) val len = cb.newLocal[Int]("unwrap_return_array_float64_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => @@ -316,37 +344,38 @@ abstract class RegistryFunctions { IEmitCode(cb, elt.isNull, primitive(elt.invoke[Double]("doubleValue"))) } case TArray(TString) => - val ast = st.asInstanceOf[SIndexablePointer] - val pca = ast.pType.asInstanceOf[PCanonicalArray] + val pca = pt.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[String]]("unrwrap_return_array_str_arr", coerce[IndexedSeq[String]](value)) val len = cb.newLocal[Int]("unwrap_return_array_str_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => - val st = SStringPointer(pca.elementType.setRequired(false).asInstanceOf[PCanonicalString]) + val st = SStringPointer(pca.elementType.asInstanceOf[PCanonicalString]) val elt = cb.newLocal[String]("unwrap_return_array_str_elt", Code.checkcast[String](arr.invoke[Int, java.lang.Object]("apply", idx))) IEmitCode(cb, elt.isNull, st.constructFromString(cb, r, elt)) } case t: TBaseStruct => - val sst = st.asInstanceOf[SBaseStructPointer] - val pt = sst.pType.asInstanceOf[PCanonicalBaseStruct] val addr = Code.invokeScalaObject3[Region, Row, PType, Long]( RegistryHelpers.getClass, "stupidUnwrapStruct", r.region, coerce[Row](value), cb.emb.ecb.getPType(pt)) - new SBaseStructPointerCode(SBaseStructPointer(pt.setRequired(false).asInstanceOf[PBaseStruct]), addr) + new SBaseStructPointerCode(SBaseStructPointer(pt.asInstanceOf[PBaseStruct]), addr) } - def registerSCode( + def registerPCode( name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[SType]) => SType, + calculateReturnPType: (Type, Seq[PType]) => PType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, Array[SCode]) => SCode + impl: (EmitRegion, EmitCodeBuilder, Seq[Type], PType, Array[PCode]) => PCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = - impl(r, cb, typeParameters, returnSType, args.toArray) + new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: PCode*): PCode = + impl(r, cb, typeParameters, returnPType, args.toArray) + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] = { + assert(unify(typeParameters, args.map(_._1.virtualType), returnPType.virtualType)) + apply(r, cb, returnPType, typeParameters, args.map { case (t, a) => PCode(t, a) }: _*).code + } }) } @@ -354,16 +383,16 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[SType]) => SType, + calculateReturnPType: (Type, Seq[PType]) => PType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, EmitCodeBuilder, SType, Array[Type], Array[SCode]) => Code[_] + impl: (EmitRegion, EmitCodeBuilder, PType, Array[Type], Array[(PType, Code[_])]) => Code[_] ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = { - assert(unify(typeParameters, args.map(_.st.virtualType), returnSType.virtualType)) - returnSType.fromCodes(FastIndexedSeq(impl(r, cb, returnSType, typeParameters.toArray, args.toArray))) + new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] = { + assert(unify(typeParameters, args.map(_._1.virtualType), returnPType.virtualType)) + impl(r, cb, returnPType, typeParameters.toArray, args.toArray) } }) } @@ -372,15 +401,15 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[EmitType]) => EmitType, + calculateReturnPType: (Type, Seq[PType]) => PType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, SType, Array[EmitCode]) => EmitCode + impl: (EmitRegion,PType, Array[EmitCode]) => EmitCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { - assert(unify(typeParameters, args.map(_.st.virtualType), rpt.virtualType)) + new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { + override def apply(r: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + assert(unify(typeParameters, args.map(_.pt.virtualType), rpt.virtualType)) impl(r, rpt, args.toArray) } }) @@ -390,22 +419,22 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[EmitType]) => EmitType, + calculateReturnPType: (Type, Seq[PType]) => PType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitCodeBuilder, Value[Region], SType, Array[EmitCode]) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], PType, Array[EmitCode]) => IEmitCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { + new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { override def apply( cb: EmitCodeBuilder, r: Value[Region], - rpt: SType, + rpt: PType, typeParameters: Seq[Type], args: EmitCode* ): IEmitCode = impl(cb, r, rpt, args.toArray) - override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + override def apply(r: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb) { cb => apply(cb, r.region, rpt, typeParameters, args: _*) } @@ -417,16 +446,14 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[SType]) => SType + calculateReturnPType: (Type, Seq[PType]) => PType )( cls: Class[_], method: String ) { - registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => - val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) - rt.fromCodes(FastIndexedSeq( - Code.invokeScalaObject(cls, method, cts, args.map { a => SType.extractPrimCode(cb, a) })(PrimitiveTypeToIRIntermediateClassTag(returnType)) - )) + registerCode(name, valueParameterTypes, returnType, calculateReturnPType) { case (r, cb, rt, _, args) => + val cts = valueParameterTypes.map(TypeToIRIntermediateClassTag(_).runtimeClass) + Code.invokeScalaObject(cls, method, cts, args.map(_._2))(TypeToIRIntermediateClassTag(returnType)) } } @@ -434,7 +461,7 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[SType]) => SType + calculateReturnPType: (Type, Seq[PType]) => PType )( cls: Class[_], method: String @@ -446,128 +473,133 @@ abstract class RegistryFunctions { case TArray(TString) => classTag[IndexedSeq[String]] case TSet(TString) => classTag[Set[String]] case TDict(TString, TString) => classTag[Map[String, String]] - case TCall => classTag[Int] - case t => PrimitiveTypeToIRIntermediateClassTag(t) - } - - def wrap(cb: EmitCodeBuilder, r: Value[Region], code: SCode): Code[_] = code.st match { - case t if t.isPrimitive => SType.extractPrimCode(cb, code) - case call: SCall => code.asCall.loadCanonicalRepresentation(cb) - case _ => scodeToJavaValue(cb, r, code) + case t => TypeToIRIntermediateClassTag(t) } - registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => + registerCode(name, valueParameterTypes, returnType, calculateReturnPType) { case (r, cb, rt, _, args) => val cts = valueParameterTypes.map(ct(_).runtimeClass) - unwrapReturn(cb, r.region, rt, - Code.invokeScalaObject(cls, method, cts, args.map { a => wrap(cb, r.region, a) })(ct(returnType))) + val out = Code.invokeScalaObject(cls, method, cts, args.map { case (t, a) => wrapArg(r, t)(a) })(ct(returnType)) + unwrapReturn(cb, r.region, rt, out).code } } - def registerWrappedScalaFunction1(name: String, a1: Type, returnType: Type, pt: (Type, SType) => SType)(cls: Class[_], method: String): Unit = + def registerWrappedScalaFunction1(name: String, a1: Type, returnType: Type, pt: (Type, PType) => PType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1), returnType, unwrappedApply(pt))(cls, method) - def registerWrappedScalaFunction2(name: String, a1: Type, a2: Type, returnType: Type, pt: (Type, SType, SType) => SType)(cls: Class[_], method: String): Unit = + def registerWrappedScalaFunction2(name: String, a1: Type, a2: Type, returnType: Type, pt: (Type, PType, PType) => PType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2), returnType, unwrappedApply(pt))(cls, method) def registerWrappedScalaFunction3(name: String, a1: Type, a2: Type, a3: Type, returnType: Type, - pt: (Type, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = + pt: (Type, PType, PType, PType) => PType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2, a3), returnType, unwrappedApply(pt))(cls, method) - def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[SType]) => SType)(cls: Class[_], method: String) { + def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[PType]) => PType)(cls: Class[_], method: String) { registerCode(name, valueParameterTypes, returnType, pt) { case (r, cb, rt, _, args) => - val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) - Code.invokeStatic(cls, method, cts, args.map(a => SType.extractPrimCode(cb, a)))(PrimitiveTypeToIRIntermediateClassTag(returnType)) + val cts = valueParameterTypes.map(TypeToIRIntermediateClassTag(_).runtimeClass) + Code.invokeStatic(cls, method, cts, args.map(_._2))(TypeToIRIntermediateClassTag(returnType)) } } def registerIR(name: String, valueParameterTypes: Array[Type], returnType: Type, inline: Boolean = false, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], Seq[IR]) => IR): Unit = IRFunctionRegistry.addIR(name, typeParameters, valueParameterTypes, returnType, inline, f) - def registerSCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, SType, SCode) => SCode): Unit = - registerSCode(name, Array(mt1), rt, unwrappedApply(pt)) { + def registerPCode1(name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, EmitCodeBuilder, PType, PCode) => PCode): Unit = + registerPCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1)) => impl(r, cb, rt, a1) } - def registerSCode1t(name: String, typeParams: Array[Type], mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode) => SCode): Unit = - registerSCode(name, Array(mt1), rt, unwrappedApply(pt), typeParameters = typeParams) { - case (r, cb, typeParams, rt, Array(a1)) => impl(r, cb, typeParams, rt, a1) - } - - def registerSCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { + def registerPCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode) => PCode): Unit = + registerPCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2)) => impl(r, cb, rt, a1, a2) } - def registerSCode2t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt), typeParameters = typeParams) { - case (r, cb, typeParams, rt, Array(a1, a2)) => impl(r, cb, typeParams, rt, a1, a2) - } - - def registerSCode3(name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { + def registerPCode3(name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, PType, PType, PType) => PType) + (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode) => PCode): Unit = + registerPCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3)) => impl(r, cb, rt, a1, a2, a3) } - def registerSCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { + def registerPCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, PType, PType, PType, PType) => PType) + (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode, PCode) => PCode): Unit = + registerPCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3, a4)) => impl(r, cb, rt, a1, a2, a3, a4) } - def registerSCode4t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, - pt: (Type, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt), typeParams) { + def registerPCode4t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, + pt: (Type, PType, PType, PType, PType) => PType) + (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], PType, PCode, PCode, PCode, PCode) => PCode): Unit = + registerPCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt), typeParams) { case (r, cb, typeParams, rt, Array(a1, a2, a3, a4)) => impl(r, cb, typeParams, rt, a1, a2, a3, a4) } - def registerSCode5(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, rt: Type, pt: (Type, SType, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode, SCode) => SCode): Unit = - registerSCode(name, Array(mt1, mt2, mt3, mt4, mt5), rt, unwrappedApply(pt)) { + def registerPCode5(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, rt: Type, pt: (Type, PType, PType, PType, PType, PType) => PType) + (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode, PCode, PCode) => PCode): Unit = + registerPCode(name, Array(mt1, mt2, mt3, mt4, mt5), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3, a4, a5)) => impl(r, cb, rt, a1, a2, a3, a4, a5) } - def registerCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, EmitRegion, SType, SCode) => Code[_]): Unit = + def registerCode1[A1](name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, PType, (PType, Code[A1])) => Code[_]): Unit = registerCode(name, Array(mt1), rt, unwrappedApply(pt)) { - case (r, cb, rt, _, Array(a1)) => impl(cb, r, rt, a1) + case (r, xb, rt, _, Array(a1: (PType, Code[A1]) @unchecked)) => impl(r, rt, a1) } - def registerCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitCodeBuilder, EmitRegion, SType, SCode, SCode) => Code[_]): Unit = + def registerCode1t[A1](name: String, typeParam: Type, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, PType, Type, (PType, Code[A1])) => Code[_]): Unit = + registerCode(name, Array(mt1), rt, unwrappedApply(pt), typeParameters = Array(typeParam)) { + case (r, cb, rt, Array(t), Array(a1: (PType, Code[A1]) @unchecked)) => impl(r, rt, t, a1) + } + + + def registerCode2[A1, A2](name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitRegion, PType, (PType, Code[A1]), (PType, Code[A2])) => Code[_]): Unit = registerCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { - case (r, cb, rt, _, Array(a1, a2)) => impl(cb, r, rt, a1, a2) + case (r, cb, rt, _, Array( + a1: (PType, Code[A1]) @unchecked, + a2: (PType, Code[A2]) @unchecked)) => impl(r, rt, a1, a2) + } + + def registerCode2t[A1, A2](name: String, typeParam1: Type, arg1: Type, arg2: Type, rt: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitRegion, PType, Type, (PType, Code[A1]), (PType, Code[A2])) => Code[_]): Unit = + registerCode(name, Array(arg1, arg2), rt, unwrappedApply(pt), Array(typeParam1)) { + case (r, cb, rt, Array(t1), Array(a1: (PType, Code[A1]) @unchecked, a2: (PType, Code[A2]) @unchecked)) => impl(r, rt, t1, a1, a2) + } + + def registerCode3[A1, A2, A3](name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, PType, PType, PType) => PType) + (impl: (EmitRegion, PType, (PType, Code[A1]), (PType, Code[A2]), (PType, Code[A3])) => Code[_]): Unit = + registerCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { + case (r, cb, rt, _, Array( + a1: (PType, Code[A1]) @unchecked, + a2: (PType, Code[A2]) @unchecked, + a3: (PType, Code[A3]) @unchecked)) => impl(r, rt, a1, a2, a3) } - def registerIEmitCode1(name: String, mt1: Type, rt: Type, pt: (Type, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode) => IEmitCode): Unit = + def registerIEmitCode1(name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1)) => impl(cb, r, rt, a1) } - def registerIEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2)) => impl(cb, r, rt, a1, a2) } - def registerIEmitCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, PType, PType, PType, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4)) => impl(cb, r, rt, a1, a2, a3, a4) } - def registerIEmitCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, PType, PType, PType, PType, PType, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4, a5, a6)) => impl(cb, r, rt, a1, a2, a3, a4, a5, a6) } - def registerEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) - (impl: (EmitRegion, SType, EmitCode, EmitCode) => EmitCode): Unit = + def registerEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitRegion, PType, EmitCode, EmitCode) => EmitCode): Unit = registerEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, rt, Array(a1, a2)) => impl(r, rt, a1, a2) } def registerIR1(name: String, mt1: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR) => IR): Unit = @@ -586,22 +618,22 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - computeReturnType: (Type, Seq[SType]) => SType + calculateReturnPType: (Type, Seq[PType]) => PType )( - impl: (EmitCodeBuilder, Value[Region], SType, Long, Array[SCode]) => SCode + impl: (EmitCodeBuilder, Value[Region], PType, Long, Array[SCode]) => SCode ) { IRFunctionRegistry.addJVMFunction( - new SeededMissingnessObliviousJVMFunction(name, valueParameterTypes, returnType, computeReturnType) { + new SeededMissingnessObliviousJVMFunction(name, valueParameterTypes, returnType, calculateReturnPType) { val isDeterministic: Boolean = false - def applySeeded(cb: EmitCodeBuilder, seed: Long, r: Value[Region], rpt: SType, args: SCode*): SCode = { + def applySeeded(cb: EmitCodeBuilder, seed: Long, r: Value[Region], rpt: PType, args: SCode*): SCode = { assert(unify(Array.empty[Type], args.map(_.st.virtualType), rpt.virtualType)) impl(cb, r, rpt, seed, args.toArray) } - def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], rpt: SType, args: EmitCode*): IEmitCode = { - IEmitCode.multiMapEmitCodes(cb, args.toFastIndexedSeq) { - argPCs => applySeeded(cb, seed, r, rpt, argPCs: _*) + def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], rpt: PType, args: (PType, EmitCode)*): IEmitCode = { + IEmitCode.multiMapEmitCodes(cb, args.map(_._2).toFastIndexedSeq) { + argPCs => applySeeded(cb, seed, r, rpt, argPCs: _*).asPCode } } @@ -609,23 +641,23 @@ abstract class RegistryFunctions { }) } - def registerSeeded0(name: String, returnType: Type, pt: SType)(impl: (EmitCodeBuilder, Value[Region], SType, Long) => SCode): Unit = - registerSeeded(name, Array[Type](), returnType, if (pt == null) null else (_: Type, _: Seq[SType]) => pt) { case (cb, r, rt, seed, _) => impl(cb, r, rt, seed) } + def registerSeeded0(name: String, returnType: Type, pt: PType)(impl: (EmitCodeBuilder, Value[Region], PType, Long) => SCode): Unit = + registerSeeded(name, Array[Type](), returnType, if (pt == null) null else (_: Type, _: Seq[PType]) => pt) { case (cb, r, rt, seed, _) => impl(cb, r, rt, seed) } - def registerSeeded1(name: String, arg1: Type, returnType: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode) => SCode): Unit = + def registerSeeded1(name: String, arg1: Type, returnType: Type, pt: (Type, PType) => PType)(impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode) => SCode): Unit = registerSeeded(name, Array(arg1), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1)) => impl(cb, r, rt, seed, a1) } - def registerSeeded2(name: String, arg1: Type, arg2: Type, returnType: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode, SCode) => SCode): Unit = + def registerSeeded2(name: String, arg1: Type, arg2: Type, returnType: Type, pt: (Type, PType, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode, SCode) => SCode): Unit = registerSeeded(name, Array(arg1, arg2), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1, a2)) => impl(cb, r, rt, seed, a1, a2) } - def registerSeeded4(name: String, arg1: Type, arg2: Type, arg3: Type, arg4: Type, returnType: Type, pt: (Type, SType, SType, SType, SType) => SType) - (impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode, SCode, SCode, SCode) => SCode): Unit = + def registerSeeded4(name: String, arg1: Type, arg2: Type, arg3: Type, arg4: Type, returnType: Type, pt: (Type, PType, PType, PType, PType) => PType) + (impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode, SCode, SCode, SCode) => SCode): Unit = registerSeeded(name, Array(arg1, arg2, arg3, arg4), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1, a2, a3, a4)) => impl(cb, r, rt, seed, a1, a2, a3, a4) } @@ -640,9 +672,11 @@ sealed abstract class JVMFunction { def returnType: Type - def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType + def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType - def apply(mb: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode + def apply(mb: EmitRegion, returnType: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode + + def getAsMethod[C](cb: EmitClassBuilder[C], rpt: PType, typeParameters: Seq[Type], args: PType*): EmitMethodBuilder[C] = ??? override def toString: String = s"$name[${ typeParameters.mkString(", ") }](${ valueParameterTypes.mkString(", ") }): $returnType" @@ -657,11 +691,11 @@ sealed abstract class JVMFunction { } object MissingnessObliviousJVMFunction { - def returnSType(computeStrictReturnEmitType: (Type, Seq[SType]) => SType)(returnType: Type, valueParameterTypes: Seq[SType]): SType = { - if (computeStrictReturnEmitType == null) - SType.canonical(returnType) - else - computeStrictReturnEmitType(returnType, valueParameterTypes) + def returnPType(calculateReturnPType: (Type, Seq[PType]) => PType)(returnType: Type, valueParameterTypes: Seq[PType]): PType = { + val returnPType = + if (calculateReturnPType == null) PType.canonical(returnType) + else calculateReturnPType(returnType, valueParameterTypes) + returnPType.setRequired(valueParameterTypes.forall(_.required)) } } @@ -670,39 +704,42 @@ abstract class UnseededMissingnessObliviousJVMFunction ( override val typeParameters: Seq[Type], override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessObliviousComputeReturnType: (Type, Seq[SType]) => SType + missingnessObliviousReturnPType: (Type, Seq[PType]) => PType ) extends JVMFunction { - override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = { - EmitType(computeStrictReturnEmitType(returnType, valueParameterTypes.map(_.st)), valueParameterTypes.forall(_.required)) - } - def computeStrictReturnEmitType(returnType: Type, valueParameterTypes: Seq[SType]): SType = - MissingnessObliviousJVMFunction.returnSType(missingnessObliviousComputeReturnType)(returnType, valueParameterTypes) + override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = + MissingnessObliviousJVMFunction.returnPType(missingnessObliviousReturnPType)(returnType, valueParameterTypes) + + def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] - def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode + def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: PCode*): PCode = + PCode(returnPType, apply(r, cb, returnPType, typeParameters, args.map(pc => pc.pt -> pc.code): _*)) - def apply(r: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + def apply(r: EmitRegion, returnPType: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb)(cb => IEmitCode.multiMapEmitCodes(cb, args.toFastIndexedSeq) { args => - apply(r, cb, returnType, typeParameters, args: _*) + apply(r, cb, returnPType, typeParameters, args: _*) }) } - def getAsMethod[C](cb: EmitClassBuilder[C], rpt: SType, typeParameters: Seq[Type], args: SType*): EmitMethodBuilder[C] = { + override def getAsMethod[C](cb: EmitClassBuilder[C], rpt: PType, typeParameters: Seq[Type], args: PType*): EmitMethodBuilder[C] = { val unified = unify(typeParameters, args.map(_.virtualType), rpt.virtualType) assert(unified) - val methodbuilder = cb.genEmitMethod(name, FastIndexedSeq[ParamType](typeInfo[Region]) ++ args.map(_.paramType), rpt.paramType) - methodbuilder.emitSCode(cb => apply(EmitRegion.default(methodbuilder), + val argTIs = args.toFastIndexedSeq.map(typeToTypeInfo) + val methodbuilder = cb.genEmitMethod(name, (typeInfo[Region] +: argTIs).map(ti => ti: CodeParamType), typeToTypeInfo(rpt)) + methodbuilder.emitWithBuilder(cb => apply(EmitRegion.default(methodbuilder), cb, rpt, typeParameters, - (0 until args.length).map(i => methodbuilder.getSCodeParam(i + 2)): _*)) + args.zip(argTIs.zipWithIndex.map { case (ti, i) => + methodbuilder.getCodeParam(i + 2)(ti).get + }): _*)) methodbuilder } } object MissingnessAwareJVMFunction { - def returnSType(calculateReturnType: (Type, Seq[EmitType]) => EmitType)(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = - if (calculateReturnType == null) EmitType(SType.canonical(returnType), false) - else calculateReturnType(returnType, valueParameterTypes) + def returnPType(calculateReturnPType: (Type, Seq[PType]) => PType)(returnType: Type, valueParameterTypes: Seq[PType]): PType= + if (calculateReturnPType == null) PType.canonical(returnType) + else calculateReturnPType(returnType, valueParameterTypes) } abstract class UnseededMissingnessAwareJVMFunction ( @@ -710,14 +747,14 @@ abstract class UnseededMissingnessAwareJVMFunction ( override val typeParameters: Seq[Type], override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessAwareComputeReturnSType: (Type, Seq[EmitType]) => EmitType + missingnessAwareReturnPType: (Type, Seq[PType]) => PType ) extends JVMFunction { - override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = - MissingnessAwareJVMFunction.returnSType(missingnessAwareComputeReturnSType)(returnType, valueParameterTypes) + override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = + MissingnessAwareJVMFunction.returnPType(missingnessAwareReturnPType)(returnType, valueParameterTypes) def apply(cb: EmitCodeBuilder, r: Value[Region], - rpt: SType, + rpt: PType, typeParameters: Seq[Type], args: EmitCode* ): IEmitCode = { @@ -736,12 +773,12 @@ abstract class SeededJVMFunction ( def setSeed(s: Long): Unit = { seed = s } - def applySeededI(seed: Long, cb: EmitCodeBuilder, region: Value[Region], rpt: SType, args: EmitCode*): IEmitCode + def applySeededI(seed: Long, cb: EmitCodeBuilder, region: Value[Region], rpt: PType, args: (PType, EmitCode)*): IEmitCode - def apply(region: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = + def apply(region: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = fatal("seeded functions must go through IEmitCode path") - def apply(region: EmitRegion, rpt: SType, args: EmitCode*): EmitCode = + def apply(region: EmitRegion, rpt: PType, args: EmitCode*): EmitCode = fatal("seeded functions must go through IEmitCode path") def isStrict: Boolean = false @@ -751,22 +788,18 @@ abstract class SeededMissingnessObliviousJVMFunction ( override val name: String, override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessObliviousreturnSType: (Type, Seq[SType]) => SType + missingnessObliviousReturnPType: (Type, Seq[PType]) => PType ) extends SeededJVMFunction(name, valueParameterTypes, returnType) { - override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = { - EmitType(computeStrictReturnEmitType(returnType, valueParameterTypes.map(_.st)), valueParameterTypes.forall(_.required)) - } - - def computeStrictReturnEmitType(returnType: Type, valueParameterTypes: Seq[SType]): SType = - MissingnessObliviousJVMFunction.returnSType(missingnessObliviousreturnSType)(returnType, valueParameterTypes) + override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = + MissingnessObliviousJVMFunction.returnPType(missingnessObliviousReturnPType)(returnType, valueParameterTypes) } abstract class SeededMissingnessAwareJVMFunction ( override val name: String, override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessAwarereturnSType: (Type, Seq[EmitType]) => EmitType + missingnessAwareReturnPType: (Type, Seq[PType]) => PType ) extends SeededJVMFunction(name, valueParameterTypes, returnType) { - override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = - MissingnessAwareJVMFunction.returnSType(missingnessAwarereturnSType)(returnType, valueParameterTypes) + override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = + MissingnessAwareJVMFunction.returnPType(missingnessAwareReturnPType)(returnType, valueParameterTypes) } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala index 8ffb6c9d933..0cc3f0110b7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala @@ -4,16 +4,15 @@ import is.hail.annotations.Region import is.hail.asm4s.{coerce => _, _} import is.hail.types.{coerce => _, _} import is.hail.expr.ir._ -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.stypes.primitives.{SFloat64, SInt32} -import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.{PArray, PCode, PFloat64, PIndexableCode, PInt32, PType} import is.hail.types.virtual.{TArray, TFloat64, TInt32, Type} object GenotypeFunctions extends RegistryFunctions { def registerAll() { - registerSCode1("gqFromPL", TArray(tv("N", "int32")), TInt32, (_: Type, _: SType) => SInt32) - { case (r, cb, rt, _pl: SIndexableCode) => + registerPCode1("gqFromPL", TArray(tv("N", "int32")), TInt32, (_: Type, _: PType) => PInt32()) + { case (r, cb, rt, _pl: PIndexableCode) => val code = EmitCodeBuilder.scopedCode(r.mb) { cb => val pl = _pl.memoize(cb, "plv") val m = cb.newLocal[Int]("m", 99) @@ -34,13 +33,13 @@ object GenotypeFunctions extends RegistryFunctions { }) m2 - m } - primitive(code) + PCode(rt, code) } registerIEmitCode1("dosage", TArray(tv("N", "float64")), TFloat64, - (_: Type, arrayType: EmitType) => EmitType(SFloat64, arrayType.required && arrayType.st.asInstanceOf[SContainer].elementEmitType.required) + (_: Type, arrayType: PType) => PFloat64(arrayType.required && arrayType.asInstanceOf[PArray].elementType.required) ) { case (cb, r, rt, gp) => - gp.toI(cb).flatMap(cb) { case (gpc: SIndexableCode) => + gp.toI(cb).flatMap(cb) { case (gpc: PIndexableCode) => val gpv = gpc.memoize(cb, "dosage_gp") cb.ifx(gpv.loadLength().cne(3), @@ -48,7 +47,7 @@ object GenotypeFunctions extends RegistryFunctions { gpv.loadElement(cb, 1).flatMap(cb) { (_1: SCode) => gpv.loadElement(cb, 2).map(cb) { (_2: SCode) => - primitive(_1.asDouble.doubleCode(cb) + _2.asDouble.doubleCode(cb) * 2.0) + PCode(rt, _1.asDouble.doubleCode(cb) + _2.asDouble.doubleCode(cb) * 2.0) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala b/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala index 93017184fda..ec45cae7768 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala @@ -5,7 +5,7 @@ import is.hail.types.BlockMatrixType import is.hail.types.virtual.Type import is.hail.linalg.BlockMatrix -case class GetElement(index: IndexedSeq[Long]) extends BlockMatrixToValueFunction { +case class GetElement(index: Seq[Long]) extends BlockMatrixToValueFunction { assert(index.length == 2) override def typ(childType: BlockMatrixType): Type = childType.elementType diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala index d292b5ec2ff..a2ce9bf5d36 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala @@ -4,10 +4,6 @@ import is.hail.asm4s.{Code, _} import is.hail.expr.ir._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ -import is.hail.types.physical.stypes.{EmitType, SType} -import is.hail.types.physical.stypes.concrete.SIntervalPointer -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.SBoolean import is.hail.types.virtual._ object IntervalFunctions extends RegistryFunctions { @@ -15,18 +11,18 @@ object IntervalFunctions extends RegistryFunctions { def registerAll(): Unit = { registerIEmitCode4("Interval", tv("T"), tv("T"), TBoolean, TBoolean, TInterval(tv("T")), - { case (_: Type, startpt, endpt, includesStartET, includesEndET) => - EmitType(PCanonicalInterval( - InferPType.getCompatiblePType(Seq(startpt.canonicalPType, endpt.canonicalPType)), - required = includesStartET.required && includesEndET.required - ).sType, includesStartET.required && includesEndET.required) + { case (_: Type, startpt, endpt, includesStartPT, includesEndPT) => + PCanonicalInterval( + InferPType.getCompatiblePType(Seq(startpt, endpt)), + required = includesStartPT.required && includesEndPT.required + ) }) { - case (cb, r, SIntervalPointer(pt: PCanonicalInterval), start, end, includesStart, includesEnd) => + case (cb, r, rt: PCanonicalInterval, start, end, includesStart, includesEnd) => includesStart.toI(cb).flatMap(cb) { includesStart => includesEnd.toI(cb).map(cb) { includesEnd => - pt.constructFromCodes(cb, r, + rt.constructFromCodes(cb, r, start, end, EmitCode.present(cb.emb, includesStart), @@ -36,83 +32,83 @@ object IntervalFunctions extends RegistryFunctions { } registerIEmitCode1("start", TInterval(tv("T")), tv("T"), - (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { + (_: Type, x: PType) => x.asInstanceOf[PInterval].pointType.orMissing(x.required)) { case (cb, r, rt, interval) => - interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => + interval.toI(cb).flatMap(cb) { case pi: PIntervalCode => val pv = pi.memoize(cb, "interval") - pv.loadStart(cb) + pv.loadStart(cb).typecast[PCode] } } registerIEmitCode1("end", TInterval(tv("T")), tv("T"), - (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { + (_: Type, x: PType) => x.asInstanceOf[PInterval].pointType.orMissing(x.required)) { case (cb, r, rt, interval) => - interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => + interval.toI(cb).flatMap(cb) { case pi: PIntervalCode => val pv = pi.memoize(cb, "interval") - pv.loadEnd(cb) + pv.loadEnd(cb).typecast[PCode] } } - registerSCode1("includesStart", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => - SBoolean + registerPCode1("includesStart", TInterval(tv("T")), TBoolean, (_: Type, x: PType) => + PBoolean(x.required) ) { - case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesStart()) + case (r, cb, rt, interval: PIntervalCode) => PCode(rt, interval.includesStart()) } - registerSCode1("includesEnd", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => - SBoolean + registerPCode1("includesEnd", TInterval(tv("T")), TBoolean, (_: Type, x: PType) => + PBoolean(x.required) ) { - case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesEnd()) + case (r, cb, rt, interval: PIntervalCode) => PCode(rt, interval.includesEnd()) } registerIEmitCode2("contains", TInterval(tv("T")), tv("T"), TBoolean, { - case(_: Type, intervalT: EmitType, _: EmitType) => EmitType(SBoolean, intervalT.required) + case(_: Type, intervalT: PInterval, _: PType) => PBoolean(intervalT.required) }) { case (cb, r, rt, int, point) => - int.toI(cb).map(cb) { case (intc: SIntervalCode) => - val interval: SIntervalValue = intc.memoize(cb, "interval") + int.toI(cb).map(cb) { case (intc: PIntervalCode) => + val interval: PIntervalValue = intc.memoize(cb, "interval") val pointv = cb.memoize(point.toI(cb), "point") val compare = cb.emb.ecb.getOrderingFunction(pointv.st, interval.st.pointType, CodeOrdering.Compare()) - val start = EmitCode.fromI(cb.emb)(cb => interval.loadStart(cb)) + val start = EmitCode.fromI(cb.emb)(cb => interval.loadStart(cb).typecast[PCode]) val cmp = cb.newLocal("cmp", compare(cb, pointv, start)) val contains = cb.newLocal[Boolean]("contains", false) cb.ifx(cmp > 0 || (cmp.ceq(0) && interval.includesStart()), { - val end = EmitCode.fromI(cb.emb)(cb => interval.loadEnd(cb)) + val end = EmitCode.fromI(cb.emb)(cb => interval.loadEnd(cb).typecast[PCode]) cb.assign(cmp, compare(cb, pointv, end)) cb.assign(contains, cmp < 0 || (cmp.ceq(0) && interval.includesEnd())) }) - primitive(contains) + PCode(rt, contains) } } - registerSCode1("isEmpty", TInterval(tv("T")), TBoolean, (_: Type, pt: SType) => SBoolean) { - case (r, cb, rt, interval: SIntervalCode) => + registerPCode1("isEmpty", TInterval(tv("T")), TBoolean, (_: Type, pt: PType) => PBoolean(pt.required)) { + case (r, cb, rt, interval: PIntervalCode) => val empty = EmitCodeBuilder.scopedCode(r.mb) { cb => val intv = interval.memoize(cb, "interval") intv.isEmpty(cb) } - primitive(empty) + PCode(rt, empty) } - registerSCode2("overlaps", TInterval(tv("T")), TInterval(tv("T")), TBoolean, (_: Type, i1t: SType, i2t: SType) => SBoolean) { - case (r, cb, rt, int1: SIntervalCode, int2: SIntervalCode) => + registerPCode2("overlaps", TInterval(tv("T")), TInterval(tv("T")), TBoolean, (_: Type, i1t: PType, i2t: PType) => PBoolean(i1t.required && i2t.required)) { + case (r, cb, rt, int1: PIntervalCode, int2: PIntervalCode) => val overlap = EmitCodeBuilder.scopedCode(r.mb) { cb => val interval1 = int1.memoize(cb, "interval1") val interval2 = int2.memoize(cb, "interval2") val compare = cb.emb.ecb.getOrderingFunction(int1.st.pointType, int2.st.pointType, CodeOrdering.Compare()) - def isAboveOnNonempty(cb: EmitCodeBuilder, lhs: SIntervalValue, rhs: SIntervalValue): Code[Boolean] = { - val start = EmitCode.fromI(cb.emb)(cb => lhs.loadStart(cb)) - val end = EmitCode.fromI(cb.emb)(cb => rhs.loadEnd(cb)) + def isAboveOnNonempty(cb: EmitCodeBuilder, lhs: PIntervalValue, rhs: PIntervalValue): Code[Boolean] = { + val start = EmitCode.fromI(cb.emb)(cb => lhs.loadStart(cb).typecast[PCode]) + val end = EmitCode.fromI(cb.emb)(cb => rhs.loadEnd(cb).typecast[PCode]) val cmp = cb.newLocal("cmp", compare(cb, start, end)) cmp > 0 || (cmp.ceq(0) && (!lhs.includesStart() || !rhs.includesEnd())) } - def isBelowOnNonempty(cb: EmitCodeBuilder, lhs: SIntervalValue, rhs: SIntervalValue): Code[Boolean] = { - val end = EmitCode.fromI(cb.emb)(cb => lhs.loadEnd(cb)) - val start = EmitCode.fromI(cb.emb)(cb => rhs.loadStart(cb)) + def isBelowOnNonempty(cb: EmitCodeBuilder, lhs: PIntervalValue, rhs: PIntervalValue): Code[Boolean] = { + val end = EmitCode.fromI(cb.emb)(cb => lhs.loadEnd(cb).typecast[PCode]) + val start = EmitCode.fromI(cb.emb)(cb => rhs.loadStart(cb).typecast[PCode]) val cmp = cb.newLocal("cmp", compare(cb, end, start)) cmp < 0 || (cmp.ceq(0) && (!lhs.includesEnd() || !rhs.includesStart())) } @@ -121,7 +117,7 @@ object IntervalFunctions extends RegistryFunctions { isBelowOnNonempty(cb, interval1, interval2) || isAboveOnNonempty(cb, interval1, interval2)) } - primitive(overlap) + PCode(rt, overlap) } registerIR2("sortedNonOverlappingIntervalsContain", diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala index 26be380affb..30dc4e4498d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala @@ -5,10 +5,9 @@ import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir.{EmitMethodBuilder, _} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{EmitType, SType} -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SCanonicalLocusPointer, SCanonicalLocusPointerCode, SIntervalPointer, SIntervalPointerCode, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SCanonicalLocusPointerCode, SIntervalPointerCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32, SInt64} +import is.hail.types.physical.stypes.primitives.SFloat64Code import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant._ @@ -40,7 +39,7 @@ object LocusFunctions extends RegistryFunctions { val pAlleles = rt.types(1).asInstanceOf[PCanonicalArray] val all = cb.newLocal[IndexedSeq[String]]("locus_alleles_parsed_alleles", variant.invoke[IndexedSeq[String]]("_2")) val len = cb.newLocal[Int]("locus_alleles_n_alleles", all.invoke[Int]("length")) - val ps = pAlleles.elementType.setRequired(false).asInstanceOf[PCanonicalString] + val ps = pAlleles.elementType.asInstanceOf[PCanonicalString] val ss = SStringPointer(ps) val (push, finish) = pAlleles.constructFromFunctions(cb, r, len, deepCopy = false) val i = cb.newLocal[Int]("locus_alleles_i", 0) @@ -94,16 +93,16 @@ object LocusFunctions extends RegistryFunctions { def registerAll() { val locusClass = Locus.getClass - registerSCode1("contig", tlocus("T"), TString, - (_: Type, x: SType) => x.asInstanceOf[SLocus].contigType) { - case (r, cb, rt, locus: SLocusCode) => - locus.contig(cb) + registerPCode1("contig", tlocus("T"), TString, + (_: Type, x: PType) => x.asInstanceOf[PLocus].contigType) { + case (r, cb, rt, locus: PLocusCode) => + locus.contig(cb).asPCode } - registerSCode1("position", tlocus("T"), TInt32, (_: Type, x: SType) => SInt32) { + registerPCode1("position", tlocus("T"), TInt32, (_: Type, x: PType) => x.asInstanceOf[PLocus].positionType) { case (r, cb, rt, pc: SLocusCode) => val locus = pc.memoize(cb, "locus_position_locus") - primitive(locus.position(cb)) + PCode(rt, locus.position(cb)) } registerLocusCode("isAutosomalOrPseudoAutosomal") { locus => isAutosomal(locus) || ((inX(locus) || inY(locus)) && inPar(locus)) @@ -115,13 +114,13 @@ object LocusFunctions extends RegistryFunctions { registerLocusCode("inXNonPar") { locus => inX(locus) && !inPar(locus) } registerLocusCode("inYNonPar") { locus => inY(locus) && !inPar(locus) } - registerSCode2("min_rep", tlocus("T"), TArray(TString), TStruct("locus" -> tv("T"), "alleles" -> TArray(TString)), { - (returnType: Type, _: SType, _: SType) => { + registerPCode2("min_rep", tlocus("T"), TArray(TString), TStruct("locus" -> tv("T"), "alleles" -> TArray(TString)), { + (returnType: Type, _: PType, _: PType) => { val locusPT = PCanonicalLocus(returnType.asInstanceOf[TStruct].field("locus").typ.asInstanceOf[TLocus].rg, true) - PCanonicalStruct("locus" -> locusPT, "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType + PCanonicalStruct("locus" -> locusPT, "alleles" -> PCanonicalArray(PCanonicalString(true), true)) } }) { - case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), locus: SLocusCode, alleles: SIndexableCode) => + case (r, cb, rt: PCanonicalStruct, locus: PLocusCode, alleles: PIndexableCode) => val variantTuple = Code.invokeScalaObject2[Locus, IndexedSeq[String], (Locus, IndexedSeq[String])]( VariantMethods.getClass, "minRep", locus.getLocusObj(cb), @@ -130,11 +129,11 @@ object LocusFunctions extends RegistryFunctions { emitVariant(cb, r.region, variantTuple, rt) } - registerSCode2("locus_windows_per_contig", TArray(TArray(TFloat64)), TFloat64, TTuple(TArray(TInt32), TArray(TInt32)), { - (_: Type, _: SType, _: SType) => - PCanonicalTuple(false, PCanonicalArray(PInt32(true), true), PCanonicalArray(PInt32(true), true)).sType + registerPCode2("locus_windows_per_contig", TArray(TArray(TFloat64)), TFloat64, TTuple(TArray(TInt32), TArray(TInt32)), { + (_: Type, _: PType, _: PType) => + PCanonicalTuple(false, PCanonicalArray(PInt32(true), true), PCanonicalArray(PInt32(true), true)) }) { - case (r: EmitRegion, cb: EmitCodeBuilder, SBaseStructPointer(rt: PCanonicalTuple), groupedCode: SIndexableCode, radiusCode: SFloat64Code) => + case (r: EmitRegion, cb: EmitCodeBuilder, rt: PCanonicalTuple, groupedCode: PIndexableCode, radiusCode: SFloat64Code) => val grouped = groupedCode.memoize(cb, "locuswindows_grouped") val radius = cb.newLocal("locuswindows_radius", radiusCode.doubleCode(cb)) @@ -201,7 +200,7 @@ object LocusFunctions extends RegistryFunctions { ) cb.define(Lbreak) - pushElement(cb, IEmitCode.present(cb, primitive(offset + idx))) + pushElement(cb, IEmitCode.present(cb, PCode(arrayType.elementType, offset + idx))) cb.assign(i, i + 1) }) @@ -230,10 +229,10 @@ object LocusFunctions extends RegistryFunctions { ), deepCopy = false) } - registerSCode1("Locus", TString, tlocus("T"), { - (returnType: Type, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType + registerPCode1("Locus", TString, tlocus("T"), { + (returnType: Type, _: PType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), str: SStringCode) => + case (r, cb, rt: PCanonicalLocus, str: PStringCode) => val slocus = str.loadString() emitLocus(cb, r.region, @@ -241,23 +240,23 @@ object LocusFunctions extends RegistryFunctions { rt) } - registerSCode2("Locus", TString, TInt32, tlocus("T"), { - (returnType: Type, _: SType, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType + registerPCode2("Locus", TString, TInt32, tlocus("T"), { + (returnType: Type, _: PType, _: PType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), contig, pos) => + case (r, cb, rt: PCanonicalLocus, contig, pos) => val contigMemo = contig.memoize(cb, "locus_contig") val posMemo = pos.memoize(cb, "locus_pos") cb += rgCode(r.mb, rt.rg).invoke[String, Int, Unit]("checkLocus", contigMemo.asString.loadString(), posMemo.asInt.intCode(cb)) rt.constructFromPositionAndString(cb, r.region, contigMemo.asString.loadString(), posMemo.asInt.intCode(cb)) } - registerSCode1("LocusAlleles", TString, tvariant("T"), { - (returnType: Type, _: SType) => { + registerPCode1("LocusAlleles", TString, tvariant("T"), { + (returnType: Type, _: PType) => { val lTyp = returnType.asInstanceOf[TStruct].field("locus").typ.asInstanceOf[TLocus] - PCanonicalStruct("locus" -> PCanonicalLocus(lTyp.rg, true), "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType + PCanonicalStruct("locus" -> PCanonicalLocus(lTyp.rg, true), "alleles" -> PCanonicalArray(PCanonicalString(true), true)) } }) { - case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), variantStr) => + case (r, cb, rt: PCanonicalStruct, variantStr) => val svar = variantStr.asString.loadString() val plocus = rt.types(0).asInstanceOf[PCanonicalLocus] @@ -268,11 +267,11 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode2("LocusInterval", TString, TBoolean, tinterval("T"), { - (returnType: Type, _: EmitType, _: EmitType) => { + (returnType: Type, _: PType, _: PType) => { val lPTyp = returnType.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - EmitType(PCanonicalInterval(PCanonicalLocus(lPTyp.asInstanceOf[TLocus].rg)).sType, false) + PCanonicalInterval(PCanonicalLocus(lPTyp.asInstanceOf[TLocus].rg)) } - }) { case (cb: EmitCodeBuilder, r: Value[Region], SIntervalPointer(rt: PCanonicalInterval), locusStrEC: EmitCode, invalidMissingEC: EmitCode) => + }) { case (cb: EmitCodeBuilder, r: Value[Region], rt: PCanonicalInterval, locusStrEC: EmitCode, invalidMissingEC: EmitCode) => val plocus = rt.pointType.asInstanceOf[PLocus] @@ -299,13 +298,13 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode6("LocusInterval", TString, TInt32, TInt32, TBoolean, TBoolean, TBoolean, tinterval("T"), { - (returnType: Type, _: EmitType, _: EmitType, _: EmitType, _: EmitType, _: EmitType, _: EmitType) => { + (returnType: Type, _: PType, _: PType, _: PType, _: PType, _: PType, _: PType) => { val lPTyp = returnType.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - EmitType(PCanonicalInterval(PCanonicalLocus(lPTyp.rg)).sType, false) + PCanonicalInterval(PCanonicalLocus(lPTyp.rg)) } }) { case (cb: EmitCodeBuilder, r: Value[Region], - SIntervalPointer(rt: PCanonicalInterval), + rt: PCanonicalInterval, locusString: EmitCode, pos1: EmitCode, pos2: EmitCode, @@ -348,38 +347,38 @@ object LocusFunctions extends RegistryFunctions { } } - registerSCode1("globalPosToLocus", TInt64, tlocus("T"), { - (returnType: Type, _: SType) => - PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType + registerPCode1("globalPosToLocus", TInt64, tlocus("T"), { + (returnType: Type, _: PType) => + PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), globalPos) => + case (r, cb, rt: PCanonicalLocus, globalPos) => val locus = cb.newLocal[Locus]("global_pos_locus", rgCode(r.mb, rt.rg).invoke[Long, Locus]("globalPosToLocus", globalPos.asLong.longCode(cb))) rt.constructFromPositionAndString(cb, r.region, locus.invoke[String]("contig"), locus.invoke[Int]("position")) } - registerSCode1("locusToGlobalPos", tlocus("T"), TInt64, (_: Type, _: SType) => SInt64) { - case (r, cb, rt, locus: SLocusCode) => + registerPCode1("locusToGlobalPos", tlocus("T"), TInt64, (_: Type, _: PType) => PInt64()) { + case (r, cb, rt, locus: PLocusCode) => val locusObject = locus.memoize(cb, "locus_to_global_pos") .getLocusObj(cb) - val globalPos = rgCode(r.mb, locus.st.rg).invoke[Locus, Long]("locusToGlobalPos", locusObject) - primitive(globalPos) + val globalPos = rgCode(r.mb, locus.pt.rg).invoke[Locus, Long]("locusToGlobalPos", locusObject) + PCode(rt, globalPos) } registerIEmitCode2("liftoverLocus", tlocus("T"), TFloat64, TStruct("result" -> tv("U", "locus"), "is_negative_strand" -> TBoolean), { - (returnType: Type, _: EmitType, _: EmitType) => { + (returnType: Type, _: PType, _: PType) => { val lTyp = returnType.asInstanceOf[TStruct].field("result").typ.asInstanceOf[TLocus] - EmitType(PCanonicalStruct("result" -> PCanonicalLocus(lTyp.rg, true), "is_negative_strand" -> PBoolean(true)).sType, false) + PCanonicalStruct("result" -> PCanonicalLocus(lTyp.rg, true), "is_negative_strand" -> PBoolean(true)) } }) { - case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), loc, minMatch) => + case (cb, r, rt: PCanonicalStruct, loc, minMatch) => loc.toI(cb).flatMap(cb) { loc => minMatch.toI(cb).flatMap(cb) { minMatch => val Lmissing = CodeLabel() val Ldefined = CodeLabel() - val locT = loc.asLocus.st + val locT = loc.pt.asInstanceOf[PLocus] val srcRG = locT.rg val destRG = rt.types(0).asInstanceOf[PLocus].rg @@ -406,12 +405,12 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode2("liftoverLocusInterval", tinterval("T"), TFloat64, TStruct("result" -> tinterval("U"), "is_negative_strand" -> TBoolean), { - (returnType: Type, _: EmitType, _: EmitType) => { + (returnType: Type, _: PType, _: PType) => { val lTyp = returnType.asInstanceOf[TStruct].field("result").typ.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - EmitType(PCanonicalStruct("result" -> PCanonicalInterval(PCanonicalLocus(lTyp.rg, true), true), "is_negative_strand" -> PBoolean(true)).sType, false) + PCanonicalStruct("result" -> PCanonicalInterval(PCanonicalLocus(lTyp.rg, true), true), "is_negative_strand" -> PBoolean(true)) } }) { - case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), interval, minMatch) => + case (cb, r, rt: PCanonicalStruct, interval, minMatch) => interval.toI(cb).flatMap(cb) { interval => minMatch.toI(cb).flatMap(cb) { minMatch => @@ -419,8 +418,8 @@ object LocusFunctions extends RegistryFunctions { val Ldefined = CodeLabel() - val iT = interval.st.asInstanceOf[SInterval] - val srcRG = iT.pointType.asInstanceOf[SLocus].rg + val iT = interval.pt.asInstanceOf[PInterval] + val srcRG = iT.pointType.asInstanceOf[PLocus].rg val destRG = rt.types(0).asInstanceOf[PInterval].pointType.asInstanceOf[PLocus].rg val er = EmitRegion(cb.emb, r) val intervalObj = Code.checkcast[Interval](scodeToJavaValue(cb, r, interval)) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala index 745f995a58c..fddb45ac23c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala @@ -3,8 +3,7 @@ package is.hail.expr.ir.functions import is.hail.asm4s.Code import is.hail.expr.ir._ import is.hail.stats._ -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.primitives._ +import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32Code} import is.hail.types.physical.{PBoolean, PFloat32, PFloat64, PInt32, PInt64, PType} import is.hail.types.virtual._ import is.hail.utils._ @@ -101,76 +100,76 @@ object MathFunctions extends RegistryFunctions { registerIR1("toFloat32", tnum("T"), TFloat32)((_, x) => Cast(x, TFloat32)) registerIR1("toFloat64", tnum("T"), TFloat64)((_, x) => Cast(x, TFloat64)) - registerScalaFunction("abs", Array(TInt32), TInt32, null)(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TInt64), TInt64, null)(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TFloat32), TFloat32, null)(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TFloat64), TFloat64, null)(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "abs") - registerScalaFunction("pow", Array(TInt32, TInt32), TFloat64, null)(thisClass, "pow") - registerScalaFunction("pow", Array(TInt64, TInt64), TFloat64, null)(thisClass, "pow") - registerScalaFunction("pow", Array(TFloat32, TFloat32), TFloat64, null)(thisClass, "pow") - registerScalaFunction("pow", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "pow") + registerScalaFunction("pow", Array(TInt32, TInt32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") + registerScalaFunction("pow", Array(TInt64, TInt64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") + registerScalaFunction("pow", Array(TFloat32, TFloat32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") + registerScalaFunction("pow", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") - registerScalaFunction("exp", Array(TFloat64), TFloat64, null)(mathPackageClass, "exp") - registerScalaFunction("log10", Array(TFloat64), TFloat64, null)(mathPackageClass, "log10") - registerScalaFunction("sqrt", Array(TFloat64), TFloat64, null)(mathPackageClass, "sqrt") - registerScalaFunction("log", Array(TFloat64), TFloat64, null)(mathPackageClass, "log") - registerScalaFunction("log", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "log") - registerScalaFunction("gamma", Array(TFloat64), TFloat64, null)(thisClass, "gamma") - registerScalaFunction("binomTest", Array(TInt32, TInt32, TFloat64, TInt32), TFloat64, null)(statsPackageClass, "binomTest") + registerScalaFunction("exp", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "exp") + registerScalaFunction("log10", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "log10") + registerScalaFunction("sqrt", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "sqrt") + registerScalaFunction("log", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "log") + registerScalaFunction("log", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "log") + registerScalaFunction("gamma", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "gamma") + registerScalaFunction("binomTest", Array(TInt32, TInt32, TFloat64, TInt32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "binomTest") - registerScalaFunction("dbeta", Array(TFloat64, TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "dbeta") + registerScalaFunction("dbeta", Array(TFloat64, TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dbeta") - registerScalaFunction("pnorm", Array(TFloat64), TFloat64, null)(statsPackageClass, "pnorm") - registerScalaFunction("qnorm", Array(TFloat64), TFloat64, null)(statsPackageClass, "qnorm") + registerScalaFunction("pnorm", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pnorm") + registerScalaFunction("qnorm", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "qnorm") - registerScalaFunction("pT", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "pT") - registerScalaFunction("pF", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "pF") + registerScalaFunction("pT", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pT") + registerScalaFunction("pF", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pF") - registerScalaFunction("dpois", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "dpois") - registerScalaFunction("dpois", Array(TFloat64, TFloat64, TBoolean), TFloat64, null)(statsPackageClass, "dpois") + registerScalaFunction("dpois", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dpois") + registerScalaFunction("dpois", Array(TFloat64, TFloat64, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dpois") - registerScalaFunction("ppois", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "ppois") - registerScalaFunction("ppois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "ppois") + registerScalaFunction("ppois", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "ppois") + registerScalaFunction("ppois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "ppois") - registerScalaFunction("qpois", Array(TFloat64, TFloat64), TInt32, null)(statsPackageClass, "qpois") - registerScalaFunction("qpois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TInt32, null)(statsPackageClass, "qpois") + registerScalaFunction("qpois", Array(TFloat64, TFloat64), TInt32, (_: Type, _: Seq[PType]) => PInt32())(statsPackageClass, "qpois") + registerScalaFunction("qpois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TInt32, (_: Type, _: Seq[PType]) => PInt32())(statsPackageClass, "qpois") - registerScalaFunction("pchisqtail", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "chiSquaredTail") - registerScalaFunction("pnchisqtail", Array(TFloat64, TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "nonCentralChiSquaredTail") - registerScalaFunction("qchisqtail", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "inverseChiSquaredTail") + registerScalaFunction("pchisqtail", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "chiSquaredTail") + registerScalaFunction("pnchisqtail", Array(TFloat64, TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "nonCentralChiSquaredTail") + registerScalaFunction("qchisqtail", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "inverseChiSquaredTail") - registerScalaFunction("floor", Array(TFloat32), TFloat32, null)(thisClass, "floor") - registerScalaFunction("floor", Array(TFloat64), TFloat64, null)(thisClass, "floor") + registerScalaFunction("floor", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "floor") + registerScalaFunction("floor", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "floor") - registerScalaFunction("ceil", Array(TFloat32), TFloat32, null)(thisClass, "ceil") - registerScalaFunction("ceil", Array(TFloat64), TFloat64, null)(thisClass, "ceil") + registerScalaFunction("ceil", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "ceil") + registerScalaFunction("ceil", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "ceil") - registerScalaFunction("mod", Array(TInt32, TInt32), TInt32, null)(thisClass, "mod") - registerScalaFunction("mod", Array(TInt64, TInt64), TInt64, null)(thisClass, "mod") - registerScalaFunction("mod", Array(TFloat32, TFloat32), TFloat32, null)(thisClass, "mod") - registerScalaFunction("mod", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "mod") + registerScalaFunction("mod", Array(TInt32, TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(thisClass, "mod") + registerScalaFunction("mod", Array(TInt64, TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(thisClass, "mod") + registerScalaFunction("mod", Array(TFloat32, TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "mod") + registerScalaFunction("mod", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "mod") - registerJavaStaticFunction("isnan", Array(TFloat32), TBoolean, null)(jFloatClass, "isNaN") - registerJavaStaticFunction("isnan", Array(TFloat64), TBoolean, null)(jDoubleClass, "isNaN") + registerJavaStaticFunction("isnan", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isNaN") + registerJavaStaticFunction("isnan", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isNaN") - registerJavaStaticFunction("is_finite", Array(TFloat32), TBoolean, null)(jFloatClass, "isFinite") - registerJavaStaticFunction("is_finite", Array(TFloat64), TBoolean, null)(jDoubleClass, "isFinite") + registerJavaStaticFunction("is_finite", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isFinite") + registerJavaStaticFunction("is_finite", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isFinite") - registerJavaStaticFunction("is_infinite", Array(TFloat32), TBoolean, null)(jFloatClass, "isInfinite") - registerJavaStaticFunction("is_infinite", Array(TFloat64), TBoolean, null)(jDoubleClass, "isInfinite") + registerJavaStaticFunction("is_infinite", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isInfinite") + registerJavaStaticFunction("is_infinite", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isInfinite") - registerJavaStaticFunction("sign", Array(TInt32), TInt32, null)(jIntegerClass, "signum") - registerScalaFunction("sign", Array(TInt64), TInt64, null)(mathPackageClass, "signum") - registerJavaStaticFunction("sign", Array(TFloat32), TFloat32, null)(jMathClass, "signum") - registerJavaStaticFunction("sign", Array(TFloat64), TFloat64, null)(jMathClass, "signum") + registerJavaStaticFunction("sign", Array(TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(jIntegerClass, "signum") + registerScalaFunction("sign", Array(TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(mathPackageClass, "signum") + registerJavaStaticFunction("sign", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(jMathClass, "signum") + registerJavaStaticFunction("sign", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(jMathClass, "signum") - registerScalaFunction("approxEqual", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TBoolean, null)(thisClass, "approxEqual") + registerScalaFunction("approxEqual", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(thisClass, "approxEqual") - registerWrappedScalaFunction1("entropy", TString, TFloat64, null)(thisClass, "irentropy") + registerWrappedScalaFunction1("entropy", TString, TFloat64, (_: Type, _: PType) => PFloat64())(thisClass, "irentropy") - registerSCode4("fisher_exact_test", TInt32, TInt32, TInt32, TInt32, fetStruct.virtualType, - (_, _, _, _, _) => fetStruct.sType + registerPCode4("fisher_exact_test", TInt32, TInt32, TInt32, TInt32, fetStruct.virtualType, + (_, _, _, _, _) => fetStruct ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("fisher_exact_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "fisherExactTest", @@ -187,8 +186,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerSCode4("chi_squared_test", TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, - (_, _, _, _, _) => chisqStruct.sType + registerPCode4("chi_squared_test", TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, + (_, _, _, _, _) => chisqStruct ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("chi_squared_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "chiSquaredTest", @@ -203,8 +202,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerSCode5("contingency_table_test", TInt32, TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, - (_, _, _, _, _, _) => chisqStruct.sType + registerPCode5("contingency_table_test", TInt32, TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, + (_, _, _, _, _, _) => chisqStruct ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, mcc: SInt32Code) => val res = cb.newLocal[Array[Double]]("contingency_table_test_res", Code.invokeScalaObject5[Int, Int, Int, Int, Int, Array[Double]](statsPackageClass, "contingencyTableTest", @@ -220,8 +219,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerSCode3("hardy_weinberg_test", TInt32, TInt32, TInt32, hweStruct.virtualType, - (_, _, _, _) => hweStruct.sType + registerPCode3("hardy_weinberg_test", TInt32, TInt32, TInt32, hweStruct.virtualType, + (_, _, _, _) => hweStruct ) { case (r, cb, rt, nHomRef: SInt32Code, nHet: SInt32Code, nHomVar: SInt32Code) => val res = cb.newLocal[Array[Double]]("hardy_weinberg_test_res", Code.invokeScalaObject3[Int, Int, Int, Array[Double]](statsPackageClass, "hardyWeinbergTest", diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala index 9e37321e4a7..1ba22a949a3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala @@ -2,15 +2,14 @@ package is.hail.expr.ir.functions import is.hail.annotations.{Memory, Region} import is.hail.asm4s.{Code, Value} -import is.hail.expr.ir._ import is.hail.expr.{Nat, NatVariable} +import is.hail.expr.ir._ import is.hail.linalg.{LAPACK, LinalgCodeUtils} import is.hail.types.coerce -import is.hail.types.physical.stypes.EmitType -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SNDArrayPointer} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.{PBooleanRequired, PCanonicalNDArray, PCanonicalStruct, PFloat64Required, PType} +import is.hail.types.physical.PCanonicalNDArray +import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable import is.hail.types.virtual._ +import is.hail.utils._ object NDArrayFunctions extends RegistryFunctions { override def registerAll() { @@ -36,75 +35,58 @@ object NDArrayFunctions extends RegistryFunctions { } } - def linear_solve(a: SNDArrayCode, b: SNDArrayCode, outputPt: PType, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayCode, Value[Int]) = { - val aInput = a.asNDArray.memoize(cb, "A") - val bInput = b.asNDArray.memoize(cb, "B") - - val aColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(aInput, cb, region) - val bColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(bInput, cb, region) - - val IndexedSeq(n0, n1) = aColMajor.shapes(cb) + registerIEmitCode2("linear_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), { (t, p1, p2) => p2 }) { case (cb, region, pt, aec, bec) => + aec.toI(cb).flatMap(cb){ apc => + bec.toI(cb).map(cb){ bpc => + val aInput = apc.asNDArray.memoize(cb, "A") + val bInput = bpc.asNDArray.memoize(cb, "B") - cb.ifx(n0 cne n1, cb._fatal("hail.nd.solve: matrix a must be square.")) + val aColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(aInput, cb, region) + val bColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(bInput, cb, region) - val IndexedSeq(n, nrhs) = bColMajor.shapes(cb) + val IndexedSeq(n0, n1) = aColMajor.shapes(cb) - cb.ifx(n0 cne n, cb._fatal("hail.nd.solve: Solve dimensions incompatible")) + cb.ifx(n0 cne n1, cb._fatal("hail.nd.solve: matrix a must be square.")) - val infoDGESVResult = cb.newLocal[Int]("dgesv_result") - val ipiv = cb.newLocal[Long]("dgesv_ipiv") - cb.assign(ipiv, Code.invokeStatic1[Memory, Long, Long]("malloc", n * 4L)) + val IndexedSeq(n, nrhs) = bColMajor.shapes(cb) - val aCopy = cb.newLocal[Long]("dgesv_a_copy") + cb.ifx(n0 cne n, cb._fatal("hail.nd.solve: Solve dimensions incompatible")) - def aNumBytes = n * n * 8L + val infoDGESVResult = cb.newLocal[Int]("dgesv_result") + val ipiv = cb.newLocal[Long]("dgesv_ipiv") + cb.assign(ipiv, Code.invokeStatic1[Memory, Long, Long]("malloc", n * 4L)) - cb.assign(aCopy, Code.invokeStatic1[Memory, Long, Long]("malloc", aNumBytes)) - val aColMajorFirstElement = aColMajor.firstDataAddress(cb) + val aCopy = cb.newLocal[Long]("dgesv_a_copy") + def aNumBytes = n * n * 8L + cb.assign(aCopy, Code.invokeStatic1[Memory, Long, Long]("malloc", aNumBytes)) + val aColMajorFirstElement = aColMajor.firstDataAddress(cb) - cb.append(Region.copyFrom(aColMajorFirstElement, aCopy, aNumBytes)) + cb.append(Region.copyFrom(aColMajorFirstElement, aCopy, aNumBytes)) - val outputPType = coerce[PCanonicalNDArray](outputPt) - val outputShape = IndexedSeq(n, nrhs) - val (outputAddress, outputFinisher) = outputPType.constructDataFunction(outputShape, outputPType.makeColumnMajorStrides(outputShape, region, cb), cb, region) + val outputPType = coerce[PCanonicalNDArray](pt) + val outputShape = IndexedSeq(n, nrhs) + val (outputAddress, outputFinisher) = outputPType.constructDataFunction(outputShape, outputPType.makeColumnMajorStrides(outputShape, region, cb), cb, region) - cb.append(Region.copyFrom(bColMajor.firstDataAddress(cb), outputAddress, n * nrhs * 8L)) + cb.append(Region.copyFrom(bColMajor.firstDataAddress(cb), outputAddress, n * nrhs * 8L)) - cb.assign(infoDGESVResult, Code.invokeScalaObject7[Int, Int, Long, Int, Long, Long, Int, Int](LAPACK.getClass, "dgesv", - n.toI, - nrhs.toI, - aCopy, - n.toI, - ipiv, - outputAddress, - n.toI - )) + cb.assign(infoDGESVResult, Code.invokeScalaObject7[Int, Int, Long, Int, Long, Long, Int, Int](LAPACK.getClass, "dgesv", + n.toI, + nrhs.toI, + aCopy, + n.toI, + ipiv, + outputAddress, + n.toI + )) - cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", ipiv.load())) - cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", aCopy.load())) + cb.ifx(infoDGESVResult cne 0, cb._fatal(s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", infoDGESVResult.toS)) - (outputFinisher(cb), infoDGESVResult) - } + cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", ipiv.load())) + cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", aCopy.load())) - registerIEmitCode2("linear_solve_no_crash", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TStruct(("solution", TNDArray(TFloat64, Nat(2))), ("failed", TBoolean)), - { (t, p1, p2) => EmitType(PCanonicalStruct(false, ("solution", PCanonicalNDArray(PFloat64Required, 2, false)), ("failed", PBooleanRequired)).sType, false) }) { - case (cb, region, SBaseStructPointer(outputStructType: PCanonicalStruct), aec, bec) => - aec.toI(cb).flatMap(cb) { apc => - bec.toI(cb).map(cb) { bpc => - val outputNDArrayPType = outputStructType.fieldType("solution") - val (resNDPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, outputNDArrayPType, cb, region) - val ndEmitCode = EmitCode(Code._empty, info cne 0, resNDPCode) - outputStructType.constructFromFields(cb, region, IndexedSeq[EmitCode](ndEmitCode, EmitCode(Code._empty, false, primitive(info cne 0))), false) - } + outputFinisher(cb) } - } - - registerSCode2("linear_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), - { (t, p1, p2) => PCanonicalNDArray(PFloat64Required, 2, true).sType }) { - case (er, cb, SNDArrayPointer(pt), apc, bpc) => - val (resPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, pt, cb, er.region) - cb.ifx(info cne 0, cb._fatal(s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", info.toS)) - resPCode + } } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala index 20e5ea4ecaf..e995a4a7f21 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala @@ -2,11 +2,8 @@ package is.hail.expr.ir.functions import is.hail.asm4s._ import is.hail.expr.ir.IEmitCode -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.SIndexablePointer import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives._ -import is.hail.types.physical.{PBoolean, PCanonicalArray, PFloat64, PInt32, PType} +import is.hail.types.physical.{PBoolean, PCanonicalArray, PCode, PFloat64, PInt32, PType} import is.hail.types.virtual._ import net.sourceforge.jdistlib.rng.MersenneTwister import net.sourceforge.jdistlib.{Beta, Gamma, Poisson} @@ -59,28 +56,28 @@ object RandomSeededFunctions extends RegistryFunctions { def registerAll() { registerSeeded2("rand_unif", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: SType, _: SType) => SFloat64 + case (_: Type, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, seed, min, max) => - primitive(cb.emb.newRNG(seed).invoke[Double, Double, Double]("runif", min.asDouble.doubleCode(cb), max.asDouble.doubleCode(cb))) + PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("runif", min.asDouble.doubleCode(cb), max.asDouble.doubleCode(cb))) } registerSeeded2("rand_norm", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: SType, _: SType) => SFloat64 + case (_: Type, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, seed, mean, sd) => - primitive(cb.emb.newRNG(seed).invoke[Double, Double, Double]("rnorm", mean.asDouble.doubleCode(cb), sd.asDouble.doubleCode(cb))) + PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("rnorm", mean.asDouble.doubleCode(cb), sd.asDouble.doubleCode(cb))) } - registerSeeded1("rand_bool", TFloat64, TBoolean, (_: Type, _: SType) => SBoolean) { case (cb, r, rt, seed, p) => - primitive(cb.emb.newRNG(seed).invoke[Double, Boolean]("rcoin", p.asDouble.doubleCode(cb))) + registerSeeded1("rand_bool", TFloat64, TBoolean, (_: Type, _: PType) => PBoolean()) { case (cb, r, rt, seed, p) => + PCode(rt, cb.emb.newRNG(seed).invoke[Double, Boolean]("rcoin", p.asDouble.doubleCode(cb))) } - registerSeeded1("rand_pois", TFloat64, TFloat64, (_: Type, _: SType) => SFloat64) { case (cb, r, rt, seed, lambda) => - primitive(cb.emb.newRNG(seed).invoke[Double, Double]("rpois", lambda.asDouble.doubleCode(cb))) + registerSeeded1("rand_pois", TFloat64, TFloat64, (_: Type, _: PType) => PFloat64()) { case (cb, r, rt, seed, lambda) => + PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double]("rpois", lambda.asDouble.doubleCode(cb))) } registerSeeded2("rand_pois", TInt32, TFloat64, TArray(TFloat64), { - case (_: Type, _: SType, _: SType) => PCanonicalArray(PFloat64(true)).sType - }) { case (cb, r, SIndexablePointer(rt: PCanonicalArray), seed, n, lambdaCode) => + case (_: Type, _: PType, _: PType) => PCanonicalArray(PFloat64(true)) + }) { case (cb, r, rt: PCanonicalArray, seed, n, lambdaCode) => val len = cb.newLocal[Int]("rand_pos_len", n.asInt.intCode(cb)) val lambda = cb.newLocal[Double]("rand_pois_lambda", lambdaCode.asDouble.doubleCode(cb)) @@ -90,15 +87,15 @@ object RandomSeededFunctions extends RegistryFunctions { } registerSeeded2("rand_beta", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: SType, _: SType) => SFloat64 + case (_: Type, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, seed, a, b) => - primitive( + PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("rbeta", a.asDouble.doubleCode(cb), b.asDouble.doubleCode(cb))) } registerSeeded4("rand_beta", TFloat64, TFloat64, TFloat64, TFloat64, TFloat64, { - case (_: Type, _: SType, _: SType, _: SType, _: SType) => SFloat64 + case (_: Type, _: PType, _: PType, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, seed, a, b, min, max) => val rng = cb.emb.newRNG(seed) @@ -110,18 +107,19 @@ object RandomSeededFunctions extends RegistryFunctions { cb.whileLoop(value < lmin || value > lmax, { cb.assign(value, rng.invoke[Double, Double, Double]("rbeta", la, lb)) }) - primitive(value) + PCode(rt, value) } registerSeeded2("rand_gamma", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: SType, _: SType) => SFloat64 + case (_: Type, _: PType, _: PType) => PFloat64() }) { case (cb, r, rt, seed, a, scale) => - primitive( + PCode( + rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("rgamma", a.asDouble.doubleCode(cb), scale.asDouble.doubleCode(cb)) ) } - registerSeeded1("rand_cat", TArray(TFloat64), TInt32, (_: Type, _: SType) => SInt32) { case (cb, r, rt, seed, aCode) => + registerSeeded1("rand_cat", TArray(TFloat64), TInt32, (_: Type, _: PType) => PInt32()) { case (cb, r, rt, seed, aCode) => val weights = aCode.asIndexable.memoize(cb, "rand_cat_weights") val len = weights.loadLength() @@ -135,7 +133,7 @@ object RandomSeededFunctions extends RegistryFunctions { ) cb.assign(i, i + 1) }) - primitive(cb.emb.newRNG(seed).invoke[Array[Double], Int]("rcat", a)) + PCode(rt, cb.emb.newRNG(seed).invoke[Array[Double], Int]("rcat", a)) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala index d563e778803..64fe56424ae 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala @@ -3,10 +3,6 @@ package is.hail.expr.ir.functions import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir._ -import is.hail.types.physical.stypes.SType -import is.hail.types.physical.stypes.concrete.SStringPointer -import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32} -import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.{PBoolean, PCanonicalString, PInt32, PLocus, PString, PType} import is.hail.types.virtual._ import is.hail.variant.ReferenceGenome @@ -15,25 +11,25 @@ object ReferenceGenomeFunctions extends RegistryFunctions { def rgCode(mb: EmitMethodBuilder[_], rg: ReferenceGenome): Code[ReferenceGenome] = mb.getReferenceGenome(rg) def registerAll() { - registerSCode1t("isValidContig", Array(LocusFunctions.tlocus("R")), TString, TBoolean, (_: Type, _: SType) => SBoolean) { - case (r, cb, Seq(tlocus: TLocus), _, contig) => - val scontig = contig.asString.loadString() - primitive(rgCode(r.mb, tlocus.asInstanceOf[TLocus].rg).invoke[String, Boolean]("isValidContig", scontig)) + registerCode1t("isValidContig", LocusFunctions.tlocus("R"), TString, TBoolean, (_: Type, _: PType) => PBoolean()) { + case (r, rt, tlocus, (contigT, contig: Code[Long])) => + val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) + rgCode(r.mb, tlocus.asInstanceOf[TLocus].rg).invoke[String, Boolean]("isValidContig", scontig) } - registerSCode2t("isValidLocus", Array(LocusFunctions.tlocus("R")), TString, TInt32, TBoolean, (_: Type, _: SType, _: SType) => SBoolean) { - case (r, cb, Seq(tlocus: TLocus), _, contig, pos) => - val scontig = contig.asString.loadString() - primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int, Boolean]("isValidLocus", scontig, pos.asInt.intCode(cb))) + registerCode2t("isValidLocus", LocusFunctions.tlocus("R"), TString, TInt32, TBoolean, (_: Type, _: PType, _: PType) => PBoolean()) { + case (r, rt, typeArg: TLocus, (contigT, contig: Code[Long]), (posT, pos: Code[Int])) => + val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) + rgCode(r.mb, typeArg.rg).invoke[String, Int, Boolean]("isValidLocus", scontig, pos) } - registerSCode4t("getReferenceSequenceFromValidLocus", + registerPCode4t("getReferenceSequenceFromValidLocus", Array(LocusFunctions.tlocus("R")), TString, TInt32, TInt32, TInt32, TString, - (_: Type, _: SType, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString())) { - case (r, cb, Seq(typeParam: TLocus), st, contig, pos, before, after) => + (_: Type, _: PType, _: PType, _: PType, _: PType) => PCanonicalString()) { + case (r, cb, Seq(typeParam: TLocus), rt: PString, contig, pos, before, after) => val scontig = contig.asString.loadString() - unwrapReturn(cb, r.region, st, + unwrapReturn(cb, r.region, rt, rgCode(cb.emb, typeParam.rg).invoke[String, Int, Int, Int, String]("getSequence", scontig, pos.asInt.intCode(cb), @@ -41,10 +37,10 @@ object ReferenceGenomeFunctions extends RegistryFunctions { after.asInt.intCode(cb))) } - registerSCode1t("contigLength", Array(LocusFunctions.tlocus("R")), TString, TInt32, (_: Type, _: SType) => SInt32) { - case (r, cb, Seq(tlocus: TLocus), _, contig) => - val scontig = contig.asString.loadString() - primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int]("contigLength", scontig)) + registerCode1t("contigLength", LocusFunctions.tlocus("R"), TString, TInt32, (_: Type, _: PType) => PInt32()) { + case (r, rt, typeArg: TLocus, (contigT, contig: Code[Long])) => + val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) + rgCode(r.mb, typeArg.rg).invoke[String, Int]("contigLength", scontig) } registerIR("getReferenceSequence", Array(TString, TInt32, TInt32, TInt32), TString, typeParameters = Array(LocusFunctions.tlocus("R"))) { diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index 1f2f018be60..e79b3e6752c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -3,15 +3,13 @@ package is.hail.expr.ir.functions import java.time.temporal.ChronoField import java.time.{Instant, ZoneId} import java.util.Locale + import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir._ import is.hail.types.physical._ -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SStringPointer} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32, SInt64} +import is.hail.types.physical.stypes.concrete.SStringPointer import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.Row @@ -100,16 +98,17 @@ object StringFunctions extends RegistryFunctions { def registerAll(): Unit = { val thisClass = getClass - registerSCode1("length", TString, TInt32, (_: Type, _: SType) => SInt32) { case (r: EmitRegion, cb, _, s: SStringCode) => - primitive(s.loadString().invoke[Int]("length")) + registerPCode1("length", TString, TInt32, (_: Type, _: PType) => PInt32()) { case (r: EmitRegion, cb, rt, s: PStringCode) => + PCode(rt, s.loadString().invoke[Int]("length")) } - registerSCode3("substring", TString, TInt32, TInt32, TString, { - (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) + registerPCode3("substring", TString, TInt32, TInt32, TString, { + (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() }) { - case (r: EmitRegion, cb, st: SString, s, start, end) => + case (r: EmitRegion, cb, rt: PString, s, start, end) => val str = s.asString.loadString().invoke[Int, Int, String]("substring", start.asInt.intCode(cb), end.asInt.intCode(cb)) + val st = SStringPointer(rt) st.constructFromString(cb, r.region, str) } @@ -141,112 +140,116 @@ object StringFunctions extends RegistryFunctions { registerIR2("sliceRight", TString, TInt32, TString) { (_, s, start) => invoke("slice", TString, s, start, invoke("length", TInt32, s)) } registerIR2("sliceLeft", TString, TInt32, TString) { (_, s, end) => invoke("slice", TString, s, I32(0), end) } - registerSCode1("str", tv("T"), TString, (_: Type, _: SType) => SStringPointer(PCanonicalString())) { case (r, cb, st: SString, a) => + registerPCode1("str", tv("T"), TString, (_: Type, _: PType) => PCanonicalString()) { case (r, cb, rt: PString, a) => val annotation = scodeToJavaValue(cb, r.region, a) - val str = cb.emb.getType(a.st.virtualType).invoke[Any, String]("str", annotation) + val str = cb.emb.getType(a.pt.virtualType).invoke[Any, String]("str", annotation) + val st = SStringPointer(rt) st.constructFromString(cb, r.region, str) } registerIEmitCode1("showStr", tv("T"), TString, { - (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true) - }) { case (cb, r, st: SString, a) => - val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) + (_: Type, _: PType) => PCanonicalString(true) + }) { case (cb, r, rt: PCanonicalString, a) => + val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.pt.virtualType)) a.toI(cb).consume(cb, - cb.assignAny(jObj, Code._null(boxedTypeInfo(a.st.virtualType))), + cb.assignAny(jObj, Code._null(boxedTypeInfo(a.pt.virtualType))), sc => cb.assignAny(jObj, scodeToJavaValue(cb, r, sc))) - val str = cb.emb.getType(a.st.virtualType).invoke[Any, String]("showStr", jObj) + val str = cb.emb.getType(a.pt.virtualType).invoke[Any, String]("showStr", jObj) + val st = SStringPointer(rt) IEmitCode.present(cb, st.constructFromString(cb, r, str)) } registerIEmitCode2("showStr", tv("T"), TInt32, TString, { - (_: Type, _: EmitType, truncType: EmitType) => EmitType(SStringPointer(PCanonicalString()), truncType.required) - }) { case (cb, r, st: SString, a, trunc) => - val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) + (_: Type, _: PType, truncType: PType) => PCanonicalString(truncType.required) + }) { case (cb, r, rt: PCanonicalString, a, trunc) => + val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.pt.virtualType)) trunc.toI(cb).map(cb) { trunc => a.toI(cb).consume(cb, - cb.assignAny(jObj, Code._null(boxedTypeInfo(a.st.virtualType))), + cb.assignAny(jObj, Code._null(boxedTypeInfo(a.pt.virtualType))), sc => cb.assignAny(jObj, scodeToJavaValue(cb, r, sc))) - val str = cb.emb.getType(a.st.virtualType).invoke[Any, Int, String]("showStr", jObj, trunc.asInt.intCode(cb)) + val str = cb.emb.getType(a.pt.virtualType).invoke[Any, Int, String]("showStr", jObj, trunc.asInt.intCode(cb)) + val st = SStringPointer(rt) + st.constructFromString(cb, r, str) } } - registerIEmitCode1("json", tv("T"), TString, (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true)) { - case (cb, r, st: SString, a) => - val ti = boxedTypeInfo(a.st.virtualType) - val inputJavaValue = cb.newLocal("json_func_input_jv")(ti) - a.toI(cb).consume(cb, - cb.assignAny(inputJavaValue, Code._null(ti)), - { sc => - val jv = scodeToJavaValue(cb, r, sc) - cb.assignAny(inputJavaValue, jv) - }) - val json = cb.emb.getType(a.st.virtualType).invoke[Any, JValue]("toJSON", inputJavaValue) - val str = Code.invokeScalaObject1[JValue, String](JsonMethods.getClass, "compact", json) - IEmitCode.present(cb, st.constructFromString(cb, r, str)) + registerIEmitCode1("json", tv("T"), TString, (_: Type, _: PType) => PCanonicalString(true)) { case (cb, r, rt: PString, a) => + val ti = boxedTypeInfo(a.pt.sType.virtualType) + val inputJavaValue = cb.newLocal("json_func_input_jv")(ti) + a.toI(cb).consume(cb, + cb.assignAny(inputJavaValue, Code._null(ti)), + { sc => + val jv = scodeToJavaValue(cb, r, sc) + cb.assignAny(inputJavaValue, jv) + }) + val json = cb.emb.getType(a.pt.sType.virtualType).invoke[Any, JValue]("toJSON", inputJavaValue) + val str = Code.invokeScalaObject1[JValue, String](JsonMethods.getClass, "compact", json) + val st = SStringPointer(rt) + IEmitCode.present(cb, st.constructFromString(cb, r, str)) } - registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "reverse") - registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "upper") - registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "lower") - registerWrappedScalaFunction1("strip", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "strip") + registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "reverse") + registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "upper") + registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "lower") + registerWrappedScalaFunction1("strip", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "strip") registerWrappedScalaFunction2("contains", TString, TString, TBoolean, { - case (_: Type, _: SType, _: SType) => SBoolean + case (_: Type, _: PType, _: PType) => PBoolean() })(thisClass, "contains") registerWrappedScalaFunction2("translate", TString, TDict(TString, TString), TString, { - case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType) => PCanonicalString() })(thisClass, "translate") registerWrappedScalaFunction2("startswith", TString, TString, TBoolean, { - case (_: Type, _: SType, _: SType) => SBoolean + case (_: Type, _: PType, _: PType) => PBoolean() })(thisClass, "startswith") registerWrappedScalaFunction2("endswith", TString, TString, TBoolean, { - case (_: Type, _: SType, _: SType) => SBoolean + case (_: Type, _: PType, _: PType) => PBoolean() })(thisClass, "endswith") registerWrappedScalaFunction2("regexMatch", TString, TString, TBoolean, { - case (_: Type, _: SType, _: SType) => SBoolean + case (_: Type, _: PType, _: PType) => PBoolean() })(thisClass, "regexMatch") registerWrappedScalaFunction2("concat", TString, TString, TString, { - case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType) => PCanonicalString() })(thisClass, "concat") registerWrappedScalaFunction2("split", TString, TString, TArray(TString), { - case (_: Type, _: SType, _: SType) => - PCanonicalArray(PCanonicalString(true)).sType + case (_: Type, _: PType, _: PType) => + PCanonicalArray(PCanonicalString(true)) })(thisClass, "split") registerWrappedScalaFunction3("split", TString, TString, TInt32, TArray(TString), { - case (_: Type, _: SType, _: SType, _: SType) => - PCanonicalArray(PCanonicalString(true)).sType + case (_: Type, _: PType, _: PType, _: PType) => + PCanonicalArray(PCanonicalString(true)) })(thisClass, "splitLimited") registerWrappedScalaFunction3("replace", TString, TString, TString, TString, { - case (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() })(thisClass, "replace") registerWrappedScalaFunction2("mkString", TSet(TString), TString, TString, { - case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType) => PCanonicalString() })(thisClass, "setMkString") registerWrappedScalaFunction2("mkString", TArray(TString), TString, TString, { - case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType) => PCanonicalString() })(thisClass, "arrayMkString") registerIEmitCode2("firstMatchIn", TString, TString, TArray(TString), { - case (_: Type, _: EmitType, _: EmitType) => EmitType(PCanonicalArray(PCanonicalString(true)).sType, false) - }) { case (cb: EmitCodeBuilder, region: Value[Region], SIndexablePointer(rt: PCanonicalArray), + case (_: Type, _: PType, _: PType) => PCanonicalArray(PCanonicalString(true)) + }) { case (cb: EmitCodeBuilder, region: Value[Region], rt: PCanonicalArray, s: EmitCode, r: EmitCode) => - s.toI(cb).flatMap(cb) { case sc: SStringCode => - r.toI(cb).flatMap(cb) { case rc: SStringCode => + s.toI(cb).flatMap(cb) { case sc: PStringCode => + r.toI(cb).flatMap(cb) { case rc: PStringCode => val out = cb.newLocal[IndexedSeq[String]]("out", Code.invokeScalaObject2[String, String, IndexedSeq[String]]( thisClass, "firstMatchIn", sc.loadString(), rc.loadString())) IEmitCode(cb, out.isNull, { val len = cb.newLocal[Int]("len", out.invoke[Int]("size")) - val eltType = rt.elementType.setRequired(false).asInstanceOf[PCanonicalString] + val eltType = rt.elementType.asInstanceOf[PCanonicalString] val sstring = SStringPointer(eltType) rt.constructFromElements(cb, region, len, deepCopy = false) { (cb, idx) => val elt = cb.newLocal[String]("first_match_elt", out.invoke[Int, String]("apply", idx)) @@ -258,11 +261,11 @@ object StringFunctions extends RegistryFunctions { } registerEmitCode2("hamming", TString, TString, TInt32, { - case (_: Type, _: EmitType, _: EmitType) => EmitType(SInt32, false) + case (_: Type, _: PType, _: PType) => PInt32() }) { case (r: EmitRegion, rt, e1: EmitCode, e2: EmitCode) => EmitCode.fromI(r.mb) { cb => - e1.toI(cb).flatMap(cb) { case (sc1: SStringCode) => - e2.toI(cb).flatMap(cb) { case (sc2: SStringCode) => + e1.toI(cb).flatMap(cb) { case (sc1: PStringCode) => + e2.toI(cb).flatMap(cb) { case (sc2: PStringCode) => val n = cb.newLocal("hamming_n", 0) val i = cb.newLocal("hamming_i", 0) @@ -277,24 +280,24 @@ object StringFunctions extends RegistryFunctions { cb.assign(n, n + 1)) cb.assign(i, i + 1) }) - primitive(n) + PCode(rt, n) }) } } } } - registerWrappedScalaFunction1("escapeString", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "escapeString") + registerWrappedScalaFunction1("escapeString", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "escapeString") registerWrappedScalaFunction3("strftime", TString, TInt64, TString, TString, { - case (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) + case (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() })(thisClass, "strftime") registerWrappedScalaFunction3("strptime", TString, TString, TString, TInt64, { - case (_: Type, _: SType, _: SType, _: SType) => SInt64 + case (_: Type, _: PType, _: PType, _: PType) => PInt64() })(thisClass, "strptime") - registerSCode("parse_json", Array(TString), TTuple(tv("T")), - (rType: Type, _: Seq[SType]) => SType.canonical(rType), typeParameters = Array(tv("T")) - ) { case (er, cb, _, resultType, Array(s: SStringCode)) => + registerPCode("parse_json", Array(TString), TTuple(tv("T")), + (rType: Type, _: Seq[PType]) => PType.canonical(rType, true), typeParameters = Array(tv("T")) + ) { case (er, cb, _, resultType, Array(s: PStringCode)) => val warnCtx = cb.emb.genFieldThisRef[mutable.HashSet[String]]("parse_json_context") cb.ifx(warnCtx.load().isNull, cb.assign(warnCtx, Code.newInstance[mutable.HashSet[String]]())) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala index 7197afbe67c..1d558fcc68b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala @@ -4,12 +4,11 @@ import is.hail.asm4s import is.hail.asm4s.{coerce => _, _} import is.hail.expr.ir._ import is.hail.types.physical._ -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.primitives._ import is.hail.types.physical.stypes.concrete.SStringPointer import is.hail.utils._ import is.hail.types.virtual._ import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.SPrimitive import org.apache.spark.sql.Row import scala.reflect.ClassTag @@ -22,91 +21,53 @@ object UtilFunctions extends RegistryFunctions { def parseInt64(s: String): Long = s.toLong - def parseSpecialNum32(s: String): Float = { - s.length match { - case 3 => - if (s.equalsCaseInsensitive("nan")) return Float.NaN - if (s.equalsCaseInsensitive("inf")) return Float.PositiveInfinity - case 4 => - if (s.equalsCaseInsensitive("+nan") || s.equalsCaseInsensitive("-nan")) return Float.NaN - if (s.equalsCaseInsensitive("+inf")) return Float.PositiveInfinity - if (s.equalsCaseInsensitive("-inf")) return Float.NegativeInfinity - case 8 => - if (s.equalsCaseInsensitive("infinity")) return Float.PositiveInfinity - case 9 => - if (s.equalsCaseInsensitive("+infinity")) return Float.PositiveInfinity - if (s.equalsCaseInsensitive("-infinity")) return Float.NegativeInfinity - case _ => - } - throw new NumberFormatException(s"cannot parse float32 from $s") + private val NAN = 1 + private val POS_INF = 2 + private val NEG_INF = 3 + + def parseSpecialNum(s: String): Int = s.length match { + case 3 if s equalsCI "nan" => NAN + case 4 if (s equalsCI "+nan") || (s equalsCI "-nan") => NAN + case 3 if s equalsCI "inf" => POS_INF + case 4 if s equalsCI "+inf" => POS_INF + case 4 if s equalsCI "-inf" => NEG_INF + case 8 if s equalsCI "infinity" => POS_INF + case 9 if s equalsCI "+infinity" => POS_INF + case 9 if s equalsCI "-infinity" => NEG_INF + case _ => 0 } - def parseSpecialNum64(s: String): Double = { - s.length match { - case 3 => - if (s.equalsCaseInsensitive("nan")) return Double.NaN - if (s.equalsCaseInsensitive("inf")) return Double.PositiveInfinity - case 4 => - if (s.equalsCaseInsensitive("+nan") || s.equalsCaseInsensitive("-nan")) return Double.NaN - if (s.equalsCaseInsensitive("+inf")) return Double.PositiveInfinity - if (s.equalsCaseInsensitive("-inf")) return Double.NegativeInfinity - case 8 => - if (s.equalsCaseInsensitive("infinity")) return Double.PositiveInfinity - case 9 => - if (s.equalsCaseInsensitive("+infinity")) return Double.PositiveInfinity - if (s.equalsCaseInsensitive("-infinity")) return Double.NegativeInfinity - case _ => - } - throw new NumberFormatException(s"cannot parse float64 from $s") + def parseFloat32(s: String): Float = parseSpecialNum(s) match { + case NAN => Float.NaN + case POS_INF => Float.PositiveInfinity + case NEG_INF => Float.NegativeInfinity + case _ => s.toFloat } - def parseFloat32(s: String): Float = { - try { - s.toFloat - } catch { - case _: NumberFormatException => - parseSpecialNum32(s) - } - } - - def parseFloat64(s: String): Double = { - try { - s.toDouble - } catch { - case _: NumberFormatException => - parseSpecialNum64(s) - } + def parseFloat64(s: String): Double = parseSpecialNum(s) match { + case NAN => Double.NaN + case POS_INF => Double.PositiveInfinity + case NEG_INF => Double.NegativeInfinity + case _ => s.toDouble } def isValidBoolean(s: String): Boolean = - (s.equalsCaseInsensitive("true") || s.equalsCaseInsensitive("false")) + (s equalsCI "true") || (s equalsCI "false") def isValidInt32(s: String): Boolean = - try { - s.toInt; true - } catch { - case _: NumberFormatException => false - } + try { s.toInt; true } catch { case _: NumberFormatException => false } def isValidInt64(s: String): Boolean = - try { - s.toLong; true - } catch { - case _: NumberFormatException => false - } + try { s.toLong; true } catch { case _: NumberFormatException => false } - def isValidFloat32(s: String): Boolean = try { - parseFloat32(s) - true - } catch { - case _: NumberFormatException => false + def isValidFloat32(s: String): Boolean = parseSpecialNum(s) match { + case 0 => try { s.toFloat; true } catch { case _: NumberFormatException => false } + case _ => true } - def isValidFloat64(s: String): Boolean = try { - parseFloat64(s) - true - } catch { - case _: NumberFormatException => false + def isValidFloat64(s: String): Boolean = parseSpecialNum(s) match { + case 0 => try { s.toDouble; true } catch { case _: NumberFormatException => false } + case _ => true } def min_ignore_missing(l: Int, lMissing: Boolean, r: Int, rMissing: Boolean): Int = @@ -167,46 +128,47 @@ object UtilFunctions extends RegistryFunctions { def registerAll() { val thisClass = getClass - registerSCode4("valuesSimilar", tv("T"), tv("U"), TFloat64, TBoolean, TBoolean, { - case (_: Type, _: SType, _: SType, _: SType, _: SType) => SBoolean + registerPCode4("valuesSimilar", tv("T"), tv("U"), TFloat64, TBoolean, TBoolean, { + case (_: Type, _: PType, _: PType, _: PType, _: PType) => PBoolean() }) { case (er, cb, rt, l, r, tol, abs) => - assert(l.st.virtualType == r.st.virtualType, s"\n lt=${ l.st.virtualType }\n rt=${ r.st.virtualType }") + assert(l.pt.virtualType == r.pt.virtualType, s"\n lt=${ l.pt.virtualType }\n rt=${ r.pt.virtualType }") val lb = scodeToJavaValue(cb, er.region, l) val rb = scodeToJavaValue(cb, er.region, r) primitive(er.mb.getType(l.st.virtualType).invoke[Any, Any, Double, Boolean, Boolean]("valuesSimilar", lb, rb, tol.asDouble.doubleCode(cb), abs.asBoolean.boolCode(cb))) } - registerCode1("triangle", TInt32, TInt32, (_: Type, _: SType) => SInt32) { case (cb, _, rt, nn) => - val n = cb.newLocal[Int]("triangle_n", nn.asInt.intCode(cb)) - (n * (n + 1)) / 2 + registerCode1[Int]("triangle", TInt32, TInt32, (_: Type, n: PType) => n) { case (_, rt, (nT, n: Code[Int])) => + Code.memoize(n, "triangle_n") { n => + (n * (n + 1)) / 2 + } } - registerSCode1("toInt32", TBoolean, TInt32, (_: Type, _: SType) => SInt32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI) } - registerSCode1("toInt64", TBoolean, TInt64, (_: Type, _: SType) => SInt64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toL) } - registerSCode1("toFloat32", TBoolean, TFloat32, (_: Type, _: SType) => SFloat32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toF) } - registerSCode1("toFloat64", TBoolean, TFloat64, (_: Type, _: SType) => SFloat64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toD) } - - for ((name, t, rpt, ct) <- Seq[(String, Type, SType, ClassTag[_])]( - ("Boolean", TBoolean, SBoolean, implicitly[ClassTag[Boolean]]), - ("Int32", TInt32, SInt32, implicitly[ClassTag[Int]]), - ("Int64", TInt64, SInt64, implicitly[ClassTag[Long]]), - ("Float64", TFloat64, SFloat64, implicitly[ClassTag[Double]]), - ("Float32", TFloat32, SFloat32, implicitly[ClassTag[Float]]) + registerCode1[Boolean]("toInt32", TBoolean, TInt32, (_: Type, _: PType) => PInt32()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI } + registerCode1[Boolean]("toInt64", TBoolean, TInt64, (_: Type, _: PType) => PInt64()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toL } + registerCode1[Boolean]("toFloat32", TBoolean, TFloat32, (_: Type, _: PType) => PFloat32()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toF } + registerCode1[Boolean]("toFloat64", TBoolean, TFloat64, (_: Type, _: PType) => PFloat64()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toD } + + for ((name, t, rpt, ct) <- Seq[(String, Type, PType, ClassTag[_])]( + ("Boolean", TBoolean, PBoolean(), implicitly[ClassTag[Boolean]]), + ("Int32", TInt32, PInt32(), implicitly[ClassTag[Int]]), + ("Int64", TInt64, PInt64(), implicitly[ClassTag[Long]]), + ("Float64", TFloat64, PFloat64(), implicitly[ClassTag[Double]]), + ("Float32", TFloat32, PFloat32(), implicitly[ClassTag[Float]]) )) { - val ctString: ClassTag[String] = implicitly[ClassTag[String]] - registerSCode1(s"to$name", TString, t, (_: Type, _: SType) => rpt) { - case (r, cb, rt, x: SStringCode) => + val ctString: ClassTag[String] = implicitly + registerPCode1(s"to$name", TString, t, (_: Type, _: PType) => rpt) { + case (r, cb, rt, x: PStringCode) => val s = x.loadString() - primitive(rt.virtualType, Code.invokeScalaObject1(thisClass, s"parse$name", s)(ctString, ct)) + PCode(rt, Code.invokeScalaObject1(thisClass, s"parse$name", s)(ctString, ct)) } - registerIEmitCode1(s"to${name}OrMissing", TString, t, (_: Type, xPT: EmitType) => EmitType(rpt, xPT.required)) { + registerIEmitCode1(s"to${name}OrMissing", TString, t, (_: Type, xPT: PType) => rpt.setRequired(xPT.required)) { case (cb, r, rt, x) => - x.toI(cb).flatMap(cb) { case (sc: SStringCode) => + x.toI(cb).flatMap(cb) { case (sc: PStringCode) => val sv = cb.newLocal[String]("s", sc.loadString()) IEmitCode(cb, !Code.invokeScalaObject1[String, Boolean](thisClass, s"isValid$name", sv), - primitive(rt.virtualType, Code.invokeScalaObject1(thisClass, s"parse$name", sv)(ctString, ct))) + PCode(rt, Code.invokeScalaObject1(thisClass, s"parse$name", sv)(ctString, ct))) } } } @@ -217,31 +179,31 @@ object UtilFunctions extends RegistryFunctions { } Array("min", "max").foreach { name => - registerCode2(name, TFloat32, TFloat32, TFloat32, (_: Type, _: SType, _: SType) => SFloat32) { - case (cb, r, rt, v1, v2) => - Code.invokeStatic2[Math, Float, Float, Float](name, v1.asFloat.floatCode(cb), v2.asFloat.floatCode(cb)) + registerCode2(name, TFloat32, TFloat32, TFloat32, (_: Type, _: PType, _: PType) => PFloat32()) { + case (r, rt, (t1, v1: Code[Float]), (t2, v2: Code[Float])) => + Code.invokeStatic2[Math, Float, Float, Float](name, v1, v2) } - registerCode2(name, TFloat64, TFloat64, TFloat64, (_: Type, _: SType, _: SType) => SFloat64) { - case (cb, r, rt, v1, v2) => - Code.invokeStatic2[Math, Double, Double, Double](name, v1.asDouble.doubleCode(cb), v2.asDouble.doubleCode(cb)) + registerCode2(name, TFloat64, TFloat64, TFloat64, (_: Type, _: PType, _: PType) => PFloat64()) { + case (r, rt, (t1, v1: Code[Double]), (t2, v2: Code[Double])) => + Code.invokeStatic2[Math, Double, Double, Double](name, v1, v2) } val ignoreMissingName = name + "_ignore_missing" val ignoreNanName = "nan" + name val ignoreBothName = ignoreNanName + "_ignore_missing" - registerCode2(ignoreNanName, TFloat32, TFloat32, TFloat32, (_: Type, _: SType, _: SType) => SFloat32) { - case (cb, r, rt, v1, v2) => - Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, v1.asFloat.floatCode(cb), v2.asFloat.floatCode(cb)) + registerCode2(ignoreNanName, TFloat32, TFloat32, TFloat32, (_: Type, _: PType, _: PType) => PFloat32()) { + case (r, rt, (t1, v1: Code[Float]), (t2, v2: Code[Float])) => + Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, v1, v2) } - registerCode2(ignoreNanName, TFloat64, TFloat64, TFloat64, (_: Type, _: SType, _: SType) => SFloat64) { - case (cb, r, rt, v1, v2) => - Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, v1.asDouble.doubleCode(cb), v2.asDouble.doubleCode(cb)) + registerCode2(ignoreNanName, TFloat64, TFloat64, TFloat64, (_: Type, _: PType, _: PType) => PFloat64()) { + case (r, rt, (t1, v1: Code[Double]), (t2, v2: Code[Double])) => + Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, v1, v2) } - def ignoreMissingTriplet[T](cb: EmitCodeBuilder, rt: SType, v1: EmitCode, v2: EmitCode, name: String, f: (Code[T], Code[T]) => Code[T])(implicit ct: ClassTag[T], ti: TypeInfo[T]): IEmitCode = { + def ignoreMissingTriplet[T](cb: EmitCodeBuilder, rt: PType, v1: EmitCode, v2: EmitCode, name: String, f: (Code[T], Code[T]) => Code[T])(implicit ct: ClassTag[T], ti: TypeInfo[T]): IEmitCode = { val value = cb.newLocal[T](s"ignore_missing_${ name }_value") val v1Value = v1.toI(cb).memoize(cb, "ignore_missing_v1") val v2Value = v2.toI(cb).memoize(cb, "ignore_missing_v2") @@ -265,43 +227,43 @@ object UtilFunctions extends RegistryFunctions { }) cb.goto(Ldefined) - IEmitCode(Lmissing, Ldefined, primitive(rt.virtualType, value.load()), v1.required || v2.required) + IEmitCode(Lmissing, Ldefined, PCode(rt, value.load()), v1.required || v2.required) } - registerIEmitCode2(ignoreMissingName, TInt32, TInt32, TInt32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt32, t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TInt32, TInt32, TInt32, (_: Type, t1: PType, t2: PType) => PInt32(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Int](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Int, Int, Int](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TInt64, TInt64, TInt64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt64, t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TInt64, TInt64, TInt64, (_: Type, t1: PType, t2: PType) => PInt64(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Long](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Long, Long, Long](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TFloat32, TFloat32, TFloat32, (_: Type, t1: PType, t2: PType) => PFloat32(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Float, Float, Float](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TFloat64, TFloat64, TFloat64, (_: Type, t1: PType, t2: PType) => PFloat64(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Double, Double, Double](name, _, _)) } - registerIEmitCode2(ignoreBothName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { + registerIEmitCode2(ignoreBothName, TFloat32, TFloat32, TFloat32, (_: Type, t1: PType, t2: PType) => PFloat32(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, _, _)) } - registerIEmitCode2(ignoreBothName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { + registerIEmitCode2(ignoreBothName, TFloat64, TFloat64, TFloat64, (_: Type, t1: PType, t2: PType) => PFloat64(t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, _, _)) } } - registerSCode2("format", TString, tv("T", "tuple"), TString, (_: Type, _: SType, _: SType) => PCanonicalString().sType) { - case (r, cb, SStringPointer(rt: PCanonicalString), format, args) => + registerPCode2("format", TString, tv("T", "tuple"), TString, (_: Type, _: PType, _: PType) => PCanonicalString()) { + case (r, cb, rt: PCanonicalString, format, args) => val javaObjArgs = Code.checkcast[Row](scodeToJavaValue(cb, r.region, args)) val formatted = Code.invokeScalaObject2[String, Row, String](thisClass, "format", format.asString.loadString(), javaObjArgs) val st = SStringPointer(rt) st.constructFromString(cb, r.region, formatted) } - registerIEmitCode2("land", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { + registerIEmitCode2("land", TBoolean, TBoolean, TBoolean, (_: Type, tl: PType, tr: PType) => PBoolean(tl.required && tr.required)) { case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm @@ -329,10 +291,10 @@ object UtilFunctions extends RegistryFunctions { val Lpresent = CodeLabel() val Lmissing = CodeLabel() cb.ifx(((M >> w) & 1).cne(0), cb.goto(Lmissing), cb.goto(Lpresent)) - IEmitCode(Lmissing, Lpresent, primitive(w.ceq(10)), l.required && r.required) + IEmitCode(Lmissing, Lpresent, PCode(rt, w.ceq(10)), l.required && r.required) } - registerIEmitCode2("lor", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { + registerIEmitCode2("lor", TBoolean, TBoolean, TBoolean, (_: Type, tl: PType, tr: PType) => PBoolean(tl.required && tr.required)) { case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm val w = cb.newLocal[Int]("lor_w") @@ -359,7 +321,7 @@ object UtilFunctions extends RegistryFunctions { val Lpresent = CodeLabel() val Lmissing = CodeLabel() cb.ifx(((M >> w) & 1).cne(0), cb.goto(Lmissing), cb.goto(Lpresent)) - IEmitCode(Lmissing, Lpresent, primitive(w.cne(0)), l.required && r.required) + IEmitCode(Lmissing, Lpresent, PCode(rt, w.cne(0)), l.required && r.required) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index 97a911a4c18..8de84949ca7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -3,7 +3,6 @@ package is.hail.expr.ir.lowering import is.hail.expr.Nat import is.hail.expr.ir._ import is.hail.expr.ir.functions.GetElement -import is.hail.rvd.RVDPartitioner import is.hail.types.{BlockMatrixSparsity, BlockMatrixType, TypeWithRequiredness} import is.hail.types.virtual._ import is.hail.utils._ @@ -89,14 +88,11 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType }, coerce[TArray](cda.typ)), 1) }, coerce[TArray](cda.typ)) } else { - ToArray(mapIR(rangeIR(I32(typ.nRowBlocks))){ rowIdxRef => - val blocksInOneRow = ToArray(mapIR(rangeIR(I32(typ.nColBlocks))) { colIdxRef => - ArrayRef(blockResults, rowIdxRef * typ.nColBlocks + colIdxRef) - }) - NDArrayConcat(blocksInOneRow, 1) - }) + val i = Ref(genUID(), TInt32) + val j = Ref(genUID(), TInt32) + val cols = ToArray(StreamMap(StreamRange(0, typ.nColBlocks, 1), j.name, ArrayRef(blockResults, i * typ.nColBlocks + j))) + ToArray(StreamMap(StreamRange(0, typ.nRowBlocks, 1), i.name, NDArrayConcat(cols, 1))) } - Let(blockResults.name, cda, NDArrayConcat(rows, 0)) } @@ -171,89 +167,25 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType object LowerBlockMatrixIR { def apply(node: IR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): IR = { - def lower(bmir: BlockMatrixIR) = LowerBlockMatrixIR.lower(bmir, typesToLower, ctx, r, relationalLetsAbove) - - node match { - case BlockMatrixCollect(child) => - lower(child).collectLocal(relationalLetsAbove, child.typ) - case BlockMatrixToValueApply(child, GetElement(IndexedSeq(i, j))) => - val rowBlock = child.typ.getBlockIdx(i) - val colBlock = child.typ.getBlockIdx(j) - - val iInBlock = i - rowBlock * child.typ.blockSize - val jInBlock = j - colBlock * child.typ.blockSize - - val lowered = lower(child) - - val elt = bindIR(lowered.blockContext(rowBlock -> colBlock)) { ctx => - NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) - } - - lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } - case BlockMatrixWrite(child, writer) => - writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 - case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) - case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") - - case node => - throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") - } - } - - // This lowers a BlockMatrixIR to an unkeyed TableStage with rows of (blockRow, blockCol, block) - def lowerToTableStage( - bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, - r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR] - ): TableStage = { - val bms = lower(bmir, typesToLower, ctx, r, relationalLetsAbove) - val typ = bmir.typ - val bmsWithCtx = bms.addContext(TTuple(TInt32, TInt32)){ case (i, j) => MakeTuple(Seq(0 -> i, 1 -> j))} - val blocksRowMajor = Array.range(0, typ.nRowBlocks).flatMap { i => - Array.tabulate(typ.nColBlocks)(j => i -> j).filter(typ.hasBlock) - } - val emptyGlobals = MakeStruct(Seq()) - val globalsId = genUID() - val letBindings = bmsWithCtx.globalVals :+ globalsId -> emptyGlobals - val contextsIR = MakeStream(blocksRowMajor.map{ case (i, j) => bmsWithCtx.blockContext((i, j)) }, TStream(bmsWithCtx.ctxType)) + def unimplemented[T](node: BaseIR): T = + throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") - val ctxRef = Ref(genUID(), bmsWithCtx.ctxType) - val body = bmsWithCtx.blockBody(ctxRef) - val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) - val bcFields = bmsWithCtx.globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) + def lowerIR(node: IR): IR = LowerToCDA.lower(node, typesToLower, ctx, r, relationalLetsAbove: Map[String, IR]) - def tsPartitionFunction(ctxRef: Ref): IR = { - val s = MakeStruct(Seq("blockRow" -> GetTupleElement(GetField(ctxRef, "new"), 0), "blockCol" -> GetTupleElement(GetField(ctxRef, "new"), 1), "block" -> bmsWithCtx.blockBody(ctxRef))) - MakeStream(Seq( - s - ), TStream(s.typ)) + def lower(bmir: BlockMatrixIR): BlockMatrixStage = { + if (!DArrayLowering.lowerBM(typesToLower)) + throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") + bmir.children.foreach { + case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => + throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") + case _ => + } + if (bmir.typ.nDefinedBlocks == 0) + BlockMatrixStage.empty(bmir.typ.elementType) + else lowerNonEmpty(bmir) } - val ts = TableStage(letBindings, bcFields, Ref(globalsId, emptyGlobals.typ), RVDPartitioner.unkeyed(blocksRowMajor.size), TableStageDependency.none, contextsIR, tsPartitionFunction) - ts - } - - private def unimplemented[T](node: BaseIR): T = - throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") - def lower(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { - if (!DArrayLowering.lowerBM(typesToLower)) - throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") - bmir.children.foreach { - case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => - throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") - case _ => - } - if (bmir.typ.nDefinedBlocks == 0) - BlockMatrixStage.empty(bmir.typ.elementType) - else lowerNonEmpty(bmir, typesToLower, ctx, r, relationalLetsAbove) - } - - def lowerNonEmpty(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { - def lower(ir: BlockMatrixIR) = LowerBlockMatrixIR.lower(ir, typesToLower, ctx, r, relationalLetsAbove) - - def lowerIR(node: IR): IR = LowerToCDA.lower(node, typesToLower, ctx, r, relationalLetsAbove: Map[String, IR]) - - bmir match { + def lowerNonEmpty(bmir: BlockMatrixIR): BlockMatrixStage = bmir match { case BlockMatrixRead(reader) => reader.lower(ctx) case x@BlockMatrixRandom(seed, gaussian, shape, blockSize) => val generator = invokeSeeded(if (gaussian) "rand_norm" else "rand_unif", seed, TFloat64, F64(0.0), F64(1.0)) @@ -366,31 +298,20 @@ object LowerBlockMatrixIR { lower(child).condenseBlocks(child.typ, rowDependents, colDependents) .addContext(TTuple(TTuple(TInt64, TInt64, TInt64), TTuple(TInt64, TInt64, TInt64))) { idx => - val (i, j) = idx - - // Aligned with the edges of blocks in child BM. - val blockAlignedRowStartIdx = rowDependents(i).head.toLong * x.typ.blockSize - val blockAlignedColStartIdx = colDependents(j).head.toLong * x.typ.blockSize - val blockAlignedRowEndIdx = math.min(child.typ.nRows, (rowDependents(i).last + 1L) * x.typ.blockSize * rStep) - val blockAlignedColEndIdx = math.min(child.typ.nCols, (colDependents(j).last + 1L) * x.typ.blockSize * cStep) - - // condenseBlocks can give the same data to multiple partitions. Need to make sure we don't use data - // that's already included in an earlier block. - val rStartPlusSeenAlready = rStart + i * x.typ.blockSize * rStep - val cStartPlusSeenAlready = cStart + j * x.typ.blockSize * cStep + val i = idx._1 + val j = idx._2 + val rowStartIdx = rowDependents(i).head.toLong * x.typ.blockSize + val colStartIdx = colDependents(j).head.toLong * x.typ.blockSize - val rowTrueStart = rStartPlusSeenAlready - blockAlignedRowStartIdx - val rowTrueEnd = math.min(math.min(rEnd, blockAlignedRowEndIdx) - blockAlignedRowStartIdx, rowTrueStart + x.typ.blockSize * rStep) + val rowEndIdx = java.lang.Math.min(child.typ.nRows, (rowDependents(i).last + 1L) * x.typ.blockSize) + val colEndIdx = java.lang.Math.min(child.typ.nCols, (colDependents(i).last + 1L) * x.typ.blockSize) val rows = MakeTuple.ordered(FastSeq[IR]( - rowTrueStart, - rowTrueEnd, + if (rStart >= rowStartIdx) rStart - rowStartIdx else (rowStartIdx - rStart) % rStep, + java.lang.Math.min(rEnd, rowEndIdx) - rowStartIdx, rStep)) - - val colTrueStart = cStartPlusSeenAlready - blockAlignedColStartIdx - val colTrueEnd = math.min(java.lang.Math.min(cEnd, blockAlignedColEndIdx) - blockAlignedColStartIdx, colTrueStart + x.typ.blockSize * cStep) val cols = MakeTuple.ordered(FastSeq[IR]( - colTrueStart, - colTrueEnd, + if (cStart >= colStartIdx) cStart - colStartIdx else (colStartIdx - cStart) % cStep, + java.lang.Math.min(cEnd, colEndIdx) - colStartIdx, cStep)) MakeTuple.ordered(FastSeq(rows, cols)) }.mapBody { (ctx, body) => NDArraySlice(body, GetField(ctx, "new")) } @@ -457,5 +378,32 @@ object LowerBlockMatrixIR { } } } + + node match { + case BlockMatrixCollect(child) => + lower(child).collectLocal(relationalLetsAbove, child.typ) + case BlockMatrixToValueApply(child, GetElement(IndexedSeq(i, j))) => + val rowBlock = child.typ.getBlockIdx(i) + val colBlock = child.typ.getBlockIdx(j) + + val iInBlock = i - rowBlock * child.typ.blockSize + val jInBlock = j - colBlock * child.typ.blockSize + + val lowered = lower(child) + + val elt = bindIR(lowered.blockContext(rowBlock -> colBlock)) { ctx => + NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) + } + + lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } + case BlockMatrixWrite(child, writer) => + writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 + case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) + case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => + throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") + + case node => + throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") + } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala index 6c1eb8a41c7..7f64ef62a75 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala @@ -3,10 +3,9 @@ package is.hail.expr.ir.lowering import is.hail.annotations.{Annotation, ExtendedOrdering, Region, SafeRow, UnsafeRow} import is.hail.asm4s.{AsmFunction1RegionLong, LongInfo, classInfo} import is.hail.expr.ir._ -import is.hail.types.physical.{PArray, PStruct, PTuple} +import is.hail.types.physical.{PArray, PStruct, PTuple, PTypeReferenceSingleCodeType} import is.hail.types.virtual.{TStream, TStruct, Type} import is.hail.rvd.RVDPartitioner -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index f55b2d0c597..4691adef824 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -271,7 +271,7 @@ class TableStage( GetField(ctxRef, "partitionBound"), StreamFilter( StreamFlatMap( - ToStream(GetField(ctxRef, "oldContexts"), true), + ToStream(GetField(ctxRef, "oldContexts")), prevContextUIDPartition, body ), @@ -440,7 +440,7 @@ object LowerTableIR { RVDPartitioner.unkeyed(nPartitionsAdj), TableStageDependency.none, context, - ctxRef => ToStream(ctxRef, true)) + ctxRef => ToStream(ctxRef)) case TableRange(n, nPartitions) => val nPartitionsAdj = math.max(math.min(n, nPartitions), 1) @@ -465,7 +465,7 @@ object LowerTableIR { MakeStruct(FastIndexedSeq("start" -> start, "end" -> end)) }, TStream(contextType)), - (ctxRef: Ref) => mapIR(StreamRange(GetField(ctxRef, "start"), GetField(ctxRef, "end"), I32(1), true)) { i => + (ctxRef: Ref) => mapIR(rangeIR(GetField(ctxRef, "start"), GetField(ctxRef, "end"))) { i => MakeStruct(FastSeq("idx" -> i)) }) @@ -656,12 +656,6 @@ object LowerTableIR { case TableHead(child, targetNumRows) => val loweredChild = lower(child) - def streamLenOrMax(a: IR): IR = - if (targetNumRows <= Integer.MAX_VALUE) - StreamLen(StreamTake(a, targetNumRows.toInt)) - else - StreamLen(a) - def partitionSizeArray(childContexts: Ref): IR = { val partitionSizeArrayFunc = genUID() val howManyPartsToTry = Ref(genUID(), TInt32) @@ -670,7 +664,7 @@ object LowerTableIR { partitionSizeArrayFunc, FastIndexedSeq(howManyPartsToTry.name -> 4), bindIR(loweredChild.mapContexts(_ => StreamTake(ToStream(childContexts), howManyPartsToTry)){ ctx: IR => ctx } - .mapCollect(relationalLetsAbove)(streamLenOrMax)) { counts => + .mapCollect(relationalLetsAbove)(StreamLen)) { counts => If((Cast(streamSumIR(ToStream(counts)), TInt64) >= targetNumRows) || (ArrayLen(childContexts) <= ArrayLen(counts)), counts, Recur(partitionSizeArrayFunc, FastIndexedSeq(howManyPartsToTry * 4), TArray(TInt32))) @@ -1100,37 +1094,13 @@ object LowerTableIR { case TableLiteral(typ, rvd, enc, encodedGlobals) => RVDToTableStage(rvd, EncodedLiteral(enc, encodedGlobals)) - case bmtt@BlockMatrixToTable(bmir) => - val bmStage = LowerBlockMatrixIR.lower(bmir, typesToLower, ctx, r, relationalLetsAbove) - val ts = LowerBlockMatrixIR.lowerToTableStage(bmir, typesToLower, ctx, r, relationalLetsAbove) - // I now have an unkeyed table of (blockRow, blockCol, block). - val entriesUnkeyed = ts.mapPartitionWithContext { (partition, ctxRef) => - flatMapIR(partition)(singleRowRef => - bindIR(GetField(singleRowRef, "block")) { singleNDRef => - bindIR(NDArrayShape(singleNDRef)) { shapeTupleRef => - flatMapIR(rangeIR(Cast(GetTupleElement(shapeTupleRef, 0), TInt32))) { withinNDRowIdx => - mapIR(rangeIR(Cast(GetTupleElement(shapeTupleRef, 1), TInt32))) { withinNDColIdx => - val entry = NDArrayRef(singleNDRef, IndexedSeq(Cast(withinNDRowIdx, TInt64), Cast(withinNDColIdx, TInt64)), ErrorIDs.NO_ERROR) - val blockStartRow = GetField(singleRowRef, "blockRow") * bmir.typ.blockSize - val blockStartCol = GetField(singleRowRef, "blockCol") * bmir.typ.blockSize - makestruct("i" -> Cast(withinNDRowIdx + blockStartRow, TInt64), "j" -> Cast(withinNDColIdx + blockStartCol, TInt64), "entry" -> entry) - } - } - } - } - ) - } - - val rowR = r.lookup(bmtt).asInstanceOf[RTable].rowType - ctx.backend.lowerDistributedSort(ctx, entriesUnkeyed, IndexedSeq(SortField("i", Ascending), SortField("j", Ascending)), relationalLetsAbove, rowR) - case node => throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(node) }") } assert(tir.typ.globalType == lowered.globalType, s"\n ir global: ${tir.typ.globalType}\n lowered global: ${lowered.globalType}") assert(tir.typ.rowType == lowered.rowType, s"\n ir row: ${tir.typ.rowType}\n lowered row: ${lowered.rowType}") - assert(lowered.key startsWith tir.typ.keyType.fieldNames, s"\n ir key: ${tir.typ.keyType.fieldNames.toSeq}\n lowered key: ${lowered.key}") + assert(lowered.key startsWith tir.typ.keyType.fieldNames, s"\n ir key: ${tir.typ.keyType.fieldNames}\n lowered key: ${lowered.key}") lowered } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala index 3f1d990a68b..b819fb5c116 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala @@ -3,8 +3,7 @@ package is.hail.expr.ir.lowering import is.hail.annotations.{Region, SafeRow, UnsafeRow} import is.hail.asm4s.{AsmFunction1RegionLong, AsmFunction1RegionUnit, LongInfo, UnitInfo, classInfo} import is.hail.expr.ir._ -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType -import is.hail.types.physical.{PTuple, PType} +import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType} import is.hail.types.virtual.Type import is.hail.utils.{FastIndexedSeq, FastSeq} import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala index a93bbecda9e..af1e87019bc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala @@ -1,14 +1,14 @@ package is.hail.expr.ir.lowering import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + import is.hail.annotations.{BroadcastRow, Region, RegionValue} import is.hail.asm4s._ import is.hail.expr.ir.{Compile, CompileIterator, ExecuteContext, GetField, IR, In, Let, MakeStruct, PartitionRVDReader, ReadPartition, StreamRange, ToArray, _} import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.rvd.{RVD, RVDType} import is.hail.sparkextras.ContextRDD -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType -import is.hail.types.physical.{PArray, PStruct, stypes} +import is.hail.types.physical.{PArray, PStruct, PTypeReferenceSingleCodeType} import is.hail.utils.{FastIndexedSeq, FastSeq} object RVDToTableStage { diff --git a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala deleted file mode 100644 index 07546639948..00000000000 --- a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala +++ /dev/null @@ -1,661 +0,0 @@ -package is.hail.expr.ir.ndarrays - -import is.hail.annotations.Region -import is.hail.expr.ir._ -import is.hail.types.physical.{PCanonicalArray, PCanonicalNDArray, PFloat32, PFloat32Required, PFloat64, PFloat64Required, PInt32, PInt32Required, PInt64, PInt64Required, PNumeric, PType} -import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.utils._ -import is.hail.asm4s._ -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat64, SInt32, SInt64} -import is.hail.types.virtual.{TFloat32, TFloat64, TInt32, TInt64, TNDArray} - -abstract class NDArrayProducer { - outer => - - def elementType: PType - val shape: IndexedSeq[Value[Long]] - def nDims = shape.size - - val initAll: EmitCodeBuilder => Unit - val initAxis: IndexedSeq[(EmitCodeBuilder) => Unit] - val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] - def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode - - def copy( - aElementType: PType = elementType, - aShape: IndexedSeq[Value[Long]] = shape, - ainitAll: EmitCodeBuilder => Unit = initAll, - ainitAxis: IndexedSeq[(EmitCodeBuilder) => Unit] = initAxis, - astepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = stepAxis - ): NDArrayProducer = { - new NDArrayProducer() { - override def elementType: PType = aElementType - - override val shape: IndexedSeq[Value[Long]] = aShape - override val initAll: EmitCodeBuilder => Unit = ainitAll - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = ainitAxis - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = astepAxis - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = outer.loadElementAtCurrentAddr(cb) - } - } - - def toSCode(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region], rowMajor: Boolean = false): SNDArrayCode = { - val (firstElementAddress, finish) = targetType.constructDataFunction( - shape, - targetType.makeColumnMajorStrides(shape, region, cb), - cb, - region) - - val currentWriteAddr = cb.newLocal[Long]("ndarray_producer_to_scode_cur_write_addr") - cb.assign(currentWriteAddr, firstElementAddress) - - initAll(cb) - val idxGenerator = if (rowMajor) SNDArray.forEachIndexWithInitAndIncRowMajor _ else SNDArray.forEachIndexWithInitAndIncColMajor _ - idxGenerator(cb, shape, initAxis, stepAxis.map(stepper => (cb: EmitCodeBuilder) => stepper(cb, 1L)), "ndarray_producer_toSCode"){ (cb, indices) => - targetType.elementType.storeAtAddress(cb, currentWriteAddr, region, loadElementAtCurrentAddr(cb), true) - cb.assign(currentWriteAddr, currentWriteAddr + targetType.elementType.byteSize) - } - - finish(cb) - } -} - -object EmitNDArray { - - def apply( - emitter: Emit[_], - ndIR: IR, - cb: EmitCodeBuilder, - region: Value[Region], - env: EmitEnv, - container: Option[AggContainer], - loopEnv: Option[Env[LoopRef]] - ): IEmitCode = { - - def emitNDInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCode = { - - assert(!emitter.ctx.inLoopCriticalPath.contains(ir)) - val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) - val r = cb.newField[Region]("emitInSeparate_region", region) - - var ev: EmitSettable = null - mb.voidWithBuilder { cb => - emitter.ctx.tryingToSplit.update(ir, ()) - val result: IEmitCode = deforest(ir, cb, r, env, container, loopEnv).map(cb)(ndap => ndap.toSCode(cb, PCanonicalNDArray(ndap.elementType.setRequired(true), ndap.nDims), r)) - - ev = cb.emb.ecb.newEmitField(s"${context}_result", result.emitType) - cb.assign(ev, result) - } - cb.invokeVoid(mb) - ev.toI(cb) - } - - def deforest(x: IR, cb: EmitCodeBuilder, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCodeGen[NDArrayProducer] = { - def deforestRecur(x: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCodeGen[NDArrayProducer] = { - - def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = { - emitter.emitI(ir, cb, region, env, container, loopEnv) - } - - x match { - case NDArrayMap(child, elemName, body) => { - deforestRecur(child, cb).map(cb) { childProducer => - val elemRef = cb.emb.newEmitField("ndarray_map_element_name", childProducer.elementType.sType, required = true) - val bodyEnv = env.bind(elemName, elemRef) - val bodyEC = EmitCode.fromI(cb.emb)(cb => emitI(body, cb, env = bodyEnv)) - - new NDArrayProducer { - override def elementType: PType = bodyEC.st.canonicalPType() - - override val shape: IndexedSeq[Value[Long]] = childProducer.shape - override val initAll: EmitCodeBuilder => Unit = childProducer.initAll - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = childProducer.initAxis - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = childProducer.stepAxis - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { - cb.assign(elemRef, EmitCode.present(cb.emb, childProducer.loadElementAtCurrentAddr(cb))) - bodyEC.toI(cb).get(cb, "NDArray map body cannot be missing") - } - } - } - } - case NDArrayMap2(lChild, rChild, lName, rName, body) => { - deforestRecur(lChild, cb).flatMap(cb) { leftProducer => - deforestRecur(rChild, cb).map(cb) { rightProducer => - val leftShapeValues = leftProducer.shape - val rightShapeValues = rightProducer.shape - - val shapeArray = NDArrayEmitter.unifyShapes2(cb, leftShapeValues, rightShapeValues) - - val lElemRef = cb.emb.newEmitField(lName, leftProducer.elementType.sType, required = true) - val rElemRef = cb.emb.newEmitField(rName, rightProducer.elementType.sType, required = true) - val bodyEnv = env.bind(lName, lElemRef) - .bind(rName, rElemRef) - val bodyEC = EmitCode.fromI(cb.emb)(cb => emitI(body, cb, env = bodyEnv)) - - val leftBroadcasted = broadcast(cb, leftProducer, "left") - val rightBroadcasted = broadcast(cb, rightProducer, "right") - - new NDArrayProducer { - override def elementType: PType = bodyEC.st.canonicalPType() - - override val shape: IndexedSeq[Value[Long]] = shapeArray - override val initAll: EmitCodeBuilder => Unit = { - cb => { - leftBroadcasted.initAll(cb) - rightBroadcasted.initAll(cb) - } - } - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map { idx => { cb: EmitCodeBuilder => - leftBroadcasted.initAxis(idx)(cb) - rightBroadcasted.initAxis(idx)(cb) - } - } - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map { idx => { (cb: EmitCodeBuilder, axis: Value[Long]) => - leftBroadcasted.stepAxis(idx)(cb, axis) - rightBroadcasted.stepAxis(idx)(cb, axis) - } - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { - cb.assign(lElemRef, EmitCode.present(cb.emb, leftBroadcasted.loadElementAtCurrentAddr(cb))) - cb.assign(rElemRef, EmitCode.present(cb.emb, rightBroadcasted.loadElementAtCurrentAddr(cb))) - - bodyEC.toI(cb).get(cb, "NDArrayMap2 body cannot be missing") - } - } - } - } - } - case NDArrayReindex(child, indexExpr) => - deforestRecur(child, cb).map(cb) { childProducer => - - new NDArrayProducer { - override def elementType: PType = childProducer.elementType - - override val shape: IndexedSeq[Value[Long]] = indexExpr.map { childIndex => - if (childIndex < childProducer.nDims) - childProducer.shape(childIndex) - else - const(1L) - } - override val initAll: EmitCodeBuilder => Unit = childProducer.initAll - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { - indexExpr.map { childIndex => - (cb: EmitCodeBuilder) => - if (childIndex < childProducer.nDims) { - childProducer.initAxis(childIndex)(cb) - } - } - } - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { - indexExpr.map { childIndex => - (cb: EmitCodeBuilder, step: Value[Long]) => - if (childIndex < childProducer.nDims) { - childProducer.stepAxis(childIndex)(cb, step) - } - } - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) - } - } - case x@NDArrayReshape(childND, shape) => - emitI(childND, cb).flatMap(cb) { case childND: SNDArrayCode => - // Plan: Run through the child row major, make an array. Then jump around it as needed. - val childMemo = childND.memoize(cb, "ndarray_reshape_child") - - val childShapeValues = childMemo.shapes(cb) - val outputNDims = x.typ.nDims - - val requestedShapeValues = Array.tabulate(outputNDims)(i => cb.newLocal[Long](s"ndarray_reindex_request_shape_$i")).toIndexedSeq - - emitI(shape, cb, env = env).map(cb) { sc: SCode => - val tupleCode = sc.asBaseStruct - val tupleValue = tupleCode.memoize(cb, "ndarray_reshape_requested") - - val hasNegativeOne = cb.newLocal[Boolean]("ndarray_reshape_has_neg_one") - val runningProduct = cb.newLocal[Long]("ndarray_reshape_running_product") - val replacesNegativeOne = cb.newLocal[Long]("ndarray_reshape_replaces_neg_one") - val tempShapeElement = cb.newLocal[Long]("ndarray_reshape_temp_shape_element") - - cb.assign(hasNegativeOne, false) - cb.assign(runningProduct, 1L) - - (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) - cb.ifx(tempShapeElement < 0L, - { - cb.ifx(tempShapeElement ceq -1L, - { - cb.ifx(hasNegativeOne, { - cb._fatal("Can't infer shape, more than one -1") - }, { - cb.assign(hasNegativeOne, true) - }) - }, - { - cb._fatal("Can't reshape, new shape must contain only nonnegative numbers or -1") - } - ) - }, - { - cb.assign(runningProduct, runningProduct * tempShapeElement) - } - ) - } - - val numElements = cb.newLocal[Long]("ndarray_reshape_child_num_elements") - cb.assign(numElements, SNDArray.numElements(childShapeValues)) - - cb.ifx(hasNegativeOne.mux( - (runningProduct ceq 0L) || (numElements % runningProduct) > 0L, - numElements cne runningProduct - ), { - cb._fatal("Can't reshape since requested shape is incompatible with number of elements") - }) - cb.assign(replacesNegativeOne, (runningProduct ceq 0L).mux(0L, numElements / runningProduct)) - - (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) - cb.assign(requestedShapeValues(i), (tempShapeElement ceq -1L).mux(replacesNegativeOne, tempShapeElement)) - } - - val childPType = childND.st.canonicalPType().asInstanceOf[PCanonicalNDArray] - val rowMajor = fromSValue(childMemo, cb).toSCode(cb, childPType, region, true).memoize(cb, "ndarray_reshape_row_major_layout") - // The canonical row major thing is now in the order we want. We just need to read this with the row major striding that - // would be generated for something of the new shape. - val outputPType = PCanonicalNDArray(rowMajor.st.elementPType.setRequired(true), x.typ.nDims, true) // TODO Should it be required? - val rowMajorStriding = outputPType.makeRowMajorStrides(requestedShapeValues, region, cb) - fromShapeStridesFirstAddress(rowMajor.st.elementPType, requestedShapeValues, rowMajorStriding, rowMajor.firstDataAddress(cb), cb) - } - } - - case x@NDArrayConcat(nds, axis) => - emitI(nds, cb).flatMap(cb) { ndsPCode => - val ndsArraySValue = ndsPCode.asIndexable.memoize(cb, "ndarray_concat_array_of_nds") - val arrLength = ndsArraySValue.loadLength() - cb.ifx(arrLength ceq 0, { - cb._fatal("need at least one ndarray to concatenate") - }) - - val missing: Code[Boolean] = { - if (ndsArraySValue.st.elementEmitType.required) - const(false) - else { - val missing = cb.newLocal[Boolean]("ndarray_concat_result_missing") - cb.assign(missing, false) - // Need to check if the any of the ndarrays are missing. - val missingCheckLoopIdx = cb.newLocal[Int]("ndarray_concat_missing_check_idx") - cb.forLoop(cb.assign(missingCheckLoopIdx, 0), missingCheckLoopIdx < arrLength, cb.assign(missingCheckLoopIdx, missingCheckLoopIdx + 1), - cb.assign(missing, missing | ndsArraySValue.isElementMissing(missingCheckLoopIdx)) - ) - missing - } - } - - IEmitCode(cb, missing, { - val loopIdx = cb.newLocal[Int]("ndarray_concat_shape_check_idx") - val firstND = ndsArraySValue.loadElement(cb, 0).map(cb) { sCode => sCode.asNDArray }.get(cb).memoize(cb, "ndarray_concat_input_0") - - val stagedArrayOfSizesPType = PCanonicalArray(PInt64(), true) - val (pushElement, finish) = stagedArrayOfSizesPType.constructFromFunctions(cb, region, arrLength, false) - - val newShape = (0 until x.typ.nDims).map { dimIdx => - val localDim = cb.newLocal[Long](s"ndarray_concat_output_shape_element_${dimIdx}") - val ndShape = firstND.shapes(cb) - cb.assign(localDim, ndShape(dimIdx)) - if (dimIdx == axis) { - pushElement(cb, EmitCode(Code._empty, false, primitive(localDim)).toI(cb)) - } - - cb.forLoop(cb.assign(loopIdx, 1), loopIdx < arrLength, cb.assign(loopIdx, loopIdx + 1), { - val shapeOfNDAtIdx = ndsArraySValue.loadElement(cb, loopIdx).map(cb) { sCode => sCode.asNDArray }.get(cb).shape(cb).memoize(cb, "ndarray_concat_input_shape") - val dimLength = cb.newLocal[Long]("dimLength", shapeOfNDAtIdx.loadField(cb, dimIdx).get(cb).asInt64.longCode(cb)) - - if (dimIdx == axis) { - pushElement(cb, EmitCode(Code._empty, false, primitive(dimLength)).toI(cb)) - cb.assign(localDim, localDim + dimLength) - } - else { - cb.ifx(dimLength.cne(localDim), - cb._fatal(const(s"NDArrayConcat: mismatched dimensions of input NDArrays along axis ").concat(loopIdx.toS).concat(": expected ") - .concat(localDim.toS).concat(", got ") - .concat(dimLength.toS)) - ) - } - }) - localDim - } - - val stagedArrayOfSizes = finish(cb).memoize(cb, "ndarray_concat_staged_array_of_sizes") - - new NDArrayProducer { - override def elementType: PType = firstND.st.elementPType - - override val shape: IndexedSeq[Value[Long]] = newShape - - val idxVars = shape.indices.map(i => cb.newLocal[Long](s"ndarray_produceer_fall_through_idx_${i}")) - // Need to keep track of the current ndarray being read from. - val currentNDArrayIdx = cb.newLocal[Int]("ndarray_concat_current_active_ndarray_idx") - - override val initAll: EmitCodeBuilder => Unit = { cb => - idxVars.foreach(idxVar => cb.assign(idxVar, 0L)) - cb.assign(currentNDArrayIdx, 0) - } - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = - shape.indices.map(i => (cb: EmitCodeBuilder) => { - cb.assign(idxVars(i), 0L) - if (i == axis) { - cb.assign(currentNDArrayIdx, 0) - } - }) - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { - // For all boring axes, just add to corresponding indexVar. For the single interesting axis, - // also consider updating the currently tracked ndarray. - shape.indices.map(idx => (cb: EmitCodeBuilder, step: Value[Long]) => { - // Start by updating the idxVar by the step - val curIdxVar = idxVars(idx) - cb.assign(curIdxVar, curIdxVar + step) - if (idx == axis) { - // If bigger than current ndarray, then we need to subtract out the size of this ndarray, increment to the next ndarray, and see if we are happy yet. - val shouldLoop = cb.newLocal[Boolean]("should_loop", curIdxVar >= stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) - cb.whileLoop(shouldLoop, - { - cb.assign(curIdxVar, curIdxVar - stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) - cb.assign(currentNDArrayIdx, currentNDArrayIdx + 1) - cb.ifx(currentNDArrayIdx < stagedArrayOfSizes.loadLength(), { - cb.assign(shouldLoop, curIdxVar >= stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) - }, { - cb.assign(shouldLoop, false) - }) - } - ) - } - }) - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { - val currentNDArray = ndsArraySValue.loadElement(cb, currentNDArrayIdx).get(cb).asNDArray.memoize(cb, "ndarray_concat_current_active_ndarray") - currentNDArray.loadElement(idxVars, cb) - } - } - }) - } - case NDArraySlice(child, slicesIR) => - deforestRecur(child, cb).flatMap(cb) { childProducer => - emitI(slicesIR, cb).flatMap(cb) { slicesPC => - val slicesValue = slicesPC.asBaseStruct.memoize(cb, "ndarray_slice_tuple_pv") - - val (indexingIndices, slicingIndices) = slicesValue.st.fieldTypes.zipWithIndex.partition { case (pFieldType, idx) => - pFieldType.isPrimitive - } match { - case (a, b) => (a.map(_._2), b.map(_._2)) - } - - IEmitCode.multiFlatMap[Int, SCode, NDArrayProducer](indexingIndices, indexingIndex => slicesValue.loadField(cb, indexingIndex), cb) { indexingSCodes => - val indexingValues = indexingSCodes.map(sCode => cb.newLocal("ndarray_slice_indexer", sCode.asInt64.longCode(cb))) - val slicingValueTriplesBuilder = new BoxedArrayBuilder[(Value[Long], Value[Long], Value[Long])]() - val outputShape = { - IEmitCode.multiFlatMap[Int, SCode, IndexedSeq[Value[Long]]](slicingIndices, - valueIdx => slicesValue.loadField(cb, valueIdx), cb) { sCodeSlices: IndexedSeq[SCode] => - IEmitCode.multiFlatMap(sCodeSlices, { sCodeSlice: SCode => - val sValueSlice = sCodeSlice.asBaseStruct.memoize(cb, "ndarray_slice_sCodeSlice") - // I know I have a tuple of three elements here, start, stop, step - - val newDimSizeI = sValueSlice.loadField(cb, 0).flatMap(cb) { startC => - sValueSlice.loadField(cb, 1).flatMap(cb) { stopC => - sValueSlice.loadField(cb, 2).map(cb) { stepC => - val start = cb.newLocal[Long]("ndarray_slice_start", startC.asLong.longCode(cb)) - val stop = cb.newLocal[Long]("ndarray_slice_stop", stopC.asLong.longCode(cb)) - val step = cb.newLocal[Long]("ndarray_slice_step", stepC.asLong.longCode(cb)) - - slicingValueTriplesBuilder.push((start, stop, step)) - - val newDimSize = cb.newLocal[Long]("new_dim_size") - cb.ifx(step >= 0L && start <= stop, { - cb.assign(newDimSize, const(1L) + ((stop - start) - 1L) / step) - }, { - cb.ifx(step < 0L && start >= stop, { - cb.assign(newDimSize, (((stop - start) + 1L) / step) + 1L) - }, { - cb.assign(newDimSize, 0L) - }) - }) - newDimSize - } - } - } - newDimSizeI - }, cb)(x => IEmitCode(cb, false, x)) - } - } - val slicingValueTriples = slicingValueTriplesBuilder.result() - - outputShape.map(cb) { outputShapeSeq => - new NDArrayProducer() { - override def elementType: PType = childProducer.elementType - - override val shape: IndexedSeq[Value[Long]] = outputShapeSeq - - override val initAll: EmitCodeBuilder => Unit = cb => { - childProducer.initAll(cb) - // Need to get the indexingIndices to the right starting points - indexingIndices.zipWithIndex.foreach { case (childIdx, ordinalIdx) => - childProducer.initAxis(childIdx) - childProducer.stepAxis(childIdx)(cb, indexingValues(ordinalIdx)) - } - } - - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map(idx => { (cb: EmitCodeBuilder) => - val whichSlicingAxis = slicingIndices(idx) - val slicingValue = slicingValueTriples(idx) - childProducer.initAxis(whichSlicingAxis)(cb) - childProducer.stepAxis(whichSlicingAxis)(cb, slicingValue._1) - }) - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map(idx => { (cb: EmitCodeBuilder, outerStep: Value[Long]) => - // SlicingIndices is a map from my coordinates to my child's coordinates. - val whichSlicingAxis = slicingIndices(idx) - val (start, stop, sliceStep) = slicingValueTriples(idx) - val innerStep = cb.newLocal[Long]("ndarray_producer_slice_child_step", sliceStep * outerStep) - childProducer.stepAxis(whichSlicingAxis)(cb, innerStep) - }) - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) - } - } - } - } - } - case NDArrayFilter(child, filters) => - deforestRecur(child, cb).map(cb) { childProducer: NDArrayProducer => - - val filterWasMissing = (0 until filters.size).map(i => cb.newField[Boolean](s"ndarray_filter_${i}_was_missing")) - val filtPValues = new Array[SIndexableValue](filters.size) - val outputShape = childProducer.shape.indices.map(idx => cb.newField[Long](s"ndarray_filter_output_shapes_${idx}")) - - filters.zipWithIndex.foreach { case (filt, i) => - // Each filt is a sequence that may be missing with elements that may not be missing. - emitI(filt, cb).consume(cb, - { - cb.assign(outputShape(i), childProducer.shape(i)) - cb.assign(filterWasMissing(i), true) - }, - { - filtArrayPC => { - val filtArrayPValue = filtArrayPC.asIndexable.memoize(cb, s"ndarray_filt_array_${i}") - filtPValues(i) = filtArrayPValue - cb.assign(outputShape(i), filtArrayPValue.loadLength().toL) - cb.assign(filterWasMissing(i), false) - } - } - ) - } - - new NDArrayProducer { - override def elementType: PType = childProducer.elementType - - override val shape: IndexedSeq[Value[Long]] = outputShape - - // Plan: Keep track of current indices on each axis, use them to step through filtered - // dimensions accordingly. - val idxVars = shape.indices.map(idx => cb.newLocal[Long](s"ndarray_producer_filter_index_${idx}")) - - override val initAll: EmitCodeBuilder => Unit = cb => { - idxVars.foreach(idxVar => cb.assign(idxVar, 0L)) - childProducer.initAll(cb) - } - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map { idx => - (cb: EmitCodeBuilder) => { - cb.assign(idxVars(idx), 0L) - childProducer.initAxis(idx)(cb) - cb.ifx(filterWasMissing(idx), { - /* pass */ - }, { - val startPoint = cb.newLocal[Long]("ndarray_producer_filter_init_axis", filtPValues(idx).loadElement(cb, idxVars(idx).toI).get( - cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb)) - childProducer.stepAxis(idx)(cb, startPoint) - }) - } - } - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map { idx => - (cb: EmitCodeBuilder, step: Value[Long]) => { - cb.ifx(filterWasMissing(idx), { - childProducer.stepAxis(idx)(cb, step) - cb.assign(idxVars(idx), idxVars(idx) + step) - }, { - val currentPos = filtPValues(idx).loadElement(cb, idxVars(idx).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb) - cb.assign(idxVars(idx), idxVars(idx) + step) - val newPos = filtPValues(idx).loadElement(cb, idxVars(idx).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb) - val stepSize = cb.newLocal[Long]("ndarray_producer_filter_step_size", newPos - currentPos) - childProducer.stepAxis(idx)(cb, stepSize) - }) - } - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) - } - } - case NDArrayAgg(child, axesToSumOut) => - deforestRecur(child, cb).map(cb) { childProducer: NDArrayProducer => - val childDims = child.typ.asInstanceOf[TNDArray].nDims - val axesToKeep = (0 until childDims).filter(axis => !axesToSumOut.contains(axis)) - val newOutputShape = axesToKeep.map(idx => childProducer.shape(idx)) - val newOutputShapeComplement = axesToSumOut.map(idx => childProducer.shape(idx)) - - val newElementType: PType = child.typ.asInstanceOf[TNDArray].elementType match { - case TInt32 => PInt32Required - case TInt64 => PInt64Required - case TFloat32 => PFloat32Required - case TFloat64 => PFloat64Required - } - new NDArrayProducer { - override def elementType: PType = newElementType - - override val shape: IndexedSeq[Value[Long]] = newOutputShape - - override val initAll: EmitCodeBuilder => Unit = childProducer.initAll - // Important part here is that NDArrayAgg has less axes then its child. We need to map - // between them. - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { - axesToKeep.map(idx => childProducer.initAxis(idx)) - } - - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { - axesToKeep.map(idx => childProducer.stepAxis(idx)) - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { - // Idea: For each axis that is being summed over, step through and keep a running sum. - val numericElementType = elementType.asInstanceOf[PNumeric] - val runningSum = NumericPrimitives.newLocal(cb, "ndarray_agg_running_sum", numericElementType.virtualType) - cb.assign(runningSum, numericElementType.zero) - - val initsToSumOut = axesToSumOut.map(idx => childProducer.initAxis(idx)) - val stepsToSumOut = axesToSumOut.map(idx => (cb: EmitCodeBuilder) => childProducer.stepAxis(idx)(cb, 1L)) - - SNDArray.forEachIndexWithInitAndIncColMajor(cb, newOutputShapeComplement, initsToSumOut, stepsToSumOut, "ndarray_producer_ndarray_agg") { (cb, _) => - cb.assign(runningSum, numericElementType.add(runningSum, SType.extractPrimCode(cb, childProducer.loadElementAtCurrentAddr(cb)))) - } - primitive(numericElementType.virtualType, runningSum) - } - } - } - case _ => { - val ndI = emitI(x, cb) - ndI.map(cb) { ndPCode => - val ndPv = ndPCode.asNDArray.memoize(cb, "deforestNDArray_fall_through_ndarray") - fromSValue(ndPv, cb) - } - } - } - } - - deforestRecur(x) - } - - emitNDInSeparateMethod("foo", cb, ndIR, region, env, container, loopEnv) - } - - def fromSValue(ndSv: SNDArrayValue, cb: EmitCodeBuilder): NDArrayProducer = { - val ndSvShape = ndSv.shapes(cb) - val strides = ndSv.strides(cb) - - fromShapeStridesFirstAddress(ndSv.st.elementPType, ndSvShape, strides, ndSv.firstDataAddress(cb), cb) - } - - def fromShapeStridesFirstAddress(newElementType: PType, ndSvShape: IndexedSeq[Value[Long]], strides: IndexedSeq[Value[Long]], firstDataAddress: Value[Long], cb: EmitCodeBuilder): NDArrayProducer = { - val counters = ndSvShape.indices.map(i => cb.newLocal[Long](s"ndarray_producer_fall_through_idx_${i}")) - - assert(ndSvShape.size == strides.size, s"shape.size = ${ndSvShape.size} != strides.size = ${strides.size}") - - new NDArrayProducer { - override def elementType: PType = newElementType - override val shape: IndexedSeq[Value[Long]] = ndSvShape - - override val initAll: EmitCodeBuilder => Unit = cb => { - counters.foreach(ctr => cb.assign(ctr, 0L)) - } - override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { - shape.indices.map(i => (cb: EmitCodeBuilder) => { - cb.assign(counters(i), 0L) - }) - } - override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { - shape.indices.map{ i => - (cb: EmitCodeBuilder, step: Value[Long]) => { - cb.assign(counters(i), counters(i) + step * strides(i)) - } - } - } - - override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { - val offset = counters.foldLeft[Code[Long]](const(0L)){ (a, b) => a + b} - val loaded = elementType.loadCheapSCode(cb, firstDataAddress + offset) - val memoLoaded = loaded.memoize(cb, "temp_memo") - memoLoaded.get - } - } - } - - def createBroadcastMask(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { - val ffff = 0xFFFFFFFFFFFFFFFFL - shape.indices.map { idx => - cb.newLocal[Long](s"ndarray_producer_broadcast_mask_${idx}", (shape(idx) ceq 1L).mux(0L, ffff)) - } - } - - def broadcast(cb: EmitCodeBuilder, prod: NDArrayProducer,ctx: String): NDArrayProducer = { - val broadcastMask = createBroadcastMask(cb, prod.shape) - val newSteps = prod.stepAxis.indices.map { idx => - (cb: EmitCodeBuilder, step: Value[Long]) => { - val maskedStep = cb.newLocal[Long]("ndarray_producer_masked_step", step & broadcastMask(idx)) - prod.stepAxis(idx)(cb, maskedStep) - } - } - prod.copy(astepAxis = newSteps) - } -} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala index 4ec321abdf1..85152faa92c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.PCode import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryValue} object BinaryOrdering { @@ -13,7 +13,7 @@ object BinaryOrdering { val type1: SBinary = t1 val type2: SBinary = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val xv: SBinaryValue = x.asBinary.memoize(cb, "xv") val yv: SBinaryValue = y.asBinary.memoize(cb, "yv") val xlen = cb.newLocal[Int]("xlen", xv.loadLength()) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala index 8a13ba01f41..813b807059b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala @@ -2,7 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.interfaces.SCall object CallOrdering { @@ -14,7 +15,7 @@ object CallOrdering { override val type1: SType = t1 override val type2: SType = t2 - override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.asCall.loadCanonicalRepresentation(cb), y.asCall.loadCanonicalRepresentation(cb)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala index e993c73fc3c..5de6939cdeb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala @@ -3,7 +3,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ @@ -51,11 +51,11 @@ object CodeOrdering { } t1.virtualType match { - case TInt32 => Int32Ordering.make(t1.asInstanceOf[SInt32.type], t2.asInstanceOf[SInt32.type], ecb) - case TInt64 => Int64Ordering.make(t1.asInstanceOf[SInt64.type], t2.asInstanceOf[SInt64.type], ecb) - case TFloat32 => Float32Ordering.make(t1.asInstanceOf[SFloat32.type], t2.asInstanceOf[SFloat32.type], ecb) - case TFloat64 => Float64Ordering.make(t1.asInstanceOf[SFloat64.type], t2.asInstanceOf[SFloat64.type], ecb) - case TBoolean => BooleanOrdering.make(t1.asInstanceOf[SBoolean.type], t2.asInstanceOf[SBoolean.type], ecb) + case TInt32 => Int32Ordering.make(t1.asInstanceOf[SInt32], t2.asInstanceOf[SInt32], ecb) + case TInt64 => Int64Ordering.make(t1.asInstanceOf[SInt64], t2.asInstanceOf[SInt64], ecb) + case TFloat32 => Float32Ordering.make(t1.asInstanceOf[SFloat32], t2.asInstanceOf[SFloat32], ecb) + case TFloat64 => Float64Ordering.make(t1.asInstanceOf[SFloat64], t2.asInstanceOf[SFloat64], ecb) + case TBoolean => BooleanOrdering.make(t1.asInstanceOf[SBoolean], t2.asInstanceOf[SBoolean], ecb) case TCall => CallOrdering.make(t1.asInstanceOf[SCall], t2.asInstanceOf[SCall], ecb) case TString => StringOrdering.make(t1.asInstanceOf[SString], t2.asInstanceOf[SString], ecb) case TBinary => BinaryOrdering.make(t1.asInstanceOf[SBinary], t2.asInstanceOf[SBinary], ecb) @@ -77,11 +77,11 @@ abstract class CodeOrdering { def reversed: Boolean = false - final def checkedSCode[T](cb: EmitCodeBuilder, arg1: SCode, arg2: SCode, context: String, - f: (EmitCodeBuilder, SCode, SCode) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { - if (arg1.st != type1) + final def checkedPCode[T](cb: EmitCodeBuilder, arg1: PCode, arg2: PCode, context: String, + f: (EmitCodeBuilder, PCode, PCode) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { + if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") - if (arg2.st != type2) + if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") val cacheKey = ("ordering", reversed, type1, type2, context) @@ -89,8 +89,8 @@ abstract class CodeOrdering { FastIndexedSeq(arg1.st.paramType, arg2.st.paramType), ti) { mb => mb.emitWithBuilder[T] { cb => - val arg1 = mb.getSCodeParam(1) - val arg2 = mb.getSCodeParam(2) + val arg1 = mb.getPCodeParam(1) + val arg2 = mb.getPCodeParam(2) f(cb, arg1, arg2) } } @@ -99,12 +99,12 @@ abstract class CodeOrdering { final def checkedEmitCode[T](cb: EmitCodeBuilder, arg1: EmitCode, arg2: EmitCode, missingEqual: Boolean, context: String, f: (EmitCodeBuilder, EmitCode, EmitCode, Boolean) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { - if (arg1.st != type1) + if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") - if (arg2.st != type2) + if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") - val cacheKey = ("ordering", reversed, arg1.emitType, arg2.emitType, context, missingEqual) + val cacheKey = ("ordering", reversed, type1, type2, context, missingEqual) val mb = cb.emb.ecb.getOrGenEmitMethod(s"ord_$context", cacheKey, FastIndexedSeq(arg1.emitParamType, arg2.emitParamType), ti) { mb => @@ -118,28 +118,28 @@ abstract class CodeOrdering { } - final def compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { - checkedSCode(cb, x, y, "compareNonnull", _compareNonnull) + final def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + checkedPCode(cb, x, y, "compareNonnull", _compareNonnull) } - final def ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { - checkedSCode(cb, x, y, "ltNonnull", _ltNonnull) + final def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "ltNonnull", _ltNonnull) } - final def lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { - checkedSCode(cb, x, y, "lteqNonnull", _lteqNonnull) + final def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "lteqNonnull", _lteqNonnull) } - final def gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { - checkedSCode(cb, x, y, "gtNonnull", _gtNonnull) + final def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "gtNonnull", _gtNonnull) } - final def gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { - checkedSCode(cb, x, y, "gteqNonnull", _gteqNonnull) + final def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "gteqNonnull", _gteqNonnull) } - final def equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { - checkedSCode(cb, x, y, "equivNonnull", _equivNonnull) + final def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + checkedPCode(cb, x, y, "equivNonnull", _equivNonnull) } final def lt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { @@ -166,17 +166,17 @@ abstract class CodeOrdering { checkedEmitCode(cb, x, y, missingEqual, "compare", _compare) } - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] def _compare(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean = true): Code[Int] = { val xm = cb.newLocal("cord_compare_xm", x.m) @@ -269,28 +269,28 @@ abstract class CodeOrdering { override def reversed: Boolean = true - override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = outer._compareNonnull(cb, y, x) + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = outer._compareNonnull(cb, y, x) - override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._ltNonnull(cb, y, x) + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._ltNonnull(cb, y, x) - override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._lteqNonnull(cb, y, x) + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._lteqNonnull(cb, y, x) - override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._gtNonnull(cb, y, x) + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gtNonnull(cb, y, x) - override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._gteqNonnull(cb, y, x) + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gteqNonnull(cb, y, x) - override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._equivNonnull(cb, y, x) + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._equivNonnull(cb, y, x) } } abstract class CodeOrderingCompareConsistentWithOthers extends CodeOrdering { - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala index b8a8db77d74..06152f3280b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.{Code, CodeLabel} import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces.{SInterval, SIntervalCode, SIntervalValue} +import is.hail.types.physical.stypes.interfaces.SInterval +import is.hail.types.physical.{PCode, PIntervalCode, PIntervalValue} object IntervalOrdering { @@ -12,25 +12,25 @@ object IntervalOrdering { val type1: SInterval = t1 val type2: SInterval = t2 - private val setup: (EmitCodeBuilder, SCode, SCode) => (SIntervalValue, SIntervalValue) = { - case (cb, lhs: SIntervalCode, rhs: SIntervalCode) => + private val setup: (EmitCodeBuilder, PCode, PCode) => (PIntervalValue, PIntervalValue) = { + case (cb, lhs: PIntervalCode, rhs: PIntervalCode) => lhs.memoize(cb, "intervalord_lhs") -> rhs.memoize(cb, "intervalord_rhs") } - override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val pointCompare = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Compare()) val cmp = cb.newLocal[Int]("intervalord_cmp", 0) val (lhs, rhs) = setup(cb, x, y) - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_)) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_)) + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) cb.assign(cmp, pointCompare(cb, lstart, rstart)) cb.ifx(cmp.ceq(0), { cb.ifx(lhs.includesStart().cne(rhs.includesStart()), { cb.assign(cmp, lhs.includesStart().mux(-1, 1)) }, { - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_)) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_)) + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) cb.assign(cmp, pointCompare(cb, lend, rend)) cb.ifx(cmp.ceq(0), { cb.ifx(lhs.includesEnd().cne(rhs.includesEnd()), { @@ -43,7 +43,7 @@ object IntervalOrdering { cmp } - override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) val Lout = CodeLabel() @@ -60,19 +60,19 @@ object IntervalOrdering { exitWith(false) }) - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_)) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_)) + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_)) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_)) + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) cb.ifx(!pointEq(cb, lend, rend), exitWith(false)) cb.define(Lout) ret } - override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val pointLt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lt()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -84,16 +84,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") cb.ifx(pointLt(cb, lstart, rstart), exitWith(true)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") cb.ifx(pointLt(cb, lend, rend), exitWith(true)) cb.assign(ret, pointEq(cb, lend, rend) && !lhs.includesEnd() && rhs.includesEnd()) @@ -102,7 +102,7 @@ object IntervalOrdering { ret } - override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val pointLtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lteq()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -114,16 +114,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") cb.ifx(!pointLtEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") cb.ifx(!pointLtEq(cb, lend, rend), exitWith(false)) cb.assign(ret, !pointEq(cb, lend, rend) || !lhs.includesEnd() || rhs.includesEnd()) @@ -131,7 +131,7 @@ object IntervalOrdering { ret } - override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val pointGt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gt()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -143,16 +143,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") cb.ifx(pointGt(cb, lstart, rstart), exitWith(true)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") cb.ifx(pointGt(cb, lend, rend), exitWith(true)) cb.assign(ret, pointEq(cb, lend, rend) && lhs.includesEnd() && !rhs.includesEnd()) @@ -161,7 +161,7 @@ object IntervalOrdering { ret } - override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val pointGtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gteq()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -173,16 +173,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") cb.ifx(!pointGtEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") cb.ifx(!pointGtEq(cb, lend, rend), exitWith(false)) cb.assign(ret, !pointEq(cb, lend, rend) || lhs.includesEnd() || !rhs.includesEnd()) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala index a746fd7bc5c..447d04d1bfe 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} -import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableValue} -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces.SContainer +import is.hail.types.physical.{PCode, PIndexableValue} object IterableOrdering { @@ -12,25 +12,25 @@ object IterableOrdering { val type1: SContainer = t1 val type2: SContainer = t2 - private[this] def setup(cb: EmitCodeBuilder, lhs: SCode, rhs: SCode): (SIndexableValue, SIndexableValue) = { + private[this] def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PIndexableValue, PIndexableValue) = { val lhsv = lhs.asIndexable.memoize(cb, "container_ord_lhs") val rhsv = rhs.asIndexable.memoize(cb, "container_ord_rhs") lhsv -> rhsv } - private[this] def loop(cb: EmitCodeBuilder, lhs: SIndexableValue, rhs: SIndexableValue)( + private[this] def loop(cb: EmitCodeBuilder, lhs: PIndexableValue, rhs: PIndexableValue)( f: (EmitCode, EmitCode) => Unit ): Unit = { val i = cb.newLocal[Int]("i") val lim = cb.newLocal("lim", lhs.loadLength().min(rhs.loadLength())) cb.forLoop(cb.assign(i, 0), i < lim, cb.assign(i, i + 1), { - val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i)) - val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i)) + val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i).typecast) + val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i).typecast) f(left, right) }) } - override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val elemCmp = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Compare()) val Lout = CodeLabel() @@ -50,7 +50,7 @@ object IterableOrdering { cmp } - override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val elemLt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lt()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -78,7 +78,7 @@ object IterableOrdering { ret } - override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val elemLtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lteq()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -106,7 +106,7 @@ object IterableOrdering { ret } - override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val elemGt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gt()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -134,7 +134,7 @@ object IterableOrdering { ret } - override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val elemGtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gteq()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -162,7 +162,7 @@ object IterableOrdering { ret } - override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) val ret = cb.newLocal[Boolean]("iterable_eq", true) val Lout = CodeLabel() diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala index b71c72e53b5..24d2574e203 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala @@ -2,9 +2,9 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitMethodBuilder} -import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.SCanonicalLocusPointer -import is.hail.types.physical.stypes.interfaces.{SLocus, SLocusValue, SStringValue} +import is.hail.types.physical.stypes.interfaces.{SLocus, SStringValue} +import is.hail.types.physical.{PBinary, PCode, PLocusValue} object LocusOrdering { def make(t1: SLocus, t2: SLocus, ecb: EmitClassBuilder[_]): CodeOrdering = { @@ -17,10 +17,10 @@ object LocusOrdering { require(t1.rg == t2.rg) - def _compareNonnull(cb: EmitCodeBuilder, lhsc: SCode, rhsc: SCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, lhsc: PCode, rhsc: PCode): Code[Int] = { val codeRG = cb.emb.getReferenceGenome(t1.rg) - val lhs: SLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") - val rhs: SLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") + val lhs: PLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") + val rhs: PLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") val lhsContig = lhs.contig(cb).memoize(cb, "locus_cmp_lcontig").asInstanceOf[SStringValue] val rhsContig = rhs.contig(cb).memoize(cb, "locus_cmp_rcontig").asInstanceOf[SStringValue] @@ -31,8 +31,8 @@ object LocusOrdering { val ret = cb.newLocal[Int]("locus_cmp_ret", 0) cb.ifx(bincmp.compareNonnull(cb, - lhsContig.get.asBytes(), - rhsContig.get.asBytes()).ceq(0), { + lhsContig.get.asBytes().asPCode, + rhsContig.get.asBytes().asPCode).ceq(0), { cb.assign(ret, Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( "compare", lhs.position(cb), rhs.position(cb))) }, { diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala index fa9056877b5..7d8c217e59e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala @@ -2,110 +2,110 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.PCode import is.hail.types.physical.stypes.primitives._ object Int32Ordering { - def make(t1: SInt32.type, t2: SInt32.type, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SInt32, t2: SInt32, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SInt32.type = t1 - val type2: SInt32.type = t2 + val type1: SInt32 = t1 + val type2: SInt32 = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.asInt.intCode(cb), y.asInt.intCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) < y.asInt.intCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) < y.asInt.intCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) <= y.asInt.intCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) <= y.asInt.intCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) > y.asInt.intCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) > y.asInt.intCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) >= y.asInt.intCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) >= y.asInt.intCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb).ceq(y.asInt.intCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb).ceq(y.asInt.intCode(cb)) } } } object Int64Ordering { - def make(t1: SInt64.type, t2: SInt64.type, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SInt64, t2: SInt64, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SInt64.type = t1 - val type2: SInt64.type = t2 + val type1: SInt64 = t1 + val type2: SInt64 = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.asLong.longCode(cb), y.asLong.longCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) < y.asLong.longCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) < y.asLong.longCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) <= y.asLong.longCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) <= y.asLong.longCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) > y.asLong.longCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) > y.asLong.longCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) >= y.asLong.longCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) >= y.asLong.longCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb).ceq(y.asLong.longCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb).ceq(y.asLong.longCode(cb)) } } } object Float32Ordering { - def make(t1: SFloat32.type, t2: SFloat32.type, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SFloat32, t2: SFloat32, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SFloat32.type = t1 - val type2: SFloat32.type = t2 + val type1: SFloat32 = t1 + val type2: SFloat32 = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.asFloat.floatCode(cb), y.asFloat.floatCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) < y.asFloat.floatCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) < y.asFloat.floatCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) <= y.asFloat.floatCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) <= y.asFloat.floatCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) > y.asFloat.floatCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) > y.asFloat.floatCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) >= y.asFloat.floatCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) >= y.asFloat.floatCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb).ceq(y.asFloat.floatCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb).ceq(y.asFloat.floatCode(cb)) } } } object Float64Ordering { - def make(t1: SFloat64.type, t2: SFloat64.type, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SFloat64, t2: SFloat64, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SFloat64.type = t1 - val type2: SFloat64.type = t2 + val type1: SFloat64 = t1 + val type2: SFloat64 = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.asDouble.doubleCode(cb), y.asDouble.doubleCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) < y.asDouble.doubleCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) < y.asDouble.doubleCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) <= y.asDouble.doubleCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) <= y.asDouble.doubleCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) > y.asDouble.doubleCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) > y.asDouble.doubleCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) >= y.asDouble.doubleCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) >= y.asDouble.doubleCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb).ceq(y.asDouble.doubleCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb).ceq(y.asDouble.doubleCode(cb)) } } } object BooleanOrdering { - def make(t1: SBoolean.type, t2: SBoolean.type, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SBoolean, t2: SBoolean, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrderingCompareConsistentWithOthers { - val type1: SBoolean.type = t1 - val type2: SBoolean.type = t2 + val type1: SBoolean = t1 + val type2: SBoolean = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.asBoolean.boolCode(cb), y.asBoolean.boolCode(cb)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala index fc5efa62ba7..cb44907593a 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.PCode import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointer, SCanonicalShufflePointerCode} import is.hail.types.physical.stypes.interfaces.SShuffle @@ -15,7 +15,7 @@ object ShuffleOrdering { val type1: SShuffle = t1 val type2: SShuffle = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val bcode1 = x.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr val bcode2 = y.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala index cb40c07f106..26e7609c0af 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.PCode import is.hail.types.physical.stypes.concrete.{SStringPointer, SStringPointerCode} import is.hail.types.physical.stypes.interfaces.SString @@ -15,7 +15,7 @@ object StringOrdering { val type1: SString = t1 val type2: SString = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val bcode1 = x.asInstanceOf[SStringPointerCode].binaryRepr val bcode2 = y.asInstanceOf[SStringPointerCode].binaryRepr val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala index d81a5445a7c..1f81620e316 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.{Code, CodeLabel} import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, SortOrder} -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructValue} +import is.hail.types.physical.stypes.interfaces.SBaseStruct +import is.hail.types.physical.{PBaseStructValue, PCode} object StructOrdering { def make( @@ -19,7 +19,7 @@ object StructOrdering { require(sortOrders == null || sortOrders.size == t1.size) - def setup(cb: EmitCodeBuilder, lhs: SCode, rhs: SCode): (SBaseStructValue, SBaseStructValue) = { + def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PBaseStructValue, PBaseStructValue) = { lhs.asBaseStruct.memoize(cb, "structord_lhs") -> rhs.asBaseStruct.memoize(cb, "structord_rhs") } @@ -28,7 +28,7 @@ object StructOrdering { if (sortOrders == null) Ascending else sortOrders(i), op) - override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val cmp = cb.newLocal("cmp", 0) @@ -36,8 +36,8 @@ object StructOrdering { var i = 0 while (i < t1.size) { val fldCmp = fieldOrdering(i, CodeOrdering.Compare(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) } + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } cb.assign(cmp, fldCmp(cb, l, r)) cb.ifx(cmp.cne(0), cb.goto(Lout)) i += 1 @@ -47,7 +47,7 @@ object StructOrdering { cmp } - override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val lt = cb.newLocal("lt", true) @@ -58,8 +58,8 @@ object StructOrdering { val fldLt = fieldOrdering(i, CodeOrdering.Lt(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_lt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_lt_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lt_rhs_fld$i") cb.assign(lt, fldLt(cb, l, r)) cb.assign(eq, !lt && fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -70,7 +70,7 @@ object StructOrdering { lt } - override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val lteq = cb.newLocal("lteq", true) @@ -81,8 +81,8 @@ object StructOrdering { val fldLtEq = fieldOrdering(i, CodeOrdering.Lteq(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_lteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_lteq_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lteq_rhs_fld$i") cb.assign(lteq, fldLtEq(cb, l, r)) cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -93,7 +93,7 @@ object StructOrdering { lteq } - override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val gt = cb.newLocal("gt", false) @@ -104,8 +104,8 @@ object StructOrdering { val fldGt = fieldOrdering(i, CodeOrdering.Gt(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_gt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_gt_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gt_rhs_fld$i") cb.assign(gt, fldGt(cb, l, r)) cb.assign(eq, !gt && fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -116,7 +116,7 @@ object StructOrdering { gt } - override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val gteq = cb.newLocal("gteq", true) @@ -127,8 +127,8 @@ object StructOrdering { val fldGtEq = fieldOrdering(i, CodeOrdering.Gteq(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_gteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_gteq_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gteq_rhs_fld$i") cb.assign(gteq, fldGtEq(cb, l, r)) cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -139,7 +139,7 @@ object StructOrdering { gteq } - override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val eq = cb.newLocal("cmp", true) @@ -147,8 +147,8 @@ object StructOrdering { var i = 0 while (i < t1.size) { val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) } + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) i += 1 diff --git a/hail/src/main/scala/is/hail/expr/ir/package.scala b/hail/src/main/scala/is/hail/expr/ir/package.scala index a75f0de877d..7c62e79ba28 100644 --- a/hail/src/main/scala/is/hail/expr/ir/package.scala +++ b/hail/src/main/scala/is/hail/expr/ir/package.scala @@ -4,7 +4,7 @@ import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir.functions.IRFunctionRegistry import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SValue} +import is.hail.types.physical.stypes.SCode import is.hail.types.virtual._ import is.hail.types.{coerce => tycoerce, _} import is.hail.utils._ @@ -14,7 +14,9 @@ import scala.language.implicitConversions package object ir { type TokenIterator = BufferedIterator[Token] - type IEmitCode = IEmitCodeGen[SCode] + + type IEmitCode = IEmitCodeGen[PCode] + type IEmitSCode = IEmitCodeGen[SCode] var uidCounter: Long = 0 @@ -141,10 +143,6 @@ package object ir { ArraySort(stream, l.name, r.name, f(l, r)) } - def sliceArrayIR(arrayIR: IR, startIR: IR, stopIR: IR): IR = { - invoke("slice", arrayIR.typ, arrayIR, startIR, stopIR) - } - def joinIR(left: IR, right: IR, lkey: IndexedSeq[String], rkey: IndexedSeq[String], joinType: String)(f: (Ref, Ref) => IR): IR = { val lRef = Ref(genUID(), left.typ.asInstanceOf[TStream].elementType) val rRef = Ref(genUID(), right.typ.asInstanceOf[TStream].elementType) @@ -177,7 +175,6 @@ package object ir { } def makestruct(fields: (String, IR)*): MakeStruct = MakeStruct(fields) - def maketuple(fields: IR*): MakeTuple = MakeTuple(fields.zipWithIndex.map{ case (field, idx) => (idx, field)}) implicit def toRichIndexedSeqEmitSettable(s: IndexedSeq[EmitSettable]): RichIndexedSeqEmitSettable = new RichIndexedSeqEmitSettable(s) @@ -189,9 +186,9 @@ package object ir { implicit def valueToCodeParam(v: Value[_]): CodeParam = CodeParam(v) - implicit def sCodeToSCodeParam(sc: SCode): SCodeParam = SCodeParam(sc) + implicit def toPCodeParam(pc: PCode): PCodeParam = PCodeParam(pc) - implicit def sValueToSCodeParam(sv: SValue): SCodeParam = SCodeParam(sv) + implicit def pValueToPCodeParam(pv: PValue): PCodeParam = PCodeParam(pv) implicit def toEmitParam(ec: EmitCode): EmitParam = EmitParam(ec) diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala index 6b525776bad..04a76870f30 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala @@ -5,12 +5,11 @@ import is.hail.asm4s._ import is.hail.expr.ir._ import is.hail.expr.ir.orderings.StructOrdering import is.hail.services.shuffler.CompileTimeShuffleClient -import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq} -import is.hail.types.physical.stypes.{EmitType, SType} +import is.hail.types.physical.stypes.EmitType import is.hail.types.physical.stypes.concrete.SCanonicalShufflePointerSettable import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} -import is.hail.types.physical.{PCanonicalArray, PCanonicalStream, PCanonicalStruct, PInterval, PStruct, PType} +import is.hail.types.physical.{PCanonicalStream, PCode, PInterval, PStruct, PType} import is.hail.types.virtual.{TInterval, TShuffle, TStream} import is.hail.utils._ @@ -143,33 +142,31 @@ object EmitStream { streamIR: IR, cb: EmitCodeBuilder, outerRegion: Value[Region], - env: EmitEnv, + env: Emit.E, container: Option[AggContainer] ): IEmitCode = { val mb = cb.emb - def emitVoid(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): Unit = + def emitVoid(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): Unit = emitter.emitVoid(cb, ir, region, env, container, None) - def emit(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): IEmitCode = { + def emit(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): IEmitCode = { ir.typ match { case _: TStream => produce(ir, cb, region, env, container) case _ => emitter.emitI(ir, cb, region, env, container, None) } } - def produce(streamIR: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): IEmitCode = + def produce(streamIR: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): IEmitCode = EmitStream.produce(emitter, streamIR, cb, region, env, container) - def typeWithReqx(node: IR): VirtualTypeWithReq = VirtualTypeWithReq(node.typ, emitter.ctx.req.lookup(node).asInstanceOf[TypeWithRequiredness]) - def typeWithReq: VirtualTypeWithReq = typeWithReqx(streamIR) - streamIR match { - case x@NA(_typ) => - val st = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] + case NA(_typ) => + val eltType = streamIR.pType.asInstanceOf[PCanonicalStream].elementType + val st = SStream(eltType.sType, false) val region = mb.genFieldThisRef[Region]("na_region") val producer = new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = {} @@ -180,15 +177,15 @@ object EmitStream { override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => cb.goto(LendOfStream) } - override val element: EmitCode = EmitCode.present(mb, st.elementType.defaultValue) + override val element: EmitCode = EmitCode.present(mb, eltType.defaultValue(mb)) override def close(cb: EmitCodeBuilder): Unit = {} } - IEmitCode.missing(cb, SStreamCode(producer)) + IEmitCode.missing(cb, SStreamCode(st, producer)) case Ref(name, _typ) => assert(_typ.isInstanceOf[TStream]) - env.bindings.lookup(name).toI(cb) + env.lookup(name).toI(cb) .map(cb) { case (stream: SStreamCode) => val childProducer = stream.producer val producer = new StreamProducer { @@ -219,7 +216,7 @@ object EmitStream { case In(n, _) => // this, Code[Region], ... - val param = env.inputValues(n).apply(outerRegion) + val param = mb.getEmitParam(2 + n, outerRegion) if (!param.st.isInstanceOf[SStream]) throw new RuntimeException(s"parameter ${ 2 + n } is not a stream! t=${ param.st } }, params=${ mb.emitParamTypes }") param.load.toI(cb) @@ -227,12 +224,13 @@ object EmitStream { case ToStream(a, _requiresMemoryManagementPerElement) => emit(a, cb).map(cb) { case ind: SIndexableCode => - val containerField = mb.newPField("tostream_arr", ind.st) + val containerField = mb.newPField("tostream_arr", ind.pt) val container = containerField.asInstanceOf[SIndexableValue] val idx = mb.genFieldThisRef[Int]("tostream_idx") val regionVar = mb.genFieldThisRef[Region]("tostream_region") SStreamCode( + SStream(ind.st.elementType, ind.pt.required), new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = { cb.assign(containerField, ind) @@ -252,7 +250,7 @@ object EmitStream { } val element: EmitCode = EmitCode.fromI(mb) { cb => - container.loadElement(cb, idx) } + container.loadElement(cb, idx).typecast[PCode] } def close(cb: EmitCodeBuilder): Unit = {} }) @@ -263,16 +261,14 @@ object EmitStream { val region = mb.genFieldThisRef[Region]("makestream_region") val emittedArgs = args.map(a => EmitCode.fromI(mb)(cb => emit(a, cb, region))).toFastIndexedSeq - // FIXME use SType.chooseCompatibleType - val st = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] - val unifiedType = st.elementEmitType - val eltField = mb.newEmitField("makestream_elt", unifiedType) + val unifiedType = x.pType.asInstanceOf[PCanonicalStream].elementType.sType // FIXME + val eltField = mb.newEmitField("makestream_elt", EmitType(unifiedType, emittedArgs.forall(_.required))) val staticLen = args.size val current = mb.genFieldThisRef[Int]("makestream_current") IEmitCode.present(cb, SStreamCode( - st, + SStream(unifiedType, required = true), new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = { cb.assign(current, 0) // switches on 1..N @@ -292,7 +288,7 @@ object EmitStream { }, emittedArgs.map { elem => EmitCodeBuilder.scopedVoid(mb) { cb => - cb.assign(eltField, elem.toI(cb).map(cb)(pc => pc.castTo(cb, region, unifiedType.st, false))) + cb.assign(eltField, elem.toI(cb).map(cb)(pc => pc.castTo(cb, region, unifiedType.pType, false))) cb.goto(LendOfSwitch) } }) @@ -321,10 +317,7 @@ object EmitStream { val leftProducer = leftEC.pv.asStream.producer val rightProducer = rightEC.pv.asStream.producer - val unifiedStreamSType = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] - val unifiedElementType = unifiedStreamSType.elementEmitType - - val xElt = mb.newEmitField(unifiedElementType) + val xElt = mb.newEmitField(x.pType.asInstanceOf[PCanonicalStream].elementType, leftEC.required && rightEC.required) // FIXME unify here val region = mb.genFieldThisRef[Region]("streamif_region") cb.ifx(xCond, @@ -357,11 +350,11 @@ object EmitStream { cb.ifx(xCond, cb.goto(leftProducer.LproduceElement), cb.goto(rightProducer.LproduceElement)) cb.define(leftProducer.LproduceElementDone) - cb.assign(xElt, leftProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.st))) + cb.assign(xElt, leftProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.pt))) cb.goto(LproduceElementDone) cb.define(rightProducer.LproduceElementDone) - cb.assign(xElt, rightProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.st))) + cb.assign(xElt, rightProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.pt))) cb.goto(LproduceElementDone) cb.define(leftProducer.LendOfStream) @@ -379,7 +372,7 @@ object EmitStream { } IEmitCode(Lmissing, Lpresent, - SStreamCode(producer), + SStreamCode(SStream(xElt.st, required = leftEC.pt.required && rightEC.pt.required), producer), leftEC.required && rightEC.required) } @@ -443,11 +436,14 @@ object EmitStream { cb.goto(LproduceElementDone) } - val element: EmitCode = EmitCode.present(mb, new SInt32Code(curr)) + val element: EmitCode = EmitCode.present(mb, new SInt32Code(true, curr)) def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode(producer) + SStreamCode( + SStream(SInt32(true), required = true), + producer + ) } } } @@ -513,7 +509,9 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(producer) + SStreamCode( + childStream.st, + producer) } case StreamTake(a, num) => @@ -555,7 +553,7 @@ object EmitStream { } } - SStreamCode(producer) + SStreamCode(childStream.st, producer) } } @@ -602,7 +600,7 @@ object EmitStream { } } - SStreamCode(producer) + SStreamCode(childStream.st, producer) } } @@ -646,15 +644,17 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(producer) + SStreamCode( + SStream(bodyResult.st, required = childStream.st.required), + producer + ) } case x@StreamScan(childIR, zeroIR, accName, eltName, bodyIR) => produce(childIR, cb).map(cb) { case (childStream: SStreamCode) => val childProducer = childStream.producer - val accEmitType = VirtualTypeWithReq(zeroIR.typ, emitter.ctx.req.lookupState(x).head.asInstanceOf[TypeWithRequiredness]).canonicalEmitType - + val accEmitType = EmitType(x.accPType.sType, x.accPType.required) val accValueAccRegion = mb.newEmitField(accEmitType) val accValueEltRegion = mb.newEmitField(accEmitType) @@ -686,7 +686,7 @@ object EmitStream { cb.ifx(first, { cb.assign(first, false) - cb.assign(accValueEltRegion, emit(zeroIR, cb, region = elementRegion).map(cb)(sc => sc.castTo(cb, elementRegion, accValueAccRegion.st))) + cb.assign(accValueEltRegion, emit(zeroIR, cb, region = elementRegion)) cb.goto(LcopyAndReturn) }) @@ -697,13 +697,13 @@ object EmitStream { if (requiresMemoryManagementPerElement) { // deep copy accumulator into element region, then clear accumulator region - cb.assign(accValueEltRegion, accValueAccRegion.toI(cb).map(cb)(_.castTo(cb, childProducer.elementRegion, accEmitType.st, deepCopy = true))) + cb.assign(accValueEltRegion, accValueAccRegion.toI(cb).map(cb)(_.castTo(cb, childProducer.elementRegion, x.accPType, deepCopy = true))) cb += accRegion.clearRegion() } val bodyCode = cb.withScopedMaybeStreamValue(childProducer.element, "scan_child_elt") { ev => emit(bodyIR, cb, env = env.bind((accName, accValueEltRegion), (eltName, ev)), region = childProducer.elementRegion) - .map(cb)(pc => pc.castTo(cb, childProducer.elementRegion, accEmitType.st, deepCopy = false)) + .map(cb)(pc => pc.castTo(cb, childProducer.elementRegion, x.accPType, deepCopy = false)) } cb.assign(accValueEltRegion, bodyCode) @@ -711,7 +711,7 @@ object EmitStream { cb.define(LcopyAndReturn) if (requiresMemoryManagementPerElement) { - cb.assign(accValueAccRegion, accValueEltRegion.toI(cb).map(cb)(pc => pc.castTo(cb, accRegion, accEmitType.st, deepCopy = true))) + cb.assign(accValueAccRegion, accValueEltRegion.toI(cb).map(cb)(pc => pc.castTo(cb, accRegion, x.accPType, deepCopy = true))) } cb.goto(LproduceElementDone) @@ -730,7 +730,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(producer) + SStreamCode(SStream(accValueEltRegion.st, childStream.st.required), producer) } case RunAggScan(child, name, init, seqs, result, states) => @@ -776,7 +776,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, childStream.st.required), producer) } case StreamFlatMap(a, name, body) => @@ -899,7 +899,10 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(producer) + SStreamCode( + SStream(innerProducer.element.st, required = outerStream.st.required), + producer + ) } case x@StreamJoinRightDistinct(leftIR, rightIR, lKey, rKey, leftName, rightName, joinIR, joinType) => @@ -918,8 +921,8 @@ object EmitStream { assert(lelt.emitType == lEltType) assert(relt.emitType == rEltType) - val lhs = EmitCode.fromI(mb)(cb => lelt.toI(cb).map(cb)(_.asBaseStruct.subset(lKey: _*))) - val rhs = EmitCode.fromI(mb)(cb => relt.toI(cb).map(cb)(_.asBaseStruct.subset(rKey: _*))) + val lhs = EmitCode.fromI(mb)(cb => lelt.toI(cb).map(cb)(_.asBaseStruct.subset(lKey: _*).asPCode)) + val rhs = EmitCode.fromI(mb)(cb => relt.toI(cb).map(cb)(_.asBaseStruct.subset(rKey: _*).asPCode)) StructOrdering.make(lhs.st.asInstanceOf[SBaseStruct], rhs.st.asInstanceOf[SBaseStruct], cb.emb.ecb, missingFieldsEqual = false) .compare(cb, lhs, rhs, missingEqual = false) @@ -984,7 +987,7 @@ object EmitStream { cb.ifx(c > 0, cb.goto(LpullRight)) cb.ifx(c < 0, { - cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)) + cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)) }, { // c == 0 if (rightProducer.requiresMemoryManagementPerElement) { @@ -1007,7 +1010,7 @@ object EmitStream { // if right stream ends before left cb.define(rightProducer.LendOfStream) - cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)) + cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)) cb.assign(rightEOS, true) if (leftProducer.requiresMemoryManagementPerElement) @@ -1029,7 +1032,7 @@ object EmitStream { } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, leftStream.st.required && rightStream.st.required), producer) case "outer" => @@ -1094,8 +1097,6 @@ object EmitStream { cb.goto(rightProducer.LproduceElement) cb.define(LpullLeft) - if (leftProducer.requiresMemoryManagementPerElement) - cb += leftProducer.elementRegion.clearRegion() cb.goto(leftProducer.LproduceElement) val Lcompare = CodeLabel() @@ -1109,6 +1110,7 @@ object EmitStream { cb.assign(lOutMissing, true) if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) + cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1123,6 +1125,7 @@ object EmitStream { cb.assign(rOutMissing, true) if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) + cb += leftProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1130,9 +1133,11 @@ object EmitStream { // c == 0 if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) + cb += leftProducer.elementRegion.clearRegion() } if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) + cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }) @@ -1141,11 +1146,11 @@ object EmitStream { mb.implementLabel(Lpush) { cb => cb.ifx(lOutMissing, - cb.assign(lxOut, EmitCode.missing(mb, lxOut.st)), + cb.assign(lxOut, EmitCode.missing(mb, lxOut.pt)), cb.assign(lxOut, lx) ) cb.ifx(rOutMissing, - cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)), + cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)), cb.assign(rxOut, rx)) cb.goto(LproduceElementDone) } @@ -1163,6 +1168,7 @@ object EmitStream { { if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) + cb += leftProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1189,6 +1195,7 @@ object EmitStream { { if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) + cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1226,7 +1233,7 @@ object EmitStream { } } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, leftStream.st.required && rightStream.st.required), producer) } } } @@ -1236,15 +1243,15 @@ object EmitStream { val childProducer = childStream.producer - val xCurElt = mb.newPField("st_grpby_curelt", childProducer.element.st) + val xCurElt = mb.newPField("st_grpby_curelt", childProducer.element.pt) val keyRegion = mb.genFieldThisRef[Region]("st_groupby_key_region") def subsetCode = xCurElt.asBaseStruct.subset(key: _*) - val curKey = mb.newPField("st_grpby_curkey", subsetCode.st) - - // This type shouldn't be a subset struct, since it is copied deeply. - // We don't want to deep copy the parent. - val lastKey = mb.newPField("st_grpby_lastkey", SType.canonical(subsetCode.st)) + val curKey = mb.newPField("st_grpby_curkey", subsetCode.st.pType) + // FIXME: PType.canonical is the wrong infrastructure here. This should be some + // notion of "cheap stype with a copy". We don't want to use a subset struct, + // since we don't want to deep copy the parent. + val lastKey = mb.newPField("st_grpby_lastkey", PType.canonical(subsetCode.st.pType)) val eos = mb.genFieldThisRef[Boolean]("st_grpby_eos") val nextGroupReady = mb.genFieldThisRef[Boolean]("streamgroupbykey_nextready") @@ -1260,7 +1267,7 @@ object EmitStream { val outerElementRegion = mb.genFieldThisRef[Region]("streamgroupbykey_outer_elt_region") def equiv(cb: EmitCodeBuilder, l: SBaseStructCode, r: SBaseStructCode): Code[Boolean] = - StructOrdering.make(l.st, r.st, cb.emb.ecb, missingFieldsEqual = false).equivNonnull(cb, l, r) + StructOrdering.make(l.st, r.st, cb.emb.ecb, missingFieldsEqual = false).equivNonnull(cb, l.asPCode, r.asPCode) val LchildProduceDoneInner = CodeLabel() val LchildProduceDoneOuter = CodeLabel() @@ -1291,7 +1298,7 @@ object EmitStream { if (requiresMemoryManagementPerElement) cb += keyRegion.clearRegion() - cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.st, deepCopy = true)) + cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.pt, deepCopy = true)) cb.assign(nextGroupReady, true) cb.assign(inOuter, true) cb.goto(LendOfStream) @@ -1367,14 +1374,14 @@ object EmitStream { if (requiresMemoryManagementPerElement) cb += keyRegion.clearRegion() - cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.st, deepCopy = true)) + cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.pt, deepCopy = true)) cb.define(LinnerStreamReady) cb.assign(nextGroupReady, false) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.present(mb, SStreamCode(innerProducer)) + override val element: EmitCode = EmitCode.present(mb, SStreamCode(SStream(innerProducer.element.st, true), innerProducer)) override def close(cb: EmitCodeBuilder): Unit = { childProducer.close(cb) @@ -1399,7 +1406,7 @@ object EmitStream { cb.ifx(inOuter, cb.goto(LchildProduceDoneOuter), cb.goto(LchildProduceDoneInner)) } - SStreamCode(outerProducer) + SStreamCode(SStream(outerProducer.element.st, required = childStream.st.required), outerProducer) } case StreamGrouped(a, groupSize) => @@ -1457,7 +1464,7 @@ object EmitStream { override def close(cb: EmitCodeBuilder): Unit = {} } - val innerStreamCode = EmitCode.present(mb, SStreamCode(innerProducer)) + val innerStreamCode = EmitCode.present(mb, SStreamCode(SStream(innerProducer.element.st, true), innerProducer)) val outerProducer = new StreamProducer { override val length: Option[EmitCodeBuilder => Code[Int]] = @@ -1520,7 +1527,7 @@ object EmitStream { cb.ifx(inOuter, cb.goto(LchildProduceDoneOuter), cb.goto(LchildProduceDoneInner)) } - SStreamCode(outerProducer) + SStreamCode(SStream(outerProducer.element.st, required = childStream.st.required), outerProducer) } } @@ -1727,7 +1734,7 @@ object EmitStream { // this stream has ended before each other, so we set the eos flag and the element EmitSettable cb.assign(eosPerStream(i), true) - cb.assign(vars(i), EmitCode.missing(mb, vars(i).st)) + cb.assign(vars(i), EmitCode.missing(mb, vars(i).pt)) cb.goto(endProduce) @@ -1749,24 +1756,16 @@ object EmitStream { } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, childStreams.forall(_.pt.required)), producer) } case x@StreamZipJoin(as, key, keyRef, valsRef, joinIR) => IEmitCode.multiMapEmitCodes(cb, as.map(a => EmitCode.fromI(mb)(cb => emit(a, cb)))) { children => val producers = children.map(_.asStream.producer) - val eltType = VirtualTypeWithReq.union(as.map(a => typeWithReqx(a))).canonicalEmitType - .st - .asInstanceOf[SStream] - .elementType - .canonicalPType() - .setRequired(false) - .asInstanceOf[PCanonicalStruct] - - val keyType = eltType.selectFields(key) - - val curValsType = PCanonicalArray(eltType) + // FIXME: unify + val curValsType = x.curValsType + val eltType = curValsType.elementType.setRequired(true).asInstanceOf[PStruct] val _elementRegion = mb.genFieldThisRef[Region]("szj_region") val regionArray = mb.genFieldThisRef[Array[Region]]("szj_region_array") @@ -1814,10 +1813,11 @@ object EmitStream { val result = mb.genFieldThisRef[Array[Long]]("merge_result") val i = mb.genFieldThisRef[Int]("merge_i") - val curKey = mb.newPField("st_grpby_curkey", keyType.sType) + val keyType = eltType.selectFields(key) + val curKey = mb.newPField("st_grpby_curkey", keyType) - val xKey = mb.newEmitField("zipjoin_key", keyType.sType, required = true) - val xElts = mb.newEmitField("zipjoin_elts", curValsType.sType, required = true) + val xKey = mb.newPresentEmitField("zipjoin_key", keyType) + val xElts = mb.newPresentEmitField("zipjoin_elts", curValsType) val joinResult: EmitCode = EmitCode.fromI(mb) { cb => val newEnv = env.bind((keyRef -> xKey), (valsRef -> xElts)) @@ -1868,18 +1868,18 @@ object EmitStream { }) cb.define(Lpush) - cb.assign(xKey, EmitCode.present(cb.emb, curKey)) - cb.assign(xElts, EmitCode.present(cb.emb, curValsType.constructFromElements(cb, elementRegion, k, false) { (cb, i) => - IEmitCode(cb, result(i).ceq(0L), eltType.loadCheapSCode(cb, result(i))) - })) + cb.assign(xKey, curKey) + cb.assign(xElts, curValsType.constructFromElements(cb, elementRegion, k, false) { (cb, i) => + IEmitCode(cb, result(i).ceq(0L), eltType.loadCheapPCode(cb, result(i))) + }) cb.goto(LproduceElementDone) cb.define(LstartNewKey) cb.forLoop(cb.assign(i, 0), i < k, cb.assign(i, i + 1), { cb += (result(i) = 0L) }) - cb.assign(curKey, eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) - .castTo(cb, elementRegion, curKey.st, true)) + cb.assign(curKey, eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) + .castTo(cb, elementRegion, curKey.pt, true)) cb.goto(LaddToResult) cb.define(LaddToResult) @@ -1909,8 +1909,8 @@ object EmitStream { cb.ifx(winner.ceq(k), cb.goto(LchallengerWins)) - val left = eltType.loadCheapSCode(cb, heads(challenger)).subset(key: _*) - val right = eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) + val left = eltType.loadCheapPCode(cb, heads(challenger)).subset(key: _*) + val right = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) val ord = StructOrdering.make(left.st, right.st, cb.emb.ecb, missingFieldsEqual = false) cb.ifx(ord.lteqNonnull(cb, left, right), cb.goto(LchallengerWins), @@ -1938,7 +1938,7 @@ object EmitStream { }) }, { cb.ifx(!winner.cne(k), cb.goto(Lpush)) - val left = eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) + val left = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) val right = curKey val ord = StructOrdering.make(left.st, right.st.asInstanceOf[SBaseStruct], cb.emb.ecb, missingFieldsEqual = false) @@ -1985,19 +1985,14 @@ object EmitStream { } } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, children.forall(_.pt.required)), producer) } case x@StreamMultiMerge(as, key) => IEmitCode.multiMapEmitCodes(cb, as.map(a => EmitCode.fromI(mb)(cb => emit(a, cb)))) { children => val producers = children.map(_.asStream.producer) - val unifiedType = VirtualTypeWithReq.union(as.map(a => typeWithReqx(a))).canonicalEmitType - .st - .asInstanceOf[SStream] - .elementEmitType - .canonicalPType - .asInstanceOf[PCanonicalStruct] + val unifiedType = x.pType.elementType.asInstanceOf[PStruct] // FIXME unify val region = mb.genFieldThisRef[Region]("smm_region") val regionArray = mb.genFieldThisRef[Array[Region]]("smm_region_array") @@ -2058,8 +2053,8 @@ object EmitStream { * left when key fields are missing. */ def comp(cb: EmitCodeBuilder, li: Code[Int], lv: Code[Long], ri: Code[Int], rv: Code[Long]): Code[Boolean] = { - val l = unifiedType.loadCheapSCode(cb, lv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_l") - val r = unifiedType.loadCheapSCode(cb, rv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_r") + val l = unifiedType.loadCheapPCode(cb, lv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_l") + val r = unifiedType.loadCheapPCode(cb, rv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_r") val ord1 = StructOrdering.make(l.asBaseStruct.st, r.asBaseStruct.st, cb.emb.ecb, missingFieldsEqual = false) val ord2 = StructOrdering.make(r.asBaseStruct.st, l.asBaseStruct.st, cb.emb.ecb, missingFieldsEqual = false) val b = cb.newLocal[Boolean]("stream_merge_comp_result") @@ -2168,7 +2163,7 @@ object EmitStream { } } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, unifiedType.loadCheapSCode(cb, heads(winner)))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, unifiedType.loadCheapPCode(cb, heads(winner)))) override def close(cb: EmitCodeBuilder): Unit = { producers.foreach { p => @@ -2180,7 +2175,7 @@ object EmitStream { cb.assign(heads, Code._null) } } - SStreamCode(producer) + SStreamCode(SStream(producer.element.st, children.forall(_.pt.required)), producer) } case ReadPartition(context, rowType, reader) => @@ -2190,13 +2185,15 @@ object EmitStream { case ShuffleRead(idIR, keyRangeIR) => val shuffleType = idIR.typ.asInstanceOf[TShuffle] val keyType = keyRangeIR.typ.asInstanceOf[TInterval].pointType + val keyPType = keyRangeIR.pType.asInstanceOf[PInterval].pointType assert(keyType == shuffleType.keyType) + assert(keyPType == shuffleType.keyDecodedPType) val region = mb.genFieldThisRef[Region]("shuffleread_region") val emitID = EmitCode.fromI(mb)(cb => emit(idIR, cb)) - val shuffleField = cb.emb.newPField(emitID.st).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleField = cb.emb.newPField(emitID.pt).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, shuffleField) @@ -2223,7 +2220,7 @@ object EmitStream { cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, shuffle.readValue(cb, region))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, shuffle.readValue(cb, region).asPCode)) override def close(cb: EmitCodeBuilder): Unit = { shuffle.finishGet(cb) @@ -2231,13 +2228,13 @@ object EmitStream { } } - IEmitCode.present(cb, SStreamCode(producer)) + IEmitCode.present(cb, SStreamCode(SStream(producer.element.st, true), producer)) case ShufflePartitionBounds(idIR, nPartitionsIR) => val region = mb.genFieldThisRef[Region]("shuffle_partition_bounds_region") val emitID = EmitCode.fromI(mb)(cb => emit(idIR, cb)) - val shuffleField = cb.emb.newPField(emitID.st).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleField = cb.emb.newPField(emitID.pt).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, shuffleField) val currentAddr = mb.genFieldThisRef[Long]("shuffle_partition_bounds_addr") @@ -2259,14 +2256,14 @@ object EmitStream { cb.goto(LproduceElementDone) } override val element: EmitCode = EmitCode.fromI(mb)(cb => - IEmitCode.present(cb, shuffle.readPartitionBound(cb, elementRegion))) + IEmitCode.present(cb, shuffle.readPartitionBound(cb, elementRegion).asPCode)) override def close(cb: EmitCodeBuilder): Unit = { shuffle.partitionBoundsFinished(cb) shuffle.close(cb) } } - IEmitCode.present(cb, SStreamCode(producer)) + IEmitCode.present(cb, SStreamCode(SStream(producer.element.st, true), producer)) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala index 86490a5bfae..784bcf1650e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala @@ -3,9 +3,7 @@ package is.hail.expr.ir.streams import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode, IR, NDArrayMap, NDArrayMap2, Ref, RunAggScan, StagedArrayBuilder, StreamFilter, StreamFlatMap, StreamFold, StreamFold2, StreamFor, StreamJoinRightDistinct, StreamMap, StreamScan, StreamZip, StreamZipJoin} -import is.hail.types.physical.stypes.interfaces.SIndexableCode -import is.hail.types.physical.PCanonicalArray -import is.hail.types.physical.stypes.SingleCodeType +import is.hail.types.physical.{PCanonicalArray, PCode, PIndexableCode, SingleCodePCode} trait StreamArgType { def apply(outerRegion: Region, eltRegion: Region): Iterator[java.lang.Long] @@ -17,25 +15,25 @@ object StreamUtils { cb: EmitCodeBuilder, stream: StreamProducer, destRegion: Value[Region] - ): SIndexableCode = { + ): PIndexableCode = { val mb = cb.emb val xLen = mb.newLocal[Int]("sta_len") - val aTyp = PCanonicalArray(stream.element.emitType.canonicalPType, true) + val aTyp = PCanonicalArray(stream.element.st.canonicalPType(), true) stream.length match { case None => - val vab = new StagedArrayBuilder(SingleCodeType.fromSType(stream.element.st), stream.element.required, mb, 0) + val vab = new StagedArrayBuilder(stream.element.st.canonicalPType(), mb, 0) writeToArrayBuilder(cb, stream, vab, destRegion) cb.assign(xLen, vab.size) aTyp.constructFromElements(cb, destRegion, xLen, deepCopy = false) { (cb, i) => - vab.loadFromIndex(cb, destRegion, i) + IEmitCode(cb, vab.isMissing(i), PCode(aTyp.elementType, vab(i))) } case Some(computeLen) => var pushElem: (EmitCodeBuilder, IEmitCode) => Unit = null - var finish: (EmitCodeBuilder) => SIndexableCode = null + var finish: (EmitCodeBuilder) => PIndexableCode = null stream.memoryManagedConsume(destRegion, cb, setup = { cb => cb.assign(xLen, computeLen(cb)) @@ -67,7 +65,7 @@ object StreamUtils { }) { cb => stream.element.toI(cb).consume(cb, cb += ab.addMissing(), - sc => cb += ab.add(ab.elt.coerceSCode(cb, sc, destRegion, deepCopy = stream.requiresMemoryManagementPerElement).code) + sc => cb += ab.add(SingleCodePCode.fromPCode(cb, sc, destRegion, deepCopy = stream.requiresMemoryManagementPerElement).code) ) } } diff --git a/hail/src/main/scala/is/hail/io/CodecSpec.scala b/hail/src/main/scala/is/hail/io/CodecSpec.scala index 73f538976a2..000b098fa1c 100644 --- a/hail/src/main/scala/is/hail/io/CodecSpec.scala +++ b/hail/src/main/scala/is/hail/io/CodecSpec.scala @@ -3,11 +3,12 @@ package is.hail.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream} import is.hail.annotations.{Region, RegionValue} -import is.hail.asm4s.Code -import is.hail.expr.ir.ExecuteContext +import is.hail.asm4s.{Code, TypeInfo, Value} +import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, ExecuteContext} import is.hail.types.encoded.EType -import is.hail.types.physical.PType +import is.hail.types.physical.{PCode, PType, PValue, typeToTypeInfo} import is.hail.types.virtual.Type +import is.hail.rvd.RVDContext import is.hail.sparkextras.ContextRDD import is.hail.utils.using import org.apache.spark.rdd.RDD diff --git a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala index 95b8c246cb8..8d7a861f49a 100644 --- a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala +++ b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala @@ -8,8 +8,7 @@ import is.hail.io.fs.FS import is.hail.rvd.RVDPartitioner import is.hail.types._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SStackStruct, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ @@ -345,7 +344,7 @@ class TextMatrixReader( params.hasHeader) { (region: Region, context: Any) => - val Row(lc, partitionIdx: Int) = context + val (lc, partitionIdx: Int) = context compiledLineParser.apply(partitionIdx, region, linesBody(lc).filter { line => val l = line.toString @@ -362,8 +361,8 @@ class TextMatrixReader( val subset = tt.globalType.valueSubsetter(requestedGlobalsType) subset(globals).asInstanceOf[Row] }, - TTuple(lines.contextType, TInt32), - lines.contexts.zipWithIndex.map { case (x, i) => Row(x, i) }, + lines.contextType, + lines.contexts.zipWithIndex, bodyPType, body) @@ -381,8 +380,6 @@ class TextMatrixReader( decomposeWithName(params, "TextMatrixReader") } - override def renderShort(): String = defaultRender() - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { @@ -483,7 +480,7 @@ class CompiledLineParser( private[this] def parseOptionalValue( cb: EmitCodeBuilder, - parse: EmitCodeBuilder => SCode + parse: EmitCodeBuilder => PCode ): IEmitCode = { assert(missingValue.size > 0) val end = cb.newLocal[Int]("parse_optional_value_end", pos + missingValue.size) @@ -574,15 +571,15 @@ class CompiledLineParser( } private[this] def parseValueOfType(cb: EmitCodeBuilder, t: PType): IEmitCode = { - def parseDefinedValue(cb: EmitCodeBuilder): SCode = t match { + def parseDefinedValue(cb: EmitCodeBuilder): PCode = t match { case t: PInt32 => - primitive(cb.invokeCode[Int](parseIntMb, region)) + PCode(t, cb.invokeCode[Int](parseIntMb, region)) case t: PInt64 => - primitive(cb.invokeCode[Long](parseLongMb, region)) + PCode(t, cb.invokeCode[Long](parseLongMb, region)) case t: PFloat32 => - primitive(Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region))) + PCode(t, Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region))) case t: PFloat64 => - primitive(Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region))) + PCode(t, Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region))) case t: PString => val st = SStringPointer(t) st.constructFromString(cb, region, cb.invokeCode[String](parseStringMb, region)) @@ -641,14 +638,16 @@ class CompiledLineParser( private[this] def parseEntries(cb: EmitCodeBuilder, entriesType: PCanonicalArray): SIndexablePointerCode = { val entryType = entriesType.elementType.asInstanceOf[PCanonicalStruct] assert(entryType.fields.size == 1) - val (push, finish) = entriesType.constructFromFunctions(cb, region, nCols, false) + val (nextAddress, _, finish) = entriesType.constructFromNextAddress(cb, region, nCols) val i = cb.newLocal[Int]("i", 0) cb.whileLoop(i < nCols, { + val nextAddr = nextAddress(cb) + cb.ifx(pos >= line.length, parseError(cb, const("unexpected end of line while reading entry ").concat(i.toS))) val ec = EmitCode.fromI(cb.emb)(cb => parseValueOfType(cb, entryType.fields(0).typ)) - push(cb, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, entryType.virtualType, ec))) + entryType.storeAtAddressFromFields(cb, nextAddr, region, FastIndexedSeq(ec), deepCopy = false) cb.assign(pos, pos + 1) cb.assign(i, i + 1) }) diff --git a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala index fb75cdc0960..df6a7657fb2 100644 --- a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala +++ b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala @@ -8,7 +8,7 @@ import is.hail.io.fs.FS import is.hail.io.index.IndexReaderBuilder import is.hail.io.{ByteArrayReader, HadoopFSDataBinaryReader} import is.hail.types._ -import is.hail.types.physical.stypes.concrete.{SCanonicalCallCode, SStackStruct, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SCanonicalCallCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.{PCanonicalArray, PCanonicalLocus, PCanonicalString, PCanonicalStruct, PStruct} import is.hail.types.virtual.{TInterval, Type} @@ -279,7 +279,7 @@ object CompileDecoder { t.constructFromPositionAndString(cb, region, contigRecoded, position) case t: PCanonicalStruct => val strT = t.field("contig").typ.asInstanceOf[PCanonicalString] - val contigPC = strT.sType.constructFromString(cb, region, contigRecoded) + val contigPC = SStringPointer(strT).constructFromString(cb, region, contigRecoded) t.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, contigPC), EmitCode.present(cb.emb, primitive(position))), deepCopy = false) @@ -310,9 +310,9 @@ object CompileDecoder { } if (settings.hasField("rsid")) - structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(false)).constructFromString(cb, region, rsid)) + structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(true)).constructFromString(cb, region, rsid)) if (settings.hasField("varid")) - structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(false)).constructFromString(cb, region, varid)) + structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(true)).constructFromString(cb, region, varid)) if (settings.hasField("offset")) structFieldCodes += EmitCode.present(cb.emb, primitive(offset)) if (settings.hasField("file_idx")) @@ -343,7 +343,7 @@ object CompileDecoder { val LnoOp = CodeLabel() cb.ifx(alreadyMemoized, cb.goto(LnoOp)) - val (push, finish) = memoTyp.constructFromFunctions(cb, partRegion, 1 << 16, false) + val (nextAddr, _, finish) = memoTyp.constructFromNextAddress(cb, partRegion, 1 << 16) val d0 = cb.newLocal[Int]("memoize_entries_d0", 0) cb.whileLoop(d0 < 256, { @@ -351,6 +351,8 @@ object CompileDecoder { cb.whileLoop(d1 < 256, { val d2 = cb.newLocal[Int]("memoize_entries_d2", const(255) - d0 - d1) + val structAddr = nextAddr(cb) + val entryFieldCodes = new BoxedArrayBuilder[EmitCode]() if (includeGT) @@ -386,7 +388,7 @@ object CompileDecoder { cb.goto(Lpresent) }))) - IEmitCode(Lmissing, Lpresent, new SCanonicalCallCode(value), false) + IEmitCode(Lmissing, Lpresent, new SCanonicalCallCode(false, value), false) } if (includeGP) @@ -410,8 +412,7 @@ object CompileDecoder { IEmitCode.present(cb, primitive((d1 + (d2 << 1)).toD / 255.0)) } - push(cb, IEmitCode.present(cb, - SStackStruct.constructFromArgs(cb, partRegion, entryType.virtualType, entryFieldCodes.result(): _*))) + entryType.storeAtAddressFromFields(cb, structAddr, partRegion, entryFieldCodes.result(), deepCopy = false) cb.assign(d1, d1 + 1) }) @@ -517,7 +518,7 @@ object CompileDecoder { val dataOffset = cb.newLocal[Int]("bgen_add_entries_offset", const(settings.nSamples + 10) + i * 2) val d0 = data(dataOffset) & 0xff val d1 = data(dataOffset + 1) & 0xff - val pc = entryType.loadCheapSCode(cb, memoTyp.loadElement(memoizedEntryData, settings.nSamples, (d0 << 8) | d1)) + val pc = entryType.loadCheapPCode(cb, memoTyp.loadElement(memoizedEntryData, settings.nSamples, (d0 << 8) | d1)) cb.goto(Lpresent) val iec = IEmitCode(Lmissing, Lpresent, pc, false) pushElement(cb, iec) diff --git a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala index 2a7e21297bc..b1f11198c62 100644 --- a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala +++ b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala @@ -488,8 +488,6 @@ class MatrixBGENReader( override def toJValue: JValue = params.toJValue - def renderShort(): String = defaultRender() - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala index 8012c9dafc8..f3eec87f90f 100644 --- a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala @@ -175,18 +175,4 @@ class HadoopFS(val conf: SerializableHadoopConfiguration) extends FS { val pathFS = ppath.getFileSystem(conf.value) pathFS.deleteOnExit(ppath) } - - def supportsScheme(scheme: String): Boolean = { - if (scheme == "") { - true - } else { - try { - hadoop.fs.FileSystem.getFileSystemClass(scheme, conf.value) - true - } catch { - case e: hadoop.fs.UnsupportedFileSystemException => false - case e: Exception => throw e - } - } - } } diff --git a/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala b/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala index 1d55bad4429..b0394149407 100644 --- a/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala +++ b/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala @@ -2,7 +2,7 @@ package is.hail.io.gen import is.hail.HailContext import is.hail.annotations.{RegionValue, UnsafeRow} -import is.hail.expr.ir.{ByteArrayBuilder, ExecuteContext, MatrixValue} +import is.hail.expr.ir.{ExecuteContext, MatrixValue} import is.hail.types.physical.PStruct import is.hail.io.fs.FS import is.hail.utils.BoxedArrayBuilder @@ -17,19 +17,19 @@ object BgenWriter { val phased: Byte = 0 val totalProb: Int = 255 - def shortToBytesLE(bb: ByteArrayBuilder, i: Int) { + def shortToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int) { bb += (i & 0xff).toByte bb += ((i >>> 8) & 0xff).toByte } - def intToBytesLE(bb: ByteArrayBuilder, i: Int) { + def intToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int) { bb += (i & 0xff).toByte bb += ((i >>> 8) & 0xff).toByte bb += ((i >>> 16) & 0xff).toByte bb += ((i >>> 24) & 0xff).toByte } - def stringToBytesWithShortLength(bb: ByteArrayBuilder, s: String): Int = { + def stringToBytesWithShortLength(bb: BoxedArrayBuilder[Byte], s: String): Int = { val bytes = s.getBytes val l = bytes.length shortToBytesLE(bb, l) @@ -37,7 +37,7 @@ object BgenWriter { 2 + l } - def stringToBytesWithIntLength(bb: ByteArrayBuilder, s: String): Int = { + def stringToBytesWithIntLength(bb: BoxedArrayBuilder[Byte], s: String): Int = { val bytes = s.getBytes val l = bytes.length intToBytesLE(bb, l) @@ -45,7 +45,7 @@ object BgenWriter { 4 + l } - def updateIntToBytesLE(bb: ByteArrayBuilder, i: Int, pos: Int) { + def updateIntToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int, pos: Int) { bb(pos) = (i & 0xff).toByte bb(pos + 1) = ((i >>> 8) & 0xff).toByte bb(pos + 2) = ((i >>> 16) & 0xff).toByte @@ -53,7 +53,7 @@ object BgenWriter { } def headerBlock(sampleIds: IndexedSeq[String], nVariants: Long): Array[Byte] = { - val bb = new ByteArrayBuilder() + val bb = new BoxedArrayBuilder[Byte] val nSamples = sampleIds.length assert(nVariants < (1L << 32)) @@ -93,8 +93,8 @@ object BgenWriter { class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { import BgenWriter._ - val bb: ByteArrayBuilder = new ByteArrayBuilder() - val uncompressedData: ByteArrayBuilder = new ByteArrayBuilder() + val bb: BoxedArrayBuilder[Byte] = new BoxedArrayBuilder[Byte] + val uncompressedData: BoxedArrayBuilder[Byte] = new BoxedArrayBuilder[Byte] val gs = new ArrayGenotypeView(rowPType) val v = new RegionValueVariant(rowPType) val va = new GenAnnotationView(rowPType) @@ -125,13 +125,13 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { i += 1 } - val gtDataBlockStart = bb.size + val gtDataBlockStart = bb.length intToBytesLE(bb, 0) // placeholder for length of compressed data intToBytesLE(bb, 0) // placeholder for length of uncompressed data val dropped = emitGPData(chr, pos, alleles) - val uncompressedLength = uncompressedData.size + val uncompressedLength = uncompressedData.length val compressedLength = compress(bb, uncompressedData.result()) updateIntToBytesLE(bb, compressedLength + 4, gtDataBlockStart) @@ -191,7 +191,7 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { } def roundWithConstantSum(input: Array[Double], fractional: Array[Double], index: Array[Int], - indexInverse: Array[Int], output: ByteArrayBuilder, expectedSize: Long) { + indexInverse: Array[Int], output: BoxedArrayBuilder[Byte], expectedSize: Long) { val n = input.length assert(fractional.length == n && index.length == n && indexInverse.length == n) @@ -245,7 +245,7 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { val indexInverse = new Array[Int](nGenotypes) val fractional = new Array[Double](nGenotypes) - val samplePloidyStart = uncompressedData.size + val samplePloidyStart = uncompressedData.length var i = 0 while (i < nSamples) { uncompressedData += 0x82.toByte // placeholder for sample ploidy - default is missing diff --git a/hail/src/main/scala/is/hail/io/gen/LoadGen.scala b/hail/src/main/scala/is/hail/io/gen/LoadGen.scala index 72fb99d65d3..bb71316ff02 100644 --- a/hail/src/main/scala/is/hail/io/gen/LoadGen.scala +++ b/hail/src/main/scala/is/hail/io/gen/LoadGen.scala @@ -21,8 +21,6 @@ import org.apache.spark.sql.Row import org.apache.spark.broadcast.Broadcast import org.json4s.{DefaultFormats, Extraction, Formats, JObject, JValue} -import scala.collection.mutable - case class GenResult(file: String, nSamples: Int, nVariants: Int, rdd: RDD[(Annotation, Iterable[Annotation])]) object LoadGen { @@ -83,7 +81,7 @@ object LoadGen { if (gp.length != (3 * nSamples)) fatal("Number of genotype probabilities does not match 3 * number of samples. If no chromosome column is included, use -c to input the chromosome.") - val gsb = new mutable.ArrayBuffer[Annotation]() + val gsb = new BoxedArrayBuilder[Annotation]() for (i <- gp.indices by 3) { val d0 = gp(i) @@ -104,7 +102,7 @@ object LoadGen { val annotations = Annotation(locus, alleles, rsid, varid) - Some(annotations -> gsb.result()) + Some(annotations -> gsb.result().toIterable) } } } @@ -267,8 +265,6 @@ class MatrixGENReader( decomposeWithName(params, "MatrixGENReader") } - def renderShort(): String = defaultRender() - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala index ee4e18503ef..018d8c9558e 100644 --- a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala +++ b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala @@ -1,17 +1,16 @@ package is.hail.io.index import java.io.OutputStream + import is.hail.annotations.{Annotation, Region, RegionPool, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.expr.ir.{CodeParam, EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, EmitMethodBuilder, ExecuteContext, IEmitCode, IntArrayBuilder, LongArrayBuilder, ParamType} +import is.hail.expr.ir.{CodeParam, EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, EmitMethodBuilder, ExecuteContext, IEmitCode, ParamType} import is.hail.io._ import is.hail.io.fs.FS import is.hail.rvd.AbstractRVDSpec import is.hail.types -import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable} -import is.hail.types.physical.stypes.interfaces.SBaseStructValue -import is.hail.types.physical.{PCanonicalArray, PCanonicalStruct, PType} +import is.hail.types.physical.{PBaseStructValue, PCanonicalArray, PCanonicalStruct, PCode, PType} import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream @@ -103,7 +102,7 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r private val aoff = sb.newSettable[Long](s"${name}_aoff") private val len = sb.newSettable[Int](s"${name}_len") - val eltType: PCanonicalStruct = types.coerce[PCanonicalStruct](arrayType.elementType.setRequired((false))) + val eltType: PCanonicalStruct = types.coerce[PCanonicalStruct](arrayType.elementType) private val elt = new SBaseStructPointerSettable(SBaseStructPointer(eltType), sb.newSettable[Long](s"${name}_elt_off")) def length: Code[Int] = len @@ -116,13 +115,13 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r def create(cb: EmitCodeBuilder, dest: Code[Long]): Unit = { cb.assign(aoff, arrayType.allocate(region, maxSize)) cb += arrayType.stagedInitialize(aoff, maxSize) - arrayType.storeAtAddress(cb, dest, region, arrayType.loadCheapSCode(cb, aoff), deepCopy = false) + arrayType.storeAtAddress(cb, dest, region, arrayType.loadCheapPCode(cb, aoff), deepCopy = false) cb.assign(len, 0) } def storeLength(cb: EmitCodeBuilder): Unit = cb += arrayType.storeLength(aoff, length) - def setFieldValue(cb: EmitCodeBuilder, name: String, field: SCode): Unit = { + def setFieldValue(cb: EmitCodeBuilder, name: String, field: PCode): Unit = { cb += eltType.setFieldPresent(elt.a, name) eltType.fieldType(name).storeAtAddress(cb, eltType.fieldOffset(elt.a, name), region, field, deepCopy = true) } @@ -136,8 +135,10 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r loadChild(cb, len) cb.assign(len, len + 1) } - def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = elt.store(cb, eltType.loadCheapSCode(cb, arrayType.loadElement(aoff, idx))) - def getLoadedChild: SBaseStructValue = elt + def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = elt.store(cb, PCode(eltType, arrayType.elementOffset(aoff, idx))) + def getLoadedChild: PBaseStructValue = elt + + def getChild(idx: Value[Int]): PCode = PCode(eltType, arrayType.elementOffset(aoff, idx)) } class StagedIndexWriterUtils(ib: Settable[IndexWriterUtils]) { @@ -188,8 +189,8 @@ class IndexWriterUtils(path: String, fs: FS, meta: StagedIndexMetadata) { } val rBuilder = new BoxedArrayBuilder[Region]() - val aBuilder = new LongArrayBuilder() - val lBuilder = new IntArrayBuilder() + val aBuilder = new BoxedArrayBuilder[Long]() + val lBuilder = new BoxedArrayBuilder[Int]() def size: Int = rBuilder.size @@ -244,9 +245,9 @@ object StagedIndexWriter { .voidWithBuilder(cb => siw.init(cb, cb.emb.getCodeParam[String](1))) fb.emb.voidWithBuilder { cb => siw.add(cb, - IEmitCode(cb, false, keyType.loadCheapSCode(cb, fb.getCodeParam[Long](1))), + IEmitCode(cb, false, PCode(keyType, fb.getCodeParam[Long](1))), fb.getCodeParam[Long](2), - IEmitCode(cb, false, annotationType.loadCheapSCode(cb, fb.getCodeParam[Long](3)))) + IEmitCode(cb, false, PCode(annotationType, fb.getCodeParam[Long](3)))) } cb.newEmitMethod("close", FastIndexedSeq[ParamType](), typeInfo[Unit]) .voidWithBuilder(siw.close) diff --git a/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala b/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala index cbbe8ea338e..37efa8b3029 100644 --- a/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala +++ b/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala @@ -4,7 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, SettableBuilder, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.OutputBuffer -import is.hail.types.physical.stypes.interfaces._ +import is.hail.types import is.hail.types.encoded.EType import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable, SIndexablePointerCode} @@ -62,7 +62,7 @@ class StagedInternalNodeBuilder(maxSize: Int, keyType: PType, annotationType: PT } def allocate(cb: EmitCodeBuilder): Unit = { - node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) + node.store(cb, PCode(pType, pType.allocate(region))) ab.create(cb, pType.fieldOffset(node.a, "children")) } @@ -77,26 +77,28 @@ class StagedInternalNodeBuilder(maxSize: Int, keyType: PType, annotationType: PT enc(cb, node, ob) } - def nodeAddress: SBaseStructValue = node + def nodeAddress: PBaseStructValue = node - def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstIndex: Code[Long], firstChild: SBaseStructValue): Unit = { + def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstIndex: Code[Long], firstChild: PBaseStructValue): Unit = { + val childtyp = types.coerce[PBaseStruct](firstChild.pt) ab.addChild(cb) - ab.setFieldValue(cb, "index_file_offset", primitive(indexFileOffset)) - ab.setFieldValue(cb, "first_idx", primitive(firstIndex)) - ab.setField(cb, "first_key", firstChild.loadField(cb, "key")) - ab.setField(cb, "first_record_offset", firstChild.loadField(cb, "offset")) - ab.setField(cb, "first_annotation", firstChild.loadField(cb, "annotation")) + ab.setFieldValue(cb, "index_file_offset", PCode(PInt64(), indexFileOffset)) + ab.setFieldValue(cb, "first_idx", PCode(PInt64(), firstIndex)) + ab.setField(cb, "first_key", firstChild.loadField(cb, childtyp.fieldIdx("key")).typecast[PCode]) + ab.setField(cb, "first_record_offset", firstChild.loadField(cb, childtyp.fieldIdx("offset")).typecast[PCode]) + ab.setField(cb, "first_annotation", firstChild.loadField(cb, childtyp.fieldIdx("annotation")).typecast[PCode]) } - def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstChild: SBaseStructValue): Unit = { + def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstChild: PBaseStructValue): Unit = { + val childtyp = types.coerce[PBaseStruct](firstChild.pt) ab.addChild(cb) - ab.setFieldValue(cb, "index_file_offset", primitive(indexFileOffset)) - ab.setField(cb, "first_idx", firstChild.loadField(cb, "first_idx")) - ab.setField(cb, "first_key", firstChild.loadField(cb, "first_key")) - ab.setField(cb, "first_record_offset", firstChild.loadField(cb, "first_record_offset")) - ab.setField(cb, "first_annotation", firstChild.loadField(cb, "first_annotation")) + ab.setFieldValue(cb, "index_file_offset", PCode(PInt64(), indexFileOffset)) + ab.setField(cb, "first_idx", firstChild.loadField(cb, childtyp.fieldIdx("first_idx")).typecast[PCode]) + ab.setField(cb, "first_key", firstChild.loadField(cb, childtyp.fieldIdx("first_key")).typecast[PCode]) + ab.setField(cb, "first_record_offset", firstChild.loadField(cb, childtyp.fieldIdx("first_record_offset")).typecast[PCode]) + ab.setField(cb, "first_annotation", firstChild.loadField(cb, childtyp.fieldIdx("first_annotation")).typecast[PCode]) } def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = ab.loadChild(cb, idx) - def getLoadedChild: SBaseStructValue = ab.getLoadedChild + def getLoadedChild: PBaseStructValue = ab.getLoadedChild } diff --git a/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala b/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala index a691952a963..7957bfd4223 100644 --- a/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala +++ b/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala @@ -6,9 +6,7 @@ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} import is.hail.io.OutputBuffer import is.hail.types.encoded.EType import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable} -import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, primitive} import is.hail.types.virtual.{TStruct, Type} import is.hail.utils._ @@ -49,15 +47,15 @@ class StagedLeafNodeBuilder(maxSize: Int, keyType: PType, annotationType: PType, def reset(cb: EmitCodeBuilder, firstIdx: Code[Long]): Unit = { cb += region.invoke[Unit]("clear") - node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) - idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), primitive(firstIdx)) + node.store(cb, pType.loadCheapPCode(cb, pType.allocate(region))) + idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), PCode(idxType, firstIdx)) ab.create(cb, pType.fieldOffset(node.a, "keys")) } def create(cb: EmitCodeBuilder, firstIdx: Code[Long]): Unit = { cb.assign(region, Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) - node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) - idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), primitive(firstIdx)) + node.store(cb, pType.loadCheapPCode(cb, pType.allocate(region))) + idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), PCode(idxType, firstIdx)) ab.create(cb, pType.fieldOffset(node.a, "keys")) } @@ -67,16 +65,16 @@ class StagedLeafNodeBuilder(maxSize: Int, keyType: PType, annotationType: PType, enc(cb, node, ob) } - def nodeAddress: SBaseStructValue = node + def nodeAddress: PBaseStructValue = node def add(cb: EmitCodeBuilder, key: => IEmitCode, offset: Code[Long], annotation: => IEmitCode): Unit = { ab.addChild(cb) ab.setField(cb, "key", key) - ab.setFieldValue(cb, "offset", primitive(offset)) + ab.setFieldValue(cb, "offset", PCode(PInt64(), offset)) ab.setField(cb, "annotation", annotation) } def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = ab.loadChild(cb, idx) - def getLoadedChild: SBaseStructValue = ab.getLoadedChild - def firstIdx(cb: EmitCodeBuilder): SCode = idxType.loadCheapSCode(cb, pType.fieldOffset(node.a, "first_idx")) + def getLoadedChild: PBaseStructValue = ab.getLoadedChild + def firstIdx(cb: EmitCodeBuilder): PCode = idxType.loadCheapPCode(cb, pType.fieldOffset(node.a, "first_idx")) } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala index 64fcc76cc91..18cd44b02de 100644 --- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala +++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala @@ -217,7 +217,7 @@ object MatrixPLINKReader { val partSize = partition(nVariants, nPartitions) val partScan = partSize.scanLeft(0)(_ + _) - val cb = new BoxedArrayBuilder[Row]() + val cb = new BoxedArrayBuilder[Any]() val ib = new BoxedArrayBuilder[Interval]() var p = 0 @@ -247,7 +247,7 @@ object MatrixPLINKReader { } assert(prevEnd == nVariants) - val contexts = cb.result().map(r => r: Any) + val contexts = cb.result() val partitioner = new RVDPartitioner(locusAllelesType, ib.result(), 0) @@ -474,8 +474,6 @@ class MatrixPLINKReader( decomposeWithName(params, "MatrixPLINKReader") } - def renderShort(): String = defaultRender() - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala index 791fad40960..3a185c73214 100644 --- a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala +++ b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala @@ -1,13 +1,13 @@ package is.hail.io.tabix import java.io.InputStream + import htsjdk.samtools.util.FileExtensions import htsjdk.tribble.util.ParsingUtils import is.hail.io.compress.BGzipLineReader import is.hail.io.fs.FS import is.hail.utils._ import is.hail.backend.BroadcastValue -import is.hail.expr.ir.IntArrayBuilder import scala.collection.mutable import scala.language.implicitConversions @@ -256,7 +256,7 @@ class TabixReader(val filePath: String, fs: FS, idxFilePath: Option[String] = No new Array[Int](0) else { var end = _end - val bins = new IntArrayBuilder(MaxBin) + val bins = new BoxedArrayBuilder[Int](MaxBin) if (end >= (1 << 29)) { end = 1 << 29 } diff --git a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala index cf1df02705a..aaa6115fe98 100644 --- a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala +++ b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala @@ -7,7 +7,7 @@ import is.hail.backend.BroadcastValue import is.hail.backend.spark.SparkBackend import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.lowering.TableStage -import is.hail.expr.ir.{ExecuteContext, GenericLine, GenericLines, GenericTableValue, IR, IRParser, Literal, LowerMatrixIR, LoweredTableReader, MatrixHybridReader, MatrixIR, MatrixLiteral, PruneDeadFields, TableRead, TableValue} +import is.hail.expr.ir.{ExecuteContext, GenericLine, GenericLines, GenericTableValue, IRParser, LowerMatrixIR, LoweredTableReader, MatrixHybridReader, MatrixIR, MatrixLiteral, PruneDeadFields, TableRead, TableValue} import is.hail.types._ import is.hail.types.physical.{PBoolean, PCall, PCanonicalArray, PCanonicalCall, PCanonicalLocus, PCanonicalSet, PCanonicalString, PCanonicalStruct, PField, PFloat64, PInt32, PStruct, PType} import is.hail.types.virtual._ @@ -1208,11 +1208,7 @@ object LoadVCF { (line.getCount == 1 || (isFlag && line.getCount == 0))) ((id, baseType), (id, attrs), isFlag) - else if (isFlag) { - warn(s"invalid VCF header: at INFO field '$id' of type 'Flag', expected 'Number=0', got 'Number=${headerNumberToString(line)}''" + - s"\n Interpreting as 'Number=0' regardless.") - ((id, baseType), (id, attrs), isFlag) - } else if (baseType.isInstanceOf[PCall]) + else if (baseType.isInstanceOf[PCall]) fatal("fields in 'call_fields' must have 'Number' equal to 1.") else ((id, PCanonicalArray(baseType.setRequired(arrayElementsRequired))), (id, attrs), isFlag) @@ -1781,13 +1777,6 @@ class MatrixVCFReader( body) } - override def lowerGlobals(ctx: ExecuteContext, requestedGlobalsType: TStruct): IR = { - val globals = Row(sampleIDs.map(Row(_)).toFastIndexedSeq) - Literal.coerce(requestedGlobalsType, - fullType.globalType.valueSubsetter(requestedGlobalsType) - .apply(globals)) - } - override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = executeGeneric(ctx).toTableStage(ctx, requestedType) @@ -1799,8 +1788,6 @@ class MatrixVCFReader( decomposeWithName(params, "MatrixVCFReader") } - def renderShort(): String = defaultRender() - override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala index ba0439a47b2..a81d5a6e84a 100644 --- a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala +++ b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala @@ -2,6 +2,7 @@ package is.hail.linalg import java.io._ import java.nio._ + import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, sum => breezeSum, _} import breeze.numerics.{abs => breezeAbs, log => breezeLog, pow => breezePow, sqrt => breezeSqrt} import breeze.stats.distributions.{RandBasis, ThreadLocalRandomGenerator} @@ -11,7 +12,7 @@ import is.hail.backend.{BroadcastValue, HailTaskContext} import is.hail.backend.spark.{SparkBackend, SparkTaskContext} import is.hail.utils._ import is.hail.expr.Parser -import is.hail.expr.ir.{CompileAndEvaluate, ExecuteContext, IR, IntArrayBuilder, TableValue} +import is.hail.expr.ir.{CompileAndEvaluate, ExecuteContext, IR, TableValue} import is.hail.types._ import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalStruct, PFloat64, PFloat64Optional, PFloat64Required, PInt64, PInt64Optional, PInt64Required, PStruct} import is.hail.types.virtual._ @@ -1367,8 +1368,8 @@ object BlockMatrixFilterRDD { val blockSize = gp.blockSize val ab = new BoxedArrayBuilder[(Int, Array[Int], Array[Int])]() - val startIndices = new IntArrayBuilder() - val endIndices = new IntArrayBuilder() + val startIndices = new BoxedArrayBuilder[Int]() + val endIndices = new BoxedArrayBuilder[Int]() keep .grouped(blockSize) diff --git a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala index dff7c72ba31..e56d3533400 100644 --- a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala +++ b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala @@ -3,67 +3,52 @@ package is.hail.linalg import is.hail.annotations.Region import is.hail.asm4s.{Code, _} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} -import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerSettable} -import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArrayValue} -import is.hail.utils.FastIndexedSeq +import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable +import is.hail.types.physical.stypes.interfaces.SNDArray +import is.hail.types.physical.{PCanonicalNDArray, PNDArrayCode, PNDArrayValue} object LinalgCodeUtils { - def checkColumnMajor(pndv: SNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { + def checkColumnMajor(pndv: PNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { val answer = cb.newField[Boolean]("checkColumnMajorResult") val shapes = pndv.shapes(cb) val strides = pndv.strides(cb) val runningProduct = cb.newLocal[Long]("check_column_major_running_product") - val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType - val elementType = pt.elementType - val nDims = pndv.st.nDims + val elementType = pndv.pt.elementType + val nDims = pndv.pt.nDims cb.assign(answer, true) - cb.assign(runningProduct, elementType.byteSize) - (0 until nDims).foreach{ index => - cb.assign(answer, answer & (strides(index) ceq runningProduct)) - cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) - } - answer - } - - def checkRowMajor(pndv: SNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { - val answer = cb.newField[Boolean]("checkColumnMajorResult") - val shapes = pndv.shapes(cb) - val strides = pndv.strides(cb) - val runningProduct = cb.newLocal[Long]("check_column_major_running_product") - - val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType - val elementType = pt.elementType - val nDims = pt.nDims - - cb.assign(answer, true) - cb.assign(runningProduct, elementType.byteSize) - ((nDims - 1) to 0 by -1).foreach { index => - cb.assign(answer, answer & (strides(index) ceq runningProduct)) - cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) - } + cb.append(Code( + runningProduct := elementType.byteSize, + Code.foreach(0 until nDims){ index => + Code( + answer := answer & (strides(index) ceq runningProduct), + runningProduct := runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L) + ) + } + )) answer } - def createColumnMajorCode(pndv: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayCode = { + def createColumnMajorCode(pndv: PNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): PNDArrayCode = { val shape = pndv.shapes(cb) - val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType + val pt = pndv.pt.asInstanceOf[PCanonicalNDArray] val strides = pt.makeColumnMajorStrides(shape, region, cb) - val (dataFirstElementAddress, dataFinisher) = pt.constructDataFunction(shape, strides, cb, region) - // construct an SNDArrayCode with undefined contents - val result = dataFinisher(cb).memoize(cb, "col_major_result") + val (dataFirstElementAddress, dataFinisher) = pndv.pt.constructDataFunction(shape, strides, cb, region) + + val curAddr = cb.newLocal[Long]("create_column_major_cur_addr", dataFirstElementAddress) - SNDArray.coiterate(cb, region, FastIndexedSeq((result.get, "result"), (pndv.get, "pndv")), { - case Seq(l, r) => cb.assign(l, r) - }) - result.get + SNDArray.forEachIndex(cb, shape, "nda_create_column_major") { case (cb, idxVars) => + pt.elementType.storeAtAddress(cb, curAddr, region, pndv.loadElement(idxVars, cb), true) + cb.assign(curAddr, curAddr + pt.elementType.byteSize) + } + dataFinisher(cb) } - def checkColMajorAndCopyIfNeeded(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayValue = { + def checkColMajorAndCopyIfNeeded(aInput: PNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): PNDArrayValue = { val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) - val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.st).asInstanceOf[SNDArrayPointerSettable] + val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.pt).asInstanceOf[SNDArrayPointerSettable] cb.ifx(aIsColumnMajor, {cb.assign(aColMajor, aInput)}, { cb.assign(aColMajor, LinalgCodeUtils.createColumnMajorCode(aInput, cb, region)) @@ -71,19 +56,6 @@ object LinalgCodeUtils { aColMajor } - def checkStandardStriding(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayValue, Value[Boolean]) = { - val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) - val a = cb.emb.newPField("ndarray_output_standardized", aInput.st).asInstanceOf[SNDArrayPointerSettable] - cb.ifx(aIsColumnMajor, {cb.assign(a, aInput)}, { - val isRowMajor = LinalgCodeUtils.checkRowMajor(aInput, cb) - cb.ifx(isRowMajor, {cb.assign(a, aInput)}, { - cb.assign(a, LinalgCodeUtils.createColumnMajorCode(aInput, cb, region)) - }) - }) - - (a, aIsColumnMajor) - } - def linearizeIndicesRowMajor(indices: IndexedSeq[Code[Long]], shapeArray: IndexedSeq[Value[Long]], mb: EmitMethodBuilder[_]): Code[Long] = { val index = mb.genFieldThisRef[Long]() val elementsInProcessedDimensions = mb.genFieldThisRef[Long]() diff --git a/hail/src/main/scala/is/hail/lir/PST.scala b/hail/src/main/scala/is/hail/lir/PST.scala index 90e7fa17825..5affedfbede 100644 --- a/hail/src/main/scala/is/hail/lir/PST.scala +++ b/hail/src/main/scala/is/hail/lir/PST.scala @@ -1,6 +1,5 @@ package is.hail.lir -import is.hail.expr.ir.{BooleanArrayBuilder, IntArrayBuilder} import is.hail.utils.BoxedArrayBuilder import scala.collection.mutable @@ -257,16 +256,16 @@ class PSTBuilder( private val regions: mutable.ArrayBuffer[PSTRegion] = mutable.ArrayBuffer[PSTRegion]() // regions with no parents - private val frontier = new IntArrayBuilder() + private val frontier = new BoxedArrayBuilder[Int]() private def addRegion(start: Int, end: Int): Int = { - var firstc = frontier.size + var firstc = frontier.length while ((firstc - 1) >= 0 && regions(frontier(firstc - 1)).start >= start) firstc -= 1 assert(firstc == 0 || regions(frontier(firstc - 1)).end <= start) val ri = regions.length - val n = frontier.size - firstc + val n = frontier.length - firstc val children = new Array[Int](n) var i = 0 while (i < n) { @@ -276,8 +275,8 @@ class PSTBuilder( children(i) = c i += 1 } - frontier.setSizeUninitialized(frontier.size - n) - if (frontier.size > 0 && regions(frontier(frontier.size - 1)).end == start) + frontier.setSizeUninitialized(frontier.length - n) + if (frontier.nonEmpty && regions(frontier.last).end == start) splitBlock.set(start) frontier += ri regions += new PSTRegion(start, end, children) @@ -285,7 +284,7 @@ class PSTBuilder( } private def addRoot(): Int = { - if (frontier.size == 1 && + if (frontier.length == 1 && regions(frontier(0)).start == 0 && regions(frontier(0)).end == nBlocks - 1) { frontier(0) @@ -293,7 +292,7 @@ class PSTBuilder( val c = regions.length val ri = regions.length - val n = frontier.size + val n = frontier.length val children = new Array[Int](n) var i = 0 while (i < n) { @@ -313,7 +312,7 @@ class PSTBuilder( // find regions in [start, end] // no edges from [0, start) target (start, end] private def findRegions(start: Int, end: Int): Unit = { - var regionStarts = new IntArrayBuilder() + var regionStarts = new BoxedArrayBuilder[Int]() regionStarts += start // find subregions of [start, end] @@ -344,7 +343,7 @@ class PSTBuilder( } } - f(regionStarts.size - 1) + f(regionStarts.length - 1) regionStarts += newStart } @@ -383,7 +382,7 @@ class PSTBuilder( val root = addRoot() val newBlocksB = new BoxedArrayBuilder[Block]() - val newSplitBlock = new BooleanArrayBuilder() + val newSplitBlock = new BoxedArrayBuilder[Boolean]() // split blocks, compute new blocks // in linearization order @@ -443,7 +442,7 @@ class PSTBuilder( child = regions(children(c)) } - val newChildren = new IntArrayBuilder() + val newChildren = new BoxedArrayBuilder[Int]() var j = r.start var jincluded = false @@ -492,7 +491,7 @@ class PSTBuilder( // but are not contained in region i def findLoopRegions(i: Int): Array[Int] = { val r = newRegions(i) - val backEdgeSourcesB = new IntArrayBuilder() + val backEdgeSourcesB = new BoxedArrayBuilder[Int]() if (r.children.nonEmpty) { var c = 0 while (c < r.children.length) { diff --git a/hail/src/main/scala/is/hail/lir/X.scala b/hail/src/main/scala/is/hail/lir/X.scala index c4fa0e716a5..aff8be6c7ee 100644 --- a/hail/src/main/scala/is/hail/lir/X.scala +++ b/hail/src/main/scala/is/hail/lir/X.scala @@ -831,7 +831,6 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case L2I => IntInfo case F2I => IntInfo case D2I => IntInfo - case IALOAD => IntInfo // Long case LNEG => LongInfo case LADD => LongInfo @@ -848,7 +847,6 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2L => LongInfo case F2L => LongInfo case D2L => LongInfo - case LALOAD => LongInfo // Float case FNEG => FloatInfo case FADD => FloatInfo @@ -859,8 +857,6 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2F => FloatInfo case L2F => FloatInfo case D2F => FloatInfo - case FALOAD => FloatInfo - // Double case DNEG => DoubleInfo case DADD => DoubleInfo @@ -871,10 +867,8 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2D => DoubleInfo case L2D => DoubleInfo case F2D => DoubleInfo - case DALOAD => DoubleInfo // Boolean case I2B => BooleanInfo - case BALOAD => BooleanInfo } } } diff --git a/hail/src/main/scala/is/hail/lir/package.scala b/hail/src/main/scala/is/hail/lir/package.scala index 65ccf0fd9b2..4ed17cf071d 100644 --- a/hail/src/main/scala/is/hail/lir/package.scala +++ b/hail/src/main/scala/is/hail/lir/package.scala @@ -9,11 +9,9 @@ package object lir { def genName(tag: String, baseName: String): String = synchronized { counter += 1 - if (baseName != null) { - if (baseName.contains(".")) - throw new RuntimeException(s"genName has invalid character(s): $baseName") + if (baseName != null) s"__$tag$counter$baseName" - } else + else s"__$tag${ counter }null" } diff --git a/hail/src/main/scala/is/hail/methods/LinearRegression.scala b/hail/src/main/scala/is/hail/methods/LinearRegression.scala index 59c1be9d103..139b48584f6 100644 --- a/hail/src/main/scala/is/hail/methods/LinearRegression.scala +++ b/hail/src/main/scala/is/hail/methods/LinearRegression.scala @@ -5,7 +5,7 @@ import breeze.numerics.sqrt import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} import is.hail.types._ import is.hail.types.physical.PStruct import is.hail.types.virtual.{TArray, TFloat64, TInt32, TStruct} @@ -86,7 +86,7 @@ case class LinearRegressionRowsSingle( val producerCtx = consumerCtx.freshContext val rvb = new RegionValueBuilder() - val missingCompleteCols = new IntArrayBuilder() + val missingCompleteCols = new BoxedArrayBuilder[Int] val data = new Array[Double](n * rowBlockSize) val blockWRVs = new Array[WritableRegionValue](rowBlockSize) @@ -244,7 +244,7 @@ case class LinearRegressionRowsChained( val rvb = new RegionValueBuilder() val inputData = bc.value - val builder = new IntArrayBuilder() + val builder = new BoxedArrayBuilder[Int] val data = inputData.map(cri => new Array[Double](cri.n * rowBlockSize)) val blockWRVs = new Array[WritableRegionValue](rowBlockSize) diff --git a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala index 73a353c42e7..08e0df305ae 100644 --- a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala +++ b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala @@ -4,7 +4,7 @@ import breeze.linalg._ import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} import is.hail.types.virtual.{TArray, TFloat64, TStruct} import is.hail.types.{MatrixType, TableType} import is.hail.rvd.RVDType @@ -94,7 +94,7 @@ case class LogisticRegression( val newRVD = mv.rvd.mapPartitions(newRVDType) { (ctx, it) => val rvb = ctx.rvb - val missingCompleteCols = new IntArrayBuilder() + val missingCompleteCols = new BoxedArrayBuilder[Int]() val _nullFits = nullFitBc.value val _yVecs = yVecsBc.value val X = XBc.value.copy diff --git a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala index a48b013cf26..b07f1e1ae27 100644 --- a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala +++ b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala @@ -4,7 +4,7 @@ import breeze.linalg._ import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} import is.hail.types.virtual.{TFloat64, TStruct} import is.hail.types.{MatrixType, TableType} import is.hail.rvd.RVDType @@ -81,7 +81,7 @@ case class PoissonRegression( val newRVD = mv.rvd.mapPartitions(newRVDType) { (ctx, it) => val rvb = ctx.rvb - val missingCompleteCols = new IntArrayBuilder() + val missingCompleteCols = new BoxedArrayBuilder[Int]() val X = XBc.value.copy it.map { ptr => diff --git a/hail/src/main/scala/is/hail/methods/Skat.scala b/hail/src/main/scala/is/hail/methods/Skat.scala index 04982f50d55..1b4bba56a5f 100644 --- a/hail/src/main/scala/is/hail/methods/Skat.scala +++ b/hail/src/main/scala/is/hail/methods/Skat.scala @@ -11,7 +11,7 @@ import org.apache.spark.sql.Row import com.sun.jna.Native import com.sun.jna.ptr.IntByReference import is.hail.HailContext -import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} import is.hail.expr.ir.functions.MatrixToTableFunction import is.hail.types.virtual.{TFloat64, TInt32, TStruct, Type} import is.hail.rvd.RVDType @@ -356,7 +356,7 @@ case class Skat( val key = Annotation.copy(keyType.virtualType, UnsafeRow.read(keyType, ctx.r, fullRowType.loadField(ptr, keyIndex))) val data = new Array[Double](n) - RegressionUtils.setMeanImputedDoubles(data, 0, completeColIdxBc.value, new IntArrayBuilder(), + RegressionUtils.setMeanImputedDoubles(data, 0, completeColIdxBc.value, new BoxedArrayBuilder[Int](), ptr, fullRowType, entryArrayType, entryType, entryArrayIdx, fieldIdx) Some(key -> (BDV(data) -> weight)) } else None diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala index ce9e47fed83..834e833c687 100644 --- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala +++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala @@ -8,7 +8,6 @@ import is.hail.expr.ir.{ExecuteContext, IR, PartitionZippedNativeReader} import is.hail.io._ import is.hail.io.fs.FS import is.hail.io.index.{InternalNodeBuilder, LeafNodeBuilder} -import is.hail.types.TableType import is.hail.types.encoded.ETypeSerializer import is.hail.types.physical.{PCanonicalStruct, PInt64Optional, PStruct, PType, PTypeSerializer} import is.hail.types.virtual.{TStructSerializer, _} @@ -165,7 +164,7 @@ object AbstractRVDSpec { contexts, body) extendedNewPartitioner match { - case Some(np) if !filterIntervals => + case Some(np) if filterIntervals => ts.repartitionNoShuffle(np) case _ => ts } @@ -261,28 +260,24 @@ abstract class AbstractRVDSpec { def readTableStage( ctx: ExecuteContext, path: String, - requestedType: TableType, + requestedType: TStruct, newPartitioner: Option[RVDPartitioner] = None, filterIntervals: Boolean = false ): IR => TableStage = newPartitioner match { case Some(_) => fatal("attempted to read unindexed data as indexed") case None => - if (!partitioner.kType.fieldNames.startsWith(requestedType.key)) - fatal(s"cannot generate whole-stage code for legacy table: " + - s"table key = [${ requestedType.key.mkString(", ") }], " + - s"key on disk: [${ partitioner.kType.fieldNames.mkString(", ") }]") val rSpec = typedCodecSpec val ctxType = TStruct("path" -> TString) val contexts = ir.ToStream(ir.Literal(TArray(ctxType), absolutePartPaths(path).map(x => Row(x)).toFastIndexedSeq)) - val body = (ctx: IR) => ir.ReadPartition(ir.GetField(ctx, "path"), requestedType.rowType, ir.PartitionNativeReader(rSpec)) + val body = (ctx: IR) => ir.ReadPartition(ir.GetField(ctx, "path"), requestedType, ir.PartitionNativeReader(rSpec)) (globals: IR) => TableStage( globals, - partitioner.coarsen(partitioner.kType.fieldNames.takeWhile(requestedType.rowType.hasField).length), + partitioner, TableStageDependency.none, contexts, body) @@ -492,7 +487,7 @@ case class IndexedRVDSpec2(_key: IndexedSeq[String], override def readTableStage( ctx: ExecuteContext, path: String, - requestedType: TableType, + requestedType: TStruct, newPartitioner: Option[RVDPartitioner] = None, filterIntervals: Boolean = false ): IR => TableStage = newPartitioner match { @@ -524,7 +519,7 @@ case class IndexedRVDSpec2(_key: IndexedSeq[String], val contexts = ir.ToStream(ir.Literal(TArray(reader.contextType), contextsValues)) - val body = (ctx: IR) => ir.ReadPartition(ctx, requestedType.rowType, reader) + val body = (ctx: IR) => ir.ReadPartition(ctx, requestedType, reader) { (globals: IR) => val ts = TableStage( diff --git a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala index c6009f3e520..4c31782e2a7 100644 --- a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala +++ b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala @@ -1,8 +1,7 @@ package is.hail.services.batch_client -import is.hail.expr.ir.ByteArrayBuilder - import java.nio.charset.StandardCharsets + import is.hail.utils._ import is.hail.services._ import is.hail.services.{DeployConfig, Tokens} @@ -98,7 +97,7 @@ class BatchClient( bunchb.clear() size = 0 - val b = new ByteArrayBuilder() + val b = new BoxedArrayBuilder[Byte]() i = 0 // reuse while (i < bunches.length) { diff --git a/hail/src/main/scala/is/hail/services/package.scala b/hail/src/main/scala/is/hail/services/package.scala index ed994d2d17d..7af90b676a0 100644 --- a/hail/src/main/scala/is/hail/services/package.scala +++ b/hail/src/main/scala/is/hail/services/package.scala @@ -42,9 +42,7 @@ package object services { true case e: SocketException => e.getMessage != null && ( - e.getMessage.contains("Connection reset") || - e.getMessage.contains("Broken pipe") || - e.getMessage.contains("Connection refused")) + e.getMessage.contains("Connection reset") || e.getMessage.contains("Broken pipe")) case e: EOFException => e.getMessage != null && ( e.getMessage.contains("SSL peer shut down incorrectly")) diff --git a/hail/src/main/scala/is/hail/services/shuffler/package.scala b/hail/src/main/scala/is/hail/services/shuffler/package.scala index 5c2f22ac3fa..781cecc9635 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/package.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/package.scala @@ -4,15 +4,14 @@ import java.io._ import java.net.Socket import java.security.KeyStore import java.util.Base64 + import is.hail.annotations._ import is.hail.asm4s._ -import is.hail.expr.ir.LongArrayBuilder import is.hail.types.physical._ import is.hail.io._ import is.hail.utils._ import org.apache.log4j.Logger - -import javax.net.ssl._ +import javax.net.ssl._; import scala.language.implicitConversions package object shuffler { @@ -39,7 +38,7 @@ package object shuffler { decoder: Decoder, sizeHint: Int = BoxedArrayBuilder.defaultInitialCapacity ): Array[Long] = { - val ab = new LongArrayBuilder(sizeHint) + val ab = new BoxedArrayBuilder[Long](sizeHint) var hasNext = decoder.readByte() while (hasNext == 1) { diff --git a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala index 2282cf94d01..755c0d2b15c 100644 --- a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala +++ b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala @@ -2,7 +2,7 @@ package is.hail.stats import breeze.linalg._ import is.hail.annotations.{Region, RegionValue} -import is.hail.expr.ir.{IntArrayBuilder, MatrixValue} +import is.hail.expr.ir.MatrixValue import is.hail.types.physical.{PArray, PStruct} import is.hail.types.virtual.TFloat64 import is.hail.utils._ @@ -12,7 +12,7 @@ object RegressionUtils { def setMeanImputedDoubles(data: Array[Double], offset: Int, completeColIdx: Array[Int], - missingCompleteCols: IntArrayBuilder, + missingCompleteCols: BoxedArrayBuilder[Int], rv: Long, rvRowType: PStruct, entryArrayType: PArray, diff --git a/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala b/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala index 0be6b195d60..6d1b8532663 100644 --- a/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala +++ b/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala @@ -151,15 +151,6 @@ object VirtualTypeWithReq { assert(!twr.required) VirtualTypeWithReq(t, twr) } - - def union(vs: Seq[VirtualTypeWithReq]): VirtualTypeWithReq = { - val t = vs.head.t - assert(vs.tail.forall(_.t == t)) - - val tr = TypeWithRequiredness(t) - tr.unionFrom(vs.map(_.r)) - VirtualTypeWithReq(t, tr) - } } case class VirtualTypeWithReq(t: Type, r: TypeWithRequiredness) { diff --git a/hail/src/main/scala/is/hail/types/encoded/EArray.scala b/hail/src/main/scala/is/hail/types/encoded/EArray.scala index 632ceae660f..a8adb15ffb1 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EArray.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EArray.scala @@ -7,7 +7,7 @@ import is.hail.types.BaseType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode, SIndexablePointerSettable} import is.hail.types.physical.stypes.interfaces.SIndexableValue import is.hail.utils._ @@ -17,12 +17,12 @@ final case class EArray(val elementType: EType, override val required: Boolean = val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TContainer].elementType) requestedType match { case _: TSet => - SIndexablePointer(PCanonicalSet(elementPType, false)) + SIndexablePointer(PCanonicalSet(elementPType, required)) case _: TArray => - SIndexablePointer(PCanonicalArray(elementPType, false)) + SIndexablePointer(PCanonicalArray(elementPType, required)) case _: TDict => val et = elementPType.asInstanceOf[PStruct] - SIndexablePointer(PCanonicalDict(et.fieldType("key"), et.fieldType("value"), false)) + SIndexablePointer(PCanonicalDict(et.fieldType("key"), et.fieldType("value"), required)) } } @@ -81,12 +81,12 @@ final case class EArray(val elementType: EType, override val required: Boolean = }) } - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { val ind = v.asInstanceOf[SIndexableValue] buildPrefixEncoder(cb, ind, out, ind.loadLength()) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val st = decodedSType(t).asInstanceOf[SIndexablePointer] val arrayType: PCanonicalArray = st.pType match { diff --git a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala index 96c9a802491..b9fb3c378a3 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala @@ -6,7 +6,7 @@ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.BaseStruct import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete._ import is.hail.types.physical.stypes.interfaces.SBaseStructValue import is.hail.types.virtual._ @@ -53,24 +53,24 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: case t: TInterval => val structPType = decodedPType(t.structRepresentation).asInstanceOf[PStruct] val pointType = structPType.field("start").typ - SIntervalPointer(PCanonicalInterval(pointType, false)) + SIntervalPointer(PCanonicalInterval(pointType, required)) case t: TLocus => - SCanonicalLocusPointer(PCanonicalLocus(t.rg, false)) + SCanonicalLocusPointer(PCanonicalLocus(t.rg, required)) case t: TStruct => val pFields = t.fields.map { case Field(name, typ, idx) => val pt = fieldType(name).decodedPType(typ) PField(name, pt, idx) } - SBaseStructPointer(PCanonicalStruct(pFields, false)) + SBaseStructPointer(PCanonicalStruct(pFields, required)) case t: TTuple => val pFields = t.fields.map { case Field(name, typ, idx) => val pt = fieldType(name).decodedPType(typ) PTupleField(t._types(idx).index, pt) } - SBaseStructPointer(PCanonicalTuple(pFields, false)) + SBaseStructPointer(PCanonicalTuple(pFields, required)) } - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { val structValue = v.st match { case SIntervalPointer(t: PCanonicalInterval) => new SBaseStructPointerSettable( SBaseStructPointer(t.representation), @@ -79,18 +79,20 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: new SBaseStructPointerSettable( SBaseStructPointer(t.representation), v.asInstanceOf[SCanonicalLocusPointerSettable].a) - case _ => v.asInstanceOf[SBaseStructValue] + case SBaseStructPointer(t) => v.asInstanceOf[SBaseStructValue] } + val ft = structValue.pt + // write missing bytes structValue.st match { case SBaseStructPointer(st) if st.size == size && st.fieldRequired.sameElements(fields.map(_.typ.required)) => - val missingBytes = UnsafeUtils.packBitsToBytes(st.nMissing) + val missingBytes = UnsafeUtils.packBitsToBytes(ft.nMissing) val addr = structValue.asInstanceOf[SBaseStructPointerSettable].a if (nMissingBytes > 1) cb += out.writeBytes(addr, missingBytes - 1) if (nMissingBytes > 0) - cb += out.writeByte((Region.loadByte(addr + (missingBytes.toLong - 1)).toI & const(EType.lowBitMask(st.nMissing & 0x7))).toB) + cb += out.writeByte((Region.loadByte(addr + (missingBytes.toLong - 1)).toI & const(EType.lowBitMask(ft.nMissing & 0x7))).toB) case _ => var j = 0 @@ -121,18 +123,19 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: if (ef.typ.required) cb._fatal(s"required field ${ ef.name } saw missing value in encode") }, - { pc => + { _pc => + val pc = _pc.asPCode ef.typ.buildEncoder(pc.st, cb.emb.ecb) .apply(cb, pc, out) }) } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val pt = decodedPType(t) val addr = cb.newLocal[Long]("base_struct_dec_addr", region.allocate(pt.alignment, pt.byteSize)) _buildInplaceDecoder(cb, pt, region, addr, in) - pt.loadCheapSCode(cb, addr) + pt.loadCheapPCode(cb, addr) } override def _buildInplaceDecoder(cb: EmitCodeBuilder, pt: PType, region: Value[Region], addr: Value[Long], in: Value[InputBuffer]): Unit = { diff --git a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala index 9185f7dcf12..d53b0bd0092 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala @@ -7,7 +7,7 @@ import is.hail.types.BaseType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.{SBinaryPointer, SBinaryPointerCode, SBinaryPointerSettable, SStringPointer, SStringPointerCode, SStringPointerSettable} import is.hail.types.physical.stypes.interfaces.SBinaryValue import is.hail.utils._ @@ -17,7 +17,7 @@ case object EBinaryRequired extends EBinary(true) class EBinary(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { val bin = v.st match { case SBinaryPointer(t) => v.asInstanceOf[SBinaryValue] case SStringPointer(t) => new SBinaryPointerSettable(SBinaryPointer(t.binaryRepresentation), v.asInstanceOf[SStringPointerSettable].a) @@ -28,7 +28,7 @@ class EBinary(override val required: Boolean) extends EType { cb += out.writeBytes(bin.bytesAddress(), len) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val t1 = decodedSType(t) val pt = t1 match { case SStringPointer(t) => t.binaryRepresentation @@ -51,8 +51,8 @@ class EBinary(override val required: Boolean) extends EType { } def _decodedSType(requestedType: Type): SType = requestedType match { - case TBinary => SBinaryPointer(PCanonicalBinary(false)) - case TString => SStringPointer(PCanonicalString(false)) + case TBinary => SBinaryPointer(PCanonicalBinary(required)) + case TString => SStringPointer(PCanonicalString(required)) } def _asIdent = "binary" diff --git a/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala b/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala index 9f01bb08443..e6c232973fb 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala @@ -5,9 +5,8 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.SNDArrayPointer -import is.hail.types.physical.stypes.interfaces.SNDArrayValue import is.hail.types.virtual._ import is.hail.utils._ @@ -18,11 +17,11 @@ final case class EBlockMatrixNDArray(elementType: EType, encodeRowMajor: Boolean def _decodedSType(requestedType: Type): SType = { val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TNDArray].elementType) - SNDArrayPointer(PCanonicalNDArray(elementPType, 2, false)) + SNDArrayPointer(PCanonicalNDArray(elementPType, 2, required)) } - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { - val ndarray = v.asInstanceOf[SNDArrayValue] + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + val ndarray = v.asInstanceOf[PNDArrayValue] val shapes = ndarray.shapes(cb) val r = cb.newLocal[Long]("r", shapes(0)) val c = cb.newLocal[Long]("c", shapes(1)) @@ -36,19 +35,19 @@ final case class EBlockMatrixNDArray(elementType: EType, encodeRowMajor: Boolean if (encodeRowMajor) { cb.forLoop(cb.assign(i, 0L), i < r, cb.assign(i, i + 1L), { cb.forLoop(cb.assign(j, 0L), j < c, cb.assign(j, j + 1L), { - writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) + writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb).asPCode, out) }) }) } else { cb.forLoop(cb.assign(j, 0L), j < c, cb.assign(j, j + 1L), { cb.forLoop(cb.assign(i, 0L), i < r, cb.assign(i, i + 1L), { - writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) + writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb).asPCode, out) }) }) } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val st = decodedSType(t).asInstanceOf[SNDArrayPointer] val pt = st.pType val readElemF = elementType.buildInplaceDecoder(pt.elementType, cb.emb.ecb) diff --git a/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala b/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala index f4d47e7245d..7b1f8aa4178 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.primitives.{SBoolean, SBooleanCode} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EBooleanOptional extends EBoolean(false) case object EBooleanRequired extends EBoolean(true) class EBoolean(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { cb += out.writeBoolean(v.asBoolean.boolCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { - new SBooleanCode(in.readBoolean()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + new SBooleanCode(required, in.readBoolean()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipBoolean() - def _decodedSType(requestedType: Type): SType = SBoolean + def _decodedSType(requestedType: Type): SType = SBoolean(required) def _asIdent = "bool" diff --git a/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala b/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala index f28dd7b48d4..2f3ab5a9b14 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat32Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EFloat32Optional extends EFloat32(false) case object EFloat32Required extends EFloat32(true) class EFloat32(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { cb += out.writeFloat(v.asFloat.floatCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { - new SFloat32Code(in.readFloat()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + new SFloat32Code(required, in.readFloat()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipFloat() - def _decodedSType(requestedType: Type): SType = SFloat32 + def _decodedSType(requestedType: Type): SType = SFloat32(required) def _asIdent = "float32" diff --git a/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala b/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala index cc478a63caf..408c4ca6f4f 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.primitives.{SFloat64, SFloat64Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EFloat64Optional extends EFloat64(false) case object EFloat64Required extends EFloat64(true) class EFloat64(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { cb += out.writeDouble(v.asDouble.doubleCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { - new SFloat64Code(in.readDouble()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + new SFloat64Code(required, in.readDouble()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipDouble() - def _decodedSType(requestedType: Type): SType = SFloat64 + def _decodedSType(requestedType: Type): SType = SFloat64(required) def _asIdent = "float64" diff --git a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala index 1acac463b3b..a19b7270752 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SCanonicalCallCode} import is.hail.types.physical.stypes.interfaces.SCallValue import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} @@ -17,19 +17,19 @@ case object EInt32Optional extends EInt32(false) case object EInt32Required extends EInt32(true) class EInt32(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { val x = v.st match { - case SCanonicalCall => v.asInstanceOf[SCallValue].canonicalCall(cb) - case SInt32 => v.asInt32.intCode(cb) + case t: SCanonicalCall => v.asInstanceOf[SCallValue].canonicalCall(cb) + case t: SInt32 => v.asInt32.intCode(cb) } cb += out.writeInt(x) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val x = in.readInt() t match { - case TCall => new SCanonicalCallCode(x) - case TInt32 => new SInt32Code(x) + case TCall => new SCanonicalCallCode(required, x) + case TInt32 => new SInt32Code(required, x) } } @@ -44,8 +44,8 @@ class EInt32(override val required: Boolean) extends EType { def _decodedSType(requestedType: Type): SType = requestedType match { - case TCall => SCanonicalCall - case _ => SInt32 + case TCall => SCanonicalCall(required) + case _ => SInt32(required) } def _asIdent = "int32" diff --git a/hail/src/main/scala/is/hail/types/encoded/EInt64.scala b/hail/src/main/scala/is/hail/types/encoded/EInt64.scala index 90931039e05..3017ad2e403 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EInt64.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EInt64.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.primitives.{SInt64, SInt64Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EInt64Optional extends EInt64(false) case object EInt64Required extends EInt64(true) class EInt64(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { cb += out.writeLong(v.asLong.longCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { - new SInt64Code(in.readLong()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + new SInt64Code(required, in.readLong()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipLong() - def _decodedSType(requestedType: Type): SType = SInt64 + def _decodedSType(requestedType: Type): SType = SInt64(required) def _asIdent = "int64" diff --git a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala index 7d8a9171855..d54c54340b7 100644 --- a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala +++ b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala @@ -4,29 +4,29 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.SNDArrayPointer import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayValue} -import is.hail.types.physical.PCanonicalNDArray +import is.hail.types.physical.{PCanonicalNDArray, PCode, PValue} import is.hail.types.virtual.{TNDArray, Type} import is.hail.utils._ case class ENDArrayColumnMajor(elementType: EType, nDims: Int, required: Boolean = false) extends EContainer { - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { val ndarray = v.asInstanceOf[SNDArrayValue] val shapes = ndarray.shapes(cb) shapes.foreach(s => cb += out.writeLong(s)) - SNDArray.coiterate(cb, null, FastIndexedSeq((ndarray.get, "A")), { - case Seq(elt) => - elementType.buildEncoder(elt.st, cb.emb.ecb) - .apply(cb, elt, out) - }) + SNDArray.forEachIndex(cb, shapes, "ndarray_encoder") { case (cb, idxVars) => + val elt = ndarray.loadElement(idxVars, cb) + elementType.buildEncoder(elt.st, cb.emb.ecb) + .apply(cb, elt, out) + } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val st = decodedSType(t).asInstanceOf[SNDArrayPointer] val pnd = st.pType val readElemF = elementType.buildInplaceDecoder(pnd.elementType, cb.emb.ecb) @@ -63,7 +63,7 @@ case class ENDArrayColumnMajor(elementType: EType, nDims: Int, required: Boolean def _decodedSType(requestedType: Type): SType = { val requestedTNDArray = requestedType.asInstanceOf[TNDArray] val elementPType = elementType.decodedPType(requestedTNDArray.elementType) - SNDArrayPointer(PCanonicalNDArray(elementPType, requestedTNDArray.nDims, false)) + SNDArrayPointer(PCanonicalNDArray(elementPType, requestedTNDArray.nDims, required)) } override def setRequired(required: Boolean): EType = ENDArrayColumnMajor(elementType, nDims, required) diff --git a/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala b/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala index 608ff09e283..93f97c26b3f 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.SType import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointer, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,7 +15,7 @@ case object EShuffleOptional extends EShuffle(false) case object EShuffleRequired extends EShuffle(true) class EShuffle(override val required: Boolean) extends EType { - def _buildEncoder(cb: EmitCodeBuilder, pv: SValue, out: Value[OutputBuffer]): Unit = { + def _buildEncoder(cb: EmitCodeBuilder, pv: PValue, out: Value[OutputBuffer]): Unit = { pv.st match { case SCanonicalShufflePointer(t) => val v = pv.asInstanceOf[SCanonicalShufflePointerSettable] @@ -25,14 +25,14 @@ class EShuffle(override val required: Boolean) extends EType { } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { val shuffleType = decodedPType(t).asInstanceOf[PCanonicalShuffle] val bT = shuffleType.representation val len = cb.newLocal[Int]("len", in.readInt()) val barray = cb.newLocal[Long]("barray", bT.allocate(region, len)) cb += bT.storeLength(barray, len) cb += in.readBytes(region, bT.bytesAddress(barray), len) - new SCanonicalShufflePointerCode(SCanonicalShufflePointer(shuffleType), bT.loadCheapSCode(cb, barray)) + new SCanonicalShufflePointerCode(SCanonicalShufflePointer(shuffleType), bT.loadCheapPCode(cb, barray)) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = { diff --git a/hail/src/main/scala/is/hail/types/encoded/EType.scala b/hail/src/main/scala/is/hail/types/encoded/EType.scala index bb4c029e34d..59c69ae1733 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EType.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EType.scala @@ -1,6 +1,7 @@ package is.hail.types.encoded import java.util import java.util.Map.Entry + import is.hail.HailContext import is.hail.annotations.Region import is.hail.asm4s.{coerce => _, _} @@ -8,7 +9,7 @@ import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, import is.hail.io._ import is.hail.types._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.virtual._ import is.hail.utils._ import org.json4s.CustomSerializer @@ -24,7 +25,7 @@ class ETypeSerializer extends CustomSerializer[EType](format => ( { abstract class EType extends BaseType with Serializable with Requiredness { type StagedEncoder = (EmitCodeBuilder, SCode, Code[OutputBuffer]) => Unit - type StagedDecoder = (EmitCodeBuilder, Code[Region], Code[InputBuffer]) => SCode + type StagedDecoder = (EmitCodeBuilder, Code[Region], Code[InputBuffer]) => PCode type StagedInplaceDecoder = (EmitCodeBuilder, Code[Region], Code[Long], Code[InputBuffer]) => Unit final def buildEncoder(ctx: ExecuteContext, t: PType): (OutputBuffer) => Encoder = { @@ -44,7 +45,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { final def buildEncoder(st: SType, kb: EmitClassBuilder[_]): StagedEncoder = { val mb = buildEncoderMethod(st, kb); - { (cb: EmitCodeBuilder, sc: SCode, ob: Code[OutputBuffer]) => cb.invokeVoid(mb, sc, ob) } + { (cb: EmitCodeBuilder, sc: SCode, ob: Code[OutputBuffer]) => cb.invokeVoid(mb, sc.asPCode, ob) } } final def buildEncoderMethod(st: SType, kb: EmitClassBuilder[_]): EmitMethodBuilder[_] = { @@ -54,7 +55,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { UnitInfo) { mb => mb.voidWithBuilder { cb => - val arg = mb.getSCodeParam(1) + val arg = mb.getPCodeParam(1) .memoize(cb, "encoder_method_arg") val out = mb.getCodeParam[OutputBuffer](2) _buildEncoder(cb, arg, out) @@ -65,7 +66,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { final def buildDecoder(t: Type, kb: EmitClassBuilder[_]): StagedDecoder = { val mb = buildDecoderMethod(t: Type, kb); { (cb: EmitCodeBuilder, r: Code[Region], ib: Code[InputBuffer]) => - cb.invokeSCode(mb, r, ib) + cb.invokePCode(mb, r, ib) } } @@ -76,7 +77,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { FastIndexedSeq[ParamType](typeInfo[Region], classInfo[InputBuffer]), st.paramType) { mb => - mb.emitSCode { cb => + mb.emitPCode { cb => val region: Value[Region] = mb.getCodeParam[Region](1) val in: Value[InputBuffer] = mb.getCodeParam[InputBuffer](2) val sc = _buildDecoder(cb, t, region, in) @@ -122,9 +123,9 @@ abstract class EType extends BaseType with Serializable with Requiredness { }).invokeCode(_, _) } - def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit + def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit - def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode + def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode def _buildInplaceDecoder( cb: EmitCodeBuilder, @@ -161,7 +162,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { } final def decodedPType(requestedType: Type): PType = { - decodedSType(requestedType).canonicalPType().setRequired(required) + decodedSType(requestedType).canonicalPType() } def _decodedSType(requestedType: Type): SType @@ -204,7 +205,7 @@ object EType { mb.voidWithBuilder { cb => val addr: Code[Long] = mb.getCodeParam[Long](1) val out: Code[OutputBuffer] = mb.getCodeParam[OutputBuffer](2) - val pc = pt.loadCheapSCode(cb, addr) + val pc = pt.loadCheapPCode(cb, addr) val f = et.buildEncoder(pc.st, mb.ecb) f(cb, pc, out) } diff --git a/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala b/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala index def4336dbbe..9ae5a095a24 100644 --- a/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala @@ -142,9 +142,9 @@ trait PArrayBackedContainer extends PContainer { def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = arrayRep.unstagedStoreAtAddress(addr, region, srcPType.asInstanceOf[PArrayBackedContainer].arrayRep, srcAddress, deepCopy) - def sType: SIndexablePointer = SIndexablePointer(setRequired(false).asInstanceOf[PArrayBackedContainer]) + def sType: SContainer = SIndexablePointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SIndexablePointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SIndexablePointerCode(SIndexablePointer(this), addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = arrayRep.store(cb, region, value.asIndexable.castToArray(cb), deepCopy) diff --git a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala index 574627c643c..6f3a6a36daa 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala @@ -178,4 +178,18 @@ abstract class PBaseStruct extends PType { } else Gen.uniformSequence(types.map(t => t.genValue)).map(a => Annotation(a: _*)) } -} \ No newline at end of file +} + +abstract class PBaseStructValue extends PValue with SBaseStructValue { + def pt: PBaseStruct +} + +abstract class PBaseStructCode extends PCode with SBaseStructCode { + def pt: PBaseStruct + + def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue +} + +trait PStructSettable extends PBaseStructValue with PSettable \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PBinary.scala b/hail/src/main/scala/is/hail/types/physical/PBinary.scala index ff9f5325b5a..9b0c679b41e 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBinary.scala @@ -71,3 +71,23 @@ abstract class PBinary extends PType { def store(addr: Code[Long], bytes: Code[Array[Byte]]): Code[Unit] } + +abstract class PBinaryValue extends PValue with SBinaryValue { + def loadLength(): Code[Int] + + def loadBytes(): Code[Array[Byte]] + + def loadByte(i: Code[Int]): Code[Byte] +} + +abstract class PBinaryCode extends PCode with SBinaryCode { + def pt: PBinary + + def loadLength(): Code[Int] + + def loadBytes(): Code[Array[Byte]] + + def memoize(cb: EmitCodeBuilder, name: String): PBinaryValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PBinaryValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala index 98fd06a064b..48fcfb68f16 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala @@ -27,13 +27,13 @@ class PBoolean(override val required: Boolean) extends PType with PPrimitive { override def byteSize: Long = 1 - def sType: SBoolean.type = SBoolean + def sType: SBoolean = SBoolean(required) def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = { cb += Region.storeBoolean(addr, value.asBoolean.boolCode(cb)) } - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBooleanCode = new SBooleanCode(Region.loadBoolean(addr)) + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBooleanCode = new SBooleanCode(required, Region.loadBoolean(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeByte(addr, annotation.asInstanceOf[Boolean].toByte) diff --git a/hail/src/main/scala/is/hail/types/physical/PCall.scala b/hail/src/main/scala/is/hail/types/physical/PCall.scala index 93f6cdc5662..324b74714b2 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCall.scala @@ -1,7 +1,30 @@ package is.hail.types.physical +import is.hail.asm4s._ import is.hail.types.virtual.TCall +import is.hail.expr.ir.EmitCodeBuilder +import is.hail.types.physical.stypes.interfaces.{SCallCode, SCallValue} abstract class PCall extends PType { lazy val virtualType: TCall.type = TCall -} \ No newline at end of file +} + +abstract class PCallValue extends PValue with SCallValue { + def ploidy(): Code[Int] + + def isPhased(): Code[Boolean] + + def forEachAllele(cb: EmitCodeBuilder)(alleleCode: Value[Int] => Unit): Unit +} + +abstract class PCallCode extends PCode with SCallCode { + def pt: PCall + + def ploidy(): Code[Int] + + def isPhased(): Code[Boolean] + + def memoize(cb: EmitCodeBuilder, name: String): PCallValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PCallValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala index 86a4f357894..49d70d5bb97 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala @@ -2,7 +2,7 @@ package is.hail.types.physical import is.hail.annotations.{Region, _} import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder, IEmitCode} +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder, IEmitCode, IEmitSCode} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode, SIndexablePointerSettable} import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableValue} @@ -323,7 +323,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) cb.ifx(isElementDefined(dstAddress, currentIdx), { cb.assign(currentElementAddress, elementOffset(dstAddress, len, currentIdx)) - this.elementType.storeAtAddress(cb, currentElementAddress, region, this.elementType.loadCheapSCode(cb, this.elementType.loadFromNested(currentElementAddress)), true) + this.elementType.storeAtAddress(cb, currentElementAddress, region, this.elementType.loadCheapPCode(cb, this.elementType.loadFromNested(currentElementAddress)), true) })) } @@ -377,9 +377,9 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) } } - def sType: SIndexablePointer = SIndexablePointer(setRequired(false).asInstanceOf[PCanonicalArray]) + def sType: SContainer = SIndexablePointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SIndexablePointerCode = new SIndexablePointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SIndexablePointerCode = new SIndexablePointerCode(SIndexablePointer(this), addr) def storeContentsAtAddress(cb: EmitCodeBuilder, addr: Value[Long], region: Value[Region], indexable: SIndexableValue, deepCopy: Boolean): Unit = { val length = indexable.loadLength() @@ -387,7 +387,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) case SIndexablePointer(PCanonicalArray(otherElementType, _)) if otherElementType == elementType => cb += Region.copyFrom(indexable.asInstanceOf[SIndexablePointerSettable].a, addr, contentsByteSize(length)) deepPointerCopy(cb, region, addr, length) - case SIndexablePointer(otherType@PCanonicalArray(otherElementType, _)) if otherElementType.equalModuloRequired(elementType) => + case SIndexablePointer(PCanonicalArray(otherElementType, _)) if otherElementType.equalModuloRequired(elementType) => // other is optional, constructing required if (elementType.required) { cb.ifx(indexable.hasMissingValues(cb), @@ -395,6 +395,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) } cb += stagedInitialize(addr, indexable.loadLength(), setMissing = false) + val otherType = indexable.st.pType.asInstanceOf[PCanonicalArray] cb += Region.copyFrom(otherType.firstElementOffset(indexable.asInstanceOf[SIndexablePointerSettable].a), this.firstElementOffset(addr), length.toL * otherType.elementByteSize) if (deepCopy) deepPointerCopy(cb, region, addr, length) @@ -447,7 +448,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) PCanonicalArray(this.elementType.deepRename(t.elementType), this.required) def constructFromElements(cb: EmitCodeBuilder, region: Value[Region], length: Value[Int], deepCopy: Boolean) - (f: (EmitCodeBuilder, Value[Int]) => IEmitCode): SIndexablePointerCode = { + (f: (EmitCodeBuilder, Value[Int]) => IEmitSCode): SIndexablePointerCode = { val addr = cb.newLocal[Long]("pcarray_construct1_addr", allocate(region, length)) cb += stagedInitialize(addr, length, setMissing = false) @@ -464,7 +465,38 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) cb.assign(i, i + 1) }) - new SIndexablePointerCode(sType, addr) + new SIndexablePointerCode(SIndexablePointer(this), addr) + } + + // unsafe StagedArrayBuilder-like interface that gives caller control over adding elements and finishing + // this won't need to exist when we have SStackStruct + def constructFromNextAddress(cb: EmitCodeBuilder, region: Value[Region], length: Value[Int]): + ((EmitCodeBuilder => Value[Long], (EmitCodeBuilder => Unit), (EmitCodeBuilder => SIndexablePointerCode))) = { + + val addr = cb.newLocal[Long]("pcarray_construct2_addr", allocate(region, length)) + cb += stagedInitialize(addr, length, setMissing = false) + val currentIndex = cb.newLocal[Int]("pcarray_construct2_i", -1) + + val currentElementAddress = cb.newLocal[Long]("pcarray_construct2_firstelementaddr", firstElementOffset(addr, length) - elementByteSize) + + def nextAddr(cb: EmitCodeBuilder): Value[Long] = { + cb.assign(currentIndex, currentIndex + 1) + cb.assign(currentElementAddress, currentElementAddress + elementByteSize) + currentElementAddress + } + + def setMissing(cb: EmitCodeBuilder): Unit = { + cb.assign(currentIndex, currentIndex + 1) + cb.assign(currentElementAddress, currentElementAddress + elementByteSize) + cb += this.setElementMissing(addr, currentIndex) + } + + def finish(cb: EmitCodeBuilder): SIndexablePointerCode = { + cb.ifx((currentIndex + 1).cne(length), cb._fatal("PCanonicalArray.constructFromNextAddress nextAddress was called the wrong number of times: len=", + length.toS, ", calls=", (currentIndex + 1).toS)) + new SIndexablePointerCode(SIndexablePointer(this), addr) + } + (nextAddr, setMissing, finish) } // unsafe StagedArrayBuilder-like interface that gives caller control over pushing elements and finishing @@ -488,7 +520,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) val finish: EmitCodeBuilder => SIndexablePointerCode = { (cb: EmitCodeBuilder) => cb.ifx(currentElementIndex.cne(length), cb._fatal("PCanonicalArray.constructFromFunctions push was called the wrong number of times: len=", length.toS, ", calls=", currentElementIndex.toS)) - new SIndexablePointerCode(sType, addr) + new SIndexablePointerCode(SIndexablePointer(this), addr) } (push, finish) } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala index 42ef32c8793..c7d994b3069 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala @@ -105,7 +105,7 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct cb.ifx(isFieldDefined(dstAddr, f.index), { val fieldAddr = cb.newLocal[Long]("pcbs_dpcopy_field", fieldOffset(dstAddr, f.index)) - dstFieldType.storeAtAddress(cb, fieldAddr, region, dstFieldType.loadCheapSCode(cb, dstFieldType.loadFromNested(fieldAddr)), deepCopy = true) + dstFieldType.storeAtAddress(cb, fieldAddr, region, dstFieldType.loadCheapPCode(cb, dstFieldType.loadFromNested(fieldAddr)), deepCopy = true) }) } } @@ -154,9 +154,9 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct } } - def sType: SBaseStructPointer = SBaseStructPointer(setRequired(false).asInstanceOf[PCanonicalBaseStruct]) + def sType: SBaseStruct = SBaseStructPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructPointerCode = new SBaseStructPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructPointerCode = new SBaseStructPointerCode(SBaseStructPointer(this), addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { @@ -196,6 +196,21 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct } } + // FIXME: this doesn't need to exist when we have StackStruct! + def storeAtAddressFromFields(cb: EmitCodeBuilder, addr: Value[Long], region: Value[Region], emitFields: IndexedSeq[EmitCode], deepCopy: Boolean): Unit = { + require(emitFields.length == size) + cb += stagedInitialize(addr, setMissing = false) + emitFields.zipWithIndex.foreach { case (ev, i) => + ev.toI(cb) + .consume(cb, + cb += setFieldMissing(addr, i), + { sc => + types(i).storeAtAddress(cb, fieldOffset(addr, i), region, sc, deepCopy = deepCopy) + } + ) + } + } + def constructFromFields(cb: EmitCodeBuilder, region: Value[Region], emitFields: IndexedSeq[EmitCode], deepCopy: Boolean): SBaseStructPointerCode = { require(emitFields.length == size) val addr = cb.newLocal[Long]("pcbs_construct_fields", allocate(region)) @@ -210,7 +225,7 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct ) } - new SBaseStructPointerCode(sType, addr) + new SBaseStructPointerCode(SBaseStructPointer(this), addr) } override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala index 52f669a8a74..5628efe70ef 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala @@ -129,9 +129,9 @@ class PCanonicalBinary(val required: Boolean) extends PBinary { } } - def sType: SBinaryPointer = SBinaryPointer(setRequired(false).asInstanceOf[PCanonicalBinary]) + def sType: SBinary = SBinaryPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = new SBinaryPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = new SBinaryPointerCode(SBinaryPointer(this), addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala index c124eed52a5..264bdafb4ef 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala @@ -38,13 +38,13 @@ final case class PCanonicalCall(required: Boolean = false) extends PCall { } } - def sType: SCall = SCanonicalCall + def sType: SCall = SCanonicalCall(required) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalCallCode(Region.loadInt(addr)) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalCallCode(required, Region.loadInt(addr)) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { - case SCanonicalCall => + case SCanonicalCall(r) => val newAddr = cb.newLocal[Long]("pcanonicalcall_store_addr", region.allocate(representation.alignment, representation.byteSize)) storeAtAddress(cb, newAddr, region, value, deepCopy) newAddr diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala index 3c2efd28c8e..0b8b50e189d 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala @@ -3,19 +3,8 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, Region} import is.hail.types.virtual.{TDict, Type} import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SIndexableCode} import org.apache.spark.sql.Row -object PCanonicalDict { - def coerceArrayCode(contents: SIndexableCode): SIndexableCode = { - contents.st match { - case SIndexablePointer(PCanonicalArray(ps: PBaseStruct, r)) => - PCanonicalDict(ps.types(0), ps.types(1), r) - .construct(contents) - } - } -} - final case class PCanonicalDict(keyType: PType, valueType: PType, required: Boolean = false) extends PDict with PArrayBackedContainer { val elementType = PCanonicalStruct(required = true, "key" -> keyType, "value" -> valueType) @@ -50,12 +39,8 @@ final case class PCanonicalDict(keyType: PType, valueType: PType, required: Bool this.arrayRep.unstagedStoreJavaObject(sortedArray, region) } - def construct(contents: SIndexableCode): SIndexableCode = { - contents.st match { - case SIndexablePointer(PCanonicalArray(pbs: PBaseStruct, _)) - if pbs.types.size == 2 && pbs.types(0) == keyType && pbs.types(1) == valueType => - case t => throw new RuntimeException(s"PCDict.construct: contents=${t}, arrayrep=${arrayRep}") - } + def construct(contents: PIndexableCode): PIndexableCode = { + assert(contents.pt.equalModuloRequired(arrayRep), s"\n contents: ${ contents.pt }\n arrayrep: ${ arrayRep }") new SIndexablePointerCode(SIndexablePointer(this), contents.asInstanceOf[SIndexablePointerCode].a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala index 488decf4e50..a6db53a7f49 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala @@ -68,21 +68,21 @@ final case class PCanonicalInterval(pointType: PType, override val required: Boo def containsPointers: Boolean = representation.containsPointers - def sType: SIntervalPointer = SIntervalPointer(setRequired(false).asInstanceOf[PCanonicalInterval]) + def sType: SIntervalPointer = SIntervalPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SIntervalPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SIntervalPointerCode(SIntervalPointer(this), addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { case SIntervalPointer(t: PCanonicalInterval) => - representation.store(cb, region, t.representation.loadCheapSCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) + representation.store(cb, region, t.representation.loadCheapPCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) } } def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { value.st match { case SIntervalPointer(t: PCanonicalInterval) => - representation.storeAtAddress(cb, addr, region, t.representation.loadCheapSCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) + representation.storeAtAddress(cb, addr, region, t.representation.loadCheapPCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) } } def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { @@ -121,6 +121,6 @@ final case class PCanonicalInterval(pointType: PType, override val required: Boo def constructFromCodes(cb: EmitCodeBuilder, region: Value[Region], start: EmitCode, end: EmitCode, includesStart: EmitCode, includesEnd: EmitCode): SIntervalPointerCode = { val sc = representation.constructFromFields(cb, region, FastIndexedSeq(start, end, includesStart, includesEnd), deepCopy = false) - new SIntervalPointerCode(sType, sc.a) + new SIntervalPointerCode(SIntervalPointer(this), sc.a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala index e1c1997f345..3104abb1559 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala @@ -91,21 +91,21 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e } } - def sType: SCanonicalLocusPointer = SCanonicalLocusPointer(setRequired(false).asInstanceOf[PCanonicalLocus]) + def sType: SCanonicalLocusPointer = SCanonicalLocusPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalLocusPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalLocusPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { case SCanonicalLocusPointer(pt) => - representation.store(cb, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) + representation.store(cb, region, pt.representation.loadCheapPCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) } } def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { value.st match { case SCanonicalLocusPointer(pt) => - representation.storeAtAddress(cb, addr, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) + representation.storeAtAddress(cb, addr, region, pt.representation.loadCheapPCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) } } @@ -131,8 +131,8 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e def constructFromPositionAndString(cb: EmitCodeBuilder, r: Value[Region], contig: Code[String], pos: Code[Int]): SCanonicalLocusPointerCode = { val contigType = representation.fieldType("contig").asInstanceOf[PCanonicalString] - val contigCode = contigType.sType.constructFromString(cb, r, contig) + val contigCode = SStringPointer(contigType).constructFromString(cb, r, contig) val repr = representation.constructFromFields(cb, r, FastIndexedSeq(EmitCode.present(cb.emb, contigCode), EmitCode.present(cb.emb, primitive(pos))), deepCopy = false) - new SCanonicalLocusPointerCode(SCanonicalLocusPointer(setRequired(false).asInstanceOf[PCanonicalLocus]), repr.a) + new SCanonicalLocusPointerCode(SCanonicalLocusPointer(this), repr.a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala index bd5d58a3b68..ac8bc9f167c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala @@ -2,17 +2,16 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, NDArray, Region, UnsafeOrdering} import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, SCodeParam, Param, ParamType} +import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, PCodeParam, Param, ParamType} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TNDArray, Type} -import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerCode, SStackStruct} +import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerCode} import org.apache.spark.sql.Row import is.hail.utils._ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boolean = false) extends PNDArray { assert(elementType.required, "elementType must be required") - assert(!elementType.containsPointers, "ndarrays do not currently support elements which contain arrays, ndarrays, or strings") def _asIdent: String = s"ndarray_of_${elementType.asIdent}" @@ -39,8 +38,8 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadShapes(cb: EmitCodeBuilder, addr: Value[Long], settables: IndexedSeq[Settable[Long]]): Unit = { - assert(settables.length == nDims, s"got ${ settables.length } settables, expect ${ nDims } dims") - val shapeTuple = shapeType.loadCheapSCode(cb, representation.loadField(addr, "shape")) + assert(settables.length == nDims) + val shapeTuple = shapeType.loadCheapPCode(cb, representation.loadField(addr, "shape")) .memoize(cb, "pcndarray_shapetuple") (0 until nDims).foreach { dimIdx => cb.assign(settables(dimIdx), shapeTuple.loadField(cb, dimIdx).get(cb).asLong.longCode(cb)) @@ -49,7 +48,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadStrides(cb: EmitCodeBuilder, addr: Value[Long], settables: IndexedSeq[Settable[Long]]): Unit = { assert(settables.length == nDims) - val strideTuple = strideType.loadCheapSCode(cb, representation.loadField(addr, "strides")) + val strideTuple = strideType.loadCheapPCode(cb, representation.loadField(addr, "strides")) .memoize(cb, "pcndarray_stridetuple") (0 until nDims).foreach { dimIdx => cb.assign(settables(dimIdx), strideTuple.loadField(cb, dimIdx).get(cb).asLong.longCode(cb)) @@ -124,7 +123,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo } private def getElementAddress(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], nd: Value[Long]): Value[Long] = { - val ndarrayValue = loadCheapSCode(cb, nd).asNDArray.memoize(cb, "getElementAddressNDValue") + val ndarrayValue = PCode(this, nd).asNDArray.memoize(cb, "getElementAddressNDValue") val stridesTuple = ndarrayValue.strides(cb) val dataStore = cb.newLocal[Long]("nd_get_element_address_data_store", @@ -152,7 +151,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadElement(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], ndAddress: Value[Long]): SCode = { val off = getElementAddress(cb, indices, ndAddress) - elementType.loadCheapSCode(cb, elementType.loadFromNested(off)) + elementType.loadCheapPCode(cb, elementType.loadFromNested(off)) } def loadElementFromDataAndStrides(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], ndDataAddress: Value[Long], strides: IndexedSeq[Value[Long]]): Code[Long] = { @@ -180,30 +179,28 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo dataCode: SIndexableCode, cb: EmitCodeBuilder, region: Value[Region] - ): SNDArrayCode = { - assert(shape.length == nDims, s"nDims = ${ nDims }, nShapeElts=${ shape.length }") - assert(strides.length == nDims, s"nDims = ${ nDims }, nShapeElts=${ strides.length }") + ): PNDArrayCode = { val cacheKey = ("constructByCopyingArray", this, dataCode.st) val mb = cb.emb.ecb.getOrGenEmitMethod("pcndarray_construct_by_copying_array", cacheKey, FastIndexedSeq[ParamType](classInfo[Region], dataCode.st.paramType) ++ (0 until 2 * nDims).map(_ => CodeParamType(LongInfo)), sType.paramType) { mb => - mb.emitSCode { cb => + mb.emitPCode { cb => val region = mb.getCodeParam[Region](1) - val dataValue = mb.getSCodeParam(2).asIndexable.memoize(cb, "pcndarray_construct_by_copying_array_datavalue") + val dataValue = mb.getPCodeParam(2).asIndexable.memoize(cb, "pcndarray_construct_by_copying_array_datavalue") val shape = (0 until nDims).map(i => mb.getCodeParam[Long](3 + i)) val strides = (0 until nDims).map(i => mb.getCodeParam[Long](3 + nDims + i)) val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") cb.assign(ndAddr, this.allocate(shape, region)) - shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), + shapeType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, - SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), + shape.map(s => EmitCode.present(cb.emb, primitive(s))), false) - strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), + strideType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), region, - SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), + strides.map(s => EmitCode.present(cb.emb, primitive(s))), false) val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) @@ -211,11 +208,11 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo cb.append(Region.storeLong(this.representation.fieldOffset(ndAddr, "data"), newDataPointer)) dataType.storeContentsAtAddress(cb, newDataPointer, region, dataValue, true) - new SNDArrayPointerCode(sType, ndAddr) + new SNDArrayPointerCode(SNDArrayPointer(this), ndAddr) } } - cb.invokeSCode(mb, FastIndexedSeq[Param](region, SCodeParam(dataCode)) ++ (shape.map(CodeParam(_)) ++ strides.map(CodeParam(_))): _*) + cb.invokePCode(mb, FastIndexedSeq[Param](region, PCodeParam(dataCode.asPCode)) ++ (shape.map(CodeParam(_)) ++ strides.map(CodeParam(_))): _*) .asNDArray } @@ -228,13 +225,13 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") cb.assign(ndAddr, this.allocate(shape, region)) - shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), + shapeType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, - SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), + shape.map(s => EmitCode.present(cb.emb, primitive(s))), false) - strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), + strideType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), region, - SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), + strides.map(s => EmitCode.present(cb.emb, primitive(s))), false) val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) @@ -244,7 +241,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo cb.append(dataType.stagedInitialize(newDataPointer, this.numElements(shape).toI)) - (newFirstElementDataPointer, (cb: EmitCodeBuilder) => new SNDArrayPointerCode(sType, ndAddr)) + (newFirstElementDataPointer, (cb: EmitCodeBuilder) => new SNDArrayPointerCode(SNDArrayPointer(this), ndAddr)) } def unstagedConstructDataFunction( @@ -347,9 +344,9 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo Region.storeAddress(addr, copyFromAddress(region, srcND, srcAddress, deepCopy)) } - def sType: SNDArrayPointer = SNDArrayPointer(setRequired(false).asInstanceOf[PCanonicalNDArray]) + def sType: SNDArrayPointer = SNDArrayPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SNDArrayPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SNDArrayPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { @@ -364,14 +361,15 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo val shape = oldND.shapes(cb) val newStrides = makeColumnMajorStrides(shape, region, cb) val (targetDataFirstElementAddr, finish) = this.constructDataFunction(shape, newStrides, cb, region) - val result = finish(cb) - SNDArray.coiterate(cb, region, FastIndexedSeq((result, "result"), (oldND.get, "oldND")), { - case Seq(dest, elt) => - cb.assign(dest, elt) - }, deepCopy = true) + val currentOffset = cb.newLocal[Long]("pcanonical_ndarray_store_offset", targetDataFirstElementAddr) + SNDArray.forEachIndex(cb, shape, "PCanonicalNDArray_store") { (cb, currentIndices) => + val oldElement = oldND.loadElement(currentIndices, cb) + elementType.storeAtAddress(cb, currentOffset, region, oldElement, true) + cb.assign(currentOffset, currentOffset + elementType.byteSize) + } - result.a + finish(cb).a } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala index 49db88abc0b..900a12d040e 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala @@ -2,19 +2,9 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, Region} import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode} -import is.hail.types.physical.stypes.interfaces.SIndexableCode import is.hail.types.virtual.{TSet, Type} import is.hail.utils._ -object PCanonicalSet { - def coerceArrayCode(contents: SIndexableCode): SIndexableCode = { - contents.st match { - case SIndexablePointer(PCanonicalArray(elt, r)) => - PCanonicalSet(elt, r).construct(contents) - } - } -} - final case class PCanonicalSet(elementType: PType, required: Boolean = false) extends PSet with PArrayBackedContainer { val arrayRep = PCanonicalArray(elementType, required) @@ -40,8 +30,7 @@ final case class PCanonicalSet(elementType: PType, required: Boolean = false) e arrayRep.unstagedStoreJavaObject(s, region) } - def construct(_contents: SIndexableCode): SIndexableCode = { - val contents = _contents.asInstanceOf[SIndexablePointerCode] + def construct(contents: PIndexableCode): PIndexableCode = { assert(contents.pt.equalModuloRequired(arrayRep), s"\n contents: ${ contents.pt }\n arrayrep: ${ arrayRep }") new SIndexablePointerCode(SIndexablePointer(this), contents.asInstanceOf[SIndexablePointerCode].a) } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala index 3f02d7e41f6..b9b19fe1c2b 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala @@ -44,11 +44,11 @@ final case class PCanonicalShuffle( override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = this.representation.unstagedStoreJavaObject(annotation, region) - def loadBinary(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = representation.loadCheapSCode(cb, addr).asInstanceOf[SBinaryPointerCode] + def loadBinary(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = representation.loadCheapPCode(cb, addr).asInstanceOf[SBinaryPointerCode] - def sType: SCanonicalShufflePointer = SCanonicalShufflePointer(setRequired(false).asInstanceOf[PCanonicalShuffle]) + def sType: SCanonicalShufflePointer = SCanonicalShufflePointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalShufflePointerCode(sType, representation.loadCheapSCode(cb, addr)) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalShufflePointerCode(sType, representation.loadCheapPCode(cb, addr)) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala index 3625cda0f28..219d02b6da8 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala @@ -3,7 +3,7 @@ package is.hail.types.physical import is.hail.annotations.UnsafeOrdering import is.hail.asm4s.Code import is.hail.expr.ir.EmitMethodBuilder -import is.hail.types.physical.stypes.{EmitType, interfaces} +import is.hail.types.physical.stypes.interfaces import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} import is.hail.types.virtual.{TStream, Type} @@ -16,6 +16,8 @@ final case class PCanonicalStream(elementType: PType, required: Boolean = false) sb.append("]") } + override def defaultValue(mb: EmitMethodBuilder[_]): SStreamCode = throw new UnsupportedOperationException + override def deepRename(t: Type) = deepRenameStream(t.asInstanceOf[TStream]) private def deepRenameStream(t: TStream): PStream = @@ -23,7 +25,7 @@ final case class PCanonicalStream(elementType: PType, required: Boolean = false) def setRequired(required: Boolean): PCanonicalStream = if (required == this.required) this else this.copy(required = required) - override def sType: SStream = interfaces.SStream(EmitType(elementType.sType, elementType.required)) + override def sType: SStream = interfaces.SStream(elementType.sType, required) def loadFromNested(addr: Code[Long]): Code[Long] = throw new NotImplementedError() diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala index 30dee6b5807..e61fc4bf980 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala @@ -58,9 +58,9 @@ class PCanonicalString(val required: Boolean) extends PString { def setRequired(required: Boolean) = if (required == this.required) this else PCanonicalString(required) - def sType: SStringPointer = SStringPointer(setRequired(false).asInstanceOf[PCanonicalString]) + def sType: SStringPointer = SStringPointer(this) - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SStringPointerCode(sType, addr) + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SStringPointerCode(SStringPointer(this), addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCode.scala b/hail/src/main/scala/is/hail/types/physical/PCode.scala new file mode 100644 index 00000000000..bb122c89f41 --- /dev/null +++ b/hail/src/main/scala/is/hail/types/physical/PCode.scala @@ -0,0 +1,318 @@ +package is.hail.types.physical + +import is.hail.annotations.Region +import is.hail.asm4s._ +import is.hail.expr.ir._ +import is.hail.expr.ir.streams.{StreamArgType, StreamProducer} +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete._ +import is.hail.types.physical.stypes.interfaces.{PVoidCode, SStream, SStreamCode} +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.virtual._ +import is.hail.utils._ + +trait PValue extends SValue { pValueSelf => + def pt: PType + + def get: PCode + + def value: Value[_] = { + new Value[Any] { + override def get: Code[Any] = pValueSelf.get.code + } + } +} + +trait PSettable extends PValue with SSettable { + + def store(cb: EmitCodeBuilder, v: SCode): Unit = store(cb, v.asInstanceOf[PCode]) + + def store(cb: EmitCodeBuilder, v: PCode): Unit + + def settableTuple(): IndexedSeq[Settable[_]] + + override def load(): PCode = get +} + +object SingleCodeType { + def typeInfoFromType(t: Type): TypeInfo[_] = t match { + case TInt32 => IntInfo + case TInt64 => LongInfo + case TFloat32 => FloatInfo + case TFloat64 => DoubleInfo + case TBoolean => BooleanInfo + case TVoid => UnitInfo + case _ => LongInfo // all others passed as ptype references + } + + def fromSType(t: SType): SingleCodeType = t.virtualType match { + case TInt32 => Int32SingleCodeType + case TInt64 => Int64SingleCodeType + case TFloat32 => Float32SingleCodeType + case TFloat64 => Float64SingleCodeType + case TBoolean => BooleanSingleCodeType + case _ => PTypeReferenceSingleCodeType(t.canonicalPType()) + + } +} + +sealed trait SingleCodeType { + def ti: TypeInfo[_] + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode + + def virtualType: Type + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode +} + +case object Int32SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = IntInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SInt32Code(true, coerce[Int](c)) + + def virtualType: Type = TInt32 + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asInt.intCode(cb)) +} + +case object Int64SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = LongInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SInt64Code(true, coerce[Long](c)) + + def virtualType: Type = TInt64 + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asLong.longCode(cb)) +} + +case object Float32SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = FloatInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SFloat32Code(true, coerce[Float](c)) + + def virtualType: Type = TFloat32 + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asFloat.floatCode(cb)) +} + +case object Float64SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = DoubleInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SFloat64Code(true, coerce[Double](c)) + + def virtualType: Type = TFloat64 + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asDouble.doubleCode(cb)) +} + +case object BooleanSingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = BooleanInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SBooleanCode(true, coerce[Boolean](c)) + + def virtualType: Type = TBoolean + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asBoolean.boolCode(cb)) +} + +case class StreamSingleCodeType(requiresMemoryManagementPerElement: Boolean, eltType: PType) extends SingleCodeType { self => + + def virtualType: Type = TStream(eltType.virtualType) + + def ti: TypeInfo[_] = classInfo[StreamArgType] + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = { + val mb = cb.emb + val xIter = mb.genFieldThisRef[Iterator[java.lang.Long]]("streamInIterator") + + // this, Region, ... + val mkIter = coerce[StreamArgType](c) + val eltRegion = mb.genFieldThisRef[Region]("stream_input_element_region") + val rvAddr = mb.genFieldThisRef[Long]("stream_input_addr") + + val producer = new StreamProducer { + override val length: Option[EmitCodeBuilder => Code[Int]] = None + + override def initialize(cb: EmitCodeBuilder): Unit = { + cb.assign(xIter, mkIter.invoke[Region, Region, Iterator[java.lang.Long]]("apply", r, eltRegion)) + } + + override val elementRegion: Settable[Region] = eltRegion + override val requiresMemoryManagementPerElement: Boolean = self.requiresMemoryManagementPerElement + override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => + val hasNext = cb.newLocal[Boolean]("stream_in_hasnext", xIter.load().hasNext) + cb.ifx(!hasNext, cb.goto(LendOfStream)) + cb.assign(rvAddr, xIter.load().next().invoke[Long]("longValue")) + cb.goto(LproduceElementDone) + } + + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, rvAddr))) + + override def close(cb: EmitCodeBuilder): Unit = {} + } + SStreamCode(SStream(eltType.sType, true), producer) + } + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = throw new UnsupportedOperationException +} + +case class PTypeReferenceSingleCodeType(pt: PType) extends SingleCodeType { + def ti: TypeInfo[_] = LongInfo + + def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = pt.loadCheapPCode(cb, coerce[Long](c)) + + def virtualType: Type = pt.virtualType + + def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = { + SingleCodePCode(this, pt.store(cb, region, pc, deepCopy = deepCopy)) + } +} + +object SingleCodePCode { + def fromPCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean = false): SingleCodePCode = { + SingleCodeType.fromSType(pc.st).coercePCode(cb, pc, region, deepCopy) + } +} + +case class SingleCodePCode(typ: SingleCodeType, code: Code[_]) + +abstract class PCode extends SCode { self => + + def st: SType + + def pt: PType + + def code: Code[_] + + def codeTuple(): IndexedSeq[Code[_]] + + def typeInfo: TypeInfo[_] = typeToTypeInfo(pt) + + override def asBoolean: SBooleanCode = asInstanceOf[SBooleanCode] + override def asInt: SInt32Code = asInstanceOf[SInt32Code] + override def asInt32: SInt32Code = asInstanceOf[SInt32Code] + override def asLong: SInt64Code = asInstanceOf[SInt64Code] + override def asInt64: SInt64Code = asInstanceOf[SInt64Code] + override def asFloat: SFloat32Code = asInstanceOf[SFloat32Code] + override def asFloat32: SFloat32Code = asInstanceOf[SFloat32Code] + override def asFloat64: SFloat64Code = asInstanceOf[SFloat64Code] + override def asDouble: SFloat64Code = asInstanceOf[SFloat64Code] + override def asBinary: PBinaryCode = asInstanceOf[PBinaryCode] + override def asIndexable: PIndexableCode = asInstanceOf[PIndexableCode] + override def asBaseStruct: PBaseStructCode = asInstanceOf[PBaseStructCode] + override def asString: PStringCode = asInstanceOf[PStringCode] + override def asInterval: PIntervalCode = asInstanceOf[PIntervalCode] + override def asNDArray: PNDArrayCode = asInstanceOf[PNDArrayCode] + override def asLocus: PLocusCode = asInstanceOf[PLocusCode] + + override def asCall: PCallCode = asInstanceOf[PCallCode] + + override def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType): PCode = + castTo(cb, region, destType, false) + + override def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType, deepCopy: Boolean): PCode = { + super.castTo(cb, region, destType, deepCopy).asPCode + } + + override def copyToRegion(cb: EmitCodeBuilder, region: Value[Region]): PCode = copyToRegion(cb, region, pt) + + override def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: PType): PCode = + super.copyToRegion(cb, region, destType).asPCode + + def memoize(cb: EmitCodeBuilder, name: String): PValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PValue + + final def toPCode(cb: EmitCodeBuilder, region: Value[Region]): PCode = this +} + +object PCode { + def apply(pt: PType, code: Code[_]): PCode = pt match { + case pt: PCanonicalArray => + new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) + case pt: PCanonicalSet => + new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) + case pt: PCanonicalDict => + new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) + case pt: PSubsetStruct => + val ss = pt.sType + new SSubsetStructCode(ss, PCode(ss.pType.ps, code).asBaseStruct) + case pt: PCanonicalBaseStruct => + new SBaseStructPointerCode(SBaseStructPointer(pt), coerce[Long](code)) + case pt: PCanonicalBinary => + new SBinaryPointerCode(SBinaryPointer(pt), coerce[Long](code)) + case pt: PCanonicalShuffle => + new SCanonicalShufflePointerCode(SCanonicalShufflePointer(pt), + new SBinaryPointerCode(SBinaryPointer(pt.representation), coerce[Long](code))) + case pt: PCanonicalString => + new SStringPointerCode(SStringPointer(pt), coerce[Long](code)) + case pt: PCanonicalInterval => + new SIntervalPointerCode(SIntervalPointer(pt), coerce[Long](code)) + case pt: PCanonicalLocus => + new SCanonicalLocusPointerCode(SCanonicalLocusPointer(pt), coerce[Long](code)) + case pt: PCanonicalCall => + new SCanonicalCallCode(pt.required, coerce[Int](code)) + case pt: PCanonicalNDArray => + new SNDArrayPointerCode(SNDArrayPointer(pt), coerce[Long](code)) + case pt: PCanonicalStream => + throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") + case PVoid => + throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") + case PBoolean(r) => + new SBooleanCode(r, coerce[Boolean](code)) + case PInt32(r) => + new SInt32Code(r, coerce[Int](code)) + case PInt64(r) => + new SInt64Code(r, coerce[Long](code)) + case PFloat32(r) => + new SFloat32Code(r, coerce[Float](code)) + case PFloat64(r) => + new SFloat64Code(r, coerce[Double](code)) + } + + def _empty: PCode = PVoidCode +} + +object PSettable { + def apply(sb: SettableBuilder, _pt: PType, name: String): PSettable = _pt match { + case pt: PCanonicalArray => + SIndexablePointerSettable(sb, SIndexablePointer(pt), name) + case pt: PCanonicalSet => + SIndexablePointerSettable(sb, SIndexablePointer(pt), name) + case pt: PCanonicalDict => + SIndexablePointerSettable(sb, SIndexablePointer(pt), name) + case pt: PSubsetStruct => + new SSubsetStructSettable(pt.sType, PSettable(sb, pt.ps, name).asInstanceOf[PStructSettable]) + case pt: PCanonicalBaseStruct => + SBaseStructPointerSettable(sb, SBaseStructPointer(pt), name) + case pt: PCanonicalBinary => + SBinaryPointerSettable(sb, SBinaryPointer(pt), name) + case pt: PCanonicalString => + SStringPointerSettable(sb, SStringPointer(pt), name) + case pt: PCanonicalInterval => + SIntervalPointerSettable(sb, SIntervalPointer(pt), name) + case pt: PCanonicalLocus => + SCanonicalLocusPointerSettable(sb, SCanonicalLocusPointer(pt), name) + case pt: PCanonicalCall => + SCanonicalCallSettable(sb, name, pt.required) + case pt: PCanonicalNDArray => + SNDArrayPointerSettable(sb, SNDArrayPointer(pt), name) + case pt: PCanonicalShuffle => + SCanonicalShufflePointerSettable(sb, SCanonicalShufflePointer(pt), name) + case pt: PCanonicalStream => + throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") + case PVoid => + throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: PVoid") + case PBoolean(r) => + SBooleanSettable(sb, name, r) + case PInt32(r) => + SInt32Settable(sb, name, r) + case PInt64(r) => + SInt64Settable(sb, name, r) + case PFloat32(r) => + SFloat32Settable(sb, name, r) + case PFloat64(r) => + SFloat64Settable(sb, name, r) + } +} diff --git a/hail/src/main/scala/is/hail/types/physical/PContainer.scala b/hail/src/main/scala/is/hail/types/physical/PContainer.scala index 668acd9dddd..2f842a3c1bd 100644 --- a/hail/src/main/scala/is/hail/types/physical/PContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/PContainer.scala @@ -87,4 +87,14 @@ abstract class PContainer extends PIterable { def nextElementAddress(currentOffset: Long): Long def nextElementAddress(currentOffset: Code[Long]): Code[Long] -} \ No newline at end of file +} + +abstract class PIndexableValue extends PValue with SIndexableValue + +abstract class PIndexableCode extends PCode with SIndexableCode { + def pt: PContainer + + def memoize(cb: EmitCodeBuilder, name: String): PIndexableValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PIndexableValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PDict.scala b/hail/src/main/scala/is/hail/types/physical/PDict.scala index 508a909039c..8ec8b0688b2 100644 --- a/hail/src/main/scala/is/hail/types/physical/PDict.scala +++ b/hail/src/main/scala/is/hail/types/physical/PDict.scala @@ -2,7 +2,6 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.check.Gen -import is.hail.types.physical.stypes.interfaces.SContainer import is.hail.types.virtual.TDict abstract class PDict extends PContainer { @@ -11,8 +10,6 @@ abstract class PDict extends PContainer { val keyType: PType val valueType: PType - def sType: SContainer - def elementType: PStruct override def genNonmissingValue: Gen[Annotation] = diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala index 3544e9bbd68..d6119e2c335 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala @@ -38,12 +38,12 @@ class PFloat32(override val required: Boolean) extends PNumeric with PPrimitive coerce[PFloat32](coerce[Float](a) * coerce[Float](b)) } - override def sType: SType = SFloat32 + override def sType: SType = SFloat32(required) def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeFloat(addr, value.asFloat.floatCode(cb))) - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SFloat32Code(Region.loadFloat(addr)) + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SFloat32Code(required, Region.loadFloat(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeFloat(addr, annotation.asInstanceOf[Float]) diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala index b61b314fa54..1136bc03e13 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala @@ -39,12 +39,12 @@ class PFloat64(override val required: Boolean) extends PNumeric with PPrimitive coerce[PFloat64](coerce[Double](a) * coerce[Double](b)) } - override def sType: SType = SFloat64 + override def sType: SType = SFloat64(required) def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeDouble(addr, value.asDouble.doubleCode(cb))) - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SFloat64Code(Region.loadDouble(addr)) + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SFloat64Code(required, Region.loadDouble(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeDouble(addr, annotation.asInstanceOf[Double]) diff --git a/hail/src/main/scala/is/hail/types/physical/PInt32.scala b/hail/src/main/scala/is/hail/types/physical/PInt32.scala index d5f8154d319..9485774e7af 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt32.scala @@ -35,18 +35,16 @@ class PInt32(override val required: Boolean) extends PNumeric with PPrimitive { coerce[PInt32](coerce[Int](a) * coerce[Int](b)) } - override def sType: SType = SInt32 + override def sType: SType = SInt32(required) def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeInt(addr, value.asInt.intCode(cb))) - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SInt32Code(Region.loadInt(addr)) + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SInt32Code(required, Region.loadInt(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeInt(addr, annotation.asInstanceOf[Int]) } - - def unstagedLoadFromAddress(addr: Long): Int = Region.loadInt(addr) } object PInt32 { diff --git a/hail/src/main/scala/is/hail/types/physical/PInt64.scala b/hail/src/main/scala/is/hail/types/physical/PInt64.scala index 6b2bfee159c..0c474e4ad03 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt64.scala @@ -36,12 +36,12 @@ class PInt64(override val required: Boolean) extends PNumeric with PPrimitive { coerce[PInt64](coerce[Long](a) * coerce[Long](b)) } - override def sType: SType = SInt64 + override def sType: SType = SInt64(required) def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeLong(addr, value.asLong.longCode(cb))) - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SInt64Code(Region.loadLong(addr)) + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SInt64Code(required, Region.loadLong(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeLong(addr, annotation.asInstanceOf[Long]) diff --git a/hail/src/main/scala/is/hail/types/physical/PInterval.scala b/hail/src/main/scala/is/hail/types/physical/PInterval.scala index de2c5be720d..9d159988e46 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInterval.scala @@ -99,4 +99,16 @@ abstract class PInterval extends PType { def includesEnd(off: Code[Long]): Code[Boolean] override def genNonmissingValue: Gen[Annotation] = Interval.gen(pointType.virtualType.ordering, pointType.genValue) -} \ No newline at end of file +} + +abstract class PIntervalValue extends PValue with SIntervalValue { + def pt: PInterval +} + +abstract class PIntervalCode extends PCode with SIntervalCode { + def pt: PInterval + + def memoize(cb: EmitCodeBuilder, name: String): PIntervalValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PIntervalValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PLocus.scala b/hail/src/main/scala/is/hail/types/physical/PLocus.scala index 87e511ce2c7..6f752afbf62 100644 --- a/hail/src/main/scala/is/hail/types/physical/PLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PLocus.scala @@ -25,4 +25,14 @@ abstract class PLocus extends PType { def positionType: PInt32 def unstagedStoreLocus(addr: Long, contig: String, position: Int, region: Region): Unit -} \ No newline at end of file +} + +abstract class PLocusValue extends PValue with SLocusValue + +abstract class PLocusCode extends PCode with SLocusCode { + def pt: PLocus + + def memoize(cb: EmitCodeBuilder, name: String): PLocusValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PLocusValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala index 691844ac1cf..1300f25fbdc 100644 --- a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala @@ -57,7 +57,7 @@ abstract class PNDArray extends PType { data: SIndexableCode, cb: EmitCodeBuilder, region: Value[Region] - ): SNDArrayCode + ): PNDArrayCode def constructDataFunction( shape: IndexedSeq[Value[Long]], @@ -65,4 +65,14 @@ abstract class PNDArray extends PType { cb: EmitCodeBuilder, region: Value[Region] ): (Value[Long], EmitCodeBuilder => SNDArrayPointerCode) -} \ No newline at end of file +} + +abstract class PNDArrayValue extends PValue with SNDArrayValue { + def pt: PNDArray +} + +abstract class PNDArrayCode extends PCode with SNDArrayCode { + def pt: PNDArray + + def memoize(cb: EmitCodeBuilder, name: String): PNDArrayValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala b/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala index 6c242f2e396..82238b65e86 100644 --- a/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala +++ b/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala @@ -9,7 +9,7 @@ import is.hail.utils._ trait PPrimitive extends PType { def byteSize: Long - def _construct(mb: EmitMethodBuilder[_], region: Value[Region], pc: SCode): SCode = pc + def _construct(mb: EmitMethodBuilder[_], region: Value[Region], pc: PCode): PCode = pc override def containsPointers: Boolean = false diff --git a/hail/src/main/scala/is/hail/types/physical/PShuffle.scala b/hail/src/main/scala/is/hail/types/physical/PShuffle.scala index 58d9f725bfa..7916e00629c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PShuffle.scala +++ b/hail/src/main/scala/is/hail/types/physical/PShuffle.scala @@ -26,4 +26,18 @@ abstract class PShuffle extends PType { def allocate(region: Region, length: Int): Long def allocate(region: Code[Region], length: Code[Int]): Code[Long] -} \ No newline at end of file +} + +abstract class PShuffleValue extends PValue with SShuffleValue { + def loadLength(): Code[Int] + + def loadBytes(): Code[Array[Byte]] +} + +abstract class PShuffleCode extends PCode with SShuffleCode { + def pt: PShuffle + + def memoize(cb: EmitCodeBuilder, name: String): PShuffleValue + + def memoizeField(cb: EmitCodeBuilder, name: String): PShuffleValue +} diff --git a/hail/src/main/scala/is/hail/types/physical/PStream.scala b/hail/src/main/scala/is/hail/types/physical/PStream.scala index fed4bd2bfb6..6a4fb44d05a 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStream.scala @@ -6,4 +6,8 @@ abstract class PStream extends PIterable with PUnrealizable { lazy val virtualType: TStream = TStream(elementType.virtualType) def _asIdent = s"stream_of_${elementType.asIdent}" -} \ No newline at end of file +} + +abstract class PStreamCode extends PCode with PUnrealizableCode { + def pt: PStream +} diff --git a/hail/src/main/scala/is/hail/types/physical/PString.scala b/hail/src/main/scala/is/hail/types/physical/PString.scala index 84340c93204..0f5316226ee 100644 --- a/hail/src/main/scala/is/hail/types/physical/PString.scala +++ b/hail/src/main/scala/is/hail/types/physical/PString.scala @@ -25,4 +25,16 @@ abstract class PString extends PType { def allocateAndStoreString(region: Region, str: String): Long def allocateAndStoreString(mb: EmitMethodBuilder[_], region: Value[Region], str: Code[String]): Code[Long] -} \ No newline at end of file +} + +abstract class PStringCode extends PCode with SStringCode { + def pt: PString + + def asBytes(): PBinaryCode +} + +abstract class PStringValue extends PValue with SStringValue { + def pt: PString + + def get: PStringCode +} diff --git a/hail/src/main/scala/is/hail/types/physical/PStruct.scala b/hail/src/main/scala/is/hail/types/physical/PStruct.scala index 0cbf5328cff..77457713b35 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStruct.scala @@ -54,5 +54,5 @@ trait PStruct extends PBaseStruct { def insertFields(fieldsToInsert: TraversableOnce[(String, PType)]): PStruct - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructCode + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode } diff --git a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala index 4530e20475f..80531fe8ee1 100644 --- a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala @@ -4,7 +4,7 @@ import is.hail.annotations.{Annotation, Region, UnsafeUtils} import is.hail.asm4s.{Code, Settable, SettableBuilder, Value, coerce, const} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} import is.hail.types.BaseStruct -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode} +import is.hail.types.physical.stypes.interfaces.SBaseStruct import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.concrete.SSubsetStruct import is.hail.types.virtual.TStruct @@ -123,7 +123,7 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: IndexedSeq[String]) ext throw new UnsupportedOperationException } - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructCode = throw new UnsupportedOperationException + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode = throw new UnsupportedOperationException def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { throw new UnsupportedOperationException diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index 0e264225a9d..eadf7eb63bf 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -162,9 +162,9 @@ object PType { } def literalPType(t: Type, a: Annotation): PType = { - val rb = new BooleanArrayBuilder() - val crib = new IntArrayBuilder() - val cib = new IntArrayBuilder() + val rb = new BoxedArrayBuilder[Boolean]() + val crib = new BoxedArrayBuilder[Int]() + val cib = new BoxedArrayBuilder[Int]() def indexTypes(t: Type): Unit = { val ci = crib.size @@ -194,7 +194,6 @@ object PType { crib.setSizeUninitialized(ci + n) cib.setSizeUninitialized(ci + n) - cib.setSize(ci + n) var j = 0 while (j < n) { @@ -403,6 +402,23 @@ abstract class PType extends Serializable with Requiredness { } } + def deepInnerRequired(required: Boolean): PType = + this match { + case t: PArray => PCanonicalArray(t.elementType.deepInnerRequired(true), required) + case t: PSet => PCanonicalSet(t.elementType.deepInnerRequired(true), required) + case t: PDict => PCanonicalDict(t.keyType.deepInnerRequired(true), t.valueType.deepInnerRequired(true), required) + case t: PStruct => + PCanonicalStruct(t.fields.map(f => PField(f.name, f.typ.deepInnerRequired(true), f.index)), required) + case t: PCanonicalTuple => + PCanonicalTuple(t._types.map { f => f.copy(typ = f.typ.deepInnerRequired(true)) }, required) + case t: PInterval => + PCanonicalInterval(t.pointType.deepInnerRequired(true), required) + case t: PStream => + PCanonicalStream(t.elementType.deepInnerRequired(true), required = required) + case t => + t.setRequired(required) + } + protected[physical] def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long def copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = { @@ -416,8 +432,8 @@ abstract class PType extends Serializable with Requiredness { _copyFromAddress(region, srcPType, srcAddress, deepCopy) } - // return a SCode that can cheaply operate on the region representation. Generally a pointer type, but not necessarily (e.g. primitives). - def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode + // return a PCode that can cheaply operate on the region representation. Generally a pointer type, but not necessarily (e.g. primitives). + def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode // stores a stack value as a region value of this type def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] @@ -429,6 +445,21 @@ abstract class PType extends Serializable with Requiredness { def deepRename(t: Type): PType = this + def defaultValue(mb: EmitMethodBuilder[_]): PCode = PCode(this, is.hail.types.physical.defaultValue(this)) + + def ti: TypeInfo[_] = typeToTypeInfo(this) + + def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(ti) + + def asParam: PCodeParamType = PCodeParamType(this) + + def nCodes: Int = 1 + + def fromCodeTuple(ct: IndexedSeq[Code[_]]): PCode = { + assert(ct.length == 1) + PCode(this, ct(0)) + } + // called to load a region value's start address from a nested representation. // Usually a no-op, but may need to dereference a pointer. def loadFromNested(addr: Code[Long]): Code[Long] diff --git a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala index 188375fd268..4e6a0de557c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala +++ b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala @@ -32,7 +32,7 @@ trait PUnrealizable extends PType { override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = unsupported - override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = unsupported + override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = unsupported override def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = unsupported @@ -42,3 +42,16 @@ trait PUnrealizable extends PType { throw new UnsupportedOperationException("containsPointers not supported on PUnrealizable") } } + +trait PUnrealizableCode extends PCode { + private def unsupported: Nothing = + throw new UnsupportedOperationException(s"$pt is not realizable") + + def code: Code[_] = unsupported + + def codeTuple(): IndexedSeq[Code[_]] = unsupported + + override def typeInfo: TypeInfo[_] = unsupported + + def memoizeField(cb: EmitCodeBuilder, name: String): PValue = unsupported +} diff --git a/hail/src/main/scala/is/hail/types/physical/package.scala b/hail/src/main/scala/is/hail/types/physical/package.scala index 96c554f3398..15d23d1834c 100644 --- a/hail/src/main/scala/is/hail/types/physical/package.scala +++ b/hail/src/main/scala/is/hail/types/physical/package.scala @@ -2,12 +2,11 @@ package is.hail.types import is.hail.asm4s._ import is.hail.expr.ir.streams.StreamArgType -import is.hail.types.physical.stypes.{SCode, SValue} import scala.language.implicitConversions package object physical { - implicit def sValueToSCode(sv: SValue): SCode = sv.get + implicit def pvalueToPCode(pv: PValue): PCode = pv.get def typeToTypeInfo(t: PType): TypeInfo[_] = t match { case _: PInt32 => typeInfo[Int] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala index cc585bb2288..a0dc45aa3b9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala @@ -5,54 +5,51 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ +import is.hail.types.physical.{PCode, PIntervalCode, PNDArrayCode, PShuffleCode, PType, PValue} object SCode { def add(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) + right.asInt.intCode(cb)) - case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) + right.asFloat.floatCode(cb)) - case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) + right.asLong.longCode(cb)) - case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) + right.asDouble.doubleCode(cb)) + case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) + right.asInt.intCode(cb)) + case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) + right.asFloat.floatCode(cb)) + case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) + right.asLong.longCode(cb)) + case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) + right.asDouble.doubleCode(cb)) } } def multiply(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) * right.asInt.intCode(cb)) - case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) * right.asFloat.floatCode(cb)) - case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) * right.asLong.longCode(cb)) - case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) * right.asDouble.doubleCode(cb)) + case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) * right.asInt.intCode(cb)) + case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) * right.asFloat.floatCode(cb)) + case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) * right.asLong.longCode(cb)) + case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) * right.asDouble.doubleCode(cb)) } } def subtract(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) - right.asInt.intCode(cb)) - case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) - right.asFloat.floatCode(cb)) - case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) - right.asLong.longCode(cb)) - case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) - right.asDouble.doubleCode(cb)) + case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) - right.asInt.intCode(cb)) + case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) - right.asFloat.floatCode(cb)) + case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) - right.asLong.longCode(cb)) + case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) - right.asDouble.doubleCode(cb)) } } def divide(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) / right.asInt.intCode(cb)) - case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) / right.asFloat.floatCode(cb)) - case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) / right.asLong.longCode(cb)) - case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) / right.asDouble.doubleCode(cb)) + case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) / right.asInt.intCode(cb)) + case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) / right.asFloat.floatCode(cb)) + case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) / right.asLong.longCode(cb)) + case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) / right.asDouble.doubleCode(cb)) } } - - def _empty: SCode = SVoidCode } abstract class SCode { def st: SType - // requires a code builder because forming a code tuple may require appending - // straight-line code, e.g. if a SCode contains nested EmitCodes - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] + def codeTuple(): IndexedSeq[Code[_]] def asBoolean: SBooleanCode = asInstanceOf[SBooleanCode] @@ -82,9 +79,9 @@ abstract class SCode { def asString: SStringCode = asInstanceOf[SStringCode] - def asInterval: SIntervalCode = asInstanceOf[SIntervalCode] + def asInterval: PIntervalCode = asInstanceOf[PIntervalCode] - def asNDArray: SNDArrayCode = asInstanceOf[SNDArrayCode] + def asNDArray: PNDArrayCode = asInstanceOf[PNDArrayCode] def asLocus: SLocusCode = asInstanceOf[SLocusCode] @@ -92,26 +89,38 @@ abstract class SCode { def asStream: SStreamCode = asInstanceOf[SStreamCode] - def asShuffle: SShuffleCode = asInstanceOf[SShuffleCode] + def asShuffle: PShuffleCode = asInstanceOf[PShuffleCode] - def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SCode = + def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType): SCode = castTo(cb, region, destType, false) - def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType, deepCopy: Boolean): SCode = { - destType.coerceOrCopy(cb, region, this, deepCopy) + + def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType, deepCopy: Boolean): SCode = { + destType.sType.coerceOrCopy(cb, region, this, deepCopy) } - def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SCode = - destType.coerceOrCopy(cb, region, this, deepCopy = true) + + def copyToRegion(cb: EmitCodeBuilder, region: Value[Region]): SCode = + copyToRegion(cb, region, st.pType) + + def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: PType): SCode = + destType.sType.coerceOrCopy(cb, region, this, deepCopy = true) def memoize(cb: EmitCodeBuilder, name: String): SValue def memoizeField(cb: EmitCodeBuilder, name: String): SValue + + def toPCode(cb: EmitCodeBuilder, region: Value[Region]): PCode + + // This method is a very temporary patch. Clients should use `toPCode`. + def asPCode: PCode = asInstanceOf[PCode] } trait SValue { def st: SType def get: SCode + + def asPValue: PValue = asInstanceOf[PValue] } @@ -122,22 +131,3 @@ trait SSettable extends SValue { def load(): SCode = get } - -object SSettable { - def apply(sb: SettableBuilder, st: SType, name: String): SSettable = { - st.fromSettables(st.settableTupleTypes().zipWithIndex.map { case (ti, i) => - sb.newSettable(s"${ name }_${ st.getClass.getSimpleName }_$i")(ti) - }) - } -} - -trait SUnrealizableCode extends SCode { - private def unsupported: Nothing = - throw new UnsupportedOperationException(s"$this is not realizable") - - def code: Code[_] = unsupported - - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = unsupported - - def memoizeField(cb: EmitCodeBuilder, name: String): SValue = unsupported -} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala index 0c0491d7878..08be3b658d9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala @@ -1,50 +1,30 @@ package is.hail.types.physical.stypes import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, SCodeEmitParamType, SCodeParamType} -import is.hail.types.VirtualTypeWithReq -import is.hail.types.physical.PType -import is.hail.types.physical.stypes.interfaces.SStream -import is.hail.types.physical.stypes.primitives._ -import is.hail.types.virtual._ +import is.hail.asm4s.{Code, Settable, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, EmitParamType, InferPType, PCodeEmitParamType, PCodeParamType, SortOrder} +import is.hail.types.TypeWithRequiredness +import is.hail.types.physical.{PCode, PType} +import is.hail.types.virtual.Type object SType { - def chooseCompatibleType(req: VirtualTypeWithReq, stypes: SType*): SType = { - if (stypes.toSet.size == 1) - stypes.head - else - req.canonicalEmitType.st + def chooseCompatibleType(req: TypeWithRequiredness, stypes: SType*): SType = { + InferPType.getCompatiblePType(stypes.map(_.pType), req).sType } - - def canonical(virt: Type): SType = { - PType.canonical(virt).sType - } - - def extractPrimCode(cb: EmitCodeBuilder, x: SCode): Code[_] = x.st.virtualType match { - case TInt32 => x.asInt.intCode(cb) - case TInt64 => x.asLong.longCode(cb) - case TFloat32 => x.asFloat.floatCode(cb) - case TFloat64 => x.asDouble.doubleCode(cb) - case TBoolean => x.asBoolean.boolCode(cb) - } - - def canonical(st: SType): SType = st.canonicalPType().sType } trait SType { - def virtualType: Type + def virtualType: Type = pType.virtualType - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode - - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] + def pType: PType - def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = codeTupleTypes() + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode - lazy val nCodes: Int = codeTupleTypes().length + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode - lazy val nSettables: Int = settableTupleTypes().length + def codeTupleTypes(): IndexedSeq[TypeInfo[_]] def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable @@ -52,65 +32,15 @@ trait SType { def canonicalPType(): PType - def paramType: SCodeParamType = SCodeParamType(this) - - def asIdent: String = canonicalPType().asIdent - - def defaultValue: SCode = { - fromCodes(codeTupleTypes().map(ti => ti.uninitializedValue)) - } - - def isPrimitive: Boolean = this match { - case SInt32 | SInt64 | SFloat32 | SFloat64 | SBoolean => true - case _ => false - } + def paramType: PCodeParamType = PCodeParamType(pType) - def isRealizable: Boolean = !this.isInstanceOf[SStream] + def asIdent: String = pType.asIdent - def castRename(t: Type): SType + def equalsExceptTopLevelRequiredness(that: SType): Boolean = pType.equalModuloRequired(that.pType) } case class EmitType(st: SType, required: Boolean) { def virtualType: Type = st.virtualType - - def paramType: SCodeEmitParamType = SCodeEmitParamType(this) - + def paramType: EmitParamType = PCodeEmitParamType(st.pType.setRequired(required)) def canonicalPType: PType = st.canonicalPType().setRequired(required) - - def equalModuloRequired(that: EmitType): Boolean = st == that.st - - lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = { - val tc = st.codeTupleTypes() - if (required) - tc - else - tc :+ BooleanInfo - } - - lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = { - val tc = st.settableTupleTypes() - if (required) - tc - else - tc :+ BooleanInfo - } - - def fromCodes(codes: IndexedSeq[Code[_]]): EmitCode = { - val scode = st.fromCodes(codes.take(st.nCodes)) - val m: Code[Boolean] = if (required) const(false) else coerce[Boolean](codes.last) - val ec = EmitCode(Code._empty, m, scode) - if (ec.required && !this.required) - ec.setOptional - else - ec - } - - def fromSettables(settables: IndexedSeq[Settable[_]]): EmitSettable = new EmitSettable( - if (required) None else Some(coerce[Boolean](settables.last)), - st.fromSettables(settables.take(st.nSettables)) - ) - - def nCodes: Int = codeTupleTypes.length - - def nSettables: Int = settableTupleTypes.length } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala deleted file mode 100644 index c85184ea79e..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala +++ /dev/null @@ -1,173 +0,0 @@ -package is.hail.types.physical.stypes - -import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir._ -import is.hail.expr.ir.streams.{StreamArgType, StreamProducer} -import is.hail.types.physical.PType -import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} -import is.hail.types.physical.stypes.primitives._ -import is.hail.types.virtual._ -import is.hail.utils._ - -object SingleCodeType { - def typeInfoFromType(t: Type): TypeInfo[_] = t match { - case TInt32 => IntInfo - case TInt64 => LongInfo - case TFloat32 => FloatInfo - case TFloat64 => DoubleInfo - case TBoolean => BooleanInfo - case TVoid => UnitInfo - case _ => LongInfo // all others passed as ptype references - } - - def fromSType(t: SType): SingleCodeType = t.virtualType match { - case TInt32 => Int32SingleCodeType - case TInt64 => Int64SingleCodeType - case TFloat32 => Float32SingleCodeType - case TFloat64 => Float64SingleCodeType - case TBoolean => BooleanSingleCodeType - case _ => PTypeReferenceSingleCodeType(t.canonicalPType().setRequired(true)) - - } -} - -sealed trait SingleCodeType { - def ti: TypeInfo[_] - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode - - def virtualType: Type - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode - - def loadedSType: SType -} - -case object Int32SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = IntInfo - - override def loadedSType: SType = SInt32 - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SInt32Code(coerce[Int](c)) - - def virtualType: Type = TInt32 - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asInt.intCode(cb)) -} - -case object Int64SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = LongInfo - - override def loadedSType: SType = SInt64 - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SInt64Code(coerce[Long](c)) - - def virtualType: Type = TInt64 - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asLong.longCode(cb)) -} - -case object Float32SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = FloatInfo - - override def loadedSType: SType = SFloat32 - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SFloat32Code(coerce[Float](c)) - - def virtualType: Type = TFloat32 - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asFloat.floatCode(cb)) -} - -case object Float64SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = DoubleInfo - - override def loadedSType: SType = SFloat64 - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SFloat64Code(coerce[Double](c)) - - def virtualType: Type = TFloat64 - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asDouble.doubleCode(cb)) -} - -case object BooleanSingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = BooleanInfo - - override def loadedSType: SType = SBoolean - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SBooleanCode(coerce[Boolean](c)) - - def virtualType: Type = TBoolean - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asBoolean.boolCode(cb)) -} - -case class StreamSingleCodeType(requiresMemoryManagementPerElement: Boolean, eltType: PType) extends SingleCodeType { - self => - - override def loadedSType: SType = SStream(EmitType(eltType.sType, true)) - - def virtualType: Type = TStream(eltType.virtualType) - - def ti: TypeInfo[_] = classInfo[StreamArgType] - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = { - val mb = cb.emb - val xIter = mb.genFieldThisRef[Iterator[java.lang.Long]]("streamInIterator") - - // this, Region, ... - val mkIter = coerce[StreamArgType](c) - val eltRegion = mb.genFieldThisRef[Region]("stream_input_element_region") - val rvAddr = mb.genFieldThisRef[Long]("stream_input_addr") - - val producer = new StreamProducer { - override val length: Option[EmitCodeBuilder => Code[Int]] = None - - override def initialize(cb: EmitCodeBuilder): Unit = { - cb.assign(xIter, mkIter.invoke[Region, Region, Iterator[java.lang.Long]]("apply", r, eltRegion)) - } - - override val elementRegion: Settable[Region] = eltRegion - override val requiresMemoryManagementPerElement: Boolean = self.requiresMemoryManagementPerElement - override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => - // NB: locals should not be used in this implementation. The way In() nodes are - // stored in fields at the beginning of code generation leads to the method builder - // here being different from the method the stream will eventually be consumed in - cb.ifx(!xIter.load().hasNext, cb.goto(LendOfStream)) - cb.assign(rvAddr, xIter.load().next().invoke[Long]("longValue")) - cb.goto(LproduceElementDone) - } - - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, rvAddr))) - - override def close(cb: EmitCodeBuilder): Unit = {} - } - SStreamCode(SStream(EmitType(eltType.sType, true)), producer) - } - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = throw new UnsupportedOperationException -} - -case class PTypeReferenceSingleCodeType(pt: PType) extends SingleCodeType { - def ti: TypeInfo[_] = LongInfo - - override def loadedSType: SType = pt.sType - - def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = pt.loadCheapSCode(cb, coerce[Long](c)) - - def virtualType: Type = pt.virtualType - - def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = { - SingleCodeSCode(this, pt.store(cb, region, pc, deepCopy = deepCopy)) - } -} - -object SingleCodeSCode { - def fromSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean = false): SingleCodeSCode = { - SingleCodeType.fromSType(pc.st).coerceSCode(cb, pc, region, deepCopy) - } -} - -case class SingleCodeSCode(typ: SingleCodeType, code: Code[_]) \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala index 5da7ec32fd1..f849530b5ef 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala @@ -4,29 +4,28 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SBaseStructValue, SStructSettable} -import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} -import is.hail.types.physical.{PBaseStruct, PType} -import is.hail.types.virtual.{TBaseStruct, Type} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStructSettable, PType} import is.hail.utils.FastIndexedSeq case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { - require(!pType.required) def size: Int = pType.size - lazy val virtualType: TBaseStruct = pType.virtualType.asInstanceOf[TBaseStruct] - - override def castRename(t: Type): SType = SBaseStructPointer(pType.deepRename(t).asInstanceOf[PBaseStruct]) - - override def fieldIdx(fieldName: String): Int = pType.fieldIdx(fieldName) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBaseStructPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + if (pt == this.pType) + new SBaseStructPointerCode(this, addr) + else + coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SBaseStructPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked]) = settables assert(a.ti == LongInfo) @@ -41,8 +40,7 @@ case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { def canonicalPType(): PType = pType - override val fieldTypes: IndexedSeq[SType] = pType.types.map(_.sType) - override val fieldEmitTypes: IndexedSeq[EmitType] = pType.types.map(t => EmitType(t.sType, t.required)) + override val fieldTypes: Array[SType] = pType.types.map(_.sType) } @@ -55,20 +53,20 @@ object SBaseStructPointerSettable { class SBaseStructPointerSettable( val st: SBaseStructPointer, val a: Settable[Long] -) extends SStructSettable { +) extends PStructSettable { val pt: PBaseStruct = st.pType - def get: SBaseStructCode = new SBaseStructPointerCode(st, a) + def get: PBaseStructCode = new SBaseStructPointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { IEmitCode(cb, pt.isFieldMissing(a, fieldIdx), - pt.fields(fieldIdx).typ.loadCheapSCode(cb, pt.loadField(a, fieldIdx))) + pt.fields(fieldIdx).typ.loadCheapPCode(cb, pt.loadField(a, fieldIdx))) } - def store(cb: EmitCodeBuilder, pv: SCode): Unit = { + def store(cb: EmitCodeBuilder, pv: PCode): Unit = { cb.assign(a, pv.asInstanceOf[SBaseStructPointerCode].a) } @@ -77,20 +75,20 @@ class SBaseStructPointerSettable( } } -class SBaseStructPointerCode(val st: SBaseStructPointer, val a: Code[Long]) extends SBaseStructCode { +class SBaseStructPointerCode(val st: SBaseStructPointer, val a: Code[Long]) extends PBaseStructCode { val pt: PBaseStruct = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SBaseStructValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PBaseStructValue = { val s = SBaseStructPointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue = memoize(cb, name, cb.fieldBuilder) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala index 18fea2daba7..7a6505d7de5 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala @@ -4,17 +4,13 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryCode, SBinaryValue} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PBinary, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.SBinary +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PBinary, PBinaryCode, PBinaryValue, PCode, PSettable, PType} import is.hail.utils._ case class SBinaryPointer(pType: PBinary) extends SBinary { - require(!pType.required) - - lazy val virtualType: Type = pType.virtualType def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBinaryPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -25,7 +21,7 @@ case class SBinaryPointer(pType: PBinary) extends SBinary { if (pt == this.pType) new SBinaryPointerCode(this, addr) else - coerceOrCopy(cb, region, pt.loadCheapSCode(cb, addr), deepCopy = false) + coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) } def fromSettables(settables: IndexedSeq[Settable[_]]): SBinaryPointerSettable = { @@ -41,8 +37,6 @@ case class SBinaryPointer(pType: PBinary) extends SBinary { } def canonicalPType(): PType = pType - - override def castRename(t: Type): SType = this } object SBinaryPointerSettable { @@ -50,8 +44,8 @@ object SBinaryPointerSettable { new SBinaryPointerSettable(st, sb.newSettable[Long](name)) } -class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) extends SBinaryValue with SSettable { - private val pt: PBinary = st.pType +class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) extends PBinaryValue with PSettable { + val pt: PBinary = st.pType override def bytesAddress(): Code[Long] = st.pType.bytesAddress(a) @@ -65,15 +59,15 @@ class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) exte def loadByte(i: Code[Int]): Code[Byte] = Region.loadByte(pt.bytesAddress(a) + i.toL) - def store(cb: EmitCodeBuilder, pc: SCode): Unit = cb.assign(a, pc.asInstanceOf[SBinaryPointerCode].a) + def store(cb: EmitCodeBuilder, pc: PCode): Unit = cb.assign(a, pc.asInstanceOf[SBinaryPointerCode].a) } -class SBinaryPointerCode(val st: SBinaryPointer, val a: Code[Long]) extends SBinaryCode { - private val pt: PBinary = st.pType +class SBinaryPointerCode(val st: SBinaryPointer, val a: Code[Long]) extends PBinaryCode { + val pt: PBinary = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala index 3f0391ea887..7dd297e08b9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala @@ -4,64 +4,67 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SCall, SCallCode, SCallValue} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PCall, PCanonicalCall, PType} -import is.hail.types.virtual.{TCall, Type} +import is.hail.types.physical.stypes.interfaces.SCall +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCall, PCallCode, PCallValue, PCanonicalCall, PCode, PSettable, PType} import is.hail.utils._ import is.hail.variant.Genotype -case object SCanonicalCall extends SCall { +case class SCanonicalCall(required: Boolean) extends SCall { + override def pType: PCall = PCanonicalCall(required) + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SCanonicalCall => value + case SCanonicalCall(_) => + new SCanonicalCallCode(required, value.asInstanceOf[SCanonicalCallCode].call) } } - lazy val virtualType: Type = TCall - - override def castRename(t: Type): SType = this - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(IntInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case PCanonicalCall(_) => + new SCanonicalCallCode(required, Region.loadInt(addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalCallSettable = { val IndexedSeq(call: Settable[Int@unchecked]) = settables assert(call.ti == IntInfo) - new SCanonicalCallSettable(call) + new SCanonicalCallSettable(required, call) } def fromCodes(codes: IndexedSeq[Code[_]]): SCanonicalCallCode = { val IndexedSeq(call: Code[Int@unchecked]) = codes assert(call.ti == IntInfo) - new SCanonicalCallCode(call) + new SCanonicalCallCode(required, call) } - def canonicalPType(): PType = PCanonicalCall(false) - - def constructFromIntRepr(c: Code[Int]): SCanonicalCallCode = new SCanonicalCallCode(c) + def canonicalPType(): PType = pType } object SCanonicalCallSettable { - def apply(sb: SettableBuilder, name: String): SCanonicalCallSettable = - new SCanonicalCallSettable(sb.newSettable[Int](s"${ name }_call")) + def apply(sb: SettableBuilder, name: String, required: Boolean): SCanonicalCallSettable = + new SCanonicalCallSettable(required, sb.newSettable[Int](s"${ name }_call")) } -class SCanonicalCallSettable(val call: Settable[Int]) extends SCallValue with SSettable { +class SCanonicalCallSettable(required: Boolean, val call: Settable[Int]) extends PCallValue with PSettable { - val pt: PCall = PCanonicalCall(false) + val pt: PCall = PCanonicalCall(required) override def canonicalCall(cb: EmitCodeBuilder): Code[Int] = call - override def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(call, v.asInstanceOf[SCanonicalCallCode].call) + override def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(call, v.asInstanceOf[SCanonicalCallCode].call) - val st: SCanonicalCall.type = SCanonicalCall + val st: SCanonicalCall = SCanonicalCall(required) - def get: SCallCode = new SCanonicalCallCode(call) + def get: PCallCode = new SCanonicalCallCode(required, call) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(call) - def store(pc: SCode): Code[Unit] = call.store(pc.asInstanceOf[SCanonicalCallCode].call) + def store(pc: PCode): Code[Unit] = call.store(pc.asInstanceOf[SCanonicalCallCode].call) def ploidy(): Code[Int] = get.ploidy() @@ -93,29 +96,29 @@ class SCanonicalCallSettable(val call: Settable[Int]) extends SCallValue with SS } } -class SCanonicalCallCode(val call: Code[Int]) extends SCallCode { +class SCanonicalCallCode(required: Boolean, val call: Code[Int]) extends PCallCode { - val pt: PCall = PCanonicalCall(false) + val pt: PCall = PCanonicalCall(required) - val st: SCanonicalCall.type = SCanonicalCall + val st: SCanonicalCall = SCanonicalCall(required) def code: Code[_] = call - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(call) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(call) def ploidy(): Code[Int] = (call >>> 1) & 0x3 def isPhased(): Code[Boolean] = (call & 0x1).ceq(1) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SCallValue = { - val s = SCanonicalCallSettable(sb, name) + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PCallValue = { + val s = SCanonicalCallSettable(sb, name, required) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SCallValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PCallValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SCallValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PCallValue = memoize(cb, name, cb.fieldBuilder) def store(mb: EmitMethodBuilder[_], r: Value[Region], dst: Code[Long]): Code[Unit] = Region.storeInt(dst, call) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala index 2064cbab6a9..ff601cb89c1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala @@ -4,23 +4,14 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SLocus, SLocusCode, SLocusValue, SString, SStringCode} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PCanonicalLocus, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.SLocus +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCanonicalLocus, PCode, PLocusCode, PLocusValue, PSettable, PStringCode, PType} import is.hail.utils.FastIndexedSeq import is.hail.variant.{Locus, ReferenceGenome} case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { - require(!pType.required) - - override def contigType: SString = pType.contigType.sType - - lazy val virtualType: Type = pType.virtualType - - override def castRename(t: Type): SType = this - override def rg: ReferenceGenome = pType.rg def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { @@ -29,7 +20,12 @@ case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, LongInfo, IntInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case PCanonicalLocus(_, _) => + new SCanonicalLocusPointerCode(this, addr) + } + } def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalLocusPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], contig: Settable[Long@unchecked], position: Settable[Int@unchecked]) = settables @@ -63,34 +59,34 @@ class SCanonicalLocusPointerSettable( val a: Settable[Long], _contig: Settable[Long], val _position: Settable[Int] -) extends SLocusValue with SSettable { +) extends PLocusValue with PSettable { val pt: PCanonicalLocus = st.pType def get = new SCanonicalLocusPointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a, _contig, _position) - def store(cb: EmitCodeBuilder, pc: SCode): Unit = { + def store(cb: EmitCodeBuilder, pc: PCode): Unit = { cb.assign(a, pc.asInstanceOf[SCanonicalLocusPointerCode].a) cb.assign(_contig, pt.contigAddr(a)) cb.assign(_position, pt.position(a)) } - def contig(cb: EmitCodeBuilder): SStringCode = { - pt.contigType.loadCheapSCode(cb, _contig).asString + def contig(cb: EmitCodeBuilder): PStringCode = { + pt.contigType.loadCheapPCode(cb, _contig).asString } def position(cb: EmitCodeBuilder): Code[Int] = _position } -class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Long]) extends SLocusCode { +class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Long]) extends PLocusCode { val pt: PCanonicalLocus = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def contig(cb: EmitCodeBuilder): SStringCode = pt.contigType.loadCheapSCode(cb, pt.contigAddr(a)).asString + def contig(cb: EmitCodeBuilder): PStringCode = pt.contigType.loadCheapPCode(cb, pt.contigAddr(a)).asString def position(cb: EmitCodeBuilder): Code[Int] = pt.position(a) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala index 60952a94d96..d44bc4294f2 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala @@ -5,27 +5,29 @@ import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeIn import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.services.shuffler.Wire -import is.hail.types.physical.stypes.interfaces.{SShuffle, SShuffleCode, SShuffleValue} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PCanonicalShuffle, PShuffle, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.SShuffle +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCanonicalShuffle, PCode, PSettable, PShuffle, PShuffleCode, PShuffleValue, PType} import is.hail.utils.FastIndexedSeq case class SCanonicalShufflePointer(pType: PCanonicalShuffle) extends SShuffle { - require(!pType.required) - - lazy val virtualType: Type = pType.virtualType - - override def castRename(t: Type): SType = this lazy val binarySType = SBinaryPointer(pType.representation) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { - new SCanonicalShufflePointerCode(this, pType.representation.loadCheapSCode(cb, pType.store(cb, region, value, deepCopy))) + new SCanonicalShufflePointerCode(this, pType.representation.loadCheapPCode(cb, pType.store(cb, region, value, deepCopy))) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case t: PCanonicalShuffle => + assert(t.equalModuloRequired(this.pType)) + new SCanonicalShufflePointerCode(this, t.loadBinary(cb, addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalShufflePointerSettable = { new SCanonicalShufflePointerSettable(this, binarySType.fromSettables(settables)) } @@ -46,14 +48,14 @@ object SCanonicalShufflePointerSettable { "PCanonicalShuffleSettableOff", pt.representation.allocate(region, Wire.ID_SIZE)) cb.append(pt.representation.store(off, bytes)) - pt.loadCheapSCode(cb, off).memoize(cb, "scanonicalshuffle_fromarraybytes").asInstanceOf[SCanonicalShufflePointerSettable] + SCanonicalShufflePointer(pt).loadFrom(cb, region, pt, off).memoize(cb, "scanonicalshuffle_fromarraybytes").asInstanceOf[SCanonicalShufflePointerSettable] } } -class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerSettable) extends SShuffleValue with SSettable { +class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerSettable) extends PShuffleValue with PSettable { val pt: PCanonicalShuffle = st.pType - def get: SShuffleCode = new SCanonicalShufflePointerCode(st, shuffle.get) + def get: PShuffleCode = new SCanonicalShufflePointerCode(st, shuffle.get) def settableTuple(): IndexedSeq[Settable[_]] = shuffle.settableTuple() @@ -61,21 +63,21 @@ class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shu def loadBytes(): Code[Array[Byte]] = shuffle.loadBytes() - def store(cb: EmitCodeBuilder, pc: SCode): Unit = shuffle.store(cb, pc.asInstanceOf[SCanonicalShufflePointerCode].shuffle) + def store(cb: EmitCodeBuilder, pc: PCode): Unit = shuffle.store(cb, pc.asInstanceOf[SCanonicalShufflePointerCode].shuffle) def storeFromBytes(cb: EmitCodeBuilder, region: Value[Region], bytes: Value[Array[Byte]]): Unit = { val addr = cb.newLocal[Long]("bytesAddr", st.pType.representation.allocate(region, bytes.length())) cb += st.pType.representation.store(addr, bytes) - shuffle.store(cb, st.pType.representation.loadCheapSCode(cb, addr)) + shuffle.store(cb, st.pType.representation.loadCheapPCode(cb, addr)) } } -class SCanonicalShufflePointerCode(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerCode) extends SShuffleCode { +class SCanonicalShufflePointerCode(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerCode) extends PShuffleCode { val pt: PShuffle = st.pType def code: Code[_] = shuffle.code - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = shuffle.makeCodeTuple(cb) + def codeTuple(): IndexedSeq[Code[_]] = shuffle.codeTuple() def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SCanonicalShufflePointerSettable = { val s = SCanonicalShufflePointerSettable(sb, st, name) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala index be362393fac..42187e4bc18 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala @@ -3,20 +3,13 @@ package is.hail.types.physical.stypes.concrete import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} -import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableCode, SIndexableValue} -import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} -import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalDict, PCanonicalSet, PContainer, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.SContainer +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalDict, PCanonicalSet, PCode, PContainer, PIndexableCode, PIndexableValue, PSettable, PType} import is.hail.utils.FastIndexedSeq case class SIndexablePointer(pType: PContainer) extends SContainer { - require(!pType.required) - - lazy val virtualType: Type = pType.virtualType - - override def castRename(t: Type): SType = SIndexablePointer(pType.deepRename(t).asInstanceOf[PContainer]) - override def elementType: SType = pType.elementType.sType def elementEmitType: EmitType = EmitType(elementType, pType.elementType.required) @@ -27,7 +20,12 @@ case class SIndexablePointer(pType: PContainer) extends SContainer { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, IntInfo, LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + if (pt == this.pType) + new SIndexablePointerCode(this, addr) + else + coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) + } def fromSettables(settables: IndexedSeq[Settable[_]]): SIndexablePointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], length: Settable[Int@unchecked], elementsAddress: Settable[Long@unchecked]) = settables @@ -47,26 +45,26 @@ case class SIndexablePointer(pType: PContainer) extends SContainer { } -class SIndexablePointerCode(val st: SIndexablePointer, val a: Code[Long]) extends SIndexableCode { +class SIndexablePointerCode(val st: SIndexablePointer, val a: Code[Long]) extends PIndexableCode { val pt: PContainer = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIndexableValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PIndexableValue = { val s = SIndexablePointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SIndexableValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PIndexableValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SIndexableValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PIndexableValue = memoize(cb, name, cb.fieldBuilder) - def castToArray(cb: EmitCodeBuilder): SIndexableCode = { + def castToArray(cb: EmitCodeBuilder): PIndexableCode = { pt match { case t: PArray => this case t: PCanonicalDict => new SIndexablePointerCode(SIndexablePointer(t.arrayRep), a) @@ -89,10 +87,10 @@ class SIndexablePointerSettable( val a: Settable[Long], val length: Settable[Int], val elementsAddress: Settable[Long] -) extends SIndexableValue with SSettable { +) extends PIndexableValue with PSettable { val pt: PContainer = st.pType - def get: SIndexableCode = new SIndexablePointerCode(st, a) + def get: PIndexableCode = new SIndexablePointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a, length, elementsAddress) @@ -102,12 +100,12 @@ class SIndexablePointerSettable( val iv = cb.newLocal("pcindval_i", i) IEmitCode(cb, isElementMissing(iv), - pt.elementType.loadCheapSCode(cb, pt.loadElement(a, length, iv))) // FIXME loadElement should take elementsAddress + pt.elementType.loadCheapPCode(cb, pt.loadElement(a, length, iv))) // FIXME loadElement should take elementsAddress } def isElementMissing(i: Code[Int]): Code[Boolean] = pt.isElementMissing(a, i) - def store(cb: EmitCodeBuilder, pc: SCode): Unit = { + def store(cb: EmitCodeBuilder, pc: PCode): Unit = { cb.assign(a, pc.asInstanceOf[SIndexablePointerCode].a) cb.assign(length, pt.loadLength(a)) cb.assign(elementsAddress, pt.firstElementOffset(a, length)) @@ -125,7 +123,7 @@ class SIndexablePointerSettable( cb.ifx(isElementMissing(idx), {}, // do nothing, { - val elt = et.loadCheapSCode(cb, et.loadFromNested(elementPtr)) + val elt = et.loadCheapPCode(cb, et.loadFromNested(elementPtr)) f(cb, idx, elt) }) cb.assign(idx, idx + 1) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala deleted file mode 100644 index bcfd8ee050d..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala +++ /dev/null @@ -1,164 +0,0 @@ -package is.hail.types.physical.stypes.concrete - -import is.hail.annotations.Region -import is.hail.asm4s.{Code, Settable, TypeInfo, Value} -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, IEmitCode} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SStructSettable} -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.{PCanonicalStruct, PType} -import is.hail.types.virtual.{TStruct, Type} -import is.hail.utils.BoxedArrayBuilder - -case class SInsertFieldsStruct(virtualType: TStruct, parent: SBaseStruct, insertedFields: IndexedSeq[(String, EmitType)]) extends SBaseStruct { - override def size: Int = virtualType.size - - // Maps index in result struct to index in insertedFields. - // Indices that refer to parent fields are not present. - lazy val insertedFieldIndices: Map[Int, Int] = insertedFields.zipWithIndex - .map { case ((name, _), idx) => virtualType.fieldIdx(name) -> idx } - .toMap - - def getFieldIndexInNewOrParent(idx: Int): Either[Int, Int] = { - insertedFieldIndices.get(idx) match { - case Some(idx) => Right(idx) - case None => Left(parent.fieldIdx(virtualType.fieldNames(idx))) - } - } - - val fieldEmitTypes: IndexedSeq[EmitType] = virtualType.fieldNames.zipWithIndex.map { case (f, idx) => - insertedFieldIndices.get(idx) match { - case Some(idx) => insertedFields(idx)._2 - case None => parent.fieldEmitTypes(parent.fieldIdx(f)) - } - } - - private lazy val insertedFieldCodeStarts = insertedFields.map(_._2.nCodes).scanLeft(0)(_ + _).init - private lazy val insertedFieldSettableStarts = insertedFields.map(_._2.nSettables).scanLeft(0)(_ + _).init - - override lazy val fieldTypes: IndexedSeq[SType] = fieldEmitTypes.map(_.st) - - override def fieldIdx(fieldName: String): Int = virtualType.fieldIdx(fieldName) - - override def canonicalPType(): PType = PCanonicalStruct(false, virtualType.fieldNames.zip(fieldEmitTypes).map { case (f, et) => (f, et.canonicalPType) }: _*) - - lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = parent.codeTupleTypes ++ insertedFields.flatMap(_._2.codeTupleTypes) - - override lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = parent.settableTupleTypes() ++ insertedFields.flatMap(_._2.settableTupleTypes) - - override def fromCodes(codes: IndexedSeq[Code[_]]): SInsertFieldsStructCode = { - assert(codes.map(_.ti) == codeTupleTypes) - new SInsertFieldsStructCode(this, parent.fromCodes(codes.take(parent.nCodes)).asInstanceOf[SBaseStructCode], insertedFields.indices.map { i => - val et = insertedFields(i)._2 - val start = insertedFieldCodeStarts(i) + parent.nCodes - et.fromCodes(codes.slice(start, start + et.nCodes)) - }) - } - - override def fromSettables(settables: IndexedSeq[Settable[_]]): SInsertFieldsStructSettable = { - assert(settables.map(_.ti) == settableTupleTypes) - new SInsertFieldsStructSettable(this, parent.fromSettables(settables.take(parent.nSettables)).asInstanceOf[SStructSettable], insertedFields.indices.map { i => - val et = insertedFields(i)._2 - val start = insertedFieldSettableStarts(i) + parent.nSettables - et.fromSettables(settables.slice(start, start + et.nSettables)) - }) - } - - override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { - value match { - case ss: SInsertFieldsStructCode if ss.st == this => value - case _ => throw new RuntimeException(s"copy insertfields struct") - } - } - - override def castRename(t: Type): SType = { - val ts = t.asInstanceOf[TStruct] - - val parentType = parent.virtualType.asInstanceOf[TStruct] - - val renamedInsertedFields = Array.fill[(String, EmitType)](insertedFields.size)(null) - val parentPassThroughFieldBuilder = new BoxedArrayBuilder[(String, (String, Type))]() - - (0 until ts.size).foreach { i => - val newField = ts.fields(i) - val newName = newField.name - val oldName = virtualType.fieldNames(i) - insertedFieldIndices.get(i) match { - case Some(idx) => - val et = insertedFields(idx)._2 - renamedInsertedFields(idx) = ((newName, et.copy(st = et.st.castRename(newField.typ)))) - case None => parentPassThroughFieldBuilder += ((oldName, (newName, newField.typ))) - } - } - - val parentPassThroughMap = parentPassThroughFieldBuilder.result().toMap - val parentCastType = TStruct(parentType.fieldNames.map(f => parentPassThroughMap.getOrElse(f, (f, parentType.fieldType(f)))): _*) - val renamedParentType = parent.castRename(parentCastType) - SInsertFieldsStruct(ts, - renamedParentType.asInstanceOf[SBaseStruct], - renamedInsertedFields - ) - } -} - -class SInsertFieldsStructSettable(val st: SInsertFieldsStruct, parent: SStructSettable, newFields: IndexedSeq[EmitSettable]) extends SStructSettable { - def get: SInsertFieldsStructCode = new SInsertFieldsStructCode(st, parent.load().asBaseStruct, newFields.map(_.load)) - - def settableTuple(): IndexedSeq[Settable[_]] = parent.settableTuple() ++ newFields.flatMap(_.settableTuple()) - - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { - st.getFieldIndexInNewOrParent(fieldIdx) match { - case Left(parentIdx) => parent.loadField(cb, parentIdx) - case Right(newFieldsIdx) => newFields(newFieldsIdx).toI(cb) - } - } - - def isFieldMissing(fieldIdx: Int): Code[Boolean] = - st.getFieldIndexInNewOrParent(fieldIdx) match { - case Left(parentIdx) => parent.isFieldMissing(parentIdx) - case Right(newFieldsIdx) => newFields(newFieldsIdx).m - } - - def store(cb: EmitCodeBuilder, pv: SCode): Unit = { - val sifc = pv.asInstanceOf[SInsertFieldsStructCode] - parent.store(cb, sifc.parent) - newFields.zip(sifc.newFields).foreach { case (settable, code) => cb.assign(settable, code) } - } -} - -class SInsertFieldsStructCode(val st: SInsertFieldsStruct, val parent: SBaseStructCode, val newFields: IndexedSeq[EmitCode]) extends SBaseStructCode { - override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = parent.makeCodeTuple(cb) ++ newFields.flatMap(_.makeCodeTuple(cb)) - - override def memoize(cb: EmitCodeBuilder, name: String): SInsertFieldsStructSettable = { - new SInsertFieldsStructSettable(st, parent.memoize(cb, name + "_parent").asInstanceOf[SStructSettable], newFields.indices.map { i => - val code = newFields(i) - val es = cb.emb.newEmitLocal(s"${ name }_nf_$i", code.emitType) - es.store(cb, code) - es - }) - } - - override def memoizeField(cb: EmitCodeBuilder, name: String): SInsertFieldsStructSettable = { - new SInsertFieldsStructSettable(st, parent.memoizeField(cb, name + "_parent").asInstanceOf[SStructSettable], newFields.indices.map { i => - val code = newFields(i) - val es = cb.emb.newEmitField(s"${ name }_nf_$i", code.emitType) - es.store(cb, code) - es - }) - } - - override def _insert(newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { - val newFieldSet = fields.map(_._1).toSet - val filteredNewFields = st.insertedFields.map(_._1) - .zipWithIndex - .filter { case (name, idx) => !newFieldSet.contains(name) } - .map { case (name, idx) => (name, newFields(idx)) } - parent._insert(newType, filteredNewFields ++ fields: _*) - } - - override def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { - st.getFieldIndexInNewOrParent(fieldIdx) match { - case Left(parentIdx) => parent.loadSingleField(cb, parentIdx) - case Right(newIdx) => newFields(newIdx).toI(cb) - } - } -} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala index 395c6dbb011..d890a9473b1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala @@ -4,27 +4,27 @@ import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SInterval, SIntervalCode, SIntervalValue} -import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} -import is.hail.types.physical.{PCanonicalInterval, PInterval, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.SInterval +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCanonicalInterval, PCode, PInterval, PIntervalCode, PIntervalValue, PSettable, PType} import is.hail.utils.FastIndexedSeq case class SIntervalPointer(pType: PInterval) extends SInterval { - require(!pType.required) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIntervalPointerCode(this, pType.store(cb, region, value, deepCopy)) } - override def castRename(t: Type): SType = SIntervalPointer(pType.deepRename(t).asInstanceOf[PInterval]) - - lazy val virtualType: Type = pType.virtualType + def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, IntInfo, IntInfo) - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, BooleanInfo, BooleanInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case t: PCanonicalInterval if t.equalModuloRequired(this.pType) => + new SIntervalPointerCode(this, addr) + case _ => + new SIntervalPointerCode(this, pType.store(cb, region, pt.loadCheapPCode(cb, addr), false)) + } + } def fromSettables(settables: IndexedSeq[Settable[_]]): SIntervalPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], includesStart: Settable[Boolean@unchecked], includesEnd: Settable[Boolean@unchecked]) = settables @@ -41,7 +41,6 @@ case class SIntervalPointer(pType: PInterval) extends SInterval { } override def pointType: SType = pType.pointType.sType - override def pointEmitType: EmitType = EmitType(pType.pointType.sType, pType.pointType.required) def canonicalPType(): PType = pType } @@ -61,8 +60,8 @@ class SIntervalPointerSettable( val a: Settable[Long], val includesStart: Settable[Boolean], val includesEnd: Settable[Boolean] -) extends SIntervalValue with SSettable { - def get: SIntervalCode = new SIntervalPointerCode(st, a) +) extends PIntervalValue with PSettable { + def get: PIntervalCode = new SIntervalPointerCode(st, a) val pt: PInterval = st.pType @@ -71,23 +70,24 @@ class SIntervalPointerSettable( def loadStart(cb: EmitCodeBuilder): IEmitCode = IEmitCode(cb, !(pt.startDefined(a)), - pt.pointType.loadCheapSCode(cb, pt.loadStart(a))) + pt.pointType.loadCheapPCode(cb, pt.loadStart(a))) def startDefined(cb: EmitCodeBuilder): Code[Boolean] = pt.startDefined(a) def loadEnd(cb: EmitCodeBuilder): IEmitCode = IEmitCode(cb, !(pt.endDefined(a)), - pt.pointType.loadCheapSCode(cb, pt.loadEnd(a))) + pt.pointType.loadCheapPCode(cb, pt.loadEnd(a))) def endDefined(cb: EmitCodeBuilder): Code[Boolean] = pt.endDefined(a) - def store(cb: EmitCodeBuilder, pc: SCode): Unit = { + def store(cb: EmitCodeBuilder, pc: PCode): Unit = { cb.assign(a, pc.asInstanceOf[SIntervalPointerCode].a) cb.assign(includesStart, pt.includesStart(a.load())) cb.assign(includesEnd, pt.includesEnd(a.load())) } + // FIXME orderings should take emitcodes/iemitcodes def isEmpty(cb: EmitCodeBuilder): Code[Boolean] = { val gt = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gt()) val gteq = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gteq()) @@ -104,24 +104,24 @@ class SIntervalPointerSettable( } -class SIntervalPointerCode(val st: SIntervalPointer, val a: Code[Long]) extends SIntervalCode { - val pt = st.pType +class SIntervalPointerCode(val st: SIntervalPointer, val a: Code[Long]) extends PIntervalCode { + override def pt: PInterval = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) def includesStart(): Code[Boolean] = pt.includesStart(a) def includesEnd(): Code[Boolean] = pt.includesEnd(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIntervalValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PIntervalValue = { val s = SIntervalPointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SIntervalValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PIntervalValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SIntervalValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PIntervalValue = memoize(cb, name, cb.fieldBuilder) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala index 4c0b4171fb1..82b1d8a150d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala @@ -4,32 +4,28 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value, const} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SNDArrayValue} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PCanonicalNDArray, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayValue} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PBaseStructCode, PCanonicalNDArray, PCode, PNDArray, PNDArrayCode, PNDArrayValue, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq case class SNDArrayPointer(pType: PCanonicalNDArray) extends SNDArray { - require(!pType.required) - def nDims: Int = pType.nDims override def elementType: SType = pType.elementType.sType - override def elementPType: PType = pType.elementType - - lazy val virtualType: Type = pType.virtualType - - override def castRename(t: Type): SType = SNDArrayPointer(pType.deepRename(t)) - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SNDArrayPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = Array.fill(2 + nDims * 2)(LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + if (pt == this.pType) + new SNDArrayPointerCode(this, addr) + else + coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) + } def fromSettables(settables: IndexedSeq[Settable[_]]): SNDArrayPointerSettable = { val a = settables(0).asInstanceOf[Settable[Long@unchecked]] @@ -65,24 +61,24 @@ class SNDArrayPointerSettable( val shape: IndexedSeq[Settable[Long]], val strides: IndexedSeq[Settable[Long]], val dataFirstElement: Settable[Long] - ) extends SNDArrayValue with SSettable { + ) extends PNDArrayValue with PSettable { val pt: PCanonicalNDArray = st.pType - def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode = { + def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): PCode = { assert(indices.size == pt.nDims) - pt.elementType.loadCheapSCode(cb, pt.loadElementFromDataAndStrides(cb, indices, dataFirstElement, strides)) + pt.elementType.loadCheapPCode(cb, pt.loadElementFromDataAndStrides(cb, indices, dataFirstElement, strides)) } def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) ++ shape ++ strides ++ FastIndexedSeq(dataFirstElement) - def store(cb: EmitCodeBuilder, v: SCode): Unit = { + def store(cb: EmitCodeBuilder, v: PCode): Unit = { cb.assign(a, v.asInstanceOf[SNDArrayPointerCode].a) pt.loadShapes(cb, a, shape) pt.loadStrides(cb, a, strides) cb.assign(dataFirstElement, pt.dataFirstElementPointer(a)) } - override def get: SNDArrayPointerCode = new SNDArrayPointerCode(st, a) + override def get: PCode = new SNDArrayPointerCode(st, a) override def outOfBounds(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): Code[Boolean] = { val shape = this.shapes(cb) @@ -121,20 +117,22 @@ class SNDArrayPointerSettable( def firstDataAddress(cb: EmitCodeBuilder): Value[Long] = dataFirstElement } -class SNDArrayPointerCode(val st: SNDArrayPointer, val a: Code[Long]) extends SNDArrayCode { +class SNDArrayPointerCode(val st: SNDArrayPointer, val a: Code[Long]) extends PNDArrayCode { val pt: PCanonicalNDArray = st.pType - override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + override def code: Code[_] = a + + override def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SNDArrayValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PNDArrayValue = { val s = SNDArrayPointerSettable(sb, st, name) cb.assign(s, this) s } - override def memoize(cb: EmitCodeBuilder, name: String): SNDArrayValue = memoize(cb, name, cb.localBuilder) + override def memoize(cb: EmitCodeBuilder, name: String): PNDArrayValue = memoize(cb, name, cb.localBuilder) - override def memoizeField(cb: EmitCodeBuilder, name: String): SValue = memoize(cb, name, cb.fieldBuilder) + override def memoizeField(cb: EmitCodeBuilder, name: String): PValue = memoize(cb, name, cb.fieldBuilder) - override def shape(cb: EmitCodeBuilder): SBaseStructCode = pt.shapeType.loadCheapSCode(cb, pt.representation.loadField(a, "shape")) + override def shape(cb: EmitCodeBuilder): PBaseStructCode = pt.shapeType.loadCheapPCode(cb, pt.representation.loadField(a, "shape")) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala deleted file mode 100644 index d233b204fb4..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala +++ /dev/null @@ -1,161 +0,0 @@ -package is.hail.types.physical.stypes.concrete - -import is.hail.annotations.Region -import is.hail.asm4s.{Code, Settable, TypeInfo, Value} -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, IEmitCode} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SStructSettable} -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalStruct, PCanonicalTuple, PTupleField, PType} -import is.hail.types.virtual.{TBaseStruct, TStruct, TTuple, Type} - -object SStackStruct { - val MAX_FIELDS_FOR_CONSTRUCT: Int = 64 - - def constructFromArgs(cb: EmitCodeBuilder, region: Value[Region], t: TBaseStruct, args: EmitCode*): SBaseStructCode = { - val as = args.toArray - assert(t.size == args.size) - if (as.length > MAX_FIELDS_FOR_CONSTRUCT) { - val structType: PCanonicalBaseStruct = t match { - case ts: TStruct => - PCanonicalStruct(false, ts.fieldNames.zip(as.map(_.emitType)).map { case (f, et) => (f, et.canonicalPType) }: _*) - case tt: TTuple => - PCanonicalTuple(tt._types.zip(as.map(_.emitType)).map { case (tf, et) => PTupleField(tf.index, et.canonicalPType) }, false) - } - structType.constructFromFields(cb, region, as, false) - } else { - val st = SStackStruct(t, as.map(_.emitType)) - new SStackStructCode(st, as) - } - } -} - -case class SStackStruct(virtualType: TBaseStruct, fieldEmitTypes: IndexedSeq[EmitType]) extends SBaseStruct { - override def size: Int = virtualType.size - - private lazy val codeStarts = fieldEmitTypes.map(_.nCodes).scanLeft(0)(_ + _).init - private lazy val settableStarts = fieldEmitTypes.map(_.nSettables).scanLeft(0)(_ + _).init - - override lazy val fieldTypes: IndexedSeq[SType] = fieldEmitTypes.map(_.st) - - def fieldIdx(fieldName: String): Int = virtualType.asInstanceOf[TStruct].fieldIdx(fieldName) - - override def canonicalPType(): PType = virtualType match { - case ts: TStruct => - PCanonicalStruct(false, ts.fieldNames.zip(fieldEmitTypes).map { case (f, et) => (f, et.canonicalPType) }: _*) - case tt: TTuple => - PCanonicalTuple(tt._types.zip(fieldEmitTypes).map { case (tf, et) => PTupleField(tf.index, et.canonicalPType) }, false) - - } - - lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = fieldEmitTypes.flatMap(_.codeTupleTypes) - - override lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = fieldEmitTypes.flatMap(_.settableTupleTypes) - - override def fromCodes(codes: IndexedSeq[Code[_]]): SBaseStructCode = { - new SStackStructCode(this, fieldEmitTypes.indices.map { i => - val et = fieldEmitTypes(i) - val start = codeStarts(i) - et.fromCodes(codes.slice(start, start + et.nCodes)) - }) - } - - override def fromSettables(settables: IndexedSeq[Settable[_]]): SStackStructSettable = { - assert(settables.length == fieldEmitTypes.map(_.nSettables).sum, s"mismatch: ${ settables.length } settables, expect ${ fieldEmitTypes.map(_.nSettables).sum }\n ${ settables.map(_.ti).mkString(",") }\n ${ fieldEmitTypes.map(_.settableTupleTypes).mkString(" | ") }") - new SStackStructSettable(this, fieldEmitTypes.indices.map { i => - val et = fieldEmitTypes(i) - val start = settableStarts(i) - et.fromSettables(settables.slice(start, start + et.nSettables)) - }) - } - - override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { - value match { - case ss: SStackStructCode => - if (ss.st == this && !deepCopy) - ss - else - new SStackStructCode(this, fieldEmitTypes.zip(ss.codes).map { case (newType, ec) => - EmitCode.fromI(cb.emb) { cb => - val iec = ec.toI(cb).map(cb) { field => newType.st.coerceOrCopy(cb, region, field, deepCopy) } - (newType.required, iec.required) match { - case (true, false) => IEmitCode.present(cb, iec.get(cb)) - case (false, true) => iec.setOptional - case _ => iec - } - } - }) - case _ => - val sv = value.asBaseStruct.memoize(cb, "stackstruct_coerce_value") - new SStackStructCode(this, Array.tabulate[EmitCode](size) { i => - EmitCode.fromI(cb.emb) { cb => - val newType = fieldEmitTypes(i) - val iec = sv.loadField(cb, i).map(cb) { field => newType.st.coerceOrCopy(cb, region, field, deepCopy) } - (newType.required, iec.required) match { - case (true, false) => IEmitCode.present(cb, iec.get(cb)) - case (false, true) => iec.setOptional - case _ => iec - } - } - }) - } - } - - override def castRename(t: Type): SType = { - val ts = t.asInstanceOf[TBaseStruct] - SStackStruct( - ts, - ts.types.zip(fieldEmitTypes).map { case (v, e) => e.copy(st = e.st.castRename(v)) } - ) - } -} - -class SStackStructSettable(val st: SStackStruct, settables: IndexedSeq[EmitSettable]) extends SStructSettable { - def get: SStackStructCode = new SStackStructCode(st, settables.map(_.load)) - - def settableTuple(): IndexedSeq[Settable[_]] = settables.flatMap(_.settableTuple()) - - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { - settables(fieldIdx).toI(cb) - } - - def isFieldMissing(fieldIdx: Int): Code[Boolean] = - settables(fieldIdx).m - - def store(cb: EmitCodeBuilder, pv: SCode): Unit = { - val ssc = pv.asInstanceOf[SStackStructCode] - settables.zip(ssc.codes).foreach { case (s, c) => s.store(cb, c) } - } -} - -class SStackStructCode(val st: SStackStruct, val codes: IndexedSeq[EmitCode]) extends SBaseStructCode { - override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = codes.flatMap(_.makeCodeTuple(cb)) - - override def memoize(cb: EmitCodeBuilder, name: String): SStackStructSettable = { - new SStackStructSettable(st, codes.indices.map { i => - val code = codes(i) - val es = cb.emb.newEmitLocal(s"${ name }_$i", code.emitType) - es.store(cb, code) - es - }) - } - - override def memoizeField(cb: EmitCodeBuilder, name: String): SStackStructSettable = { - new SStackStructSettable(st, codes.indices.map { i => - val code = codes(i) - val es = cb.emb.newEmitField(s"${ name }_$i", code.emitType) - es.store(cb, code) - es - }) - } - - override def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { - codes(fieldIdx).toI(cb) - } - - override def subset(fieldNames: String*): SStackStructCode = { - val newToOld = fieldNames.map(st.fieldIdx).toArray - val oldVType = st.virtualType.asInstanceOf[TStruct] - val newVirtualType = TStruct(newToOld.map(i => (oldVType.fieldNames(i), oldVType.types(i))): _*) - new SStackStructCode(SStackStruct(newVirtualType, newToOld.map(st.fieldEmitTypes)), newToOld.map(codes)) - } -} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala index 30612a0ea43..2098960f60f 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala @@ -4,26 +4,26 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBinaryCode, SString, SStringCode, SStringValue} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PCanonicalString, PString, PType} -import is.hail.types.virtual.Type +import is.hail.types.physical.stypes.interfaces.{SString, SStringCode} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PBinaryCode, PCanonicalString, PCode, PSettable, PString, PStringCode, PStringValue, PType, PValue} import is.hail.utils.FastIndexedSeq case class SStringPointer(pType: PString) extends SString { - require(!pType.required) - - lazy val virtualType: Type = pType.virtualType - - override def castRename(t: Type): SType = this - def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SStringPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case _: PCanonicalString => + new SStringPointerCode(this, addr) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SStringPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked]) = settables assert(a.ti == LongInfo) @@ -44,28 +44,28 @@ case class SStringPointer(pType: PString) extends SString { } -class SStringPointerCode(val st: SStringPointer, val a: Code[Long]) extends SStringCode { - val pt: PString = st.pType +class SStringPointerCode(val st: SStringPointer, val a: Code[Long]) extends PStringCode { + override def pt: PString = st.pType def code: Code[_] = a - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) def loadString(): Code[String] = pt.loadString(a) - def asBytes(): SBinaryCode = new SBinaryPointerCode(SBinaryPointer(pt.binaryRepresentation), a) + def asBytes(): PBinaryCode = new SBinaryPointerCode(SBinaryPointer(pt.binaryRepresentation), a) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SValue = { + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PValue = { val s = new SStringPointerSettable(st, sb.newSettable[Long]("sstringpointer_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SValue = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SValue = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.fieldBuilder) def binaryRepr: SBinaryPointerCode = new SBinaryPointerCode(SBinaryPointer(st.pType.binaryRepresentation), a) } @@ -77,14 +77,14 @@ object SStringPointerSettable { } } -class SStringPointerSettable(val st: SStringPointer, val a: Settable[Long]) extends SStringValue with SSettable { +class SStringPointerSettable(val st: SStringPointer, val a: Settable[Long]) extends PStringValue with PSettable { val pt: PString = st.pType def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) def get: SStringPointerCode = new SStringPointerCode(st, a.load()) - def store(cb: EmitCodeBuilder, v: SCode): Unit = { + def store(cb: EmitCodeBuilder, v: PCode): Unit = { cb.assign(a, v.asInstanceOf[SStringPointerCode].a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala index 877e03f1503..aae7fcb11ae 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala @@ -3,45 +3,24 @@ package is.hail.types.physical.stypes.concrete import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SBaseStructValue, SStructSettable} -import is.hail.types.physical.{PCanonicalStruct, PType} -import is.hail.types.virtual.{TStruct, Type} +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, IEmitSCode, SortOrder} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} +import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStruct, PStructSettable, PSubsetStruct, PType} +import is.hail.types.virtual.TStruct case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) extends SBaseStruct { val size: Int = fieldNames.size - val _fieldIdx: Map[String, Int] = fieldNames.zipWithIndex.toMap - val newToOldFieldMapping: Map[Int, Int] = _fieldIdx - .map { case (f, i) => (i, parent.virtualType.asInstanceOf[TStruct].fieldIdx(f)) } + val fieldIdx: Map[String, Int] = fieldNames.zipWithIndex.toMap + val newToOldFieldMapping: Map[Int, Int] = fieldIdx + .map { case (f, i) => (i, parent.pType.virtualType.asInstanceOf[TStruct].fieldIdx(f)) } - val fieldTypes: IndexedSeq[SType] = Array.tabulate(size)(i => parent.fieldTypes(newToOldFieldMapping(i))) - val fieldEmitTypes: IndexedSeq[EmitType] = Array.tabulate(size)(i => parent.fieldEmitTypes(newToOldFieldMapping(i))) + val fieldTypes: Array[SType] = Array.tabulate(size)(i => parent.fieldTypes(newToOldFieldMapping(i))) - lazy val virtualType: TStruct = { - val vparent = parent.virtualType.asInstanceOf[TStruct] - TStruct(fieldNames.map(f => (f, vparent.field(f).typ)): _*) - } - - override def fieldIdx(fieldName: String): Int = _fieldIdx(fieldName) - - override def castRename(t: Type): SType = { - val renamedVType = t.asInstanceOf[TStruct] - val newNames = renamedVType.fieldNames - val subsetPrevVirtualType = virtualType - val vparent = parent.virtualType.asInstanceOf[TStruct] - val newParent = TStruct(vparent.fieldNames.map(f => subsetPrevVirtualType.fieldIdx.get(f) match { - case Some(idxInSelectedFields) => - val renamed = renamedVType.fields(idxInSelectedFields) - (renamed.name, renamed.typ) - case None => (f, vparent.fieldType(f)) - }): _*) - val newType = SSubsetStruct(parent.castRename(newParent).asInstanceOf[SBaseStruct], newNames) - assert(newType.virtualType == t) - newType - } + val pType: PSubsetStruct = PSubsetStruct(parent.pType.asInstanceOf[PStruct], fieldNames.toArray + ) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) @@ -54,46 +33,52 @@ case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) ex def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = parent.codeTupleTypes() - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = parent.settableTupleTypes() + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + throw new UnsupportedOperationException + } def fromSettables(settables: IndexedSeq[Settable[_]]): SSubsetStructSettable = { - new SSubsetStructSettable(this, parent.fromSettables(settables).asInstanceOf[SStructSettable]) + new SSubsetStructSettable(this, parent.fromSettables(settables).asInstanceOf[PStructSettable]) } def fromCodes(codes: IndexedSeq[Code[_]]): SSubsetStructCode = { - new SSubsetStructCode(this, parent.fromCodes(codes)) + new SSubsetStructCode(this, parent.fromCodes(codes).asInstanceOf[PBaseStructCode]) } - def canonicalPType(): PType = { - PCanonicalStruct(fieldNames.zipWithIndex.map { case (f, i) => - (f, parent.fieldEmitTypes(newToOldFieldMapping(i)).canonicalPType) - }: _*) - } + def canonicalPType(): PType = pType } -class SSubsetStructSettable(val st: SSubsetStruct, prev: SStructSettable) extends SStructSettable { +// FIXME: prev should be SStructSettable, not PStructSettable +class SSubsetStructSettable(val st: SSubsetStruct, prev: PStructSettable) extends PStructSettable { + def pt: PBaseStruct = st.pType.asInstanceOf[PBaseStruct] + def get: SSubsetStructCode = new SSubsetStructCode(st, prev.load().asBaseStruct) def settableTuple(): IndexedSeq[Settable[_]] = prev.settableTuple() - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitSCode = { prev.loadField(cb, st.newToOldFieldMapping(fieldIdx)) } def isFieldMissing(fieldIdx: Int): Code[Boolean] = prev.isFieldMissing(st.newToOldFieldMapping(fieldIdx)) - def store(cb: EmitCodeBuilder, pv: SCode): Unit = prev.store(cb, pv.asInstanceOf[SSubsetStructCode].prev) + def store(cb: EmitCodeBuilder, pv: PCode): Unit = prev.store(cb, pv.asInstanceOf[SSubsetStructCode].prev) } -class SSubsetStructCode(val st: SSubsetStruct, val prev: SBaseStructCode) extends SBaseStructCode { - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = prev.makeCodeTuple(cb) +class SSubsetStructCode(val st: SSubsetStruct, val prev: PBaseStructCode) extends PBaseStructCode { + + val pt: PBaseStruct = st.pType + + def code: Code[_] = prev.code + + def codeTuple(): IndexedSeq[Code[_]] = prev.codeTuple() - def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue = { - new SSubsetStructSettable(st, prev.memoize(cb, name).asInstanceOf[SStructSettable]) + def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue = { + new SSubsetStructSettable(st, prev.memoize(cb, name).asInstanceOf[PStructSettable]) } - def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue = { - new SSubsetStructSettable(st, prev.memoizeField(cb, name).asInstanceOf[SStructSettable]) + def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue = { + new SSubsetStructSettable(st, prev.memoizeField(cb, name).asInstanceOf[PStructSettable]) } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala index fa65a126e8d..227c256b63a 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala @@ -1,79 +1,42 @@ package is.hail.types.physical.stypes.interfaces -import is.hail.annotations.Region -import is.hail.asm4s.{Code, Value} -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, IEmitCode} -import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalStruct} -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.{SInsertFieldsStruct, SInsertFieldsStructCode, SSubsetStruct, SSubsetStructCode} -import is.hail.types.virtual.{TBaseStruct, TStruct} -import is.hail.utils._ +import is.hail.asm4s.Code +import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} +import is.hail.types.physical.PBaseStruct +import is.hail.types.physical.stypes.concrete.{SSubsetStruct, SSubsetStructCode} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} trait SBaseStruct extends SType { - def virtualType: TBaseStruct - override def fromCodes(codes: IndexedSeq[Code[_]]): SBaseStructCode def size: Int - val fieldTypes: IndexedSeq[SType] - val fieldEmitTypes: IndexedSeq[EmitType] - - def fieldIdx(fieldName: String): Int + val fieldTypes: Array[SType] } trait SStructSettable extends SBaseStructValue with SSettable trait SBaseStructValue extends SValue { - def st: SBaseStruct + def pt: PBaseStruct def isFieldMissing(fieldIdx: Int): Code[Boolean] - def isFieldMissing(fieldName: String): Code[Boolean] = isFieldMissing(st.fieldIdx(fieldName)) + def isFieldMissing(fieldName: String): Code[Boolean] = isFieldMissing(pt.fieldIdx(fieldName)) - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitSCode - def loadField(cb: EmitCodeBuilder, fieldName: String): IEmitCode = loadField(cb, st.fieldIdx(fieldName)) + def loadField(cb: EmitCodeBuilder, fieldName: String): IEmitSCode = loadField(cb, pt.fieldIdx(fieldName)) } -trait SBaseStructCode extends SCode { - self => +trait SBaseStructCode extends SCode { self => def st: SBaseStruct def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue - final def loadSingleField(cb: EmitCodeBuilder, fieldName: String): IEmitCode = loadSingleField(cb, st.fieldIdx(fieldName)) - - def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { - memoize(cb, "structcode_loadsinglefield") - .loadField(cb, fieldIdx) - } - - def subset(fieldNames: String*): SBaseStructCode = { + def subset(fieldNames: String*): SSubsetStructCode = { val st = SSubsetStruct(self.st, fieldNames.toIndexedSeq) - new SSubsetStructCode(st, self) - } - - protected[stypes] def _insert(newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { - new SInsertFieldsStructCode( - SInsertFieldsStruct(newType, st, fields.map { case (name, ec) => (name, ec.emitType) }.toFastIndexedSeq), - this, - fields.map(_._2).toFastIndexedSeq - ) - } - - final def insert(cb: EmitCodeBuilder, region: Value[Region], newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { - if (newType.size < 64 || fields.length < 16) - return _insert(newType, fields: _*) - - val newFieldMap = fields.toMap - val oldPV = memoize(cb, "insert_fields_old") - val allFields = newType.fieldNames.map { f => - (f, newFieldMap.getOrElse(f, EmitCode.fromI(cb.emb)(cb => oldPV.loadField(cb, f)))) } - - val pcs = PCanonicalStruct(allFields.map { case (f, ec) => (f, ec.emitType.canonicalPType) }: _*) - pcs.constructFromFields(cb, region, allFields.map(_._2), false) + new SSubsetStructCode(st, self.asPCode.asBaseStruct) // FIXME, should be sufficient to just use self here } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala index 5534c179df0..6bfcd4a95e3 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala @@ -2,6 +2,7 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.Code import is.hail.expr.ir.EmitCodeBuilder +import is.hail.types.physical.PValue import is.hail.types.physical.stypes.{SCode, SType, SValue} trait SBinary extends SType diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala index 9a0f4fd7617..b9da86c29ba 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala @@ -3,6 +3,7 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.{Code, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.{PCode, PValue} trait SCall extends SType diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala index 83f9318f79a..a5f0382b040 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala @@ -1,7 +1,7 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s._ -import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} +import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} trait SContainer extends SType { @@ -18,7 +18,7 @@ trait SIndexableValue extends SValue { def isElementDefined(i: Code[Int]): Code[Boolean] = !isElementMissing(i) - def loadElement(cb: EmitCodeBuilder, i: Code[Int]): IEmitCode + def loadElement(cb: EmitCodeBuilder, i: Code[Int]): IEmitSCode def hasMissingValues(cb: EmitCodeBuilder): Code[Boolean] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala index cb7f533e318..6d9b2224109 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala @@ -1,13 +1,12 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.{Code, Value} -import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} +import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} import is.hail.types.physical.PInterval -import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} +import is.hail.types.physical.stypes.{SCode, SType, SValue} trait SInterval extends SType { def pointType: SType - def pointEmitType: EmitType } trait SIntervalValue extends SValue { @@ -17,11 +16,11 @@ trait SIntervalValue extends SValue { def includesEnd(): Value[Boolean] - def loadStart(cb: EmitCodeBuilder): IEmitCode + def loadStart(cb: EmitCodeBuilder): IEmitSCode def startDefined(cb: EmitCodeBuilder): Code[Boolean] - def loadEnd(cb: EmitCodeBuilder): IEmitCode + def loadEnd(cb: EmitCodeBuilder): IEmitSCode def endDefined(cb: EmitCodeBuilder): Code[Boolean] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala index 629799a6a98..a58a934c132 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala @@ -7,7 +7,6 @@ import is.hail.variant.{Locus, ReferenceGenome} trait SLocus extends SType { def rg: ReferenceGenome - def contigType: SString } trait SLocusValue extends SValue { @@ -20,8 +19,6 @@ trait SLocusValue extends SValue { } trait SLocusCode extends SCode { - def st: SLocus - def contig(cb: EmitCodeBuilder): SStringCode def position(cb: EmitCodeBuilder): Code[Int] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala index a47530a0758..8085436dfe0 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala @@ -1,125 +1,13 @@ package is.hail.types.physical.stypes.interfaces -import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.{PNDArray, PType} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.utils.{FastIndexedSeq, toRichIterable} +import is.hail.types.physical.stypes.{SCode, SType, SValue} object SNDArray { - def numElements(shape: IndexedSeq[Value[Long]]): Code[Long] = { - shape.foldLeft(1L: Code[Long])(_ * _) - } - // Column major order - def forEachIndexColMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) - (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { - forEachIndexWithInitAndIncColMajor(cb, shape, shape.map(_ => (cb: EmitCodeBuilder) => ()), shape.map(_ => (cb: EmitCodeBuilder) => ()), context)(f) - } - - def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit): Unit = - coiterate(cb, region, arrays, body, deepCopy=false) - - def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { - if (arrays.isEmpty) return - val indexVars = Array.tabulate(arrays(0)._1.st.nDims)(i => s"i$i").toFastIndexedSeq - val indices = Array.range(0, arrays(0)._1.st.nDims).toFastIndexedSeq - coiterate(cb, region, indexVars, arrays.map { case (array, name) => (array, indices, name) }, body, deepCopy) - } - - def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit): Unit = - coiterate(cb, region, indexVars, arrays, body, deepCopy=false) - - // Note: to iterate through an array in column major order, make sure the indices are in ascending order. E.g. - // coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { - // case Seq(a, b) => cb.assign(a, SCode.add(cb, a, b)) - // }) - // computes A += B. - def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { - - val indexSizes = new Array[Settable[Int]](indexVars.length) - val indexCoords = Array.tabulate(indexVars.length) { i => cb.newLocal[Int](indexVars(i)) } - - case class ArrayInfo( - array: SNDArrayValue, - strides: IndexedSeq[Value[Long]], - pos: IndexedSeq[Settable[Long]], - elt: SSettable, - indexToDim: Map[Int, Int], - name: String) - - val info = arrays.map { case (_array, indices, name) => - for (idx <- indices) assert(idx < indexVars.length && idx >= 0) - // FIXME: relax this assumption to handle transposing, non-column major - for (i <- 0 until indices.length - 1) assert(indices(i) < indices(i+1)) - assert(indices.length == _array.st.nDims) - - val array = _array.memoize(cb, s"${name}_copy") - val shape = array.shapes(cb) - for (i <- indices.indices) { - val idx = indices(i) - if (indexSizes(idx) == null) { - indexSizes(idx) = cb.newLocal[Int](s"${indexVars(idx)}_max") - cb.assign(indexSizes(idx), shape(i).toI) - } else { - cb.ifx(indexSizes(idx).cne(shape(i).toI), s"${indexVars(idx)} indexes incompatible dimensions") - } - } - val strides = array.strides(cb) - val pos = Array.tabulate(array.st.nDims + 1) { i => cb.newLocal[Long](s"$name$i") } - val elt = new SSettable { - def st: SType = array.st.elementType - val pt: PType = array.st.pType.elementType - - // FIXME: need to use `pos` of smallest index var - def get: SCode = pt.loadCheapSCode(cb, pt.loadFromNested(pos(0))) - def store(cb: EmitCodeBuilder, v: SCode): Unit = pt.storeAtAddress(cb, pos(0), region, v, deepCopy) - def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(pos.last) - } - val indexToDim = indices.zipWithIndex.toMap - ArrayInfo(array, strides, pos, elt, indexToDim, name) - } - - def recurLoopBuilder(idx: Int): Unit = { - if (idx < 0) { - body(info.map(_.elt)) - } else { - val coord = indexCoords(idx) - def init(): Unit = { - cb.assign(coord, 0) - for (n <- arrays.indices) { - if (info(n).indexToDim.contains(idx)) { - val i = info(n).indexToDim(idx) - // FIXME: assumes array's indices in ascending order - cb.assign(info(n).pos(i), info(n).pos(i+1)) - } - } - } - def increment(): Unit = { - cb.assign(coord, coord + 1) - for (n <- arrays.indices) { - if (info(n).indexToDim.contains(idx)) { - val i = info(n).indexToDim(idx) - cb.assign(info(n).pos(i), info(n).pos(i) + info(n).strides(i)) - } - } - } - - cb.forLoop(init(), coord < indexSizes(idx), increment(), recurLoopBuilder(idx - 1)) - } - } - - for (n <- arrays.indices) { - cb.assign(info(n).pos(info(n).array.st.nDims), info(n).array.firstDataAddress(cb)) - } - recurLoopBuilder(indexVars.length - 1) - } - - // Column major order - def forEachIndexWithInitAndIncColMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], inits: IndexedSeq[EmitCodeBuilder => Unit], - incrementers: IndexedSeq[EmitCodeBuilder => Unit], context: String) - (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { + def forEachIndex(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) + (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { val indices = Array.tabulate(shape.length) { dimIdx => cb.newLocal[Long](s"${ context }_foreach_dim_$dimIdx", 0L) } @@ -133,10 +21,8 @@ object SNDArray { recurLoopBuilder(dimIdx + 1, () => { cb.forLoop({ - inits(dimIdx)(cb) cb.assign(dimVar, 0L) }, dimVar < shape(dimIdx), { - incrementers(dimIdx)(cb) cb.assign(dimVar, dimVar + 1L) }, innerLambda() @@ -151,47 +37,6 @@ object SNDArray { recurLoopBuilder(0, body) } - // Row major order - def forEachIndexRowMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) - (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { - forEachIndexWithInitAndIncRowMajor(cb, shape, shape.map(_ => (cb: EmitCodeBuilder) => ()), shape.map(_ => (cb: EmitCodeBuilder) => ()), context)(f) - } - - // Row major order - def forEachIndexWithInitAndIncRowMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], inits: IndexedSeq[EmitCodeBuilder => Unit], - incrementers: IndexedSeq[EmitCodeBuilder => Unit], context: String) - (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { - - val indices = Array.tabulate(shape.length) { dimIdx => cb.newLocal[Long](s"${ context }_foreach_dim_$dimIdx", 0L) } - - def recurLoopBuilder(dimIdx: Int, innerLambda: () => Unit): Unit = { - if (dimIdx == -1) { - innerLambda() - } - else { - val dimVar = indices(dimIdx) - - recurLoopBuilder(dimIdx - 1, - () => { - cb.forLoop({ - inits(dimIdx)(cb) - cb.assign(dimVar, 0L) - }, dimVar < shape(dimIdx), { - incrementers(dimIdx)(cb) - cb.assign(dimVar, dimVar + 1L) - }, - innerLambda() - ) - } - ) - } - } - - val body = () => f(cb, indices) - - recurLoopBuilder(shape.length - 1, body) - } - // Column major order def unstagedForEachIndex(shape: IndexedSeq[Long]) (f: IndexedSeq[Long] => Unit): Unit = { @@ -223,19 +68,14 @@ object SNDArray { trait SNDArray extends SType { - def pType: PNDArray - def nDims: Int def elementType: SType - def elementPType: PType } trait SNDArrayValue extends SValue { def st: SNDArray - override def get: SNDArrayCode - def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode def shapes(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala index 027e000c5a5..ac7a3947f94 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala @@ -4,12 +4,11 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.expr.ir.streams.StreamProducer -import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType, SUnrealizableCode, SValue} -import is.hail.types.physical.{PCanonicalStream, PStream, PType} -import is.hail.types.virtual.{TStream, Type} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PCanonicalStream, PCode, PStream, PStreamCode, PType, PValue} -case class SStream(elementEmitType: EmitType) extends SType { - def elementType: SType = elementEmitType.st +case class SStream(elementType: SType, required: Boolean) extends SType { + def pType: PStream = PCanonicalStream(elementType.pType, required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) throw new UnsupportedOperationException @@ -20,30 +19,27 @@ case class SStream(elementEmitType: EmitType) extends SType { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = throw new UnsupportedOperationException + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException + def fromCodes(codes: IndexedSeq[Code[_]]): SCode = throw new UnsupportedOperationException def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = throw new UnsupportedOperationException - def canonicalPType(): PType = PCanonicalStream(elementEmitType.canonicalPType) - - override def virtualType: Type = TStream(elementType.virtualType) - - override def castRename(t: Type): SType = ??? + def canonicalPType(): PType = pType } -object SStreamCode{ - def apply(producer: StreamProducer): SStreamCode = SStreamCode(SStream(producer.element.emitType), producer) -} - -final case class SStreamCode(st: SStream, producer: StreamProducer) extends SCode with SUnrealizableCode { +final case class SStreamCode(st: SStream, producer: StreamProducer) extends PStreamCode { self => - def memoize(cb: EmitCodeBuilder, name: String): SValue = new SValue { + override def pt: PStream = st.pType + + def memoize(cb: EmitCodeBuilder, name: String): PValue = new PValue { + def pt: PStream = PCanonicalStream(st.pType) override def st: SType = self.st var used: Boolean = false - def get: SCode = { + def get: PCode = { assert(!used) used = true self diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala index c7e91e77ee8..5cb7b78308d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala @@ -4,38 +4,41 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, UnitInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SUnrealizableCode, SValue} -import is.hail.types.physical.{PType, PVoid} -import is.hail.types.virtual.{TVoid, Type} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PCode, PType, PUnrealizableCode, PValue, PVoid} case object SVoid extends SType { - override def virtualType: Type = TVoid - - override def castRename(t: Type): SType = this + def pType: PType = PVoid def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = value def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = IndexedSeq() + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException + def fromCodes(codes: IndexedSeq[Code[_]]): SCode = throw new UnsupportedOperationException def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = throw new UnsupportedOperationException - def canonicalPType(): PType = throw new UnsupportedOperationException + def canonicalPType(): PType = pType } -case object SVoidCode extends SCode with SUnrealizableCode { +case object PVoidCode extends PCode with PUnrealizableCode { self => + override def pt: PType = PVoid + override def st: SType = SVoid + override def typeInfo: TypeInfo[_] = UnitInfo + override def code: Code[_] = Code._empty - def memoize(cb: EmitCodeBuilder, name: String): SValue = new SValue { + def memoize(cb: EmitCodeBuilder, name: String): PValue = new PValue { val pt: PType = PVoid val st: SType = SVoid - def get: SCode = self + def get: PCode = self } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala index 4ab4a359248..ece62fdeb37 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala @@ -1,22 +1,13 @@ package is.hail.types.physical.stypes -import is.hail.asm4s._ -import is.hail.types.physical.stypes.primitives._ -import is.hail.types.virtual._ +import is.hail.asm4s.Code +import is.hail.types.physical.stypes.primitives.{SBooleanCode, SFloat32Code, SFloat64Code, SInt32Code, SInt64Code} package object interfaces { - def primitive(x: Code[Long]): SInt64Code = new SInt64Code(x) - def primitive(x: Code[Int]): SInt32Code = new SInt32Code(x) - def primitive(x: Code[Double]): SFloat64Code = new SFloat64Code(x) - def primitive(x: Code[Float]): SFloat32Code = new SFloat32Code(x) - def primitive(x: Code[Boolean]): SBooleanCode = new SBooleanCode(x) - - def primitive(t: Type, x: Code[_]): SCode = t match { - case TInt32 => primitive(coerce[Int](x)) - case TInt64 => primitive(coerce[Long](x)) - case TFloat32 => primitive(coerce[Float](x)) - case TFloat64 => primitive(coerce[Double](x)) - case TBoolean => primitive(coerce[Boolean](x)) - } + def primitive(x: Code[Long]): SInt64Code = new SInt64Code(true, x) + def primitive(x: Code[Int]): SInt32Code = new SInt32Code(true, x) + def primitive(x: Code[Double]): SFloat64Code = new SFloat64Code(true, x) + def primitive(x: Code[Float]): SFloat32Code = new SFloat32Code(true, x) + def primitive(x: Code[Boolean]): SBooleanCode = new SBooleanCode(true, x) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala index 2607f9f318e..6a1105387f0 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala @@ -2,53 +2,60 @@ package is.hail.types.physical.stypes.primitives import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, Settable, SettableBuilder, TypeInfo, Value} +import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PBoolean, PType} -import is.hail.types.virtual.{TBoolean, Type} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PBoolean, PCode, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq -case object SBoolean extends SPrimitive { +case class SBoolean(required: Boolean) extends SPrimitive { def ti: TypeInfo[_] = BooleanInfo - lazy val virtualType: Type = TBoolean - - override def castRename(t: Type): SType = this + override def pType: PBoolean = PBoolean(required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SBoolean => - value + case SBoolean(_) => + value.asInstanceOf[SBooleanCode] } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(BooleanInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case PBoolean(_) => + new SBooleanCode(required: Boolean, Region.loadBoolean(addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SBooleanSettable = { val IndexedSeq(x: Settable[Boolean@unchecked]) = settables assert(x.ti == BooleanInfo) - new SBooleanSettable( x) + new SBooleanSettable(required, x) } def fromCodes(codes: IndexedSeq[Code[_]]): SBooleanCode = { val IndexedSeq(x: Code[Boolean@unchecked]) = codes assert(x.ti == BooleanInfo) - new SBooleanCode(x) + new SBooleanCode(required, x) } - def canonicalPType(): PType = PBoolean() + def canonicalPType(): PType = pType } -class SBooleanCode(val code: Code[Boolean]) extends SCode with SPrimitiveCode { +class SBooleanCode(required: Boolean, val code: Code[Boolean]) extends PCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - def st: SBoolean.type = SBoolean + val pt: PBoolean = PBoolean(required) + + def st: SBoolean = SBoolean(required) - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SBooleanSettable = { - val s = new SBooleanSettable(sb.newSettable[Boolean]("sboolean_memoize")) + val s = new SBooleanSettable(required, sb.newSettable[Boolean]("sboolean_memoize")) s.store(cb, this) s } @@ -61,21 +68,21 @@ class SBooleanCode(val code: Code[Boolean]) extends SCode with SPrimitiveCode { } object SBooleanSettable { - def apply(sb: SettableBuilder, name: String): SBooleanSettable = { - new SBooleanSettable( sb.newSettable[Boolean](name)) + def apply(sb: SettableBuilder, name: String, required: Boolean): SBooleanSettable = { + new SBooleanSettable(required, sb.newSettable[Boolean](name)) } } -class SBooleanSettable(x: Settable[Boolean]) extends SValue with SSettable { - val pt: PBoolean = PBoolean() +class SBooleanSettable(required: Boolean, x: Settable[Boolean]) extends PValue with PSettable { + val pt: PBoolean = PBoolean(required) - def st: SBoolean.type = SBoolean + def st: SBoolean = SBoolean(required) - def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asBoolean.boolCode(cb)) + def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asBoolean.boolCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: SCode = new SBooleanCode(x) + def get: PCode = new SBooleanCode(required, x) def boolCode(cb: EmitCodeBuilder): Code[Boolean] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala index 43f5a2db83e..2e788af86c2 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala @@ -4,84 +4,92 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, FloatInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PFloat32, PType} -import is.hail.types.virtual.{TFloat32, Type} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCode, PFloat32, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq -case object SFloat32 extends SPrimitive { +case class SFloat32(required: Boolean) extends SPrimitive { def ti: TypeInfo[_] = FloatInfo - lazy val virtualType: Type = TFloat32 - - override def castRename(t: Type): SType = this + override def pType: PFloat32 = PFloat32(required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SFloat32 => value + case SFloat32(r) => + if (r == required) + value + else + new SFloat32Code(required, value.asInstanceOf[SFloat32Code].code) } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(FloatInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case _: PFloat32 => + new SFloat32Code(required, Region.loadFloat(addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SFloat32Settable = { val IndexedSeq(x: Settable[Float@unchecked]) = settables assert(x.ti == FloatInfo) - new SFloat32Settable(x) + new SFloat32Settable(required, x) } def fromCodes(codes: IndexedSeq[Code[_]]): SFloat32Code = { val IndexedSeq(x: Code[Float@unchecked]) = codes assert(x.ti == FloatInfo) - new SFloat32Code(x) + new SFloat32Code(required, x) } - def canonicalPType(): PType = PFloat32() + def canonicalPType(): PType = pType } -trait SFloat32Value extends SValue { +trait PFloat32Value extends PValue { def floatCode(cb: EmitCodeBuilder): Code[Float] } -class SFloat32Code(val code: Code[Float]) extends SCode with SPrimitiveCode { +class SFloat32Code(required: Boolean, val code: Code[Float]) extends PCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PFloat32 = PFloat32(false) + val pt: PFloat32 = PFloat32(required) - def st: SFloat32.type = SFloat32 + def st: SFloat32 = SFloat32(required) - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SFloat32Value = { - val s = new SFloat32Settable(sb.newSettable[Float]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PFloat32Value = { + val s = new SFloat32Settable(required, sb.newSettable[Float]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SFloat32Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PFloat32Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SFloat32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PFloat32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def floatCode(cb: EmitCodeBuilder): Code[Float] = code } object SFloat32Settable { - def apply(sb: SettableBuilder, name: String): SFloat32Settable = { - new SFloat32Settable(sb.newSettable[Float](name)) + def apply(sb: SettableBuilder, name: String, required: Boolean): SFloat32Settable = { + new SFloat32Settable(required, sb.newSettable[Float](name)) } } -class SFloat32Settable(x: Settable[Float]) extends SFloat32Value with SSettable { - val pt: PFloat32 = PFloat32() +class SFloat32Settable(required: Boolean, x: Settable[Float]) extends PFloat32Value with PSettable { + val pt: PFloat32 = PFloat32(required) - def st: SFloat32.type = SFloat32 + def st: SFloat32 = SFloat32(required) - def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asFloat.floatCode(cb)) + def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asFloat.floatCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: SCode = new SFloat32Code(x) + def get: PCode = new SFloat32Code(required, x) def floatCode(cb: EmitCodeBuilder): Code[Float] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala index 753ac6e9e21..d72346bc173 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala @@ -4,21 +4,22 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, DoubleInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PFloat64, PType} -import is.hail.types.virtual.{TFloat64, Type} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCode, PFloat64, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq -case object SFloat64 extends SPrimitive { +case class SFloat64(required: Boolean) extends SPrimitive { def ti: TypeInfo[_] = DoubleInfo - lazy val virtualType: Type = TFloat64 - - override def castRename(t: Type): SType = this + override def pType: PFloat64 = PFloat64(required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SFloat64 => value + case SFloat64(r) => + if (r == required) + value + else + new SFloat64Code(required, value.asInstanceOf[SFloat64Code].code) } } @@ -27,70 +28,72 @@ case object SFloat64 extends SPrimitive { def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { pt match { case _: PFloat64 => - new SFloat64Code(Region.loadDouble(addr)) + new SFloat64Code(required, Region.loadDouble(addr)) } } def fromSettables(settables: IndexedSeq[Settable[_]]): SFloat64Settable = { val IndexedSeq(x: Settable[Double@unchecked]) = settables assert(x.ti == DoubleInfo) - new SFloat64Settable(x) + new SFloat64Settable(required, x) } def fromCodes(codes: IndexedSeq[Code[_]]): SFloat64Code = { val IndexedSeq(x: Code[Double@unchecked]) = codes assert(x.ti == DoubleInfo) - new SFloat64Code(x) + new SFloat64Code(required, x) } - def canonicalPType(): PType = PFloat64() + def canonicalPType(): PType = pType } -trait SFloat64Value extends SValue { +trait PFloat64Value extends PValue { def doubleCode(cb: EmitCodeBuilder): Code[Double] } object SFloat64Code { - def apply(code: Code[Double]): SFloat64Code = new SFloat64Code(code) + def apply(code: Code[Double], required: Boolean = true): SFloat64Code = new SFloat64Code(required, code) } -class SFloat64Code(val code: Code[Double]) extends SCode with SPrimitiveCode { +class SFloat64Code(required: Boolean, val code: Code[Double]) extends PCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - def st: SFloat64.type = SFloat64 + val pt: PFloat64 = PFloat64(required) + + def st: SFloat64 = SFloat64(required) - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SFloat64Value = { - val s = new SFloat64Settable(sb.newSettable[Double]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PFloat64Value = { + val s = new SFloat64Settable(required, sb.newSettable[Double]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SFloat64Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PFloat64Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SFloat64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PFloat64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def doubleCode(cb: EmitCodeBuilder): Code[Double] = code } object SFloat64Settable { - def apply(sb: SettableBuilder, name: String): SFloat64Settable = { - new SFloat64Settable(sb.newSettable[Double](name)) + def apply(sb: SettableBuilder, name: String, required: Boolean): SFloat64Settable = { + new SFloat64Settable(required, sb.newSettable[Double](name)) } } -class SFloat64Settable(x: Settable[Double]) extends SFloat64Value with SSettable { - val pt: PFloat64 = PFloat64(false) +class SFloat64Settable(required: Boolean, x: Settable[Double]) extends PFloat64Value with PSettable { + val pt: PFloat64 = PFloat64(required) - def st: SFloat64.type = SFloat64 + def st: SFloat64 = SFloat64(required) - def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asDouble.doubleCode(cb)) + def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asDouble.doubleCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: SCode = new SFloat64Code(x) + def get: PCode = new SFloat64Code(required, x) def doubleCode(cb: EmitCodeBuilder): Code[Double] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala index fd9f0cfd6d9..9350bd95524 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala @@ -4,81 +4,91 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PInt32, PType} -import is.hail.types.virtual.{TInt32, Type} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.{PCode, PInt32, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq -case object SInt32 extends SPrimitive { +case class SInt32(required: Boolean) extends SPrimitive { def ti: TypeInfo[_] = IntInfo - lazy val virtualType: Type = TInt32 - - override def castRename(t: Type): SType = this + override def pType: PInt32 = PInt32(required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SInt32 => value + case SInt32(r) => + if (r == required) + value + else + new SInt32Code(required, value.asInstanceOf[SInt32Code].code) } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(IntInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case _: PInt32 => + new SInt32Code(required, Region.loadInt(addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SInt32Settable = { val IndexedSeq(x: Settable[Int@unchecked]) = settables assert(x.ti == IntInfo) - new SInt32Settable(x) + new SInt32Settable(required, x) } def fromCodes(codes: IndexedSeq[Code[_]]): SInt32Code = { val IndexedSeq(x: Code[Int@unchecked]) = codes assert(x.ti == IntInfo) - new SInt32Code(x) + new SInt32Code(required, x) } - def canonicalPType(): PType = PInt32() + def canonicalPType(): PType = pType } -trait SInt32Value extends SValue { +trait PInt32Value extends PValue { def intCode(cb: EmitCodeBuilder): Code[Int] } -class SInt32Code(val code: Code[Int]) extends SCode with SPrimitiveCode { +class SInt32Code(required: Boolean, val code: Code[Int]) extends PCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - def st: SInt32.type = SInt32 + val pt: PInt32 = PInt32(required) + + def st: SInt32 = SInt32(required) - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SInt32Value = { - val s = new SInt32Settable(sb.newSettable[Int]("sInt32_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PInt32Value = { + val s = new SInt32Settable(required, sb.newSettable[Int]("sInt32_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SInt32Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PInt32Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SInt32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PInt32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def intCode(cb: EmitCodeBuilder): Code[Int] = code } object SInt32Settable { - def apply(sb: SettableBuilder, name: String): SInt32Settable = { - new SInt32Settable(sb.newSettable[Int](name)) + def apply(sb: SettableBuilder, name: String, required: Boolean): SInt32Settable = { + new SInt32Settable(required, sb.newSettable[Int](name)) } } -class SInt32Settable(x: Settable[Int]) extends SInt32Value with SSettable { - val pt: PInt32 = PInt32(false) +class SInt32Settable(required: Boolean, x: Settable[Int]) extends PInt32Value with PSettable { + val pt: PInt32 = PInt32(required) - def st: SInt32.type = SInt32 + def st: SInt32 = SInt32(required) - def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asInt.intCode(cb)) + def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asInt.intCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: SCode = new SInt32Code(x) + def get: PCode = new SInt32Code(required, x) def intCode(cb: EmitCodeBuilder): Code[Int] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala index 2a2c0ba9672..7ebc3e5c3bf 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala @@ -3,81 +3,92 @@ package is.hail.types.physical.stypes.primitives import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} -import is.hail.types.physical.{PInt64, PType} -import is.hail.types.virtual.{TInt64, Type} +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.{PCode, PInt64, PSettable, PType, PValue} import is.hail.utils.FastIndexedSeq -case object SInt64 extends SPrimitive { +case class SInt64(required: Boolean) extends SPrimitive { def ti: TypeInfo[_] = LongInfo - lazy val virtualType: Type = TInt64 - - override def castRename(t: Type): SType = this + override def pType: PInt64 = PInt64(required) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SInt64 => value + case SInt64(r) => + if (r == required) + value + else + new SInt64Code(required, value.asInstanceOf[SInt64Code].code) } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) + def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { + pt match { + case _: PInt64 => + new SInt64Code(required, Region.loadLong(addr)) + } + } + def fromSettables(settables: IndexedSeq[Settable[_]]): SInt64Settable = { val IndexedSeq(x: Settable[Long@unchecked]) = settables assert(x.ti == LongInfo) - new SInt64Settable(x) + new SInt64Settable(required, x) } def fromCodes(codes: IndexedSeq[Code[_]]): SInt64Code = { val IndexedSeq(x: Code[Long@unchecked]) = codes assert(x.ti == LongInfo) - new SInt64Code(x) + new SInt64Code(required, x) } - def canonicalPType(): PType = PInt64() + def canonicalPType(): PType = pType } -trait SInt64Value extends SValue { +trait PInt64Value extends PValue { def longCode(cb: EmitCodeBuilder): Code[Long] + } -class SInt64Code(val code: Code[Long]) extends SCode with SPrimitiveCode { +class SInt64Code(required: Boolean, val code: Code[Long]) extends PCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - def st: SInt64.type = SInt64 + val pt: PInt64 = PInt64(required) + + def st: SInt64 = SInt64(required) - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SInt64Value = { - val s = new SInt64Settable(sb.newSettable[Long]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PInt64Value = { + val s = new SInt64Settable(required, sb.newSettable[Long]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): SInt64Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): PInt64Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): SInt64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): PInt64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def longCode(cb: EmitCodeBuilder): Code[Long] = code } object SInt64Settable { - def apply(sb: SettableBuilder, name: String): SInt64Settable = { - new SInt64Settable(sb.newSettable[Long](name)) + def apply(sb: SettableBuilder, name: String, required: Boolean): SInt64Settable = { + new SInt64Settable(required, sb.newSettable[Long](name)) } } -class SInt64Settable(x: Settable[Long]) extends SInt64Value with SSettable { - val pt: PInt64 = PInt64(false) +class SInt64Settable(required: Boolean, x: Settable[Long]) extends PInt64Value with PSettable { + val pt: PInt64 = PInt64(required) - def st: SInt64.type = SInt64 + def st: SInt64 = SInt64(required) - def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asLong.longCode(cb)) + def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asLong.longCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: SCode = new SInt64Code(x) + def get: PCode = new SInt64Code(required, x) def longCode(cb: EmitCodeBuilder): Code[Long] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala index d068c0fbe57..f96fdb7a9a4 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala @@ -1,7 +1,7 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, AnnotationPathException, _} -import is.hail.expr.ir.{Env, IRParser, IntArrayBuilder} +import is.hail.expr.ir.{Env, IRParser} import is.hail.types.physical.{PField, PStruct} import is.hail.utils._ import org.apache.spark.sql.Row @@ -185,7 +185,7 @@ final case class TStruct(fields: IndexedSeq[Field]) extends TBaseStruct { def annotate(other: TStruct): (TStruct, Merger) = { val newFieldsBuilder = new BoxedArrayBuilder[(String, Type)]() - val fieldIdxBuilder = new IntArrayBuilder() + val fieldIdxBuilder = new BoxedArrayBuilder[Int]() // In fieldIdxBuilder, positive integers are field indices from the left. // Negative integers are the complement of field indices from the right. diff --git a/hail/src/main/scala/is/hail/utils/ArrayStack.scala b/hail/src/main/scala/is/hail/utils/ArrayStack.scala index 6c388cae087..4e197ae8823 100644 --- a/hail/src/main/scala/is/hail/utils/ArrayStack.scala +++ b/hail/src/main/scala/is/hail/utils/ArrayStack.scala @@ -2,7 +2,7 @@ package is.hail.utils import scala.reflect.ClassTag -final class ObjectArrayStack[T <: AnyRef](hintSize: Int = 16)(implicit tct: ClassTag[T]) { +final class ObjectArrayStack[T](hintSize: Int = 16)(implicit tct: ClassTag[T]) { private var a = new Array[T](hintSize) private[this] var size_ = 0 diff --git a/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala index 8fe07fab815..7472f83479e 100644 --- a/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala +++ b/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala @@ -6,7 +6,7 @@ object BoxedArrayBuilder { final val defaultInitialCapacity: Int = 16 } -final class BoxedArrayBuilder[T <: AnyRef](initialCapacity: Int)(implicit tct: ClassTag[T]) extends Serializable { +final class BoxedArrayBuilder[T](initialCapacity: Int)(implicit tct: ClassTag[T]) extends Serializable { private[utils] var b: Array[T] = new Array[T](initialCapacity) private[utils] var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/utils/Graph.scala b/hail/src/main/scala/is/hail/utils/Graph.scala index b420705a8af..7e82d704011 100644 --- a/hail/src/main/scala/is/hail/utils/Graph.scala +++ b/hail/src/main/scala/is/hail/utils/Graph.scala @@ -2,9 +2,8 @@ package is.hail.utils import is.hail.annotations.{Region, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.types.physical.{PCanonicalTuple, PTuple, PType, stypes} +import is.hail.types.physical.{PCanonicalTuple, PTuple, PType, PTypeReferenceSingleCodeType} import is.hail.expr.ir.{Compile, ExecuteContext, IR, IRParser, IRParserEnvironment, Interpret, Literal, MakeTuple, SingleCodeEmitParamType} -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/utils/HailIterator.scala b/hail/src/main/scala/is/hail/utils/HailIterator.scala index afdd0d9393e..a13ad5b72e1 100644 --- a/hail/src/main/scala/is/hail/utils/HailIterator.scala +++ b/hail/src/main/scala/is/hail/utils/HailIterator.scala @@ -1,6 +1,5 @@ package is.hail.utils -import scala.collection.mutable import scala.reflect.ClassTag abstract class HailIterator[@specialized T] { @@ -9,10 +8,10 @@ abstract class HailIterator[@specialized T] { def hasNext: Boolean def toArray(implicit tct: ClassTag[T]): Array[T] = { - val b = new mutable.ArrayBuffer[T]() + val b = new BoxedArrayBuilder[T]() while (hasNext) b += next() - b.toArray + b.result() } def countNonNegative()(implicit ev: Numeric[T]): Int = { diff --git a/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala new file mode 100644 index 00000000000..b5f1eb6bd82 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala @@ -0,0 +1,68 @@ +package is.hail.utils + +import is.hail.annotations.{Annotation, RegionValueBuilder} +import is.hail.types.virtual.Type + +import scala.collection.mutable + +class MissingAnnotationArrayBuilder extends Serializable { + private var len = 0 + private var elements = new BoxedArrayBuilder[Annotation]() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Annotation) { + elements += x + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Annotation) => Unit) { + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else { + whenPresent(i, elements(j)) + j += 1 + } + i += 1 + } + } + + def write(rvb: RegionValueBuilder, t: Type) { + rvb.startArray(len) + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else { + rvb.addAnnotation(t, elements(j)) + j += 1 + } + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingAnnotationArrayBuilder = { + val ab = new MissingAnnotationArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala new file mode 100644 index 00000000000..31506c4d155 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala @@ -0,0 +1,65 @@ +package is.hail.utils + +import is.hail.annotations._ +import is.hail.types.virtual.{TArray, TBoolean} + +import scala.collection.mutable + +class MissingBooleanArrayBuilder extends Serializable { + private var len = 0 + private var elements = new mutable.BitSet() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Boolean) { + if (x) + elements.add(len) + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Boolean) => Unit) { + var i = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else + whenPresent(i, elements(i)) + i += 1 + } + } + + val typ = TArray(TBoolean) + + def write(rvb: RegionValueBuilder) { + rvb.startArray(len) + var i = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else + rvb.addBoolean(elements(i)) + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingBooleanArrayBuilder = { + val ab = new MissingBooleanArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala new file mode 100644 index 00000000000..a300f915d51 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala @@ -0,0 +1,70 @@ +package is.hail.utils + +import is.hail.annotations._ +import is.hail.types.virtual.{TArray, TFloat64} + +import scala.collection.mutable + +class MissingDoubleArrayBuilder extends Serializable { + private var len = 0 + private var elements = new BoxedArrayBuilder[Double]() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Double) { + elements += x + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Double) => Unit) { + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else { + whenPresent(i, elements(j)) + j += 1 + } + i += 1 + } + } + + val typ = TArray(TFloat64) + + def write(rvb: RegionValueBuilder) { + rvb.startArray(len) + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else { + rvb.addDouble(elements(j)) + j += 1 + } + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingDoubleArrayBuilder = { + val ab = new MissingDoubleArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala new file mode 100644 index 00000000000..e8dca65c818 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala @@ -0,0 +1,70 @@ +package is.hail.utils + +import is.hail.annotations._ +import is.hail.types.virtual.{TArray, TFloat32} + +import scala.collection.mutable + +class MissingFloatArrayBuilder extends Serializable { + private var len = 0 + private var elements = new BoxedArrayBuilder[Float]() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Float) { + elements += x + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Float) => Unit) { + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else { + whenPresent(i, elements(j)) + j += 1 + } + i += 1 + } + } + + val typ = TArray(TFloat32) + + def write(rvb: RegionValueBuilder) { + rvb.startArray(len) + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else { + rvb.addFloat(elements(j)) + j += 1 + } + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingFloatArrayBuilder = { + val ab = new MissingFloatArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala new file mode 100644 index 00000000000..4557cc01c95 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala @@ -0,0 +1,70 @@ +package is.hail.utils + +import is.hail.annotations._ +import is.hail.types.virtual.{TArray, TInt32} + +import scala.collection.mutable + +class MissingIntArrayBuilder extends Serializable { + private var len = 0 + private var elements = new BoxedArrayBuilder[Int]() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Int) { + elements += x + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Int) => Unit) { + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else { + whenPresent(i, elements(j)) + j += 1 + } + i += 1 + } + } + + val typ = TArray(TInt32) + + def write(rvb: RegionValueBuilder) { + rvb.startArray(len) + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else { + rvb.addInt(elements(j)) + j += 1 + } + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingIntArrayBuilder = { + val ab = new MissingIntArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala new file mode 100644 index 00000000000..522da7c67e4 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala @@ -0,0 +1,70 @@ +package is.hail.utils + +import is.hail.annotations._ +import is.hail.types.virtual.{TArray, TInt64} + +import scala.collection.mutable + +class MissingLongArrayBuilder extends Serializable { + private var len = 0 + private var elements = new BoxedArrayBuilder[Long]() + private var isMissing = new mutable.BitSet() + + def addMissing() { + isMissing.add(len) + len += 1 + } + + def add(x: Long) { + elements += x + len += 1 + } + + def length(): Int = len + + def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Long) => Unit) { + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + whenMissing(i) + else { + whenPresent(i, elements(j)) + j += 1 + } + i += 1 + } + } + + val typ = TArray(TInt64) + + def write(rvb: RegionValueBuilder) { + rvb.startArray(len) + var i = 0 + var j = 0 + while (i < len) { + if (isMissing(i)) + rvb.setMissing() + else { + rvb.addLong(elements(j)) + j += 1 + } + i += 1 + } + rvb.endArray() + } + + def clear() { + len = 0 + elements.clear() + isMissing.clear() + } + + override def clone(): MissingLongArrayBuilder = { + val ab = new MissingLongArrayBuilder() + ab.len = len + ab.elements = elements.clone() + ab.isMissing = isMissing.clone() + ab + } +} diff --git a/hail/src/main/scala/is/hail/utils/TextTableReader.scala b/hail/src/main/scala/is/hail/utils/TextTableReader.scala index ec6267505b5..9dc05818379 100644 --- a/hail/src/main/scala/is/hail/utils/TextTableReader.scala +++ b/hail/src/main/scala/is/hail/utils/TextTableReader.scala @@ -333,8 +333,6 @@ class TextTableReader( PCanonicalStruct.empty(required = true) } - def renderShort(): String = defaultRender() - def executeGeneric(ctx: ExecuteContext): GenericTableValue = { val fs = ctx.fs diff --git a/hail/src/main/scala/is/hail/utils/package.scala b/hail/src/main/scala/is/hail/utils/package.scala index dd3267e07c2..8719cb9459a 100644 --- a/hail/src/main/scala/is/hail/utils/package.scala +++ b/hail/src/main/scala/is/hail/utils/package.scala @@ -7,9 +7,9 @@ import java.security.SecureRandom import java.text.SimpleDateFormat import java.util.{Base64, Date} import java.util.zip.{Deflater, Inflater} + import is.hail.annotations.ExtendedOrdering import is.hail.check.Gen -import is.hail.expr.ir.ByteArrayBuilder import org.apache.commons.io.output.TeeOutputStream import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.PathIOException @@ -801,7 +801,7 @@ package object utils extends Logging } } - def compress(bb: ByteArrayBuilder, input: Array[Byte]): Int = { + def compress(bb: BoxedArrayBuilder[Byte], input: Array[Byte]): Int = { val compressor = new Deflater() compressor.setInput(input) compressor.finish() diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala index 55c53767cf4..3455d14a56d 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala @@ -6,9 +6,6 @@ import is.hail.io.InputBuffer import is.hail.utils._ import is.hail.asm4s._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.virtual._ class RichCodeInputBuffer( val ib: Value[InputBuffer] @@ -90,11 +87,11 @@ class RichCodeInputBuffer( ib.invoke[Region, Long, Int, Unit]("readBytes", toRegion, toOff, n) } - def readPrimitive(t: Type): SCode = t match { - case TBoolean => primitive(readBoolean()) - case TInt32 => primitive(readInt()) - case TInt64 => primitive(readLong()) - case TFloat32 => primitive(readFloat()) - case TFloat64 => primitive(readDouble()) + def readPrimitive(typ: PType): Code[_] = typ match { + case _: PBoolean => readBoolean() + case _: PInt32 => readInt() + case _: PInt64 => readLong() + case _: PFloat32 => readFloat() + case _: PFloat64 => readDouble() } } diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala index be5c2f5c114..01179cccd9b 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala @@ -2,11 +2,8 @@ package is.hail.utils.richUtils import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical._ import is.hail.io.OutputBuffer -import is.hail.types.physical.stypes.SCode -import is.hail.types.virtual._ class RichCodeOutputBuffer( val ob: Value[OutputBuffer] @@ -59,11 +56,11 @@ class RichCodeOutputBuffer( def writeUTF(s: Code[String]): Code[Unit] = ob.invoke[String, Unit]("writeUTF", s) - def writePrimitive(cb: EmitCodeBuilder, pc: SCode): Unit = pc.st.virtualType match { - case TBoolean => cb += writeBoolean(pc.asBoolean.boolCode(cb)) - case TInt32 => cb += writeInt(pc.asInt.intCode(cb)) - case TInt64 => cb += writeLong(pc.asLong.longCode(cb)) - case TFloat32 => cb += writeFloat(pc.asFloat.floatCode(cb)) - case TFloat64 => cb += writeDouble(pc.asDouble.doubleCode(cb)) + def writePrimitive(typ: PType): Code[_] => Code[Unit] = typ match { + case _: PBoolean => v => writeBoolean(coerce[Boolean](v)) + case _: PInt32 => v => writeInt(coerce[Int](v)) + case _: PInt64 => v => writeLong(coerce[Long](v)) + case _: PFloat32 => v => writeFloat(coerce[Float](v)) + case _: PFloat64 => v => writeDouble(coerce[Double](v)) } } diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala index 94bb63fe33e..52899f482af 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala @@ -3,8 +3,6 @@ package is.hail.utils.richUtils import is.hail.utils.BoxedArrayBuilder import org.apache.spark.sql.Row -import scala.collection.mutable - class RichRow(r: Row) { def update(i: Int, a: Any): Row = { @@ -21,18 +19,18 @@ class RichRow(r: Row) { } def append(a: Any): Row = { - val ab = new mutable.ArrayBuffer[Any]() + val ab = new BoxedArrayBuilder[Any]() ab ++= r.toSeq ab += a - Row.fromSeq(ab) + Row.fromSeq(ab.result()) } def insertBefore(i: Int, a: Any): Row = { - val ab = new mutable.ArrayBuffer[Any]() + val ab = new BoxedArrayBuilder[Any]() (0 until i).foreach(ab += r.get(_)) ab += a (i until r.size).foreach(ab += r.get(_)) - Row.fromSeq(ab) + Row.fromSeq(ab.result()) } def truncate(newSize: Int): Row = { diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala index e871170e64f..ff57815dfea 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala @@ -9,14 +9,13 @@ class RichString(val str: String) extends AnyVal { def strings: (String, String) = (truncate, str) } - def equalsCaseInsensitive(other: String): Boolean = + def equalsCI(other: String): Boolean = if (str.length == other.length) { - var i = 0 - while (i < str.length) { + for (i <- 0 until str.length) if ((str charAt i).toLower != (other charAt i).toLower) return false - i += 1 - } true - } else false + } + else + false } diff --git a/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala b/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala index 958b997241a..0584d17edf8 100644 --- a/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala +++ b/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala @@ -1,12 +1,12 @@ package is.hail.variant import is.hail.annotations._ -import is.hail.types.physical.{PArray, PInt32, PLocus, PString, PStruct} +import is.hail.types.physical.{PArray, PLocus, PString, PStruct} import is.hail.utils._ class RegionValueVariant(rowType: PStruct) extends View { private val locusField = rowType.fieldByName("locus") - private val locusPType = locusField.typ + private val locusPType = locusField.typ.asInstanceOf[PLocus] private val allelesField = rowType.fieldByName("alleles") private val locusIdx = locusField.index private val allelesIdx = allelesField.index @@ -31,23 +31,12 @@ class RegionValueVariant(rowType: PStruct) extends View { } def contig(): String = { - if (cachedContig == null) { - locusPType match { - case pl: PLocus => - cachedContig = pl.contig(locusAddress) - case s: PStruct => - cachedContig = s.types(0).asInstanceOf[PString].loadString(s.loadField(locusAddress, 0)) - } - } + if (cachedContig == null) + cachedContig = locusPType.contig(locusAddress) cachedContig } - def position(): Int = locusPType match { - case pl: PLocus => - pl.position(locusAddress) - case s: PStruct => - s.types(1).asInstanceOf[PInt32].unstagedLoadFromAddress(s.loadField(locusAddress, 1)) - } + def position(): Int = locusPType.position(locusAddress) def alleles(): Array[String] = { if (cachedAlleles == null) { diff --git a/hail/src/test/resources/bad_flag_number.vcf b/hail/src/test/resources/bad_flag_number.vcf deleted file mode 100644 index 0ce2984ae83..00000000000 --- a/hail/src/test/resources/bad_flag_number.vcf +++ /dev/null @@ -1,91 +0,0 @@ -##fileformat=VCFv4.2 -##hailversion=0.2.67-40d373134612 -##INFO= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -#CHROM POS ID REF ALT QUAL FILTER INFO -1 1 . A C . . BAD_FLAG -1 2 . A C . . BAD_FLAG -1 3 . A C . . BAD_FLAG diff --git a/hail/src/test/scala/is/hail/TestUtils.scala b/hail/src/test/scala/is/hail/TestUtils.scala index 5929df29285..d448da1a4c3 100644 --- a/hail/src/test/scala/is/hail/TestUtils.scala +++ b/hail/src/test/scala/is/hail/TestUtils.scala @@ -1,6 +1,7 @@ package is.hail import java.io.{File, PrintWriter} + import breeze.linalg.{DenseMatrix, Matrix, Vector} import is.hail.ExecStrategy.ExecStrategy import is.hail.annotations.{Region, RegionValueBuilder, SafeRow} @@ -9,17 +10,14 @@ import is.hail.backend.spark.SparkBackend import is.hail.expr.ir._ import is.hail.expr.ir.{BindingEnv, MakeTuple, Subst} import is.hail.expr.ir.lowering.LowererUnsupportedOperation -import is.hail.types.physical.{PBaseStruct, PCanonicalArray, PType, stypes} +import is.hail.types.physical.{PBaseStruct, PCanonicalArray, PType, PTypeReferenceSingleCodeType} import is.hail.types.virtual._ import is.hail.io.vcf.MatrixVCFReader -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import is.hail.variant._ import org.apache.spark.SparkException import org.apache.spark.sql.Row -import scala.collection.mutable - object ExecStrategy extends Enumeration { type ExecStrategy = Value val Interpret, InterpretUnoptimized, JvmCompile, LoweredJVMCompile, JvmCompileUnoptimized = Value @@ -177,7 +175,7 @@ object TestUtils { ctx: ExecuteContext ): Any = { val inputTypesB = new BoxedArrayBuilder[Type]() - val inputsB = new mutable.ArrayBuffer[Any]() + val inputsB = new BoxedArrayBuilder[Any]() args.foreach { case (v, t) => inputsB += v @@ -348,6 +346,7 @@ object TestUtils { } filteredExecStrats.foreach { strat => + InferPType.clearPTypes(x) try { val res = strat match { case ExecStrategy.Interpret => diff --git a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala index 14578eabbd8..69e85051979 100644 --- a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala +++ b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala @@ -3,7 +3,7 @@ package is.hail.annotations import is.hail.HailSuite import is.hail.asm4s._ import is.hail.check.{Gen, Prop} -import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder, IEmitCode, RequirednessSuite} +import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder, IEmitCode} import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.SStringPointer import is.hail.types.physical.stypes.interfaces._ @@ -362,7 +362,7 @@ class StagedConstructorSuite extends HailSuite { fb.emitWithBuilder { cb => val region = fb.emb.getCodeParam[Region](1) rt.constructFromElements(cb, region, const(2), deepCopy = false) { (cb, idx) => - IEmitCode(cb, idx > 0, new SInt32Code(fb.getCodeParam[Int](2))) + IEmitCode(cb, idx > 0, new SInt32Code(false, fb.getCodeParam[Int](2))) }.a } @@ -436,7 +436,7 @@ class StagedConstructorSuite extends HailSuite { val fb = EmitFunctionBuilder[Region, Long, Long](ctx, "deep_copy") fb.emitWithBuilder[Long](cb => t.store(cb, fb.apply_method.getCodeParam[Region](1), - t.loadCheapSCode(cb, fb.apply_method.getCodeParam[Long](2)), + t.loadCheapPCode(cb, fb.apply_method.getCodeParam[Long](2)), deepCopy = true)) val copyF = fb.resultWithIndex()(ctx.fs, 0, region) val newOff = copyF(region, src) @@ -462,7 +462,7 @@ class StagedConstructorSuite extends HailSuite { "x3" -> PCanonicalArray(PInt32(true), required = true), "x4" -> PCanonicalSet(PCanonicalStruct(true, "y" -> PCanonicalString(true)), required = false) ), required = false) - val t2 = RequirednessSuite.deepInnerRequired(t1, false) + val t2 = t1.deepInnerRequired(false) val value = IndexedSeq( Row(1, IndexedSeq(1,2,3), IndexedSeq(0, -1), Set(Row("asdasdasd"), Row(""))), @@ -490,7 +490,7 @@ class StagedConstructorSuite extends HailSuite { "x3" -> PCanonicalArray(PInt32(true), required = true), "x4" -> PCanonicalSet(PCanonicalStruct(true, "y" -> PCanonicalString(true)), required = false) ), required = false)) - val t2 = RequirednessSuite.deepInnerRequired(t1, false).asInstanceOf[PCanonicalStruct] + val t2 = t1.deepInnerRequired(false).asInstanceOf[PCanonicalStruct] val value = IndexedSeq( Row(1, IndexedSeq(1,2,3), IndexedSeq(0, -1), Set(Row("asdasdasd"), Row(""))), @@ -505,7 +505,7 @@ class StagedConstructorSuite extends HailSuite { val f1 = EmitFunctionBuilder[Long](ctx, "stagedCopy1") f1.emitWithBuilder { cb => val region = f1.partitionRegion - t2.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapSCode(cb, v1))), deepCopy = false).a + t2.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapPCode(cb, v1))), deepCopy = false).a } val cp1 = f1.resultWithIndex()(ctx.fs, 0, r)() assert(SafeRow.read(t2, cp1) == Row(value)) @@ -513,7 +513,7 @@ class StagedConstructorSuite extends HailSuite { val f2 = EmitFunctionBuilder[Long](ctx, "stagedCopy2") f2.emitWithBuilder { cb => val region = f2.partitionRegion - t1.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapSCode(cb, v1))), deepCopy = false).a + t1.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapPCode(cb, v1))), deepCopy = false).a } val cp2 = f2.resultWithIndex()(ctx.fs, 0, r)() assert(SafeRow.read(t1, cp2) == Row(value)) diff --git a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala index 5c67dd79ad5..6e901204adf 100644 --- a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala @@ -345,7 +345,7 @@ class ASM4SSuite extends TestNGSuite { intField.store(fb.getArg[Int](1)), longField.store(fb.getArg[Long](2)), booleanField.store(fb.getArg[Boolean](3))) - + typeInfo[T] match { case IntInfo => fb.emit(Code(c, intField.load())) case LongInfo => fb.emit(Code(c, longField.load())) @@ -401,6 +401,48 @@ class ASM4SSuite extends TestNGSuite { assert(fb.result()()() == 1) } + @Test def fbFunctionsCanBeNested(): Unit = { + val fb = FunctionBuilder[Boolean]("F") + val fb2 = fb.cb.genDependentFunction[Int, Boolean]("DepF") + val localF = fb.genFieldThisRef[AsmFunction1[Int, Boolean]]() + + val wrappedInt = Code.invokeStatic1[java.lang.Integer, Int, java.lang.Integer]("valueOf", 0) + val rawOut = localF.load().invoke[java.lang.Object, java.lang.Object]("apply", wrappedInt) + + fb2.emit(true) + fb.emit(Code( + localF := fb2.newInstance(fb.apply_method), + checkcast[java.lang.Boolean](rawOut).invoke[Boolean]("booleanValue") + )) + + val f = fb.result()() + assert(f()) + } + + @Test def dependentFunctionsCanUseParentsFields(): Unit = { + val fb = FunctionBuilder[Int, Int, Int]("F") + val fb2 = fb.cb.genDependentFunction[Int, Int]("DepF") + + val localF = fb.genFieldThisRef[AsmFunction1[Int, Int]]() + + val field1 = fb.genFieldThisRef[Int]() + val field2 = fb2.newDepField[Int](field1.load()) + + def wrappedCall(c: Code[Int]) = + localF.load().invoke[java.lang.Object, java.lang.Object]("apply", + Code.invokeStatic1[java.lang.Integer, Int, java.lang.Integer]("valueOf", c)) + + fb2.emit(field2 + fb2.getArg[Int](1)) + fb.emit(Code( + field1 := fb.getArg[Int](1), + localF := fb2.newInstance(fb.apply_method), + checkcast[java.lang.Integer](wrappedCall(fb.getArg[Int](2))).invoke[Int]("intValue") + )) + + val f = fb.result()() + assert(f(1, 2) == 3) + } + @Test def testInitialize(): Unit = { val fb = FunctionBuilder[Boolean, Int]("F") val l = fb.newLocal[Int]() diff --git a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala index 10c9e558feb..33e0da58d9d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala @@ -6,10 +6,9 @@ import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.agg._ import is.hail.types.{MatrixType, RPrimitive, TypeWithRequiredness, VirtualTypeWithReq} -import is.hail.types.physical.{stypes, _} +import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.io.BufferSpec -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import is.hail.variant.{Call0, Call1, Call2} import is.hail.{ExecStrategy, HailSuite} diff --git a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala index b1c34a16341..a00ad9d4c64 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala @@ -186,9 +186,9 @@ class ArrayFunctionsSuite extends HailSuite { @Test(dataProvider = "arrayOpsData") def arrayOpsFPDiv(a: IndexedSeq[Integer], b: IndexedSeq[Integer]) { - assertEvalsTo(invoke("div", TArray(TFloat64), toIRArray(a), toIRArray(b)), + assertEvalsTo(invoke("div", TArray(TFloat32), toIRArray(a), toIRArray(b)), Option(a).zip(Option(b)).headOption.map { case (a0, b0) => - a0.zip(b0).map { case (i, j) => Option(i).zip(Option(j)).headOption.map[java.lang.Double] { case (m, n) => m.toDouble / n }.orNull } + a0.zip(b0).map { case (i, j) => Option(i).zip(Option(j)).headOption.map[java.lang.Float] { case (m, n) => m.toFloat / n }.orNull } }.orNull ) } diff --git a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala index 3490d95f16d..6fca2c9bef3 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala @@ -67,7 +67,7 @@ class ETypeSuite extends HailSuite { val ibArg = fb2.apply_method.getCodeParam[InputBuffer](2) val dec = eType.buildDecoderMethod(outPType.virtualType, fb2.apply_method.ecb) fb2.emitWithBuilder[Long] { cb => - val decoded = cb.invokeSCode(dec, regArg, ibArg) + val decoded = cb.invokePCode(dec, regArg, ibArg) outPType.store(cb, regArg, decoded, deepCopy = false) } diff --git a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala index 9eb9c440d45..294878b4c5c 100644 --- a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir import is.hail.annotations.{Region, RegionValue, RegionValueBuilder, SafeRow, ScalaToRegionValue} import is.hail.asm4s._ -import is.hail.types.physical.{stypes, _} +import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant.Call2 @@ -12,7 +12,6 @@ import is.hail.expr.ir.streams.{EmitStream, StreamArgType, StreamUtils} import is.hail.types.physical.stypes.interfaces.SStreamCode import org.apache.spark.sql.Row import is.hail.TestUtils._ -import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeSCode, StreamSingleCodeType} import org.testng.annotations.Test class EmitStreamSuite extends HailSuite { @@ -53,8 +52,11 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[F](ctx, "F", (classInfo[Region]: ParamType) +: inputTypes.map(pt => pt: ParamType), LongInfo) val mb = fb.apply_method val ir = streamIR.deepCopy() + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + InferPType(ir, Env.empty, requiredness, usesAndDefs) - val emitContext = EmitContext.analyze(ctx, ir) + val emitContext = new EmitContext(ctx, requiredness) var arrayType: PType = null mb.emit(EmitCodeBuilder.scopedCode(mb) { cb => @@ -64,10 +66,10 @@ class EmitStreamSuite extends HailSuite { case s => s } TypeCheck(s) - EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, EmitEnv(Env.empty, inputTypes.indices.map(i => mb.storeEmitParam(i + 2, cb))), None) + EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, Env.empty, None) .consumeCode[Long](cb, 0L, { s => val arr = StreamUtils.toArray(cb, s.asStream.producer, region) - val scp = SingleCodeSCode.fromSCode(cb, arr, region, false) + val scp = SingleCodePCode.fromPCode(cb, arr, region, false) arrayType = scp.typ.asInstanceOf[PTypeReferenceSingleCodeType].pt coerce[Long](scp.code) @@ -124,14 +126,18 @@ class EmitStreamSuite extends HailSuite { val mb = fb.apply_method val region = mb.getCodeParam[Region](1) val ir = streamIR.deepCopy() - val emitContext = EmitContext.analyze(ctx, ir) + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + InferPType(ir, Env.empty, requiredness, usesAndDefs) + + val emitContext = new EmitContext(ctx, requiredness) fb.emitWithBuilder { cb => TypeCheck(ir) val len = cb.newLocal[Int]("len", 0) val len2 = cb.newLocal[Int]("len2", -1) - EmitStream.produce(new Emit(emitContext, fb.ecb), ir, cb, region, EmitEnv(Env.empty, FastIndexedSeq()), None) + EmitStream.produce(new Emit(emitContext, fb.ecb), ir, cb, region, Env.empty, None) .consume(cb, {}, { case stream: SStreamCode => diff --git a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala index 1aef3d03d39..6e388feb936 100644 --- a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala @@ -7,7 +7,6 @@ import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.functions.{IRFunctionRegistry, RegistryFunctions} import is.hail.types.virtual._ -import is.hail.types.physical.stypes.interfaces._ import is.hail.utils.{FastIndexedSeq, FastSeq} import is.hail.variant.Call2 import org.testng.annotations.Test @@ -32,10 +31,10 @@ object TestRegisterFunctions extends RegistryFunctions { registerJavaStaticFunction("compare", Array(TInt32, TInt32), TInt32, null)(classOf[java.lang.Integer], "compare") registerScalaFunction("foobar1", Array(), TInt32, null)(ScalaTestObject.getClass, "testFunction") registerScalaFunction("foobar2", Array(), TInt32, null)(ScalaTestCompanion.getClass, "testFunction") - registerSCode2("testCodeUnification", tnum("x"), tv("x", "int32"), tv("x"), null) { - case (_, cb, rt, a, b) => primitive(a.asInt.intCode(cb) + b.asInt.intCode(cb)) + registerCode2[Int, Int]("testCodeUnification", tnum("x"), tv("x", "int32"), tv("x"), null) { + case (_, rt, (aT, a: Code[Int]), (bT, b: Code[Int])) => a + b } - registerSCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, cb, rt, a) => a } + registerCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, rt, (aT, a: Code[Long])) => a } } } diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index 9be2b14f0f9..4c59ce80950 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -19,7 +19,6 @@ import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.linalg.BlockMatrix import is.hail.methods._ import is.hail.rvd.{RVD, RVDPartitioner, RVDSpecMaker} -import is.hail.types.physical.stypes.{EmitType, Float32SingleCodeType, Float64SingleCodeType, Int32SingleCodeType, Int64SingleCodeType, PTypeReferenceSingleCodeType, SType, SingleCodeType} import is.hail.utils.{FastIndexedSeq, _} import is.hail.variant.{Call2, Locus} import is.hail.{ExecStrategy, HailContext, HailSuite, utils} @@ -43,40 +42,40 @@ object IRSuite { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[EmitType]) => EmitType + calculateReturnType: (Type, Seq[PType]) => PType )( - impl: (EmitCodeBuilder, Value[Region], SType, Long, Array[EmitCode]) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], PType, Long, Array[EmitCode]) => IEmitCode ) { IRFunctionRegistry.addJVMFunction( new SeededMissingnessAwareJVMFunction(name, valueParameterTypes, returnType, calculateReturnType) { val isDeterministic: Boolean = false - def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], returnPType: SType, args: EmitCode*): IEmitCode = { - assert(unify(FastSeq(), args.map(_.st.virtualType), returnPType.virtualType)) - impl(cb, r, returnPType, seed, args.toArray) + def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], returnPType: PType, args: (PType, EmitCode)*): IEmitCode = { + assert(unify(FastSeq(), args.map(_._1.virtualType), returnPType.virtualType)) + impl(cb, r, returnPType, seed, args.map(a => a._2).toArray) } } ) } - def registerSeededWithMissingness1( + def registerSeededWithMissingness( name: String, valueParameterType: Type, returnType: Type, - calculateReturnType: (Type, EmitType) => EmitType + calculateReturnType: (Type, PType) => PType )( - impl: (EmitCodeBuilder, Value[Region], SType, Long, EmitCode) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], PType, Long, EmitCode) => IEmitCode ): Unit = registerSeededWithMissingness(name, Array(valueParameterType), returnType, unwrappedApply(calculateReturnType)) { case (cb, r, rt, seed, Array(a1)) => impl(cb, r, rt, seed, a1) } def registerAll() { - registerSeededWithMissingness1("incr_s", TBoolean, TBoolean, { (ret: Type, pt: EmitType) => pt }) { case (cb, r, _, _, l) => + registerSeededWithMissingness("incr_s", TBoolean, TBoolean, { (ret: Type, pt: PType) => pt }) { case (cb, mb, rt, _, l) => cb += Code.invokeScalaObject0[Unit](outer.getClass, "incr") l.toI(cb) } - registerSeededWithMissingness1("incr_v", TBoolean, TBoolean, { (ret: Type, pt: EmitType) => pt }) { case (cb, _, _, _, l) => + registerSeededWithMissingness("incr_v", TBoolean, TBoolean, { (ret: Type, pt: PType) => pt }) { case (cb, mb, rt, _, l) => l.toI(cb).map(cb) { pc => cb += Code.invokeScalaObject0[Unit](outer.getClass, "incr") pc @@ -90,6 +89,11 @@ object IRSuite { class IRSuite extends HailSuite { implicit val execStrats = ExecStrategy.nonLowering + def assertPType(node: IR, expected: PType) { + InferPType(node) + assert(node.pType == expected) + } + @Test def testI32() { assertEvalsTo(I32(5), 5) } @@ -118,6 +122,17 @@ class IRSuite extends HailSuite { @Test def testFalse() { assertEvalsTo(False(), false) } + + @Test def testScalarInferPType() { + assertPType(I32(5), PInt32(true)) + assertPType(I64(5), PInt64(true)) + assertPType(F32(3.1415f), PFloat32(true)) + assertPType(F64(3.1415926589793238462643383), PFloat64(true)) + assertPType(Str("HELLO WORLD"), PCanonicalString(true)) + assertPType(True(), PBoolean(true)) + assertPType(False(), PBoolean(true)) + } + // FIXME Void() doesn't work because we can't handle a void type in a tuple @Test def testCast() { @@ -143,6 +158,31 @@ class IRSuite extends HailSuite { (Cast(F64(3.14), TFloat64), 3.14)) } + @Test def testCastInferPType() { + assertPType(Cast(I32(5), TInt32), PInt32(true)) + assertPType(Cast(I32(5), TInt64), PInt64(true)) + assertPType(Cast(I32(5), TFloat32), PFloat32(true)) + assertPType(Cast(I32(5), TFloat64), PFloat64(true)) + + assertPType(Cast(I64(5), TInt32), PInt32(true)) + assertPType(Cast(I64(0xf29fb5c9af12107dL), TInt32), PInt32(true)) // truncate + assertPType(Cast(I64(5), TInt64), PInt64(true)) + assertPType(Cast(I64(5), TFloat32), PFloat32(true)) + assertPType(Cast(I64(5), TFloat64), PFloat64(true)) + + assertPType(Cast(F32(3.14f), TInt32), PInt32(true)) + assertPType(Cast(F32(3.99f), TInt32), PInt32(true)) // truncate + assertPType(Cast(F32(3.14f), TInt64), PInt64(true)) + assertPType(Cast(F32(3.14f), TFloat32), PFloat32(true)) + assertPType(Cast(F32(3.14f), TFloat64), PFloat64(true)) + + assertPType(Cast(F64(3.14), TInt32), PInt32(true)) + assertPType(Cast(F64(3.99), TInt32), PInt32(true)) // truncate + assertPType(Cast(F64(3.14), TInt64), PInt64(true)) + assertPType(Cast(F64(3.14), TFloat32), PFloat32(true)) + assertPType(Cast(F64(3.14), TFloat64), PFloat64(true)) + } + @Test def testCastRename() { assertEvalsTo(CastRename(MakeStruct(FastSeq(("x", I32(1)))), TStruct("foo" -> TInt32)), Row(1)) assertEvalsTo(CastRename(MakeArray(FastSeq(MakeStruct(FastSeq(("x", I32(1))))), @@ -150,10 +190,64 @@ class IRSuite extends HailSuite { FastIndexedSeq(Row(1))) } + @Test def testCastRenameIR() { + var expectedPType: PType = PCanonicalStruct(true, "foo" -> PInt32(true)) + var childPType: PType = PCanonicalStruct(true, "x" -> PInt32(true)) + var targetType: Type = TStruct("foo" -> TInt32) + assertPType(CastRename(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalArray(PCanonicalStruct(true, "foo" -> PInt64(true))) + childPType = PCanonicalArray(PCanonicalStruct(true, "c" -> PInt64(true))) + targetType = TArray(TStruct("foo" -> TInt64)) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalArray(PCanonicalStruct("foo" -> PCanonicalString(true))) + childPType = PCanonicalArray(PCanonicalStruct("q" -> PCanonicalString(true))) + targetType = TArray(TStruct("foo" -> TString)) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalArray(PCanonicalStruct(true, "foo" -> PCanonicalStruct("baz" -> PBoolean(true)))) + childPType = PCanonicalArray(PCanonicalStruct(true, "b" -> PCanonicalStruct("a" -> PBoolean(true)))) + targetType = TArray(TStruct("foo" -> TStruct("baz" -> TBoolean))) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalArray(PCanonicalStruct("foo" -> PCanonicalArray(PFloat64(true), true), "bar" -> PCanonicalBinary())) + childPType = PCanonicalArray(PCanonicalStruct("x" -> PCanonicalArray(PFloat64(true), true), "y" -> PCanonicalBinary())) + targetType = TArray(TStruct("foo" -> TArray(TFloat64), "bar" -> TBinary)) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalTuple(true, PCanonicalStruct(true, "foo" -> PCanonicalInterval(PFloat32())), PCanonicalStruct(false, "bar" -> PFloat64(true))) + childPType = PCanonicalTuple(true, PCanonicalStruct(true, "v" -> PCanonicalInterval(PFloat32())), PCanonicalStruct(false, "q" -> PFloat64(true))) + targetType = TTuple(TStruct("foo" -> TInterval(TFloat32)), TStruct("bar" -> TFloat64)) + assertPType(CastRename(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalDict(PCanonicalString(), PCanonicalTuple(false, + PCanonicalStruct("foo" -> PCanonicalStruct("bar" -> PCanonicalNDArray(PInt32(true), 3, true))), + PCanonicalStruct(false, "bar" -> PCanonicalBinary(true)))) + childPType = PCanonicalDict(PCanonicalString(), PCanonicalTuple(false, + PCanonicalStruct("xxxxxx" -> PCanonicalStruct("qqq" -> PCanonicalNDArray(PInt32(true), 3, true))), + PCanonicalStruct(false, "ddd" -> PCanonicalBinary(true)))) + targetType = TDict(TString, TTuple(TStruct("foo" -> TStruct("bar" -> TNDArray(TInt32, Nat(3)))), + TStruct("bar" -> TBinary))) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + + expectedPType = PCanonicalStream(PCanonicalStruct("foo2a" -> PCanonicalArray(PFloat64(true), true), "bar2a" -> PCanonicalBinary())) + childPType = PCanonicalStream(PCanonicalStruct("q" -> PCanonicalArray(PFloat64(true), true), "yxxx" -> PCanonicalBinary())) + targetType = TStream(TStruct("foo2a" -> TArray(TFloat64), "bar2a" -> TBinary)) + assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) + } + @Test def testNA() { assertEvalsTo(NA(TInt32), null) } + @Test def testNAIsNAInferPType() { + assertPType(NA(TInt32), PInt32(false)) + + assertPType(IsNA(NA(TInt32)), PBoolean(true)) + assertPType(IsNA(I32(5)), PBoolean(true)) + } + @Test def testCoalesce() { assertEvalsTo(Coalesce(FastSeq(In(0, TInt32))), FastIndexedSeq((null, TInt32)), null) assertEvalsTo(Coalesce(FastSeq(In(0, TInt32))), FastIndexedSeq((1, TInt32)), 1) @@ -172,6 +266,22 @@ class IRSuite extends HailSuite { assertEvalsTo(Coalesce(FastSeq(t1, t2)), FastIndexedSeq((value, TArray(TInt32))), value) } + @Test def testCoalesceInferPType() { + assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), + In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true)))))))), PCanonicalArray(PCanonicalArray(PInt32()))) + assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), + In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true))))))), PCanonicalArray(PCanonicalArray(PInt32()))) + assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), + In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))))), PCanonicalArray(PCanonicalArray(PInt32()), true)) + assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), + In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))))), PCanonicalArray(PCanonicalArray(PInt32()), true)) + assertPType(Coalesce(FastSeq( + In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), + In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(), true))))), + In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true)), true)))) + )), PCanonicalArray(PCanonicalArray(PInt32()), true)) + } + val i32na = NA(TInt32) val i64na = NA(TInt64) val f32na = NA(TFloat32) @@ -208,6 +318,84 @@ class IRSuite extends HailSuite { ) } + @Test def testApplyUnaryPrimOpInferPType() { + val i32na = NA(TInt32) + def i64na = NA(TInt64) + def f32na = NA(TFloat32) + def f64na = NA(TFloat64) + def bna = NA(TBoolean) + + var node = ApplyUnaryPrimOp(Negate(), I32(5)) + assertPType(node, PInt32(true)) + node = ApplyUnaryPrimOp(Negate(), i32na) + assertPType(node, PInt32(false)) + + // should not be able to infer physical type twice on one IR (i32na) + node = ApplyUnaryPrimOp(Negate(), i32na) + intercept[RuntimeException](InferPType(node)) + + node = ApplyUnaryPrimOp(Negate(), I64(5)) + assertPType(node, PInt64(true)) + + node = ApplyUnaryPrimOp(Negate(), i64na) + assertPType(node, PInt64(false)) + + node = ApplyUnaryPrimOp(Negate(), F32(5)) + assertPType(node, PFloat32(true)) + + node = ApplyUnaryPrimOp(Negate(), f32na) + assertPType(node, PFloat32(false)) + + node = ApplyUnaryPrimOp(Negate(), F64(5)) + assertPType(node, PFloat64(true)) + + node = ApplyUnaryPrimOp(Negate(), f64na) + assertPType(node, PFloat64(false)) + + node = ApplyUnaryPrimOp(Bang(), False()) + assertPType(node, PBoolean(true)) + + node = ApplyUnaryPrimOp(Bang(), True()) + assertPType(node, PBoolean(true)) + + node = ApplyUnaryPrimOp(Bang(), bna) + assertPType(node, PBoolean(false)) + + node = ApplyUnaryPrimOp(BitNot(), I32(0xdeadbeef)) + assertPType(node, PInt32(true)) + + node = ApplyUnaryPrimOp(BitNot(), I64(0xdeadbeef12345678L)) + assertPType(node, PInt64(true)) + + node = ApplyUnaryPrimOp(BitNot(), I64(-0xdeadbeef12345678L)) + assertPType(node, PInt64(true)) + + node = ApplyUnaryPrimOp(BitNot(), i64na) + assertPType(node, PInt64(false)) + } + + @Test def testComplexInferPType() { + // InferPType expects array->stream lowered ir + val ir = ToArray(StreamMap( + Let( + "q", + I32(2), + StreamMap( + Let( + "v", + Ref("q", TInt32) + I32(3), + StreamRange(0, Ref("v", TInt32), 1) + ), + "x", + Ref("x", TInt32) + Ref("q", TInt32) + ) + ), + "y", + Ref("y", TInt32) + I32(3))) + + assertPType(ir, PCanonicalArray(PInt32(true), true)) + } + @Test def testApplyBinaryPrimOpAdd() { def assertSumsTo(t: Type, x: Any, y: Any, sum: Any) { assertEvalsTo(ApplyBinaryPrimOp(Add(), In(0, t), In(1, t)), FastIndexedSeq(x -> t, y -> t), sum) @@ -290,12 +478,12 @@ class IRSuite extends HailSuite { assertEvalsTo(ApplyBinaryPrimOp(FloatingPointDivide(), In(0, t), In(1, t)), FastIndexedSeq(x -> t, y -> t), expected) } - assertExpected(TInt32, 5, 2, 2.5) + assertExpected(TInt32, 5, 2, 2.5f) assertExpected(TInt32, 5, null, null) assertExpected(TInt32, null, 2, null) assertExpected(TInt32, null, null, null) - assertExpected(TInt64, 5L, 2L, 2.5) + assertExpected(TInt64, 5L, 2L, 2.5f) assertExpected(TInt64, 5L, null, null) assertExpected(TInt64, null, 2L, null) assertExpected(TInt64, null, null, null) @@ -603,6 +791,30 @@ class IRSuite extends HailSuite { assertEvalsTo(If(True(), NA(TInt32), I32(7)), null) } + @Test def testIfInferPType() { + assertPType(If(True(), In(0, SingleCodeEmitParamType(true, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(true)) + assertPType(If(True(), In(0, SingleCodeEmitParamType(false, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(false)) + assertPType(If(NA(TBoolean), In(0, SingleCodeEmitParamType(true, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(false)) + + var cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) + var altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) + + var ir = If(True(), cnsqBranch, altrBranch) + assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(true), true), true)) + + cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) + altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(false), true), true)))) + + ir = If(True(), cnsqBranch, altrBranch) + assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(false), true), true)) + + cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), false), true)))) + altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(false), true), true)))) + + ir = If(True(), cnsqBranch, altrBranch) + assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(false), false), true)) + } + @Test def testLet() { assertEvalsTo(Let("v", I32(5), Ref("v", TInt32)), 5) assertEvalsTo(Let("v", NA(TInt32), Ref("v", TInt32)), null) @@ -634,6 +846,43 @@ class IRSuite extends HailSuite { assertEvalsTo(MakeArray(FastSeq(), TArray(TInt32)), FastIndexedSeq()) } + @Test def testMakeArrayInferPTypeFromNestedRef() { + var ir = MakeArray(FastSeq(), TArray(TInt32)) + assertPType(ir, PCanonicalArray(PInt32(true), true)) + + val eltType = TStruct("a" -> TArray(TArray(TInt32)), "b" -> TInt32, "c" -> TDict(TInt32, TString)) + + val pTypes = Array[PType]( + PCanonicalStruct(true, + "a" -> PCanonicalArray(PCanonicalArray(PInt32(false), true), false), + "b" -> PInt32(true), + "c" -> PCanonicalDict(PInt32(false), PCanonicalString(false), false)), + PCanonicalStruct(true, + "a" -> PCanonicalArray(PCanonicalArray(PInt32(true), true), true), + "b" -> PInt32(true), + "c" -> PCanonicalDict(PInt32(true), PCanonicalString(true), true))) + + val unified = PCanonicalStruct(true, + "a" -> PCanonicalArray(PCanonicalArray(PInt32(false), true), false), + "b" -> PInt32(true), + "c" -> PCanonicalDict(PInt32(false), PCanonicalString(false), false)) + + assertPType(MakeArray(Array(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(0))))), TArray(eltType)), PCanonicalArray(pTypes(0), true)) + assertPType(MakeArray(Array(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(0)))), In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(1))))), TArray(eltType)), PCanonicalArray(pTypes(0), true)) + } + + @Test def testMakeArrayInferPType() { + var ir = MakeArray(FastSeq(I32(5), NA(TInt32), I32(-3)), TArray(TInt32)) + + assertPType(ir, PCanonicalArray(PInt32(false), true)) + + ir = MakeArray(FastSeq(I32(5), I32(1), I32(-3)), TArray(TInt32)) + + assertPType(ir, PCanonicalArray(PInt32(true), true)) + + ir = MakeArray(FastSeq(I32(5), I32(1), I32(-3)), TArray(TInt32)) + } + @Test def testGetNestedElementPTypesI32() { var types = Seq(PInt32(true)) var res = InferPType.getCompatiblePType(types) @@ -1204,6 +1453,29 @@ class IRSuite extends HailSuite { assert(res == PCanonicalInterval(PCanonicalInterval(PInt32(false), false), true)) } + @Test def testToDictInferPtype() { + val allRequired = ToDict(MakeStream(FastIndexedSeq( + MakeTuple.ordered(FastIndexedSeq(I32(5), Str("a"))), + MakeTuple.ordered(FastIndexedSeq(I32(10), Str("b"))) + ), TStream(TTuple(TInt32, TString)))) + + assertPType(allRequired, PCanonicalDict(PInt32(true), PCanonicalString(true), true)) + + var notAllRequired = ToDict(MakeStream(FastIndexedSeq( + MakeTuple.ordered(FastIndexedSeq(NA(TInt32), Str("a"))), + MakeTuple.ordered(FastIndexedSeq(I32(10), Str("b"))) + ), TStream(TTuple(TInt32, TString)))) + + assertPType(notAllRequired, PCanonicalDict(PInt32(false), PCanonicalString(true), true)) + + notAllRequired = ToDict(MakeStream(FastIndexedSeq( + MakeTuple.ordered(FastIndexedSeq(NA(TInt32), Str("a"))), + MakeTuple.ordered(FastIndexedSeq(I32(10), NA(TString)) + )), TStream(TTuple(TInt32, TString)))) + + assertPType(notAllRequired, PCanonicalDict(PInt32(false), PCanonicalString(false), true)) + } + @Test def testMakeStruct() { assertEvalsTo(MakeStruct(FastSeq()), Row()) assertEvalsTo(MakeStruct(FastSeq("a" -> NA(TInt32), "b" -> 4, "c" -> 0.5)), Row(null, 4, 0.5)) @@ -1211,6 +1483,17 @@ class IRSuite extends HailSuite { assertEvalsTo(GetField(MakeStruct((0 until 20000).map(i => s"foo$i" -> I32(1))), "foo1"), 1) } + @Test def testMakeStructInferPType() { + var ir = MakeStruct(FastSeq()) + assertPType(ir, PCanonicalStruct(true)) + + ir = MakeStruct(FastSeq("a" -> NA(TInt32), "b" -> 4, "c" -> 0.5)) + assertPType(ir, PCanonicalStruct(true, "a" -> PInt32(false), "b" -> PInt32(true), "c" -> PFloat64(true))) + + val ir2 = GetField(MakeStruct((0 until 20000).map(i => s"foo$i" -> I32(1))), "foo1") + assertPType(ir2, PInt32(true)) + } + @Test def testMakeArrayWithDifferentRequiredness(): Unit = { val pt1 = PCanonicalArray(PCanonicalStruct("a" -> PInt32(), "b" -> PCanonicalArray(PInt32()))) val pt2 = PCanonicalArray(PCanonicalStruct(true, "a" -> PInt32(true), "b" -> PCanonicalArray(PInt32(), true))) @@ -2420,6 +2703,11 @@ class IRSuite extends HailSuite { assertFatal(Die(NA(TString), TFloat64, -1), "message missing") } + @Test def testDieInferPType() { + assertPType(Die("mumblefoo", TFloat64), PFloat64(true)) + assertPType(Die("mumblefoo", TArray(TFloat64)), PCanonicalArray(PFloat64(true), true)) + } + @Test def testStreamRange() { def assertEquals(start: Integer, stop: Integer, step: Integer, expected: IndexedSeq[Int]) { assertEvalsTo(ToArray(StreamRange(In(0, TInt32), In(1, TInt32), In(2, TInt32))), @@ -2871,7 +3159,6 @@ class IRSuite extends HailSuite { MakeTuple(FastIndexedSeq(2 -> i, 4 -> b)), GetTupleElement(t, 1), Die("mumblefoo", TFloat64), - Trap(Die("mumblefoo", TFloat64)), invoke("land", TBoolean, b, c), // ApplySpecial invoke("toFloat64", TFloat64, i), // Apply Literal(TStruct("x" -> TInt32), Row(1)), diff --git a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala index 0734f8d545f..b8481bd1ba9 100644 --- a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala @@ -9,7 +9,6 @@ import is.hail.TestUtils._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.rvd.RVDType import is.hail.types.physical._ -import is.hail.types.physical.stypes.EmitType import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.Row @@ -39,8 +38,8 @@ class OrderingSuite extends HailSuite { implicit val x = op.rtti val fb = EmitFunctionBuilder[Region, Long, Long, op.ReturnType](ctx, "lifted") fb.emitWithBuilder { cb => - val cv1 = t.loadCheapSCode(cb, fb.getCodeParam[Long](2)) - val cv2 = t.loadCheapSCode(cb, fb.getCodeParam[Long](3)) + val cv1 = t.loadCheapPCode(cb, fb.getCodeParam[Long](2)) + val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](3)) fb.ecb.getOrderingFunction(cv1.st, cv2.st, op) .apply(cb, EmitCode.present(cb.emb, cv1), EmitCode.present(cb.emb, cv2)) } @@ -58,9 +57,9 @@ class OrderingSuite extends HailSuite { val fb = EmitFunctionBuilder[Region, Boolean, Long, Boolean, Long, op.ReturnType](ctx, "lifted") fb.emitWithBuilder { cb => val m1 = fb.getCodeParam[Boolean](2) - val cv1 = t.loadCheapSCode(cb, fb.getCodeParam[Long](3)) + val cv1 = t.loadCheapPCode(cb, fb.getCodeParam[Long](3)) val m2 = fb.getCodeParam[Boolean](4) - val cv2 = t.loadCheapSCode(cb, fb.getCodeParam[Long](5)) + val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](5)) val ev1 = EmitCode(Code._empty, m1, cv1) val ev2 = EmitCode(Code._empty, m2, cv2) fb.ecb.getOrderingFunction(ev1.st, ev2.st, op) @@ -458,10 +457,8 @@ class OrderingSuite extends HailSuite { val cset = fb.getCodeParam[Long](2) val cetuple = fb.getCodeParam[Long](3) - val bs = new BinarySearch(fb.apply_method, pset.sType, EmitType(pset.elementType.sType, true), keyOnly = false) - fb.emitWithBuilder(cb => - bs.getClosestIndex(cb, pset.loadCheapSCode(cb, cset), - EmitCode.fromI(fb.apply_method)(cb => IEmitCode.present(cb, pt.loadCheapSCode(cb, pTuple.loadField(cetuple, 0)))))) + val bs = new BinarySearch(fb.apply_method, pset, pset.elementType, keyOnly = false) + fb.emitWithBuilder(cb => bs.getClosestIndex(cset, false, pt.loadCheapPCode(cb, pTuple.loadField(cetuple, 0)).code)) val asArray = SafeIndexedSeq(pArray, soff) @@ -496,12 +493,9 @@ class OrderingSuite extends HailSuite { val cdict = fb.getCodeParam[Long](2) val cktuple = fb.getCodeParam[Long](3) - val bs = new BinarySearch(fb.apply_method, pDict.sType, EmitType(pDict.keyType.sType, false), keyOnly = true) - + val bs = new BinarySearch(fb.apply_method, pDict, pDict.keyType, keyOnly = true) val m = ptuple.isFieldMissing(cktuple, 0) - fb.emitWithBuilder(cb => - bs.getClosestIndex(cb, pDict.loadCheapSCode(cb, cdict), - EmitCode.fromI(fb.apply_method)(cb => IEmitCode.present(cb, pDict.keyType.loadCheapSCode(cb, ptuple.loadField(cktuple, 0)))))) + fb.emitWithBuilder(cb => bs.getClosestIndex(cdict, m, pDict.keyType.loadCheapPCode(cb, ptuple.loadField(cktuple, 0)).code)) val asArray = SafeIndexedSeq(PCanonicalArray(pDict.elementType), soff) diff --git a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala index 8e9fca074d8..9df2846eb8d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala @@ -101,7 +101,6 @@ class PruneSuite extends HailSuite { false).execute(ctx)) lazy val tr = TableRead(tab.typ, false, new TableReader { - override def renderShort(): String = ??? def pathsUsed: Seq[String] = FastSeq() @@ -139,8 +138,6 @@ class PruneSuite extends HailSuite { def lower(mr: MatrixRead): TableIR = ??? def toJValue: JValue = ??? - - override def renderShort(): String = ??? }) lazy val emptyTableDep = TableType(TStruct.empty, FastIndexedSeq(), TStruct.empty) diff --git a/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala index 6764550ff16..5af28d6b42e 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala @@ -4,11 +4,10 @@ import is.hail.TestUtils._ import is.hail.expr.ir.TestUtils._ import is.hail.asm4s.Code import is.hail.expr.ir.functions.{IRRandomness, RegistryFunctions} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.{SInt32, SInt64} +import is.hail.types.physical.{PCode, PInt32, PInt64} import is.hail.types.virtual.{TArray, TFloat64, TInt32, TInt64, TStream} import is.hail.utils._ -import is.hail.{ExecStrategy, HailSuite} +import is.hail.{ExecStrategy, HailContext, HailSuite} import org.apache.spark.sql.Row import org.testng.annotations.{BeforeClass, Test} @@ -36,16 +35,16 @@ object TestRandomFunctions extends RegistryFunctions { } def registerAll() { - registerSeeded0("counter_seeded", TInt32, SInt32) { case (cb, r, rt, seed) => - primitive(getTestRNG(cb.emb, seed).invoke[Int]("counter")) + registerSeeded0("counter_seeded", TInt32, PInt32(true)) { case (cb, r, rt, seed) => + PCode(rt, getTestRNG(cb.emb, seed).invoke[Int]("counter")) } - registerSeeded0("seed_seeded", TInt64, SInt64) { case (cb, r, rt, seed) => - primitive(getTestRNG(cb.emb, seed).invoke[Long]("seed")) + registerSeeded0("seed_seeded", TInt64, PInt64(true)) { case (cb, r, rt, seed) => + PCode(rt, getTestRNG(cb.emb, seed).invoke[Long]("seed")) } - registerSeeded0("pi_seeded", TInt32, SInt32) { case (cb, r, rt, seed) => - primitive(getTestRNG(cb.emb, seed).invoke[Int]("partitionIndex")) + registerSeeded0("pi_seeded", TInt32, PInt32(true)) { case (cb, r, rt, seed) => + PCode(rt, getTestRNG(cb.emb, seed).invoke[Int]("partitionIndex")) } } } diff --git a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala index c1c68862b64..4ced01d7fad 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala @@ -51,13 +51,12 @@ class RequirednessSuite extends HailSuite { def nd(r: Boolean): IR = if (r) MakeNDArray.fill(int(optional), FastIndexedSeq(1, 2), True()) else NA(tnd) -// FIXME: Currently ndarrays don't support elements that have pointers. -// def nestednd(r: Boolean, aelt: Boolean): IR = { -// if (r) -// MakeNDArray.fill(array(optional, aelt), FastIndexedSeq(1, 2), True()) -// else -// NDArrayMap(NA(tnestednd), genUID(), array(optional, aelt)) -// } + def nestednd(r: Boolean, aelt: Boolean): IR = { + if (r) + MakeNDArray.fill(array(optional, aelt), FastIndexedSeq(1, 2), True()) + else + NDArrayMap(NA(tnestednd), genUID(), array(optional, aelt)) + } def nestedstream(r: Boolean, a: Boolean, aelt: Boolean): IR = { if (r) @@ -106,7 +105,7 @@ class RequirednessSuite extends HailSuite { MakeTuple.ordered(FastIndexedSeq(I32(5), Str("bar")))) allRequired.foreach { n => - nodes += Array(n, RequirednessSuite.deepInnerRequired(PType.canonical(n.typ, required), required)) + nodes += Array(n, PType.canonical(n.typ, required).deepInnerRequired(required)) } val bools = Array(true, false) @@ -115,6 +114,7 @@ class RequirednessSuite extends HailSuite { nodes += Array(nd(r1), pnd(r1)) for (r2 <- bools) { nodes += Array(array(r2, r1), parray(r2, r1)) + nodes += Array(nestednd(r2, r1), pnestednd(r2, r1)) for (r3 <- bools) { nodes += Array(nestedarray(r3, r2, r1), pnestedarray(r3, r2, r1)) for (r4 <- bools) { @@ -519,22 +519,3 @@ class RequirednessSuite extends HailSuite { assert(actual == expected) } } - -object RequirednessSuite { - def deepInnerRequired(t: PType, required: Boolean): PType = - t match { - case t: PCanonicalArray => PCanonicalArray(deepInnerRequired(t.elementType, true), required) - case t: PCanonicalSet => PCanonicalSet(deepInnerRequired(t.elementType, true), required) - case t: PCanonicalDict => PCanonicalDict(deepInnerRequired(t.keyType, true), deepInnerRequired(t.valueType, true), required) - case t: PCanonicalStruct => - PCanonicalStruct(t.fields.map(f => PField(f.name, deepInnerRequired(f.typ, true), f.index)), required) - case t: PCanonicalTuple => - PCanonicalTuple(t._types.map { f => f.copy(typ = deepInnerRequired(f.typ, true)) }, required) - case t: PCanonicalInterval => - PCanonicalInterval(deepInnerRequired(t.pointType, true), required) - case t: PCanonicalStream => - PCanonicalStream(deepInnerRequired(t.elementType, true), required = required) - case t => - t.setRequired(required) - } -} diff --git a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala index 329eb261908..20825569446 100644 --- a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala @@ -1,6 +1,7 @@ package is.hail.expr.ir import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + import is.hail.HailSuite import is.hail.annotations.Region import is.hail.asm4s._ @@ -9,15 +10,13 @@ import is.hail.expr.ir.agg._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.io.{InputBuffer, OutputBuffer, StreamBufferSpec} -import is.hail.types.physical.stypes.Int64SingleCodeType -import is.hail.types.physical.stypes.interfaces.primitive import is.hail.types.physical.stypes.primitives.SInt64 import is.hail.utils._ import org.testng.annotations.Test import scala.collection.mutable class TestBTreeKey(mb: EmitMethodBuilder[_]) extends BTreeKey { - private val comp = mb.ecb.getOrderingFunction(SInt64, SInt64, CodeOrdering.Compare()) + private val comp = mb.ecb.getOrderingFunction(SInt64(false), SInt64(false), CodeOrdering.Compare()) def storageType: PTuple = PCanonicalTuple(required = true, PInt64(), PCanonicalTuple(false)) def compType: PType = PInt64() def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = @@ -42,7 +41,7 @@ class TestBTreeKey(mb: EmitMethodBuilder[_]) extends BTreeKey { def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = comp(cb, k1, k2) def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = - EmitCode(Code._empty, storageType.isFieldMissing(off, 0), primitive(Region.loadLong(storageType.fieldOffset(off, 0)))) + EmitCode(Code._empty, storageType.isFieldMissing(off, 0), PCode(compType, Region.loadLong(storageType.fieldOffset(off, 0)))) } object BTreeBackedSet { @@ -111,7 +110,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { val btree = new AppendOnlyBTree(cb, key, r, root, maxElements = n) fb.emitWithBuilder { cb => - val ec = EmitCode(Code._empty, m, primitive(v)) + val ec = EmitCode(Code._empty, m, PCode(PInt64Optional, v)) cb.assign(r, fb.getCodeParam[Region](1)) cb.assign(root, fb.getCodeParam[Long](2)) cb.assign(elt, btree.getOrElseInitialize(cb, ec)) @@ -132,7 +131,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { val key = new TestBTreeKey(fb.apply_method) val btree = new AppendOnlyBTree(cb, key, r, root, maxElements = n) - val sab = new StagedArrayBuilder(Int64SingleCodeType, true, fb.apply_method, 16) + val sab = new StagedArrayBuilder(PInt64(), fb.apply_method, 16) val idx = fb.newLocal[Int]() val returnArray = fb.newLocal[Array[java.lang.Long]]() @@ -144,7 +143,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { cb += Code.memoize(koff, "koff") { koff => val ec = key.loadCompKey(cb, koff) ec.m.mux(sab.addMissing(), - sab.add(ec.pv.asInt64.longCode(cb))) + sab.add(ec.v)) } } cb += (returnArray := Code.newArray[java.lang.Long](sab.size)) diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala index e501a4c6487..798af683148 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala @@ -761,8 +761,6 @@ class TableIRSuite extends HailSuite { @Test def testPartitionCountsWithDropRows() { val tr = new TableReader { - override def renderShort(): String = ??? - def pathsUsed: Seq[String] = FastSeq() override def apply(tr: TableRead, ctx: ExecuteContext): TableValue = ??? diff --git a/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala index dd9070d6577..6b51186d5c7 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala @@ -104,7 +104,7 @@ class TakeByAggregatorSuite extends HailSuite { cb.whileLoop(i < n, { cb += (random := rng.invoke[Double, Double, Double]("runif", -10000d, 10000d).toI) tba.seqOp(cb, false, random, false, random) - ab.append(cb, new SInt32Code(random)) + ab.append(cb, new SInt32Code(true, random)) cb += (i := i + 1) }) cb += ab.size.cne(n).orEmpty(Code._fatal[Unit]("bad size!")) diff --git a/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala b/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala index 0527df4874e..93b08c1a7b5 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala @@ -88,7 +88,7 @@ object TestUtils { def IRSet(a: Integer*): IR = toIRSet(a) - def IRCall(c: Call): IR = invoke("callFromRepr", TCall, I32(c)) + def IRCall(c: Call): IR = Cast(I32(c), TCall) def IRAggCount: IR = { val aggSig = AggSignature(Count(), FastSeq.empty, FastSeq.empty) diff --git a/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala deleted file mode 100644 index 9e4970a4953..00000000000 --- a/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala +++ /dev/null @@ -1,37 +0,0 @@ -package is.hail.expr.ir - -import is.hail.TestUtils._ -import is.hail.types.virtual._ -import is.hail.utils._ -import is.hail.{ExecStrategy, HailSuite} -import org.apache.spark.sql.Row -import org.testng.annotations.Test - -class TrapNodeSuite extends HailSuite { - implicit val execStrats = ExecStrategy.javaOnly - - @Test def testTrapNode() { - assertEvalsTo(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(0, 1, 2)), I32(1))), Row(null, 1)) - val res = eval(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(0, 1, 2)), I32(-1)))) - res match { - case Row(Row(msg: String, id: Int), null) => - assert(id == -1) - assert(msg.contains("array index out of bounds")) - } - - assertEvalsTo(Trap(Die(Str("foo bar"), TInt32, 5)), Row(Row("foo bar", 5), null)) - } - - @Test def testTrapNodeInLargerContext() { - def resultByIdx(idx: Int): IR = bindIR(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(100, 200, 300)), I32(idx)))) { value => - If(IsNA(GetTupleElement(value, 0)), - GetTupleElement(value, 1), - I32(-1) - ) - } - - assertEvalsTo(resultByIdx(-100), -1) - assertEvalsTo(resultByIdx(2), 300) - assertEvalsTo(resultByIdx(4), -1) - } -} diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala index 8d81fc26dca..196b093a600 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala @@ -6,7 +6,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32Code} -import is.hail.types.physical.{PCanonicalArray, PCanonicalString} +import is.hail.types.physical.{PCanonicalArray, PCanonicalString, PCode} import is.hail.utils.FastIndexedSeq import org.testng.annotations.Test @@ -40,9 +40,9 @@ class DownsampleSuite extends HailSuite { cb.assign(y, rng.invoke[Double, Double, Double]("runif", 0d, 1d)) ds1.insert(cb, - EmitCode.present(cb.emb, new SFloat64Code(x)), - EmitCode.present(cb.emb, new SFloat64Code(y)), - EmitCode.missing(cb.emb, PCanonicalArray(PCanonicalString()).sType)) + EmitCode.present(cb.emb, new SFloat64Code(true, x)), + EmitCode.present(cb.emb, new SFloat64Code(true, y)), + EmitCode.missing(cb.emb, PCanonicalArray(PCanonicalString()))) cb.assign(i, i + const(1)) }) ds1.merge(cb, ds2) diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala index fabf026ebd8..39e5b11d85e 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala @@ -47,7 +47,7 @@ class StagedBlockLinkedListSuite extends HailSuite { sbll.load(cb, ptr) sbll.push(cb, r, EmitCode(Code._empty, eltOff.get.ceq(0L), - elemPType.loadCheapSCode(cb, eltOff))) + elemPType.loadCheapPCode(cb, eltOff))) sbll.store(cb, ptr) Code._empty } diff --git a/hail/src/test/scala/is/hail/methods/SkatSuite.scala b/hail/src/test/scala/is/hail/methods/SkatSuite.scala index 067b295060b..acd1a4bbc8d 100644 --- a/hail/src/test/scala/is/hail/methods/SkatSuite.scala +++ b/hail/src/test/scala/is/hail/methods/SkatSuite.scala @@ -3,10 +3,9 @@ package is.hail.methods import is.hail.{HailSuite, TestUtils} import is.hail.utils._ import breeze.linalg._ -import is.hail.expr.ir.DoubleArrayBuilder import org.testng.annotations.Test -case class SkatAggForR(xs: BoxedArrayBuilder[DenseVector[Double]], weights: DoubleArrayBuilder) +case class SkatAggForR(xs: BoxedArrayBuilder[DenseVector[Double]], weights: BoxedArrayBuilder[Double]) class SkatSuite extends HailSuite { diff --git a/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala b/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala index cac61e3c4ae..51b7293079e 100644 --- a/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala +++ b/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala @@ -14,7 +14,6 @@ import is.hail.services.shuffler.ShufflerTestUtils._ import is.hail.io._ import is.hail.utils._ import is.hail._ -import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test @@ -41,7 +40,7 @@ class ShuffleSuite extends HailSuite { using(new ShuffleClient(shuffleType, rowPType, keyPType)) { c => val rowDecodedPType = c.codecs.rowDecodedPType - val values = new LongArrayBuilder() + val values = new BoxedArrayBuilder[Long]() pool.scopedRegion { region => val rvb = new RegionValueBuilder(region) val nElements = 1000000 @@ -77,7 +76,7 @@ class ShuffleSuite extends HailSuite { c.get(region, left, true, right, false)) i = 0 - val ab = new LongArrayBuilder() + val ab = new BoxedArrayBuilder[Long]() while (i < nPartitions) { ab ++= c.get(region, partitionBounds(i).offset, true, diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala index 549bb23711b..d8986268eab 100644 --- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala +++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala @@ -110,17 +110,16 @@ class PNDArraySuite extends PhysicalTestUtils { assert(PNDArray.getReferenceCount(addr1) == 1) // Deep copy with elements that contain pointers, so have to actually do a full copy - // FIXME: Currently ndarrays do not support this, reference counting needs to account for this. -// val pNDOfArrays = PCanonicalNDArray(PCanonicalArray(PInt32Required, true), 1) -// val annotationNDOfArrays = new SafeNDArray(IndexedSeq(3L), (0 until 3).map(idx => (0 to idx).toArray.toIndexedSeq)) -// val addr3 = pNDOfArrays.unstagedStoreJavaObject(annotationNDOfArrays, region=region1) -// val unsafe3 = UnsafeRow.read(pNDOfArrays, region1, addr3) -// val addr4 = pNDOfArrays.copyFromAddress(region2, pNDOfArrays, addr3, true) -// val unsafe4 = UnsafeRow.read(pNDOfArrays, region2, addr4) -// assert(addr3 != addr4) -// assert(unsafe3 == unsafe4) -// assert(PNDArray.getReferenceCount(addr3) == 1L) -// assert(PNDArray.getReferenceCount(addr4) == 1L) + val pNDOfArrays = PCanonicalNDArray(PCanonicalArray(PInt32Required, true), 1) + val annotationNDOfArrays = new SafeNDArray(IndexedSeq(3L), (0 until 3).map(idx => (0 to idx).toArray.toIndexedSeq)) + val addr3 = pNDOfArrays.unstagedStoreJavaObject(annotationNDOfArrays, region=region1) + val unsafe3 = UnsafeRow.read(pNDOfArrays, region1, addr3) + val addr4 = pNDOfArrays.copyFromAddress(region2, pNDOfArrays, addr3, true) + val unsafe4 = UnsafeRow.read(pNDOfArrays, region2, addr4) + assert(addr3 != addr4) + assert(unsafe3 == unsafe4) + assert(PNDArray.getReferenceCount(addr3) == 1L) + assert(PNDArray.getReferenceCount(addr4) == 1L) // Deep copy with PTypes with different requirements val pNDOfStructs1 = PCanonicalNDArray(PCanonicalStruct(true, ("x", PInt32Required), ("y", PInt32())), 1) diff --git a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala index e3302279ad4..132ddb8d691 100644 --- a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala +++ b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala @@ -54,7 +54,7 @@ abstract class PhysicalTestUtils extends HailSuite { val value = fb.getCodeParam[Long](2) try { - fb.emitWithBuilder(cb => destType.store(cb, codeRegion, sourceType.loadCheapSCode(cb, value), deepCopy = deepCopy)) + fb.emitWithBuilder(cb => destType.store(cb, codeRegion, sourceType.loadCheapPCode(cb, value), deepCopy = deepCopy)) compileSuccess = true } catch { case e: Throwable => diff --git a/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala b/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala index f33c4ce89e9..e957fa7b88b 100644 --- a/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala +++ b/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala @@ -1,12 +1,11 @@ package is.hail.utils -import is.hail.expr.ir.IntArrayBuilder import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test class ArrayBuilderSuite extends TestNGSuite { @Test def addOneElement() { - val ab = new IntArrayBuilder(0) + val ab = new BoxedArrayBuilder[Int](0) ab += 3 val a = ab.result() assert(a.length == 1) @@ -14,13 +13,13 @@ class ArrayBuilderSuite extends TestNGSuite { } @Test def addArray() { - val ab = new IntArrayBuilder(0) + val ab = new BoxedArrayBuilder[Int](0) ab ++= Array.fill[Int](5)(2) val a = ab.result() assert(a.length == 5) assert(a.forall(_ == 2)) - val ab2 = new IntArrayBuilder(0) + val ab2 = new BoxedArrayBuilder[Int](0) ab2 ++= (Array.fill[Int](4)(3), 2) val a2 = ab2.result() assert(a2.length == 2) @@ -30,7 +29,7 @@ class ArrayBuilderSuite extends TestNGSuite { val ab2Update = ab2.result() assert(ab2Update sameElements Array(5, 3)) - val ab3 = new IntArrayBuilder + val ab3 = new BoxedArrayBuilder[Int] ab3 += 1 ab3 += 5 ab3 ++= Array.fill[Int](2)(3) diff --git a/internal-gateway/Makefile b/internal-gateway/Makefile index af77f678db8..bd9d510742c 100644 --- a/internal-gateway/Makefile +++ b/internal-gateway/Makefile @@ -2,16 +2,22 @@ include ../config.mk .PHONY: build push deploy -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -INTERNAL_GATEWAY_IMAGE := $(DOCKER_PREFIX)/internal-gateway:$(TOKEN) +INTERNAL_GATEWAY_LATEST = $(DOCKER_PREFIX)/internal-gateway:latest +INTERNAL_GATEWAY_IMAGE = $(DOCKER_PREFIX)/internal-gateway:$(shell docker images -q --no-trunc internal-gateway | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh . Dockerfile.out $(INTERNAL_GATEWAY_IMAGE) + -docker pull $(INTERNAL_GATEWAY_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out + docker build -t internal-gateway -f Dockerfile.out --cache-from internal-gateway,$(INTERNAL_GATEWAY_LATEST),hail-ubuntu . + +push: build + docker tag internal-gateway $(INTERNAL_GATEWAY_LATEST) + docker push $(INTERNAL_GATEWAY_LATEST) + docker tag internal-gateway $(INTERNAL_GATEWAY_IMAGE) + docker push $(INTERNAL_GATEWAY_IMAGE) -deploy: build +deploy: push python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"global":{"internal_ip":"$(INTERNAL_IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"internal_gateway_image":{"image":"$(INTERNAL_GATEWAY_IMAGE)"}}' deployment.yaml deployment.yaml.out diff --git a/js_common/.gitignore b/js_common/.gitignore deleted file mode 100644 index d451ff16c10..00000000000 --- a/js_common/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -node_modules -.DS_Store -dist -dist-ssr -*.local diff --git a/js_common/batch-client.ts b/js_common/batch-client.ts deleted file mode 100644 index f9e2bfd334a..00000000000 --- a/js_common/batch-client.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { Job, Batch } from './types' - -export type GetJobsResult = { jobs: Job[] } -export type GetBatchesResult = { - batches: Batch[], - last_batch_id: number, -} diff --git a/js_common/hail.css b/js_common/hail.css deleted file mode 100644 index c468b4288a4..00000000000 --- a/js_common/hail.css +++ /dev/null @@ -1,26 +0,0 @@ -.App { - min-height: 100vh; - display: flex; - flex-direction: column; - align-items: center; - font-size: calc(10px + 2vmin); - color: #2b2d2f; -} - -.List { - margin-left: 50px; -} - -body { - margin: 0; - font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', - 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', - sans-serif; - -webkit-font-smoothing: antialiased; - -moz-osx-font-smoothing: grayscale; -} - -code { - font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', - monospace; -} diff --git a/js_common/package-lock.json b/js_common/package-lock.json deleted file mode 100644 index 7814c678a7c..00000000000 --- a/js_common/package-lock.json +++ /dev/null @@ -1,199 +0,0 @@ -{ - "name": "@hail/common", - "version": "0.0.0", - "lockfileVersion": 2, - "requires": true, - "packages": { - "": { - "name": "@hail/common", - "version": "0.0.0", - "license": "ISC", - "dependencies": { - "axios": "^0.21.1", - "react": "^17.0.2" - }, - "devDependencies": { - "@types/react": "^17.0.5", - "svelte": "^3.38.2" - } - }, - "node_modules/@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "node_modules/@types/react": { - "version": "17.0.5", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.5.tgz", - "integrity": "sha512-bj4biDB9ZJmGAYTWSKJly6bMr4BLUiBrx9ujiJEoP9XIDY9CTaPGxE5QWN/1WjpPLzYF7/jRNnV2nNxNe970sw==", - "dev": true, - "dependencies": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "node_modules/@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "node_modules/axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", - "dependencies": { - "follow-redirects": "^1.10.0" - } - }, - "node_modules/csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "node_modules/follow-redirects": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", - "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "dependencies": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true, - "engines": { - "node": ">= 8" - } - } - }, - "dependencies": { - "@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "@types/react": { - "version": "17.0.5", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.5.tgz", - "integrity": "sha512-bj4biDB9ZJmGAYTWSKJly6bMr4BLUiBrx9ujiJEoP9XIDY9CTaPGxE5QWN/1WjpPLzYF7/jRNnV2nNxNe970sw==", - "dev": true, - "requires": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", - "requires": { - "follow-redirects": "^1.10.0" - } - }, - "csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "follow-redirects": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", - "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==" - }, - "js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "requires": { - "js-tokens": "^3.0.0 || ^4.0.0" - } - }, - "object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" - }, - "react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "requires": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - } - }, - "svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true - } - } -} diff --git a/js_common/package.json b/js_common/package.json deleted file mode 100644 index 88f1ebf210e..00000000000 --- a/js_common/package.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "@hail/common", - "version": "0.0.0", - "description": "", - "main": "index.js", - "scripts": {}, - "author": "", - "license": "ISC", - "dependencies": { - "axios": "^0.21.1", - "react": "^17.0.2" - }, - "devDependencies": { - "@types/react": "^17.0.5", - "svelte": "^3.38.2" - } -} diff --git a/js_common/react/batch-client.ts b/js_common/react/batch-client.ts deleted file mode 100644 index 5b68b1ab632..00000000000 --- a/js_common/react/batch-client.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { usePollingApi } from './hooks' -import { Maybe } from '../types' -import { GetJobsResult, GetBatchesResult } from '../batch-client' - -export function useJobs(id: number): Maybe { - return usePollingApi(`/api/v1alpha/batches/${id}/jobs`) -} - -export function useBatches(): Maybe { - return usePollingApi('/api/v1alpha/batches') -} diff --git a/js_common/react/hooks.ts b/js_common/react/hooks.ts deleted file mode 100644 index 5d06fd3f510..00000000000 --- a/js_common/react/hooks.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { useState, useEffect } from 'react' -import { Maybe } from '../types' -import axios from 'axios' - -const POLL_INTERVAL_MILLIS = 1000 - -export function usePollingApi(apiPath: string): Maybe { - const [value, setValue] = useState>(undefined) - - const fetchData = () => axios.get(apiPath).then(res => setValue(res.data)) - useEffect(() => { - fetchData() - const pollInterval = setInterval(fetchData, POLL_INTERVAL_MILLIS) - - return () => clearInterval(pollInterval) - }, []) - - return value -} - -export function useStreamingApi(apiPath: string): Maybe { - const [value, setValue] = useState>(undefined) - - useEffect(() => { - const ws = new WebSocket(`ws://localhost:5050${apiPath}`) - ws.onmessage = ev => setValue(JSON.parse(ev.data)) - - return () => ws.close() - }, []) - - return value -} diff --git a/js_common/svelte/batch-client.ts b/js_common/svelte/batch-client.ts deleted file mode 100644 index b5c197526b6..00000000000 --- a/js_common/svelte/batch-client.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { pollingApiStore } from './store' -import type { StoreApiResult } from './store' -import { GetJobsResult, GetBatchesResult } from '../batch-client' - -export function getJobsStore(id: number): StoreApiResult { - return pollingApiStore(`/api/v1alpha/batches/${id}/jobs`) -} - -export function getBatchesStore(): StoreApiResult { - return pollingApiStore('/api/v1alpha/batches') -} diff --git a/js_common/svelte/store.ts b/js_common/svelte/store.ts deleted file mode 100644 index f5b7711d24a..00000000000 --- a/js_common/svelte/store.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { writable, Writable } from 'svelte/store' -import type { Maybe } from '../types' -import axios from 'axios' - -const POLL_INTERVAL_MILLIS = 1000 - -export type StoreApiResult = { - store: Writable>, - destroy: () => void, -} - -export function pollingApiStore(apiPath: string): StoreApiResult { - const store = writable(undefined) - const fetchData = () => axios.get(apiPath).then(res => store.set(res.data)) - fetchData() - const interval = setInterval(fetchData, POLL_INTERVAL_MILLIS) - - return { store, destroy: () => clearInterval(interval) } -} - -export function streamingApiStore(apiPath: string): StoreApiResult { - const store = writable(undefined) - const ws = new WebSocket(`ws://localhost:5050${apiPath}`) - ws.onmessage = ev => store.set(JSON.parse(ev.data)) - - return { store, destroy: () => ws.close() } -} diff --git a/js_common/types.ts b/js_common/types.ts deleted file mode 100644 index 0b101b4ab99..00000000000 --- a/js_common/types.ts +++ /dev/null @@ -1,36 +0,0 @@ -export type Maybe = T | undefined; - -export type Batch = { - id: number, - user: string, - billing_project: string, - token: string, - state: string, - complete: boolean, - closed: boolean, - n_jobs: number, - n_completed: number, - n_succeeded: number, - n_failed: number, - n_cancelled: number, - time_created: string, - time_closed: string, - time_completed: string, - duration: string, - attributes: any, - msec_mcpu: number, - cost: string, -} - -export type Job = { - batch_id: number, - billing_project: string, - cost: number, - duration: number, - exit_code: Maybe, - job_id: number, - msec_mcpu: number, - name: Maybe, - state: string, - user: string, -} diff --git a/letsencrypt/Makefile b/letsencrypt/Makefile index 9446ddb034c..51ced96d199 100644 --- a/letsencrypt/Makefile +++ b/letsencrypt/Makefile @@ -1,19 +1,25 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) +LETSENCRYPT_LATEST = $(DOCKER_PREFIX)/letsencrypt:latest +LETSENCRYPT_IMAGE = $(DOCKER_PREFIX)/letsencrypt:$(shell docker images -q --no-trunc letsencrypt | sed -e 's,[^:]*:,,') -LETSENCRYPT_IMAGE := $(DOCKER_PREFIX)/letsencrypt:$(TOKEN) - -.PHONY: build start-service run clean +.PHONY: build push start-service run clean build: - ../docker-build.sh . Dockerfile $(LETSENCRYPT_IMAGE) + docker build -f Dockerfile -t letsencrypt --cache-from letsencrypt,$(LETSENCRYPT_LATEST) . + +.PHONY: push +push: build + docker tag letsencrypt $(LETSENCRYPT_LATEST) + docker push $(LETSENCRYPT_LATEST) + docker tag letsencrypt $(LETSENCRYPT_IMAGE) + docker push $(LETSENCRYPT_IMAGE) start-service: kubectl -n default apply -f service.yaml DRY_RUN ?= false -run: build +run: push echo $(DOMAIN) > domains.txt.out echo internal.$(DOMAIN) >> domains.txt.out sed 's/$$/.$(DOMAIN)/g' subdomains.txt >> domains.txt.out diff --git a/memory/Makefile b/memory/Makefile index 6f6342ce62b..1744f5a6111 100644 --- a/memory/Makefile +++ b/memory/Makefile @@ -3,9 +3,8 @@ include ../config.mk PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear PYTHON := PYTHONPATH=$(PYTHONPATH) python3 -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -MEMORY_IMAGE := $(DOCKER_PREFIX)/memory:$(TOKEN) +MEMORY_LATEST = $(DOCKER_PREFIX)/memory:latest +MEMORY_IMAGE = $(DOCKER_PREFIX)/memory:$(shell docker images -q --no-trunc memory | sed -e 's,[^:]*:,,') .PHONY: check check: @@ -14,12 +13,21 @@ check: .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. memory/Dockerfile.out $(MEMORY_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(MEMORY_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -t memory -f Dockerfile.out --cache-from memory,$(MEMORY_LATEST),service-base .. + +.PHONY: push +push: build + docker tag memory $(MEMORY_LATEST) + docker push $(MEMORY_LATEST) + docker tag memory $(MEMORY_IMAGE) + docker push $(MEMORY_IMAGE) .PHONY: deploy -deploy: build + +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f service-account.yaml python3 ../ci/jinja2_render.py '{"default_ns":{"name":"$(NAMESPACE)"}}' service-account-batch-pods.yaml service-account-batch-pods.yaml.out diff --git a/monitoring/Makefile b/monitoring/Makefile index e3b5dcf1eec..a740693de7f 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -1,8 +1,7 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -MONITORING_IMAGE := $(DOCKER_PREFIX)/monitoring:$(TOKEN) +MONITORING_LATEST = $(DOCKER_PREFIX)/monitoring:latest +MONITORING_IMAGE = $(DOCKER_PREFIX)/monitoring:$(shell docker images -q --no-trunc monitoring:latest | sed -e 's,[^:]*:,,') PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:} PYTHON := PYTHONPATH=$(PYTHONPATH)../hail/python:../gear:../web_common python3 @@ -16,12 +15,20 @@ check: .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. monitoring/Dockerfile.out $(MONITORING_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(MONITORING_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -f Dockerfile.out -t monitoring --cache-from monitoring,$(MONITORING_LATEST),service-base .. + +.PHONY: push +push: build + docker tag monitoring $(MONITORING_LATEST) + docker push $(MONITORING_LATEST) + docker tag monitoring $(MONITORING_IMAGE) + docker push $(MONITORING_IMAGE) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"monitoring_image":{"image":"$(MONITORING_IMAGE)"},"monitoring_database":{"user_secret_name":"sql-monitoring-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/notebook/Makefile b/notebook/Makefile index c3653fe0d17..c3081759e7a 100644 --- a/notebook/Makefile +++ b/notebook/Makefile @@ -1,9 +1,9 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -NOTEBOOK_IMAGE := $(DOCKER_PREFIX)/notebook:$(TOKEN) -NOTEBOOK_NGINX_IMAGE := $(DOCKER_PREFIX)/notebook_nginx:$(TOKEN) +NOTEBOOK_LATEST = $(DOCKER_PREFIX)/notebook:latest +NOTEBOOK_IMAGE = $(DOCKER_PREFIX)/notebook:$(shell docker images -q --no-trunc notebook | sed -e 's,[^:]*:,,') +NOTEBOOK_NGINX_LATEST = $(DOCKER_PREFIX)/notebook_nginx:latest +NOTEBOOK_NGINX_IMAGE = $(DOCKER_PREFIX)/notebook_nginx:$(shell docker images -q --no-trunc notebook_nginx | sed -e 's,[^:]*:,,') EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -17,24 +17,35 @@ check: .PHONY: build-notebook build-notebook: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh .. notebook/Dockerfile.out $(NOTEBOOK_IMAGE) + $(MAKE) -C ../docker build + -docker pull $(NOTEBOOK_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -t notebook -f Dockerfile.out --cache-from notebook,$(NOTEBOOK_LATEST),service-base .. .PHONY: build-nginx build-nginx: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out + -docker pull $(NOTEBOOK_NGINX_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - ../docker-build.sh . Dockerfile.nginx.out $(NOTEBOOK_NGINX_IMAGE) + docker build -t notebook_nginx -f Dockerfile.nginx.out --cache-from notebook_nginx,$(NOTEBOOK_NGINX_LATEST),hail-ubuntu . + +.PHONY: push +push: build-notebook build-nginx + docker tag notebook $(NOTEBOOK_LATEST) + docker push $(NOTEBOOK_LATEST) + docker tag notebook $(NOTEBOOK_IMAGE) + docker push $(NOTEBOOK_IMAGE) + docker tag notebook_nginx $(NOTEBOOK_NGINX_LATEST) + docker push $(NOTEBOOK_NGINX_LATEST) + docker tag notebook_nginx $(NOTEBOOK_NGINX_IMAGE) + docker push $(NOTEBOOK_NGINX_IMAGE) -.PHONY: build -build: build-notebook build-nginx JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"notebook_image":{"image":"$(NOTEBOOK_IMAGE)"},"notebook_nginx_image":{"image":"$(NOTEBOOK_NGINX_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"notebook_database":{"user_secret_name":"sql-notebook-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"scope":"$(SCOPE)"}' .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default E=$(JINJA_ENVIRONMENT) && \ python3 ../ci/jinja2_render.py $$E deployment.yaml deployment.yaml.out && \ diff --git a/notebook/notebook/templates/workshop/resources.html b/notebook/notebook/templates/workshop/resources.html index 2b22921e7c4..fd2f4729388 100644 --- a/notebook/notebook/templates/workshop/resources.html +++ b/notebook/notebook/templates/workshop/resources.html @@ -3,10 +3,6 @@ {% block content %}

Workshop Resources

-

Institute for Behavioral Genetics Statistical Genetics Workshop 2021

-

Dates: June 16th, 2021

> -

Hail version: 0.2.69

-

Notebooks, slides, and data

BroadE Workshop 2021

Dates: April 8th, 2021

>

Hail version: 0.2.64

diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index 3025fbfca27..00000000000 --- a/package-lock.json +++ /dev/null @@ -1,216 +0,0 @@ -{ - "name": "hail", - "lockfileVersion": 2, - "requires": true, - "packages": { - "": { - "workspaces": [ - "js_common" - ] - }, - "js_common": { - "name": "@hail/common", - "version": "0.0.0", - "license": "ISC", - "dependencies": { - "axios": "^0.21.1", - "react": "^17.0.2" - }, - "devDependencies": { - "@types/react": "^17.0.5", - "svelte": "^3.38.2" - } - }, - "node_modules/@hail/common": { - "resolved": "js_common", - "link": true - }, - "node_modules/@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "node_modules/@types/react": { - "version": "17.0.6", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", - "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", - "dev": true, - "dependencies": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "node_modules/@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "node_modules/axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", - "dependencies": { - "follow-redirects": "^1.10.0" - } - }, - "node_modules/csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "node_modules/follow-redirects": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", - "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "dependencies": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true, - "engines": { - "node": ">= 8" - } - } - }, - "dependencies": { - "@hail/common": { - "version": "file:js_common", - "requires": { - "@types/react": "^17.0.5", - "axios": "^0.21.1", - "react": "^17.0.2", - "svelte": "^3.38.2" - } - }, - "@types/prop-types": { - "version": "15.7.3", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", - "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", - "dev": true - }, - "@types/react": { - "version": "17.0.6", - "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", - "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", - "dev": true, - "requires": { - "@types/prop-types": "*", - "@types/scheduler": "*", - "csstype": "^3.0.2" - } - }, - "@types/scheduler": { - "version": "0.16.1", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", - "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", - "dev": true - }, - "axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", - "requires": { - "follow-redirects": "^1.10.0" - } - }, - "csstype": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", - "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", - "dev": true - }, - "follow-redirects": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", - "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==" - }, - "js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" - }, - "loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "requires": { - "js-tokens": "^3.0.0 || ^4.0.0" - } - }, - "object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" - }, - "react": { - "version": "17.0.2", - "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", - "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", - "requires": { - "loose-envify": "^1.1.0", - "object-assign": "^4.1.1" - } - }, - "svelte": { - "version": "3.38.2", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", - "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", - "dev": true - } - } -} diff --git a/package.json b/package.json deleted file mode 100644 index 363635989b8..00000000000 --- a/package.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "workspaces": [ - "js_common" - ] -} diff --git a/prometheus/Makefile b/prometheus/Makefile index 09dcc5cd820..8d972b49c93 100644 --- a/prometheus/Makefile +++ b/prometheus/Makefile @@ -2,17 +2,23 @@ include ../config.mk .PHONY: build push deploy -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -PROM_NGINX_IMAGE := $(DOCKER_PREFIX)/prom_nginx:$(TOKEN) +PROM_NGINX_LATEST = $(DOCKER_PREFIX)/prom_nginx:latest +PROM_NGINX_IMAGE = $(DOCKER_PREFIX)/prom_nginx:$(shell docker images -q --no-trunc prom_nginx | sed -e 's,[^:]*:,,') build: $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image": {"image": "'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out + -docker pull $(PROM_NGINX_LATEST) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image": {"image": "hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - ../docker-build.sh . Dockerfile.nginx.out $(PROM_NGINX_IMAGE) + docker build -t prom_nginx -f Dockerfile.nginx.out --cache-from prom_nginx,$(PROM_NGINX_LATEST),hail-ubuntu . + +push: build + docker tag prom_nginx $(PROM_NGINX_LATEST) + docker push $(PROM_NGINX_LATEST) + docker tag prom_nginx $(PROM_NGINX_IMAGE) + docker push $(PROM_NGINX_IMAGE) -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "prom_nginx_image": {"image": "$(PROM_NGINX_IMAGE)"}}' prometheus.yaml prometheus.yaml.out kubectl -n $(NAMESPACE) apply -f prometheus.yaml.out diff --git a/pylintrc b/pylintrc index f6ffe559d8a..f7c3092e140 100644 --- a/pylintrc +++ b/pylintrc @@ -11,7 +11,7 @@ # C1801 Do not use len(SEQUENCE) as condition value # W0221 Parameters differ from overridden method -disable=C0111,W1203,W1202,C0111,R0913,W0622,W0212,W0621,R0914,W0603,R0902,R0801,C1801,W0221,line-too-long,too-few-public-methods,fixme,too-many-function-args,too-many-branches,too-many-lines,too-many-boolean-expressions,too-many-statements,too-many-nested-blocks,wrong-import-order,logging-not-lazy,unnecessary-lambda,too-many-public-methods,broad-except,too-many-return-statements,bare-except,invalid-name,unsubscriptable-object +disable=C0111,W1203,W1202,C0111,R0913,W0622,W0212,W0621,R0914,W0603,R0902,R0801,C1801,W0221,line-too-long,too-few-public-methods,fixme,too-many-function-args,too-many-branches,too-many-lines,too-many-boolean-expressions,too-many-statements,too-many-nested-blocks,wrong-import-order,logging-not-lazy,unnecessary-lambda,too-many-public-methods,broad-except,too-many-return-statements,bare-except [FORMAT] diff --git a/query/Makefile b/query/Makefile index 4740927c5be..3f01a55f6b2 100644 --- a/query/Makefile +++ b/query/Makefile @@ -3,9 +3,8 @@ include ../config.mk EXTRA_PYTHONPATH := ../hail/python:../gear PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -QUERY_IMAGE := $(DOCKER_PREFIX)/query:$(TOKEN) +QUERY_LATEST = $(DOCKER_PREFIX)/query:latest +QUERY_IMAGE = $(DOCKER_PREFIX)/query:$(shell docker images -q --no-trunc query | sed -e 's,[^:]*:,,') .PHONY: check check: @@ -18,14 +17,22 @@ build: $(MAKE) -C ../hail shadowJar # janky cp ../hail/build/libs/hail-all-spark.jar ./hail.jar - python3 ../ci/jinja2_render.py '{"service_java_run_base_image":{"image":"'$$(cat ../docker/service-java-run-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh . Dockerfile.out $(QUERY_IMAGE) + -docker pull $(QUERY_LATEST) + python3 ../ci/jinja2_render.py '{"service_java_run_base_image":{"image":"service-java-run-base"}}' Dockerfile Dockerfile.out + docker build -t query -f Dockerfile.out --cache-from query,$(QUERY_LATEST),service-base . + +.PHONY: push +push: build + docker tag query $(QUERY_LATEST) + docker push $(QUERY_LATEST) + docker tag query $(QUERY_IMAGE) + docker push $(QUERY_IMAGE) UPLOAD_QUERY_JAR_TOKEN := $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) HAIL_REVISION := $(shell git rev-parse HEAD) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f service-account.yaml gsutil cp ./hail.jar gs://hail-test-dmk9z/$(UPLOAD_QUERY_JAR_TOKEN)/jars/$(HAIL_REVISION).jar diff --git a/shuffler/Makefile b/shuffler/Makefile index 82d60a3e7bf..ac29f3d32b7 100644 --- a/shuffler/Makefile +++ b/shuffler/Makefile @@ -1,22 +1,29 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -SHUFFLER_IMAGE := $(DOCKER_PREFIX)/shuffler:$(TOKEN) +SHUFFLER_LATEST = $(DOCKER_PREFIX)/shuffler:latest +SHUFFLER_IMAGE = $(DOCKER_PREFIX)/shuffler:$(shell docker images -q --no-trunc shuffler | sed -e 's,[^:]*:,,') PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$(PYTHONPATH) python3 .PHONY: build build: - $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + $(MAKE) -C ../docker build + -docker pull $(SHUFFLER_LATEST) + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out $(MAKE) -C ../hail shadowJar cp ../hail/build/libs/hail-all-spark.jar hail.jar - ../docker-build.sh . Dockerfile.out $(SHUFFLER_IMAGE) + docker build -t shuffler -f Dockerfile.out --cache-from shuffler,$(SHUFFLER_LATEST),service-base . + +.PHONY: push +push: build + docker tag shuffler $(SHUFFLER_LATEST) + docker push $(SHUFFLER_LATEST) + docker tag shuffler $(SHUFFLER_IMAGE) + docker push $(SHUFFLER_IMAGE) .PHONY: deploy -deploy: build +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"shuffler_image":{"image":"$(SHUFFLER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/site/Makefile b/site/Makefile index 49e83ad2a3e..4fc3bf9cfde 100644 --- a/site/Makefile +++ b/site/Makefile @@ -49,7 +49,7 @@ push: build deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"hail-ubuntu"}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: diff --git a/tls/Dockerfile b/tls/Dockerfile index a116655a430..8971c02836f 100644 --- a/tls/Dockerfile +++ b/tls/Dockerfile @@ -1,17 +1,8 @@ -FROM {{ hail_ubuntu_image.image }} +FROM {{ service_base_image.image }} -# source: https://cloud.google.com/storage/docs/gsutil_install#linux # re: RANDFILE, https://github.com/openssl/openssl/issues/7754#issuecomment-444063355 -# jdk not strictly necessary, but we want keytool -RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ - install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \ - hail-apt-get-install openssl openjdk-8-jdk-headless && \ - sed -i 's/^RANDFILE/#RANDFILE/' /etc/ssl/openssl.cnf && \ - hail-pip-install pyyaml - -ENV PATH $PATH:/google-cloud-sdk/bin +RUN hail-apt-get-install openssl && \ + sed -i 's/^RANDFILE/#RANDFILE/' /etc/ssl/openssl.cnf COPY config.yaml . COPY create_certs.py . diff --git a/tls/create_certs.py b/tls/create_certs.py index 4fcde6e1382..c362fb7d215 100644 --- a/tls/create_certs.py +++ b/tls/create_certs.py @@ -5,7 +5,7 @@ import subprocess as sp import tempfile -# gear, hailtop, and web_common are not available in the create_certs image +from hailtop.utils import sync_check_shell parser = argparse.ArgumentParser(prog='create_certs.py', description='create hail certs') parser.add_argument('namespace', type=str, help='kubernetes namespace') @@ -22,8 +22,7 @@ def echo_check_call(cmd): - print(cmd) - sp.run(cmd, check=True) + sync_check_shell(' '.join(cmd), echo=True) def create_key_and_cert(p): diff --git a/ukbb-rg/Makefile b/ukbb-rg/Makefile index e0a9183967e..ab8b4f7d2bf 100644 --- a/ukbb-rg/Makefile +++ b/ukbb-rg/Makefile @@ -1,22 +1,36 @@ include ../config.mk -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) +UKBB_RG_STATIC_LATEST = $(DOCKER_PREFIX)/ukbb-rg-static:latest +UKBB_RG_STATIC_IMAGE = $(DOCKER_PREFIX)/ukbb-rg-static:$(shell docker images -q --no-trunc ukbb-rg-static | sed -e 's,[^:]*:,,') -UKBB_RG_STATIC_IMAGE := $(DOCKER_PREFIX)/ukbb-rg-static:$(TOKEN) -UKBB_RG_BROWSER_IMAGE := $(DOCKER_PREFIX)/ukbb-rg-browser:$(TOKEN) +UKBB_RG_BROWSER_LATEST = $(DOCKER_PREFIX)/ukbb-rg-browser:latest +UKBB_RG_BROWSER_IMAGE = $(DOCKER_PREFIX)/ukbb-rg-browser:$(shell docker images -q --no-trunc ukbb-rg-browser | sed -e 's,[^:]*:,,') .PHONY: build build: ls app/app.R app/www/rainbowvis.js # read the README - + docker pull $(DOCKER_PREFIX)/ubuntu:18.04 + -docker pull $(UKBB_RG_STATIC_LATEST) python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile.static Dockerfile.static.out - ../docker-build.sh . Dockerfile.static.out $(UKBB_RG_STATIC_IMAGE) - + docker build -t ukbb-rg-static -f Dockerfile.static.out --cache-from ukbb-rg-static,$(UKBB_RG_STATIC_LATEST),$(DOCKER_PREFIX)/ubuntu:18.04 . + docker pull $(DOCKER_PREFIX)/ubuntu:19.04 + -docker pull $(UKBB_RG_BROWSER_LATEST) python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile.browser Dockerfile.browser.out - ../docker-build.sh . Dockerfile.browser.out $(UKBB_RG_BROWSER_IMAGE) + docker build -t ukbb-rg-browser -f Dockerfile.browser.out --cache-from ukbb-rg-browser,$(UKBB_RG_BROWSER_LATEST),$(DOCKER_PREFIX)/ubuntu:19.04 . + +.PHONY: push +push: build + docker tag ukbb-rg-static $(UKBB_RG_STATIC_LATEST) + docker push $(UKBB_RG_STATIC_LATEST) + docker tag ukbb-rg-static $(UKBB_RG_STATIC_IMAGE) + docker push $(UKBB_RG_STATIC_IMAGE) + docker tag ukbb-rg-browser $(UKBB_RG_BROWSER_LATEST) + docker push $(UKBB_RG_BROWSER_LATEST) + docker tag ukbb-rg-browser $(UKBB_RG_BROWSER_IMAGE) + docker push $(UKBB_RG_BROWSER_IMAGE) .PHONY: deploy -deploy: build +deploy: push python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' deployment.yaml deployment.yaml.out kubectl apply -f deployment.yaml.out diff --git a/web_common/web_common/styles/main.scss b/web_common/web_common/styles/main.scss index 44cbb8c2360..f6faf33a59a 100644 --- a/web_common/web_common/styles/main.scss +++ b/web_common/web_common/styles/main.scss @@ -243,25 +243,9 @@ a { } tr { - &:nth-of-type(7n+1) { - background-color: #fff4f4; - } - &:nth-of-type(7n+2) { - background-color: #fff9f1; - } - &:nth-of-type(7n+3) { - background-color: #fdfdf1; - } - &:nth-of-type(7n+4) { - background-color: #f4fff4; - } - &:nth-of-type(7n+5) { - background-color: #f2f4ff; - } - &:nth-of-type(7n+6) { - background-color: #fff6ff; + &:nth-of-type(even) { + background-color: #f2f2f2; } - td.data-table-bad { color: red; border-color: red; diff --git a/website/Makefile b/website/Makefile index 2cbafcae65c..84039d6e705 100644 --- a/website/Makefile +++ b/website/Makefile @@ -1,10 +1,8 @@ include ../config.mk -.PHONY: docs build run run-docker deploy clean +.PHONY: docs build run run-docker push deploy clean -TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) - -WEBSITE_IMAGE := $(DOCKER_PREFIX)/website:$(TOKEN) +IMAGE = $(DOCKER_PREFIX)/website:$(shell docker images -q --no-trunc website | sed -e 's,[^:]*:,,') check: curlylint . @@ -15,8 +13,8 @@ docs: build: docs $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image": {"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out - ../docker-build.sh . Dockerfile.out $(WEBSITE_IMAGE) + python3 ../ci/jinja2_render.py '{"service_base_image": {"image":"service-base"}}' Dockerfile Dockerfile.out + docker build -f Dockerfile.out -t website . run: docs cd website && tar -xvzf ../docs.tar.gz @@ -25,10 +23,14 @@ run: docs run-docker: build docker run -e HAIL_DOMAIN=localhost:5000 -p 5000:5000 website python3 -m website local -deploy: build +push: build + docker tag website $(IMAGE) + docker push $(IMAGE) + +deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(WEBSITE_IMAGE)"}}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: From 0fcee31a86e1ce562f9db2ef8cdb547b235b36bf Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 5 Jul 2021 15:58:49 +1000 Subject: [PATCH 264/501] Revert "Revert "Merge from Upstream"" This reverts commit 7d3b7bce312e85e4ac5ba89336f9fb8999d17000. --- .git-blame-ignore-revs | 2 + .gitignore | 1 + address/Makefile | 21 +- admin-pod/Makefile | 6 +- auth/Makefile | 21 +- batch/Makefile | 37 +- batch/batch/batch_configuration.py | 1 + batch/batch/driver/create_instance.py | 12 +- batch/batch/driver/gce.py | 50 +- batch/batch/driver/instance_collection.py | 28 +- batch/batch/driver/job.py | 7 +- batch/batch/driver/pool.py | 69 +- batch/batch/front_end/front_end.py | 46 +- batch/batch/front_end/templates/job.html | 37 +- batch/batch/front_end/validate.py | 6 +- batch/batch/utils.py | 11 +- batch/batch/worker/disk.py | 25 +- batch/batch/worker/flock.py | 51 - batch/batch/worker/worker.py | 91 +- batch/deployment.yaml | 4 + batch/test/test_batch.py | 37 +- batch2/proxy.py | 19 + batch2/react-batch/.eslintrc.json | 35 + batch2/react-batch/.gitignore | 5 + batch2/react-batch/index.html | 13 + batch2/react-batch/package-lock.json | 6591 +++++++++++++++++ batch2/react-batch/package.json | 30 + batch2/react-batch/src/App.tsx | 24 + .../react-batch/src/components/BatchTable.tsx | 20 + .../react-batch/src/components/JobTable.tsx | 21 + batch2/react-batch/src/main.tsx | 13 + batch2/react-batch/src/pages/BatchPage.tsx | 14 + batch2/react-batch/src/pages/BatchesPage.tsx | 14 + batch2/react-batch/tsconfig.json | 20 + batch2/react-batch/vite.config.ts | 14 + batch2/svelte-batch/.gitignore | 4 + batch2/svelte-batch/index.html | 13 + batch2/svelte-batch/package-lock.json | 940 +++ batch2/svelte-batch/package.json | 17 + batch2/svelte-batch/public/favicon.ico | Bin 0 -> 1150 bytes batch2/svelte-batch/src/App.svelte | 22 + batch2/svelte-batch/src/assets/svelte.png | Bin 0 -> 5185 bytes .../src/components/BatchTable.svelte | 15 + .../src/components/JobTable.svelte | 14 + batch2/svelte-batch/src/global.d.ts | 2 + batch2/svelte-batch/src/main.ts | 7 + .../svelte-batch/src/pages/BatchPage.svelte | 25 + .../svelte-batch/src/pages/BatchesPage.svelte | 24 + batch2/svelte-batch/svelte.config.cjs | 7 + batch2/svelte-batch/tsconfig.json | 37 + batch2/svelte-batch/vite.config.js | 13 + benchmark-service/Makefile | 21 +- .../python/benchmark_hail/compare/compare.py | 4 +- .../python/benchmark_hail/run/__init__.py | 4 +- .../benchmark_hail/run/methods_benchmarks.py | 34 +- .../benchmark_hail/run/sentinel_benchmarks.py | 50 + benchmark/scripts/benchmark_in_batch.py | 6 +- bootstrap-gateway/Makefile | 20 +- build.yaml | 72 +- ci/Dockerfile.ci-utils | 1 - ci/Makefile | 64 +- ci/buildkit/Dockerfile | 6 + ...ication-credentials-to-docker-auth-config} | 4 +- ci/ci/build.py | 52 +- ci/ci/constants.py | 1 + ci/ci/environment.py | 2 +- ci/ci/templates/pr-table.html | 4 + ci/ci/templates/pr.html | 1 + ci/deployment.yaml | 4 +- ci/kaniko/Dockerfile | 29 - ci/test/resources/build.yaml | 8 +- ...extract_1000_Genomes_30x_GRCh38_samples.sh | 5 + .../extract_1000_Genomes_NYGC_30x_GRCh38.py | 32 + datasets/extract/extract_CADD.py | 35 + datasets/extract/extract_dbSNP.py | 29 + .../notebooks/1kg_NYGC_30x_datasets.ipynb | 814 ++ datasets/notebooks/CADD_datasets.ipynb | 124 + datasets/notebooks/dbSNP_datasets.ipynb | 685 ++ datasets/notebooks/reformat_buckets.ipynb | 195 + datasets/notebooks/reformat_buckets.txt | 201 + .../notebooks/reformat_buckets_mappings.json | 605 ++ dev-docs/batch-operation.md | 38 +- dev-docs/compiler-team/development_tools.md | 79 + dev-docs/development_process.md | 6 +- dev-docs/google-cloud-cookbook.md | 30 + docker-build.sh | 20 + docker/Dockerfile.base | 40 +- docker/Dockerfile.service-base | 14 +- docker/Makefile | 114 +- docker/publish-public-images.sh | 34 +- docker/python-dill/push.sh | 21 +- docker/requirements.txt | 7 +- gateway/Makefile | 18 +- grafana/Makefile | 18 +- grafana/deployment.yaml | 2 +- hail/Makefile | 6 +- hail/build.gradle | 5 +- hail/python/MANIFEST.in | 1 + hail/python/hail/__init__.py | 3 +- hail/python/hail/backend/spark_backend.py | 17 +- hail/python/hail/context.py | 5 +- hail/python/hail/docs/change_log.md | 35 +- hail/python/hail/docs/conf.py | 4 +- .../1000_Genomes_HighCov_autosomes.rst | 214 + .../schemas/1000_Genomes_HighCov_chrX.rst | 214 + .../schemas/1000_Genomes_HighCov_chrY.rst | 175 + .../1000_Genomes_Retracted_autosomes.rst | 128 + .../schemas/1000_Genomes_Retracted_chrX.rst | 128 + .../schemas/1000_Genomes_Retracted_chrY.rst | 117 + .../schemas/1000_Genomes_autosomes.rst | 3 +- .../datasets/schemas/1000_Genomes_chrX.rst | 3 +- .../datasets/schemas/1000_Genomes_chrY.rst | 3 +- .../hail/docs/datasets/schemas/CADD.rst | 3 +- ...ubcutaneous_all_snp_gene_associations.rst} | 4 +- ...eral_Omentum_all_snp_gene_associations.rst | 39 + ...renal_Gland_all_snp_gene_associations.rst} | 4 +- ...Artery_Aorta_all_snp_gene_associations.rst | 39 + ...ery_Coronary_all_snp_gene_associations.rst | 39 + ...tery_Tibial_all_snp_gene_associations.rst} | 4 +- ...in_Amygdala_all_snp_gene_associations.rst} | 4 +- ..._cortex_BA24_all_snp_gene_associations.rst | 39 + ...asal_ganglia_all_snp_gene_associations.rst | 39 + ...r_Hemisphere_all_snp_gene_associations.rst | 39 + ..._Cerebellum_all_snp_gene_associations.rst} | 4 +- ...Brain_Cortex_all_snp_gene_associations.rst | 39 + ...l_Cortex_BA9_all_snp_gene_associations.rst | 39 + ..._Hippocampus_all_snp_gene_associations.rst | 39 + ...ypothalamus_all_snp_gene_associations.rst} | 4 +- ...asal_ganglia_all_snp_gene_associations.rst | 39 + ...asal_ganglia_all_snp_gene_associations.rst | 39 + ...cervical_c-1_all_snp_gene_associations.rst | 39 + ...tantia_nigra_all_snp_gene_associations.rst | 39 + ...mmary_Tissue_all_snp_gene_associations.rst | 39 + ..._fibroblasts_all_snp_gene_associations.rst | 39 + ..._lymphocytes_all_snp_gene_associations.rst | 39 + ...olon_Sigmoid_all_snp_gene_associations.rst | 39 + ...n_Transverse_all_snp_gene_associations.rst | 39 + ...eal_Junction_all_snp_gene_associations.rst | 39 + ...hagus_Mucosa_all_snp_gene_associations.rst | 39 + ...s_Muscularis_all_snp_gene_associations.rst | 39 + ...al_Appendage_all_snp_gene_associations.rst | 39 + ...ft_Ventricle_all_snp_gene_associations.rst | 39 + ...idney_Cortex_all_snp_gene_associations.rst | 39 + ..._eQTL_Liver_all_snp_gene_associations.rst} | 4 +- ...x_eQTL_Lung_all_snp_gene_associations.rst} | 4 +- ...livary_Gland_all_snp_gene_associations.rst | 39 + ...cle_Skeletal_all_snp_gene_associations.rst | 39 + ...Nerve_Tibial_all_snp_gene_associations.rst | 39 + ..._eQTL_Ovary_all_snp_gene_associations.rst} | 4 +- ...QTL_Pancreas_all_snp_gene_associations.rst | 39 + ...L_Pituitary_all_snp_gene_associations.rst} | 4 +- ...QTL_Prostate_all_snp_gene_associations.rst | 39 + ...d_Suprapubic_all_snp_gene_associations.rst | 39 + ...ed_Lower_leg_all_snp_gene_associations.rst | 39 + ...rminal_Ileum_all_snp_gene_associations.rst | 39 + ...eQTL_Spleen_all_snp_gene_associations.rst} | 6 +- ...QTL_Stomach_all_snp_gene_associations.rst} | 4 +- ...eQTL_Testis_all_snp_gene_associations.rst} | 6 +- ...QTL_Thyroid_all_snp_gene_associations.rst} | 4 +- ...eQTL_Uterus_all_snp_gene_associations.rst} | 6 +- ...eQTL_Vagina_all_snp_gene_associations.rst} | 6 +- ..._Whole_Blood_all_snp_gene_associations.rst | 39 + .../GTEx_eQTL_allpairs_Artery_Tibial.rst | 39 - .../GTEx_eQTL_allpairs_Brain_Amygdala.rst | 39 - ...s_Brain_Anterior_cingulate_cortex_BA24.rst | 39 - ...L_allpairs_Brain_Caudate_basal_ganglia.rst | 39 - ...L_allpairs_Brain_Cerebellar_Hemisphere.rst | 39 - .../GTEx_eQTL_allpairs_Brain_Cerebellum.rst | 39 - .../GTEx_eQTL_allpairs_Brain_Cortex.rst | 39 - .../GTEx_eQTL_allpairs_Brain_Hippocampus.rst | 39 - .../GTEx_eQTL_allpairs_Brain_Hypothalamus.rst | 39 - ...L_allpairs_Brain_Putamen_basal_ganglia.rst | 39 - .../GTEx_eQTL_allpairs_Colon_Sigmoid.rst | 39 - .../GTEx_eQTL_allpairs_Colon_Transverse.rst | 39 - .../GTEx_eQTL_allpairs_Esophagus_Mucosa.rst | 39 - ...TEx_eQTL_allpairs_Esophagus_Muscularis.rst | 39 - ...TEx_eQTL_allpairs_Heart_Left_Ventricle.rst | 39 - .../GTEx_eQTL_allpairs_Kidney_Cortex.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Liver.rst | 39 - ...TEx_eQTL_allpairs_Minor_Salivary_Gland.rst | 39 - .../GTEx_eQTL_allpairs_Muscle_Skeletal.rst | 39 - .../GTEx_eQTL_allpairs_Nerve_Tibial.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Ovary.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Pancreas.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Pituitary.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Prostate.rst | 39 - ...TL_allpairs_Skin_Sun_Exposed_Lower_leg.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Spleen.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Stomach.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Testis.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Thyroid.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Uterus.rst | 39 - .../schemas/GTEx_eQTL_allpairs_Vagina.rst | 39 - .../GTEx_eQTL_allpairs_Whole_Blood.rst | 39 - ...ubcutaneous_all_snp_gene_associations.rst} | 4 +- ...eral_Omentum_all_snp_gene_associations.rst | 42 + ...renal_Gland_all_snp_gene_associations.rst} | 4 +- ...Artery_Aorta_all_snp_gene_associations.rst | 42 + ...ery_Coronary_all_snp_gene_associations.rst | 42 + ...tery_Tibial_all_snp_gene_associations.rst} | 4 +- ...in_Amygdala_all_snp_gene_associations.rst} | 4 +- ..._cortex_BA24_all_snp_gene_associations.rst | 42 + ...asal_ganglia_all_snp_gene_associations.rst | 42 + ...r_Hemisphere_all_snp_gene_associations.rst | 42 + ..._Cerebellum_all_snp_gene_associations.rst} | 4 +- ...Brain_Cortex_all_snp_gene_associations.rst | 42 + ...l_Cortex_BA9_all_snp_gene_associations.rst | 42 + ..._Hippocampus_all_snp_gene_associations.rst | 42 + ...ypothalamus_all_snp_gene_associations.rst} | 4 +- ...asal_ganglia_all_snp_gene_associations.rst | 42 + ...asal_ganglia_all_snp_gene_associations.rst | 42 + ...cervical_c-1_all_snp_gene_associations.rst | 42 + ...tantia_nigra_all_snp_gene_associations.rst | 42 + ...mmary_Tissue_all_snp_gene_associations.rst | 42 + ..._fibroblasts_all_snp_gene_associations.rst | 42 + ..._lymphocytes_all_snp_gene_associations.rst | 42 + ...olon_Sigmoid_all_snp_gene_associations.rst | 42 + ...n_Transverse_all_snp_gene_associations.rst | 42 + ...eal_Junction_all_snp_gene_associations.rst | 42 + ...hagus_Mucosa_all_snp_gene_associations.rst | 42 + ...s_Muscularis_all_snp_gene_associations.rst | 42 + ...al_Appendage_all_snp_gene_associations.rst | 42 + ...ft_Ventricle_all_snp_gene_associations.rst | 42 + ...idney_Cortex_all_snp_gene_associations.rst | 42 + ..._sQTL_Liver_all_snp_gene_associations.rst} | 4 +- ...x_sQTL_Lung_all_snp_gene_associations.rst} | 4 +- ...livary_Gland_all_snp_gene_associations.rst | 42 + ...cle_Skeletal_all_snp_gene_associations.rst | 42 + ...Nerve_Tibial_all_snp_gene_associations.rst | 42 + ..._sQTL_Ovary_all_snp_gene_associations.rst} | 4 +- ...QTL_Pancreas_all_snp_gene_associations.rst | 42 + ...L_Pituitary_all_snp_gene_associations.rst} | 4 +- ...QTL_Prostate_all_snp_gene_associations.rst | 42 + ...d_Suprapubic_all_snp_gene_associations.rst | 42 + ...ed_Lower_leg_all_snp_gene_associations.rst | 42 + ...rminal_Ileum_all_snp_gene_associations.rst | 42 + ...sQTL_Spleen_all_snp_gene_associations.rst} | 6 +- ...QTL_Stomach_all_snp_gene_associations.rst} | 4 +- ...sQTL_Testis_all_snp_gene_associations.rst} | 6 +- ...QTL_Thyroid_all_snp_gene_associations.rst} | 4 +- ...sQTL_Uterus_all_snp_gene_associations.rst} | 6 +- ...sQTL_Vagina_all_snp_gene_associations.rst} | 6 +- ..._Whole_Blood_all_snp_gene_associations.rst | 42 + .../GTEx_sQTL_allpairs_Artery_Tibial.rst | 42 - .../GTEx_sQTL_allpairs_Brain_Amygdala.rst | 42 - ...s_Brain_Anterior_cingulate_cortex_BA24.rst | 42 - ...L_allpairs_Brain_Caudate_basal_ganglia.rst | 42 - ...L_allpairs_Brain_Cerebellar_Hemisphere.rst | 42 - .../GTEx_sQTL_allpairs_Brain_Cerebellum.rst | 42 - .../GTEx_sQTL_allpairs_Brain_Cortex.rst | 42 - .../GTEx_sQTL_allpairs_Brain_Hippocampus.rst | 42 - .../GTEx_sQTL_allpairs_Brain_Hypothalamus.rst | 42 - ...L_allpairs_Brain_Putamen_basal_ganglia.rst | 42 - .../GTEx_sQTL_allpairs_Colon_Sigmoid.rst | 42 - .../GTEx_sQTL_allpairs_Colon_Transverse.rst | 42 - .../GTEx_sQTL_allpairs_Esophagus_Mucosa.rst | 42 - ...TEx_sQTL_allpairs_Esophagus_Muscularis.rst | 42 - ...TEx_sQTL_allpairs_Heart_Left_Ventricle.rst | 42 - .../GTEx_sQTL_allpairs_Kidney_Cortex.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Liver.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Lung.rst | 42 - ...TEx_sQTL_allpairs_Minor_Salivary_Gland.rst | 42 - .../GTEx_sQTL_allpairs_Muscle_Skeletal.rst | 42 - .../GTEx_sQTL_allpairs_Nerve_Tibial.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Ovary.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Pancreas.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Pituitary.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Prostate.rst | 42 - ...TL_allpairs_Skin_Sun_Exposed_Lower_leg.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Spleen.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Stomach.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Testis.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Thyroid.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Uterus.rst | 42 - .../schemas/GTEx_sQTL_allpairs_Vagina.rst | 42 - .../GTEx_sQTL_allpairs_Whole_Blood.rst | 42 - .../hail/docs/datasets/schemas/dbSNP.rst | 99 + ..._eQTL_allpairs_Lung.rst => dbSNP_rsid.rst} | 26 +- .../hail/docs/functions/collections.rst | 2 + hail/python/hail/docs/utils/index.rst | 4 + hail/python/hail/experimental/datasets.json | 1381 ++-- hail/python/hail/experimental/datasets.py | 29 +- hail/python/hail/experimental/db.py | 13 +- .../hail/experimental/haplotype_freq_em.py | 2 +- hail/python/hail/experimental/plots.py | 2 +- .../experimental/vcf_combiner/__main__.py | 2 +- .../experimental/vcf_combiner/vcf_combiner.py | 88 +- hail/python/hail/expr/__init__.py | 3 +- .../hail/expr/expressions/base_expression.py | 6 +- .../expr/expressions/typed_expressions.py | 116 +- hail/python/hail/expr/functions.py | 39 +- hail/python/hail/fs/fs.py | 4 + hail/python/hail/fs/google_fs.py | 3 + hail/python/hail/fs/hadoop_fs.py | 3 + hail/python/hail/fs/local_fs.py | 3 + hail/python/hail/ir/ir.py | 7 +- hail/python/hail/methods/__init__.py | 3 +- hail/python/hail/methods/impex.py | 28 +- hail/python/hail/methods/misc.py | 9 +- hail/python/hail/methods/statgen.py | 417 +- hail/python/hail/nd/__init__.py | 4 +- hail/python/hail/nd/nd.py | 20 +- hail/python/hail/table.py | 6 +- hail/python/hail/utils/__init__.py | 6 +- hail/python/hail/utils/hadoop_utils.py | 20 + hail/python/hail/utils/misc.py | 4 +- hail/python/hail/utils/tutorial.py | 69 +- hail/python/hailtop/aiogoogle/auth/session.py | 6 +- .../aiogoogle/client/compute_client.py | 3 + .../aiogoogle/client/storage_client.py | 70 +- hail/python/hailtop/aiotools/fs.py | 101 +- hail/python/hailtop/aiotools/s3asyncfs.py | 423 ++ hail/python/hailtop/aiotools/stream.py | 84 +- hail/python/hailtop/batch/backend.py | 326 +- hail/python/hailtop/batch/batch.py | 42 +- .../hailtop/batch/batch_pool_executor.py | 3 +- hail/python/hailtop/batch/docs/api.rst | 1 + hail/python/hailtop/batch/docs/change_log.rst | 15 + hail/python/hailtop/batch/docs/conf.py | 1 + hail/python/hailtop/batch/job.py | 16 +- hail/python/hailtop/batch_client/aioclient.py | 5 +- hail/python/hailtop/batch_client/client.py | 5 +- hail/python/hailtop/google_storage.py | 2 +- .../dataproc/resources/init_notebook.py | 1 + hail/python/hailtop/httpx.py | 58 +- hail/python/hailtop/utils/__init__.py | 5 +- hail/python/hailtop/utils/utils.py | 173 +- .../python/hailtop/utils/validate/__init__.py | 3 +- .../python/hailtop/utils/validate/validate.py | 9 +- hail/python/requirements.txt | 5 +- hail/python/setup.py | 1 + hail/python/test/hail/expr/test_expr.py | 73 +- hail/python/test/hail/expr/test_ndarrays.py | 66 +- hail/python/test/hail/helpers.py | 10 +- hail/python/test/hail/linalg/test_linalg.py | 77 +- .../hail/matrixtable/test_matrix_table.py | 9 +- hail/python/test/hail/methods/test_impex.py | 25 +- hail/python/test/hail/methods/test_misc.py | 6 + hail/python/test/hail/methods/test_pca.py | 93 +- hail/python/test/hail/methods/test_statgen.py | 277 +- hail/python/test/hail/table/test_table.py | 7 +- .../python/test/hailtop/aiotools/test_copy.py | 199 +- hail/python/test/hailtop/batch/test_batch.py | 38 +- hail/python/test/hailtop/test_aiogoogle.py | 299 +- hail/python/test/hailtop/test_fs.py | 338 + hail/src/main/scala/is/hail/HailContext.scala | 3 +- .../scala/is/hail/asm4s/ClassBuilder.scala | 85 +- hail/src/main/scala/is/hail/asm4s/Code.scala | 20 + .../main/scala/is/hail/asm4s/package.scala | 28 +- .../is/hail/backend/local/LocalBackend.scala | 3 +- .../hail/backend/service/ServiceBackend.scala | 1 + .../is/hail/backend/spark/SparkBackend.scala | 29 +- .../experimental/ExperimentalFunctions.scala | 6 +- .../scala/is/hail/expr/ir/ArraySorter.scala | 243 +- .../main/scala/is/hail/expr/ir/BinaryOp.scala | 20 +- .../scala/is/hail/expr/ir/BinarySearch.scala | 89 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 11 +- .../main/scala/is/hail/expr/ir/Casts.scala | 41 +- .../main/scala/is/hail/expr/ir/Children.scala | 1 + .../main/scala/is/hail/expr/ir/Compile.scala | 56 +- .../is/hail/expr/ir/CompileAndEvaluate.scala | 3 +- .../src/main/scala/is/hail/expr/ir/Copy.scala | 3 + .../src/main/scala/is/hail/expr/ir/Emit.scala | 1818 ++--- .../is/hail/expr/ir/EmitClassBuilder.scala | 431 +- .../is/hail/expr/ir/EmitCodeBuilder.scala | 96 +- .../scala/is/hail/expr/ir/FoldConstants.scala | 1 + .../is/hail/expr/ir/GenericTableValue.scala | 7 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 73 +- .../scala/is/hail/expr/ir/InferPType.scala | 336 +- .../scala/is/hail/expr/ir/InferType.scala | 3 +- .../scala/is/hail/expr/ir/Interpret.scala | 19 +- .../main/scala/is/hail/expr/ir/MatrixIR.scala | 13 + .../scala/is/hail/expr/ir/MatrixValue.scala | 2 +- .../scala/is/hail/expr/ir/MatrixWriter.scala | 213 +- .../is/hail/expr/ir/NativeReaderOptions.scala | 2 + .../main/scala/is/hail/expr/ir/Param.scala | 20 +- .../main/scala/is/hail/expr/ir/Parser.scala | 18 +- .../main/scala/is/hail/expr/ir/Pretty.scala | 6 +- ...imitiveTypeToIRIntermediateClassTag.scala} | 4 +- .../is/hail/expr/ir/PruneDeadFields.scala | 14 + .../scala/is/hail/expr/ir/Requiredness.scala | 37 +- .../main/scala/is/hail/expr/ir/Simplify.scala | 14 +- .../expr/ir/SpecializedArrayBuilders.scala | 237 +- .../main/scala/is/hail/expr/ir/TableIR.scala | 66 +- .../scala/is/hail/expr/ir/TableWriter.scala | 80 +- .../scala/is/hail/expr/ir/TypeCheck.scala | 3 + .../main/scala/is/hail/expr/ir/UnaryOp.scala | 9 +- .../is/hail/expr/ir/agg/AggregatorState.scala | 47 +- .../is/hail/expr/ir/agg/AppendOnlyBTree.scala | 2 +- .../expr/ir/agg/ApproxCDFAggregator.scala | 2 +- .../ArrayElementLengthCheckAggregator.scala | 2 +- .../expr/ir/agg/CallStatsAggregator.scala | 15 +- .../expr/ir/agg/CollectAsSetAggregator.scala | 7 +- .../is/hail/expr/ir/agg/CountAggregator.scala | 7 +- .../hail/expr/ir/agg/DensifyAggregator.scala | 10 +- .../expr/ir/agg/DownsampleAggregator.scala | 31 +- .../hail/expr/ir/agg/GroupedAggregator.scala | 13 +- .../expr/ir/agg/ImputeTypeAggregator.scala | 9 +- .../ir/agg/LinearRegressionAggregator.scala | 2 +- .../hail/expr/ir/agg/MonoidAggregator.scala | 12 +- .../expr/ir/agg/NDArraySumAggregator.scala | 28 +- .../hail/expr/ir/agg/StagedArrayBuilder.scala | 8 +- .../expr/ir/agg/StagedBlockLinkedList.scala | 7 +- .../hail/expr/ir/agg/TakeByAggregator.scala | 25 +- .../ir/analyses/ComputeMethodSplits.scala | 45 + .../analyses/ControlFlowPreventsSplit.scala | 33 + .../expr/ir/analyses/ParentPointers.scala | 17 + .../expr/ir/functions/ArrayFunctions.scala | 12 +- .../expr/ir/functions/CallFunctions.scala | 86 +- .../is/hail/expr/ir/functions/Functions.scala | 379 +- .../expr/ir/functions/GenotypeFunctions.scala | 17 +- .../hail/expr/ir/functions/GetElement.scala | 2 +- .../expr/ir/functions/IntervalFunctions.scala | 78 +- .../expr/ir/functions/LocusFunctions.scala | 103 +- .../expr/ir/functions/MathFunctions.scala | 117 +- .../expr/ir/functions/NDArrayFunctions.scala | 100 +- .../ir/functions/RandomSeededFunctions.scala | 42 +- .../functions/ReferenceGenomeFunctions.scala | 36 +- .../expr/ir/functions/StringFunctions.scala | 135 +- .../expr/ir/functions/UtilFunctions.scala | 208 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 164 +- .../ir/lowering/LowerDistributedSort.scala | 3 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 40 +- .../is/hail/expr/ir/lowering/LowerToCDA.scala | 3 +- .../expr/ir/lowering/RVDToTableStage.scala | 4 +- .../hail/expr/ir/ndarrays/EmitNDArray.scala | 661 ++ .../expr/ir/orderings/BinaryOrdering.scala | 4 +- .../hail/expr/ir/orderings/CallOrdering.scala | 5 +- .../hail/expr/ir/orderings/CodeOrdering.scala | 88 +- .../expr/ir/orderings/IntervalOrdering.scala | 68 +- .../expr/ir/orderings/IterableOrdering.scala | 24 +- .../expr/ir/orderings/LocusOrdering.scala | 14 +- .../expr/ir/orderings/PrimitiveOrdering.scala | 82 +- .../expr/ir/orderings/ShuffleOrdering.scala | 4 +- .../expr/ir/orderings/StringOrdering.scala | 4 +- .../expr/ir/orderings/StructOrdering.scala | 42 +- .../main/scala/is/hail/expr/ir/package.scala | 15 +- .../is/hail/expr/ir/streams/EmitStream.scala | 219 +- .../is/hail/expr/ir/streams/StreamUtils.scala | 16 +- .../src/main/scala/is/hail/io/CodecSpec.scala | 7 +- .../scala/is/hail/io/TextMatrixReader.scala | 29 +- .../is/hail/io/bgen/BgenRDDPartitions.scala | 19 +- .../main/scala/is/hail/io/bgen/LoadBgen.scala | 2 + .../main/scala/is/hail/io/fs/HadoopFS.scala | 14 + .../scala/is/hail/io/gen/ExportBGEN.scala | 26 +- .../main/scala/is/hail/io/gen/LoadGen.scala | 8 +- .../scala/is/hail/io/index/IndexWriter.scala | 27 +- .../hail/io/index/InternalNodeBuilder.scala | 34 +- .../is/hail/io/index/LeafNodeBuilder.scala | 18 +- .../scala/is/hail/io/plink/LoadPlink.scala | 6 +- .../scala/is/hail/io/tabix/TabixReader.scala | 4 +- .../main/scala/is/hail/io/vcf/LoadVCF.scala | 17 +- .../scala/is/hail/linalg/BlockMatrix.scala | 7 +- .../is/hail/linalg/LinalgCodeUtils.scala | 82 +- hail/src/main/scala/is/hail/lir/PST.scala | 25 +- hail/src/main/scala/is/hail/lir/X.scala | 6 + hail/src/main/scala/is/hail/lir/package.scala | 6 +- .../is/hail/methods/LinearRegression.scala | 6 +- .../is/hail/methods/LogisticRegression.scala | 4 +- .../is/hail/methods/PoissonRegression.scala | 4 +- .../src/main/scala/is/hail/methods/Skat.scala | 4 +- .../scala/is/hail/rvd/AbstractRVDSpec.scala | 17 +- .../services/batch_client/BatchClient.scala | 5 +- .../main/scala/is/hail/services/package.scala | 4 +- .../is/hail/services/shuffler/package.scala | 7 +- .../scala/is/hail/stats/RegressionUtils.scala | 4 +- .../is/hail/types/TypeWithRequiredness.scala | 9 + .../scala/is/hail/types/encoded/EArray.scala | 12 +- .../is/hail/types/encoded/EBaseStruct.scala | 27 +- .../scala/is/hail/types/encoded/EBinary.scala | 10 +- .../types/encoded/EBlockMatrixNDArray.scala | 15 +- .../is/hail/types/encoded/EBoolean.scala | 10 +- .../is/hail/types/encoded/EFloat32.scala | 10 +- .../is/hail/types/encoded/EFloat64.scala | 10 +- .../scala/is/hail/types/encoded/EInt32.scala | 18 +- .../scala/is/hail/types/encoded/EInt64.scala | 10 +- .../types/encoded/ENDArrayColumnMajor.scala | 20 +- .../is/hail/types/encoded/EShuffle.scala | 8 +- .../scala/is/hail/types/encoded/EType.scala | 21 +- .../physical/PArrayBackedContainer.scala | 4 +- .../is/hail/types/physical/PBaseStruct.scala | 16 +- .../is/hail/types/physical/PBinary.scala | 20 - .../is/hail/types/physical/PBoolean.scala | 4 +- .../scala/is/hail/types/physical/PCall.scala | 25 +- .../hail/types/physical/PCanonicalArray.scala | 48 +- .../types/physical/PCanonicalBaseStruct.scala | 23 +- .../types/physical/PCanonicalBinary.scala | 4 +- .../hail/types/physical/PCanonicalCall.scala | 6 +- .../hail/types/physical/PCanonicalDict.scala | 19 +- .../types/physical/PCanonicalInterval.scala | 10 +- .../hail/types/physical/PCanonicalLocus.scala | 12 +- .../types/physical/PCanonicalNDArray.scala | 62 +- .../hail/types/physical/PCanonicalSet.scala | 13 +- .../types/physical/PCanonicalShuffle.scala | 6 +- .../types/physical/PCanonicalStream.scala | 6 +- .../types/physical/PCanonicalString.scala | 4 +- .../scala/is/hail/types/physical/PCode.scala | 318 - .../is/hail/types/physical/PContainer.scala | 12 +- .../scala/is/hail/types/physical/PDict.scala | 3 + .../is/hail/types/physical/PFloat32.scala | 4 +- .../is/hail/types/physical/PFloat64.scala | 4 +- .../scala/is/hail/types/physical/PInt32.scala | 6 +- .../scala/is/hail/types/physical/PInt64.scala | 4 +- .../is/hail/types/physical/PInterval.scala | 14 +- .../scala/is/hail/types/physical/PLocus.scala | 12 +- .../is/hail/types/physical/PNDArray.scala | 14 +- .../is/hail/types/physical/PPrimitive.scala | 2 +- .../is/hail/types/physical/PShuffle.scala | 16 +- .../is/hail/types/physical/PStream.scala | 6 +- .../is/hail/types/physical/PString.scala | 14 +- .../is/hail/types/physical/PStruct.scala | 2 +- .../hail/types/physical/PSubsetStruct.scala | 4 +- .../scala/is/hail/types/physical/PType.scala | 43 +- .../hail/types/physical/PUnrealizable.scala | 15 +- .../is/hail/types/physical/package.scala | 3 +- .../is/hail/types/physical/stypes/SCode.scala | 86 +- .../is/hail/types/physical/stypes/SType.scala | 104 +- .../physical/stypes/SingleCodeSCode.scala | 173 + .../stypes/concrete/SBaseStructPointer.scala | 42 +- .../stypes/concrete/SBinaryPointer.scala | 26 +- .../stypes/concrete/SCanonicalCall.scala | 65 +- .../concrete/SCanonicalLocusPointer.scala | 36 +- .../concrete/SCanonicalShufflePointer.scala | 36 +- .../stypes/concrete/SIndexablePointer.scala | 42 +- .../stypes/concrete/SInsertFieldsStruct.scala | 164 + .../stypes/concrete/SIntervalPointer.scala | 48 +- .../stypes/concrete/SNDArrayPointer.scala | 46 +- .../stypes/concrete/SStackStruct.scala | 161 + .../stypes/concrete/SStringPointer.scala | 38 +- .../stypes/concrete/SSubsetStruct.scala | 83 +- .../stypes/interfaces/SBaseStruct.scala | 63 +- .../physical/stypes/interfaces/SBinary.scala | 1 - .../physical/stypes/interfaces/SCall.scala | 1 - .../stypes/interfaces/SContainer.scala | 4 +- .../stypes/interfaces/SInterval.scala | 9 +- .../physical/stypes/interfaces/SLocus.scala | 3 + .../physical/stypes/interfaces/SNDArray.scala | 166 +- .../physical/stypes/interfaces/SStream.scala | 30 +- .../physical/stypes/interfaces/SVoid.scala | 23 +- .../physical/stypes/interfaces/package.scala | 23 +- .../physical/stypes/primitives/SBoolean.scala | 53 +- .../physical/stypes/primitives/SFloat32.scala | 62 +- .../physical/stypes/primitives/SFloat64.scala | 59 +- .../physical/stypes/primitives/SInt32.scala | 62 +- .../physical/stypes/primitives/SInt64.scala | 63 +- .../scala/is/hail/types/virtual/TStruct.scala | 4 +- .../main/scala/is/hail/utils/ArrayStack.scala | 2 +- .../is/hail/utils/BoxedArrayBuilder.scala | 2 +- hail/src/main/scala/is/hail/utils/Graph.scala | 3 +- .../scala/is/hail/utils/HailIterator.scala | 5 +- .../utils/MissingAnnotationArrayBuilder.scala | 68 - .../utils/MissingBooleanArrayBuilder.scala | 65 - .../utils/MissingDoubleArrayBuilder.scala | 70 - .../hail/utils/MissingFloatArrayBuilder.scala | 70 - .../hail/utils/MissingIntArrayBuilder.scala | 70 - .../hail/utils/MissingLongArrayBuilder.scala | 70 - .../scala/is/hail/utils/TextTableReader.scala | 2 + .../main/scala/is/hail/utils/package.scala | 4 +- .../utils/richUtils/RichCodeInputBuffer.scala | 15 +- .../richUtils/RichCodeOutputBuffer.scala | 15 +- .../is/hail/utils/richUtils/RichRow.scala | 10 +- .../is/hail/utils/richUtils/RichString.scala | 11 +- .../is/hail/variant/RegionValueVariant.scala | 21 +- hail/src/test/resources/bad_flag_number.vcf | 91 + hail/src/test/scala/is/hail/TestUtils.scala | 9 +- .../annotations/StagedConstructorSuite.scala | 14 +- .../test/scala/is/hail/asm4s/ASM4SSuite.scala | 44 +- .../is/hail/expr/ir/Aggregators2Suite.scala | 3 +- .../is/hail/expr/ir/ArrayFunctionsSuite.scala | 4 +- .../scala/is/hail/expr/ir/ETypeSuite.scala | 2 +- .../is/hail/expr/ir/EmitStreamSuite.scala | 20 +- .../scala/is/hail/expr/ir/FunctionSuite.scala | 7 +- .../test/scala/is/hail/expr/ir/IRSuite.scala | 315 +- .../scala/is/hail/expr/ir/OrderingSuite.scala | 22 +- .../scala/is/hail/expr/ir/PruneSuite.scala | 3 + .../hail/expr/ir/RandomFunctionsSuite.scala | 17 +- .../is/hail/expr/ir/RequirednessSuite.scala | 35 +- .../is/hail/expr/ir/StagedBTreeSuite.scala | 13 +- .../scala/is/hail/expr/ir/TableIRSuite.scala | 2 + .../hail/expr/ir/TakeByAggregatorSuite.scala | 2 +- .../scala/is/hail/expr/ir/TestUtils.scala | 2 +- .../scala/is/hail/expr/ir/TrapNodeSuite.scala | 37 + .../is/hail/expr/ir/agg/DownsampleSuite.scala | 8 +- .../ir/agg/StagedBlockLinkedListSuite.scala | 2 +- .../scala/is/hail/methods/SkatSuite.scala | 3 +- .../hail/services/shuffler/ShuffleSuite.scala | 5 +- .../hail/types/physical/PNDArraySuite.scala | 21 +- .../types/physical/PhysicalTestUtils.scala | 2 +- .../is/hail/utils/ArrayBuilderSuite.scala | 9 +- internal-gateway/Makefile | 18 +- js_common/.gitignore | 5 + js_common/batch-client.ts | 7 + js_common/hail.css | 26 + js_common/package-lock.json | 199 + js_common/package.json | 17 + js_common/react/batch-client.ts | 11 + js_common/react/hooks.ts | 32 + js_common/svelte/batch-client.ts | 11 + js_common/svelte/store.ts | 27 + js_common/types.ts | 36 + letsencrypt/Makefile | 18 +- memory/Makefile | 22 +- monitoring/Makefile | 21 +- notebook/Makefile | 35 +- .../templates/workshop/resources.html | 4 + package-lock.json | 216 + package.json | 5 + prometheus/Makefile | 18 +- pylintrc | 2 +- query/Makefile | 19 +- shuffler/Makefile | 21 +- site/Makefile | 2 +- tls/Dockerfile | 15 +- tls/create_certs.py | 5 +- ukbb-rg/Makefile | 30 +- web_common/web_common/styles/main.scss | 20 +- website/Makefile | 18 +- 617 files changed, 25752 insertions(+), 10587 deletions(-) delete mode 100644 batch/batch/worker/flock.py create mode 100644 batch2/proxy.py create mode 100644 batch2/react-batch/.eslintrc.json create mode 100644 batch2/react-batch/.gitignore create mode 100644 batch2/react-batch/index.html create mode 100644 batch2/react-batch/package-lock.json create mode 100644 batch2/react-batch/package.json create mode 100644 batch2/react-batch/src/App.tsx create mode 100644 batch2/react-batch/src/components/BatchTable.tsx create mode 100644 batch2/react-batch/src/components/JobTable.tsx create mode 100644 batch2/react-batch/src/main.tsx create mode 100644 batch2/react-batch/src/pages/BatchPage.tsx create mode 100644 batch2/react-batch/src/pages/BatchesPage.tsx create mode 100644 batch2/react-batch/tsconfig.json create mode 100644 batch2/react-batch/vite.config.ts create mode 100644 batch2/svelte-batch/.gitignore create mode 100644 batch2/svelte-batch/index.html create mode 100644 batch2/svelte-batch/package-lock.json create mode 100644 batch2/svelte-batch/package.json create mode 100644 batch2/svelte-batch/public/favicon.ico create mode 100644 batch2/svelte-batch/src/App.svelte create mode 100644 batch2/svelte-batch/src/assets/svelte.png create mode 100644 batch2/svelte-batch/src/components/BatchTable.svelte create mode 100644 batch2/svelte-batch/src/components/JobTable.svelte create mode 100644 batch2/svelte-batch/src/global.d.ts create mode 100644 batch2/svelte-batch/src/main.ts create mode 100644 batch2/svelte-batch/src/pages/BatchPage.svelte create mode 100644 batch2/svelte-batch/src/pages/BatchesPage.svelte create mode 100644 batch2/svelte-batch/svelte.config.cjs create mode 100644 batch2/svelte-batch/tsconfig.json create mode 100644 batch2/svelte-batch/vite.config.js create mode 100644 benchmark/python/benchmark_hail/run/sentinel_benchmarks.py create mode 100644 ci/buildkit/Dockerfile rename ci/{kaniko/convert-google-application-credentials-to-kaniko-auth-config => buildkit/convert-google-application-credentials-to-docker-auth-config} (71%) delete mode 100644 ci/kaniko/Dockerfile create mode 100644 datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh create mode 100644 datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py create mode 100644 datasets/extract/extract_CADD.py create mode 100644 datasets/extract/extract_dbSNP.py create mode 100644 datasets/notebooks/1kg_NYGC_30x_datasets.ipynb create mode 100644 datasets/notebooks/CADD_datasets.ipynb create mode 100644 datasets/notebooks/dbSNP_datasets.ipynb create mode 100644 datasets/notebooks/reformat_buckets.ipynb create mode 100644 datasets/notebooks/reformat_buckets.txt create mode 100644 datasets/notebooks/reformat_buckets_mappings.json create mode 100644 dev-docs/compiler-team/development_tools.md create mode 100644 dev-docs/google-cloud-cookbook.md create mode 100755 docker-build.sh create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst create mode 100644 hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst => GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst} (88%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst => GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst => GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst} (89%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst => GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst => GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst => GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst => GTEx_eQTL_Liver_all_snp_gene_associations.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst => GTEx_eQTL_Lung_all_snp_gene_associations.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst => GTEx_eQTL_Ovary_all_snp_gene_associations.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst => GTEx_eQTL_Pituitary_all_snp_gene_associations.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Artery_Aorta.rst => GTEx_eQTL_Spleen_all_snp_gene_associations.rst} (86%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst => GTEx_eQTL_Stomach_all_snp_gene_associations.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Adrenal_Gland.rst => GTEx_eQTL_Testis_all_snp_gene_associations.rst} (86%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst => GTEx_eQTL_Thyroid_all_snp_gene_associations.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Artery_Coronary.rst => GTEx_eQTL_Uterus_all_snp_gene_associations.rst} (86%) rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst => GTEx_eQTL_Vagina_all_snp_gene_associations.rst} (86%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst => GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst => GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst => GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst} (90%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst => GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst => GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst} (90%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst => GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst} (89%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst => GTEx_sQTL_Liver_all_snp_gene_associations.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst => GTEx_sQTL_Lung_all_snp_gene_associations.rst} (92%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst => GTEx_sQTL_Ovary_all_snp_gene_associations.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst => GTEx_sQTL_Pituitary_all_snp_gene_associations.rst} (91%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst => GTEx_sQTL_Spleen_all_snp_gene_associations.rst} (87%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst => GTEx_sQTL_Stomach_all_snp_gene_associations.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Artery_Coronary.rst => GTEx_sQTL_Testis_all_snp_gene_associations.rst} (87%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst => GTEx_sQTL_Thyroid_all_snp_gene_associations.rst} (91%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Adrenal_Gland.rst => GTEx_sQTL_Uterus_all_snp_gene_associations.rst} (87%) rename hail/python/hail/docs/datasets/schemas/{GTEx_sQTL_allpairs_Artery_Aorta.rst => GTEx_sQTL_Vagina_all_snp_gene_associations.rst} (87%) create mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst create mode 100644 hail/python/hail/docs/datasets/schemas/dbSNP.rst rename hail/python/hail/docs/datasets/schemas/{GTEx_eQTL_allpairs_Lung.rst => dbSNP_rsid.rst} (53%) create mode 100644 hail/python/hailtop/aiotools/s3asyncfs.py create mode 100644 hail/python/test/hailtop/test_fs.py rename hail/src/main/scala/is/hail/expr/ir/{TypeToIRIntermediateClassTag.scala => PrimitiveTypeToIRIntermediateClassTag.scala} (73%) create mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/PCode.scala create mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala create mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala create mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala delete mode 100644 hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala create mode 100644 hail/src/test/resources/bad_flag_number.vcf create mode 100644 hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala create mode 100644 js_common/.gitignore create mode 100644 js_common/batch-client.ts create mode 100644 js_common/hail.css create mode 100644 js_common/package-lock.json create mode 100644 js_common/package.json create mode 100644 js_common/react/batch-client.ts create mode 100644 js_common/react/hooks.ts create mode 100644 js_common/svelte/batch-client.ts create mode 100644 js_common/svelte/store.ts create mode 100644 js_common/types.ts create mode 100644 package-lock.json create mode 100644 package.json diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 96d91bb687e..f68453039ab 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,4 @@ # YAML formatting 1a861505c1fc2ea3c9d7b32a47be7af10d13907c +# black format services code +4fccbe2d18c6d2f4059036d61489467c780bbc0e diff --git a/.gitignore b/.gitignore index 03eb40dec3a..7f413f1d869 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ hs_err_pid*.log *hail/python/hail/docs/tutorials/data* *hail/python/hailtop/pipeline/docs/output* .mypy_cache/ +node_modules *.out GPATH GRTAGS diff --git a/address/Makefile b/address/Makefile index 315eb864b60..f38fcc5cfa7 100644 --- a/address/Makefile +++ b/address/Makefile @@ -1,7 +1,8 @@ include ../config.mk -ADDRESS_LATEST = $(DOCKER_PREFIX)/address:latest -ADDRESS_IMAGE = $(DOCKER_PREFIX)/address:$(shell docker images -q --no-trunc address | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +ADDRESS_IMAGE := $(DOCKER_PREFIX)/address:$(TOKEN) PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$(PYTHONPATH) python3 @@ -14,20 +15,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(ADDRESS_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -t address -f Dockerfile.out --cache-from address,$(ADDRESS_LATEST),service-base .. - -.PHONY: push -push: build - docker tag address $(ADDRESS_LATEST) - docker push $(ADDRESS_LATEST) - docker tag address $(ADDRESS_IMAGE) - docker push $(ADDRESS_IMAGE) + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. address/Dockerfile.out $(ADDRESS_IMAGE) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"address_image":{"image":"$(ADDRESS_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f service-account.yaml diff --git a/admin-pod/Makefile b/admin-pod/Makefile index 1efe07336e7..baa722f688e 100644 --- a/admin-pod/Makefile +++ b/admin-pod/Makefile @@ -1,10 +1,8 @@ include ../config.mk -SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') - .PHONY: deploy deploy: ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - $(MAKE) -C ../docker push - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"$(SERVICE_BASE_IMAGE)"}}' admin-pod.yaml admin-pod.yaml.out + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' admin-pod.yaml admin-pod.yaml.out kubectl -n $(NAMESPACE) apply -f admin-pod.yaml.out diff --git a/auth/Makefile b/auth/Makefile index ee04c6773a8..d7f7cb7d589 100644 --- a/auth/Makefile +++ b/auth/Makefile @@ -1,7 +1,8 @@ include ../config.mk -AUTH_LATEST = $(DOCKER_PREFIX)/auth:latest -AUTH_IMAGE = $(DOCKER_PREFIX)/auth:$(shell docker images -q --no-trunc auth:latest | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +AUTH_IMAGE := $(DOCKER_PREFIX)/auth:$(TOKEN) EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -15,20 +16,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(AUTH_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -f Dockerfile.out -t auth --cache-from auth,$(AUTH_LATEST),base .. - -.PHONY: push -push: build - docker tag auth $(AUTH_LATEST) - docker push $(AUTH_LATEST) - docker tag auth $(AUTH_IMAGE) - docker push $(AUTH_IMAGE) + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. auth/Dockerfile.out $(AUTH_IMAGE) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f auth-driver-service-account.yaml python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"auth_image":{"image":"$(AUTH_IMAGE)"},"auth_database":{"user_secret_name":"sql-auth-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out diff --git a/batch/Makefile b/batch/Makefile index 6e013a9ac72..b928a2f346d 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -1,10 +1,9 @@ include ../config.mk -BATCH_LATEST = $(DOCKER_PREFIX)/batch:latest -BATCH_IMAGE = $(DOCKER_PREFIX)/batch:$(shell docker images -q --no-trunc batch | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) -BATCH_WORKER_LATEST = $(DOCKER_PREFIX)/batch-worker:latest -BATCH_WORKER_IMAGE = $(DOCKER_PREFIX)/batch-worker:$(shell docker images -q --no-trunc batch-worker | sed -e 's,[^:]*:,,') +BATCH_IMAGE := $(DOCKER_PREFIX)/batch:$(TOKEN) +BATCH_WORKER_IMAGE := $(DOCKER_PREFIX)/batch-worker:$(TOKEN) EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -16,40 +15,24 @@ check: curlylint . bash ../check-sql.sh -.PHONY: build-prereqs -build-prereqs: - $(MAKE) -C ../docker build - .PHONY: build-batch -build-batch: build-prereqs - -docker pull $(BATCH_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -t batch -f Dockerfile.out --cache-from batch,$(BATCH_LATEST),service-base . +build-batch: + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh . Dockerfile.out $(BATCH_IMAGE) .PHONY: build-worker -build-worker: build-prereqs - -docker pull $(BATCH_WORKER_LATEST) +build-worker: src/main/java/is/hail/JVMEntryway.class jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' Dockerfile.worker Dockerfile.worker.out - docker build -t batch-worker -f Dockerfile.worker.out --cache-from batch-worker,$(BATCH_WORKER_LATEST),service-base .. + ../docker-build.sh .. batch/Dockerfile.worker.out $(BATCH_WORKER_IMAGE) .PHONY: build build: build-batch build-worker -.PHONY: push -push: build - docker tag batch $(BATCH_LATEST) - docker push $(BATCH_LATEST) - docker tag batch $(BATCH_IMAGE) - docker push $(BATCH_IMAGE) - docker tag batch-worker $(BATCH_WORKER_LATEST) - docker push $(BATCH_WORKER_LATEST) - docker tag batch-worker $(BATCH_WORKER_IMAGE) - docker push $(BATCH_WORKER_IMAGE) - JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)","docker_prefix":"$(DOCKER_PREFIX)","docker_root_image":"$(DOCKER_ROOT_IMAGE)"},"scope":"$(SCOPE)"}' .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default E=$(JINJA_ENVIRONMENT) && \ python3 ../ci/jinja2_render.py $$E deployment.yaml deployment.yaml.out && \ diff --git a/batch/batch/batch_configuration.py b/batch/batch/batch_configuration.py index bf0b036eb59..067ff5703cb 100644 --- a/batch/batch/batch_configuration.py +++ b/batch/batch/batch_configuration.py @@ -5,6 +5,7 @@ REFRESH_INTERVAL_IN_SECONDS = int(os.environ.get('REFRESH_INTERVAL_IN_SECONDS', 5 * 60)) DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] PROJECT = os.environ['PROJECT'] +SCOPE = os.environ['HAIL_SCOPE'] GCP_REGION = os.environ['HAIL_GCP_REGION'] GCP_ZONE = os.environ['HAIL_GCP_ZONE'] diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index fce1133e1fa..2f22eba6a40 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -161,15 +161,6 @@ async def create_instance( sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse/ sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse /gcsfuse -sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/xfsquota/ -sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/xfsquota /xfsquota - -touch /xfsquota/projects -touch /xfsquota/projid - -ln -s /xfsquota/projects /etc/projects -ln -s /xfsquota/projid /etc/projid - export HOME=/root CORES=$(nproc) @@ -206,7 +197,7 @@ async def create_instance( EOF -sudo tee /etc/google-fluentd/config.d/worker-log.conf < @type tail format json @@ -278,7 +269,6 @@ async def create_instance( -v /batch:/batch:shared \ -v /logs:/logs \ -v /gcsfuse:/gcsfuse:shared \ --v /xfsquota:/xfsquota \ --mount type=bind,source=/mnt/disks/$WORKER_DATA_DISK_NAME,target=/host \ --mount type=bind,source=/dev,target=/dev,bind-propagation=rshared \ -p 5000:5000 \ diff --git a/batch/batch/driver/gce.py b/batch/batch/driver/gce.py index a868202bcee..c78686c189c 100644 --- a/batch/batch/driver/gce.py +++ b/batch/batch/driver/gce.py @@ -1,17 +1,17 @@ import re import json import logging -import dateutil.parser import datetime import aiohttp from gear import Database from hailtop import aiotools, aiogoogle -from hailtop.utils import periodically_call +from hailtop.utils import periodically_call, time_msecs from ..batch_configuration import PROJECT, DEFAULT_NAMESPACE from .zone_monitor import ZoneMonitor from .instance_collection_manager import InstanceCollectionManager +from ..utils import parse_timestamp_msecs log = logging.getLogger('gce_event_monitor') @@ -58,7 +58,7 @@ async def handle_event(self, event): log.warning(f'event has no payload {json.dumps(event)}') return - timestamp = dateutil.parser.isoparse(event['timestamp']).timestamp() * 1000 + timestamp_msecs = parse_timestamp_msecs(event['timestamp']) resource_type = event['resource']['type'] if resource_type != 'gce_instance': @@ -101,16 +101,16 @@ async def handle_event(self, event): log.error(f'event for unknown instance {name}: {json.dumps(event)}') return - if event_subtype == 'v1.compute.instances.preempted': + if event_subtype == 'compute.instances.preempted': log.info(f'event handler: handle preempt {instance}') - await self.handle_preempt_event(instance, timestamp) + await self.handle_preempt_event(instance, timestamp_msecs) elif event_subtype == 'v1.compute.instances.delete': if event_type == 'COMPLETED': log.info(f'event handler: delete {instance} done') - await self.handle_delete_done_event(instance, timestamp) + await self.handle_delete_done_event(instance, timestamp_msecs) elif event_type == 'STARTED': log.info(f'event handler: handle call delete {instance}') - await self.handle_call_delete_event(instance, timestamp) + await self.handle_call_delete_event(instance, timestamp_msecs) async def handle_events(self): row = await self.db.select_and_fetchone('SELECT * FROM `gevents_mark`;') @@ -120,7 +120,9 @@ async def handle_events(self): await self.db.execute_update('UPDATE `gevents_mark` SET mark = %s;', (mark,)) filter = f''' -logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Factivity" AND +(logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Factivity" OR +logName="projects/{PROJECT}/logs/cloudaudit.googleapis.com%2Fsystem_event" +) AND resource.type=gce_instance AND protoPayload.resourceName:"{self.machine_name_prefix}" AND timestamp >= "{mark}" @@ -152,16 +154,34 @@ async def delete_orphaned_disks(self): params = {'filter': f'(labels.namespace = {DEFAULT_NAMESPACE})'} for zone in self.zone_monitor.zones: + log.info(f'deleting orphaned disks for zone {zone}') async for disk in await self.compute_client.list(f'/zones/{zone}/disks', params=params): + disk_name = disk['name'] instance_name = disk['labels']['instance-name'] instance = self.inst_coll_manager.get_instance(instance_name) + + creation_timestamp_msecs = parse_timestamp_msecs(disk.get('creationTimestamp')) + last_attach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastAttachTimestamp')) + last_detach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastDetachTimestamp')) + + now_msecs = time_msecs() if instance is None: - try: - await self.compute_client.delete_disk(f'/zones/{zone}/disks/{disk["name"]}') - except aiohttp.ClientResponseError as e: - if e.status == 404: - continue - raise + log.exception(f'deleting disk {disk_name} from instance that no longer exists') + elif (last_attach_timestamp_msecs is None + and now_msecs - creation_timestamp_msecs > 10 * 60 * 1000): + log.exception(f'deleting disk {disk_name} that has not attached within 10 minutes') + elif (last_detach_timestamp_msecs is not None + and now_msecs - last_detach_timestamp_msecs > 5 * 60 * 1000): + log.exception(f'deleting detached disk {disk_name} that has not been cleaned up within 5 minutes') + else: + continue + + try: + await self.compute_client.delete_disk(f'/zones/{zone}/disks/{disk_name}') + except aiohttp.ClientResponseError as e: + if e.status == 404: + continue + log.exception(f'error while deleting orphaned disk {disk_name}') async def delete_orphaned_disks_loop(self): - await periodically_call(300, self.delete_orphaned_disks) + await periodically_call(15, self.delete_orphaned_disks) diff --git a/batch/batch/driver/instance_collection.py b/batch/batch/driver/instance_collection.py index e4747050e7d..08021c2b5d8 100644 --- a/batch/batch/driver/instance_collection.py +++ b/batch/batch/driver/instance_collection.py @@ -1,7 +1,9 @@ +import asyncio import aiohttp import sortedcontainers import logging import dateutil.parser +import collections from typing import Dict from hailtop.utils import time_msecs, secret_alnum_string, periodically_call @@ -26,6 +28,7 @@ def __init__(self, app, name, machine_name_prefix, is_pool): self.is_pool = is_pool self.name_instance: Dict[str, Instance] = {} + self.live_free_cores_mcpu_by_zone: Dict[str, int] = collections.defaultdict(int) self.instances_by_last_updated = sortedcontainers.SortedSet(key=lambda instance: instance.last_updated) @@ -70,6 +73,7 @@ def adjust_for_remove_instance(self, instance): if instance.state in ('pending', 'active'): self.live_free_cores_mcpu -= max(0, instance.free_cores_mcpu) self.live_total_cores_mcpu -= instance.cores_mcpu + self.live_free_cores_mcpu_by_zone[instance.zone] -= max(0, instance.free_cores_mcpu) async def remove_instance(self, instance, reason, timestamp=None): await instance.deactivate(reason, timestamp) @@ -88,6 +92,7 @@ def adjust_for_add_instance(self, instance): if instance.state in ('pending', 'active'): self.live_free_cores_mcpu += max(0, instance.free_cores_mcpu) self.live_total_cores_mcpu += instance.cores_mcpu + self.live_free_cores_mcpu_by_zone[instance.zone] += max(0, instance.free_cores_mcpu) def add_instance(self, instance): assert instance.name not in self.name_instance @@ -123,6 +128,13 @@ async def check_on_instance(self, instance): return raise + if (instance.state == 'active' + and instance.failed_request_count > 5 + and time_msecs() - instance.last_updated > 5 * 60 * 1000): + log.exception(f'deleting {instance} with {instance.failed_request_count} failed request counts after more than 5 minutes') + await self.call_delete_instance(instance, 'not_responding') + return + # PROVISIONING, STAGING, RUNNING, STOPPING, TERMINATED gce_state = spec['status'] @@ -157,12 +169,16 @@ async def check_on_instance(self, instance): async def monitor_instances(self): if self.instances_by_last_updated: - # 0 is the smallest (oldest) - instance = self.instances_by_last_updated[0] - since_last_updated = time_msecs() - instance.last_updated - if since_last_updated > 60 * 1000: - log.info(f'checking on {instance}, last updated {since_last_updated / 1000}s ago') - await self.check_on_instance(instance) + # [:50] are the fifty smallest (oldest) + instances = self.instances_by_last_updated[:50] + + async def check(instance): + since_last_updated = time_msecs() - instance.last_updated + if since_last_updated > 60 * 1000: + log.info(f'checking on {instance}, last updated {since_last_updated / 1000}s ago') + await self.check_on_instance(instance) + + await asyncio.gather(*[check(instance) for instance in instances]) async def monitor_instances_loop(self): await periodically_call(1, self.monitor_instances) diff --git a/batch/batch/driver/job.py b/batch/batch/driver/job.py index ec4dd800250..e2854c5a372 100644 --- a/batch/batch/driver/job.py +++ b/batch/batch/driver/job.py @@ -248,9 +248,12 @@ async def make_request(): if instance.state in ('inactive', 'deleted'): return try: - async with aiohttp.ClientSession(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) as session: + async with aiohttp.ClientSession(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5)) as session: await session.delete(url) await instance.mark_healthy() + except asyncio.TimeoutError: + await instance.incr_failed_request_count() + return except aiohttp.ClientResponseError as err: if err.status == 404: await instance.mark_healthy() @@ -425,6 +428,8 @@ async def schedule_job(app, record, instance): await instance.mark_healthy() if e.status == 403: log.info(f'attempt already exists for job {id} on {instance}, aborting') + if e.status == 503: + log.info(f'job {id} cannot be scheduled because {instance} is shutting down, aborting') raise e except Exception: await instance.incr_failed_request_count() diff --git a/batch/batch/driver/pool.py b/batch/batch/driver/pool.py index c0c19a3a97b..e9118246d76 100644 --- a/batch/batch/driver/pool.py +++ b/batch/batch/driver/pool.py @@ -18,7 +18,7 @@ periodically_call, ) -from ..batch_configuration import STANDING_WORKER_MAX_IDLE_TIME_MSECS, WORKER_MAX_IDLE_TIME_MSECS +from ..batch_configuration import STANDING_WORKER_MAX_IDLE_TIME_MSECS, WORKER_MAX_IDLE_TIME_MSECS, GCP_ZONE from ..inst_coll_config import PoolConfig from ..utils import ( Box, @@ -165,7 +165,7 @@ def adjust_for_add_instance(self, instance): if instance.state == 'active' and instance.failed_request_count <= 1: self.healthy_instances_by_free_cores.add(instance) - async def create_instance(self, cores=None, max_idle_time_msecs=None): + async def create_instance(self, cores=None, max_idle_time_msecs=None, zone=None): if cores is None: cores = self.worker_cores @@ -174,9 +174,10 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None): machine_name = self.generate_machine_name() - zone = self.zone_monitor.get_zone(cores, self.worker_local_ssd_data_disk, self.worker_pd_ssd_data_disk_size_gb) if zone is None: - return + zone = self.zone_monitor.get_zone(cores, self.worker_local_ssd_data_disk, self.worker_pd_ssd_data_disk_size_gb) + if zone is None: + return machine_type = f'n1-{self.worker_type}-{cores}' @@ -209,18 +210,45 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None): job_private=False, ) + async def create_instances_from_ready_cores(self, ready_cores_mcpu, zone=None): + n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] + + instances_needed = (ready_cores_mcpu - self.live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( + self.worker_cores * 1000 + ) + instances_needed = min( + instances_needed, + self.max_live_instances - n_live_instances, + self.max_instances - self.n_instances, + # 20 queries/s; our GCE long-run quota + 300, + # n * 16 cores / 15s = excess_scheduling_rate/s = 10/s => n ~= 10 + 10, + ) + + if instances_needed > 0: + log.info(f'creating {instances_needed} new instances') + # parallelism will be bounded by thread pool + await asyncio.gather(*[self.create_instance(zone=zone) for _ in range(instances_needed)]) + async def create_instances(self): - ready_cores = await self.db.select_and_fetchone( + ready_cores_mcpu_per_user = self.db.select_and_fetchall( ''' -SELECT CAST(COALESCE(SUM(ready_cores_mcpu), 0) AS SIGNED) AS ready_cores_mcpu +SELECT user, + CAST(COALESCE(SUM(ready_cores_mcpu), 0) AS SIGNED) AS ready_cores_mcpu FROM user_inst_coll_resources WHERE inst_coll = %s -LOCK IN SHARE MODE; +GROUP BY user; ''', (self.name,), ) - ready_cores_mcpu = ready_cores['ready_cores_mcpu'] + if ready_cores_mcpu_per_user is None: + ready_cores_mcpu_per_user = {} + else: + ready_cores_mcpu_per_user = {r['user']: r['ready_cores_mcpu'] async for r in ready_cores_mcpu_per_user} + + ready_cores_mcpu = sum(ready_cores_mcpu_per_user.values()) free_cores_mcpu = sum([worker.free_cores_mcpu for worker in self.healthy_instances_by_free_cores]) free_cores = free_cores_mcpu / 1000 @@ -232,29 +260,17 @@ async def create_instances(self): ) if ready_cores_mcpu > 0 and free_cores < 500: - n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] + await self.create_instances_from_ready_cores(ready_cores_mcpu) - instances_needed = (ready_cores_mcpu - self.live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( - self.worker_cores * 1000 - ) - instances_needed = min( - instances_needed, - self.max_live_instances - n_live_instances, - self.max_instances - self.n_instances, - # 20 queries/s; our GCE long-run quota - 300, - # n * 16 cores / 15s = excess_scheduling_rate/s = 10/s => n ~= 10 - 10, - ) - if instances_needed > 0: - log.info(f'creating {instances_needed} new instances') - # parallelism will be bounded by thread pool - await asyncio.gather(*[self.create_instance() for _ in range(instances_needed)]) + ci_ready_cores_mcpu = ready_cores_mcpu_per_user.get('ci', 0) + if ci_ready_cores_mcpu > 0 and self.live_free_cores_mcpu_by_zone[GCP_ZONE] == 0: + await self.create_instances_from_ready_cores(ci_ready_cores_mcpu, zone=GCP_ZONE) n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] if self.enable_standing_worker and n_live_instances == 0 and self.max_instances > 0: await self.create_instance( - cores=self.standing_worker_cores, max_idle_time_msecs=STANDING_WORKER_MAX_IDLE_TIME_MSECS + cores=self.standing_worker_cores, + max_idle_time_msecs=STANDING_WORKER_MAX_IDLE_TIME_MSECS ) async def control_loop(self): @@ -433,6 +449,7 @@ def get_instance(user, cores_mcpu): for instance in self.pool.healthy_instances_by_free_cores: histogram[instance.free_cores_mcpu] += 1 log.info(f'schedule {self.pool}: no viable instances for {cores_mcpu}: {histogram}') + return None should_wait = True diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 389d17dfa7f..6a0e4ee63a6 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -16,6 +16,7 @@ import google.oauth2.service_account import google.api_core.exceptions import humanize +import traceback from prometheus_async.aio.web import server_stats # type: ignore from hailtop.utils import ( time_msecs, @@ -67,7 +68,7 @@ from ..inst_coll_config import InstanceCollectionConfigs from ..log_store import LogStore from ..database import CallError, check_call_procedure -from ..batch_configuration import BATCH_BUCKET_NAME, DEFAULT_NAMESPACE +from ..batch_configuration import BATCH_BUCKET_NAME, DEFAULT_NAMESPACE, SCOPE from ..globals import HTTP_CLIENT_MAX_SIZE, BATCH_FORMAT_VERSION, memory_to_worker_type from ..spec_writer import SpecWriter from ..batch_format_version import BatchFormatVersion @@ -99,6 +100,21 @@ async def wrapped(request, userdata, *args, **kwargs): return wrapped +def catch_ui_error_in_dev(fun): + @wraps(fun) + async def wrapped(request, userdata, *args, **kwargs): + try: + return await fun(request, userdata, *args, **kwargs) + except aiohttp.web_exceptions.HTTPFound as e: + raise e + except Exception as e: + if SCOPE == 'dev': + log.exception('error while populating ui page') + raise web.HTTPInternalServerError(text=traceback.format_exc()) from e + raise + return wrapped + + async def _user_can_access(db: Database, batch_id: int, user: str): record = await db.select_and_fetchone( ''' @@ -156,12 +172,15 @@ async def _handle_ui_error(session, f, *args, **kwargs): await f(*args, **kwargs) except KeyError as e: set_message(session, str(e), 'error') + log.info(f'ui error: KeyError {e}') return True except BatchOperationAlreadyCompletedError as e: set_message(session, e.message, e.ui_error_type) + log.info(f'ui error: BatchOperationAlreadyCompletedError {e.message}') return True except BatchUserError as e: set_message(session, e.message, e.ui_error_type) + log.info(f'ui error: BatchUserError {e.message}') return True else: return False @@ -322,7 +341,7 @@ async def _read_log_from_gcs(task): except google.api_core.exceptions.NotFound: id = (batch_id, job_id) log.exception(f'missing log file for {id} and task {task}') - data = 'ERROR: could not read log file' + data = 'ERROR: could not find log file' return task, data spec = json.loads(record['spec']) @@ -832,6 +851,10 @@ async def create_jobs(request, userdata): if user != 'ci' and not (network is None or network == 'public'): raise web.HTTPBadRequest(reason=f'unauthorized network {network}') + unconfined = spec.get('unconfined') + if user != 'ci' and unconfined: + raise web.HTTPBadRequest(reason=f'unauthorized use of unconfined={unconfined}') + spec_writer.add(json.dumps(spec)) db_spec = batch_format_version.db_spec(spec) @@ -1178,6 +1201,7 @@ async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/batches/{batch_id}') @monitor_endpoint @web_billing_project_users_only() +@catch_ui_error_in_dev async def ui_batch(request, userdata, batch_id): app = request.app batch = await _get_batch(app, batch_id) @@ -1198,6 +1222,7 @@ async def ui_batch(request, userdata, batch_id): @monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) +@catch_ui_error_in_dev async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument post = await request.post() q = post.get('q') @@ -1216,6 +1241,7 @@ async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unuse @monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) +@catch_ui_error_in_dev async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument post = await request.post() q = post.get('q') @@ -1232,6 +1258,7 @@ async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.get('/batches', name='batches') @monitor_endpoint @web_authenticated_users_only() +@catch_ui_error_in_dev async def ui_batches(request, userdata): user = userdata['username'] q = request.query.get('q', f'user:{user}') @@ -1346,6 +1373,7 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume @routes.get('/batches/{batch_id}/jobs/{job_id}') @monitor_endpoint @web_billing_project_users_only() +@catch_ui_error_in_dev async def ui_get_job(request, userdata, batch_id): app = request.app job_id = int(request.match_info['job_id']) @@ -1366,6 +1394,7 @@ async def ui_get_job(request, userdata, batch_id): 'running': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), }, 'short_error': dictfix.NoneOr(str), + 'error': dictfix.NoneOr(str), 'container_status': {'out_of_memory': dictfix.NoneOr(bool)}, 'state': str, } @@ -1380,6 +1409,7 @@ async def ui_get_job(request, userdata, batch_id): job_status = dictfix.dictfix(job_status, job_status_spec) container_statuses = job_status['container_statuses'] step_statuses = [container_statuses['input'], container_statuses['main'], container_statuses['output']] + step_errors = {step: status['error'] for step, status in container_statuses.items() if status is not None} for status in step_statuses: # backwards compatibility @@ -1418,6 +1448,8 @@ async def ui_get_job(request, userdata, batch_id): 'step_statuses': step_statuses, 'job_specification': job_specification, 'job_status_str': json.dumps(job, indent=2), + 'step_errors': step_errors, + 'error': job_status.get('error') } return await render_template('batch', request, userdata, 'job.html', page_context) @@ -1425,6 +1457,7 @@ async def ui_get_job(request, userdata, batch_id): @routes.get('/billing_limits') @monitor_endpoint @web_authenticated_users_only() +@catch_ui_error_in_dev async def ui_get_billing_limits(request, userdata): app = request.app db: Database = app['db'] @@ -1499,6 +1532,7 @@ async def post_edit_billing_limits(request, userdata): # pylint: disable=unused @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_edit_billing_limits_ui(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1580,6 +1614,7 @@ def billing_record_to_dict(record): @routes.get('/billing') @monitor_endpoint @web_authenticated_developers_only() +@catch_ui_error_in_dev async def ui_get_billing(request, userdata): billing, start, end = await _query_billing(request) @@ -1620,6 +1655,7 @@ async def ui_get_billing(request, userdata): @routes.get('/billing_projects') @monitor_endpoint @web_authenticated_developers_only() +@catch_ui_error_in_dev async def ui_get_billing_projects(request, userdata): db: Database = request.app['db'] billing_projects = await query_billing_projects(db) @@ -1711,6 +1747,7 @@ async def delete(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_billing_projects_remove_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1775,6 +1812,7 @@ async def insert(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_billing_projects_add_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] post = await request.post() @@ -1830,6 +1868,7 @@ async def insert(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_create_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] post = await request.post() @@ -1890,6 +1929,7 @@ async def close_project(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_close_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1935,6 +1975,7 @@ async def open_project(tx): @monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) +@catch_ui_error_in_dev async def post_reopen_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] billing_project = request.match_info['billing_project'] @@ -1997,6 +2038,7 @@ async def refresh_inst_colls(request): @routes.get('') @routes.get('/') @web_authenticated_users_only() +@catch_ui_error_in_dev async def index(request, userdata): # pylint: disable=unused-argument location = request.app.router['batches'].url_for() raise web.HTTPFound(location=location) diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html index 3dc1785b07b..597fbe0b7ef 100644 --- a/batch/batch/front_end/templates/job.html +++ b/batch/batch/front_end/templates/job.html @@ -101,23 +101,50 @@

Step Status

-{% if job_log %} -

Log

+{% if error is not none %} +

Error

+
{{ error }}
+{% endif %} -{% if 'input' in job_log %} +{% if job_log or step_errors %} +

Logs

+ +{% if 'input' in job_log or 'input' in step_errors %}

Input

+{% if 'input' in job_log %} +

Log

{{ job_log['input'] }}
{% endif %} +{% if 'input' in step_errors and step_errors['input'] is not none %} +

Error

+
{{ step_errors['input'] }}
+{% endif %} +{% endif %} -{% if 'main' in job_log %} +{% if 'main' in job_log or 'main' in step_errors %}

Main

+{% if 'main' in job_log %} +

Log

{{ job_log['main'] }}
{% endif %} +{% if 'main' in step_errors and step_errors['main'] is not none %} +

Error

+
{{ step_errors['main'] }}
+{% endif %} +{% endif %} -{% if 'output' in job_log %} +{% if 'output' in job_log or 'output' in step_errors %}

Output

+{% if 'output' in job_log %} +

Log

{{ job_log['output'] }}
{% endif %} +{% if 'output' in step_errors and step_errors['output'] is not none %} +

Error

+
{{ step_errors['output'] }}
+{% endif %} +{% endif %} + {% endif %}

Job Specification

diff --git a/batch/batch/front_end/validate.py b/batch/batch/front_end/validate.py index 62518ca0b2a..a8a004901b4 100644 --- a/batch/batch/front_end/validate.py +++ b/batch/batch/front_end/validate.py @@ -20,6 +20,7 @@ regex, required, str_type, + non_empty_str_type, switch, ValidationError, ) @@ -48,8 +49,8 @@ 'gcsfuse': listof( keyed( { - required('bucket'): str_type, - required('mount_path'): str_type, + required('bucket'): non_empty_str_type, + required('mount_path'): non_empty_str_type, required('read_only'): bool_type, } ) @@ -58,6 +59,7 @@ required('job_id'): int_type, 'mount_tokens': bool_type, 'network': oneof('public', 'private'), + 'unconfined': bool_type, 'output_files': listof(keyed({required('from'): str_type, required('to'): str_type})), required('parent_ids'): listof(int_type), 'port': int_type, diff --git a/batch/batch/utils.py b/batch/batch/utils.py index 58c2189de56..13f7dcd793d 100644 --- a/batch/batch/utils.py +++ b/batch/batch/utils.py @@ -2,11 +2,13 @@ import math import json import secrets +import dateutil.parser from aiohttp import web from functools import wraps from collections import deque from gear import maybe_parse_bearer_header +from hailtop.utils import secret_alnum_string from .globals import RESERVED_STORAGE_GB_PER_CORE @@ -194,6 +196,12 @@ def is_valid_cores_mcpu(cores_mcpu: int): return quarter_cores & (quarter_cores - 1) == 0 +def parse_timestamp_msecs(ts): + if ts is None: + return ts + return dateutil.parser.isoparse(ts).timestamp() * 1000 + + class Box: def __init__(self, value): self.value = value @@ -248,7 +256,8 @@ def __init__(self): self._global_counter = WindowFractionCounter(10) def push(self, success: bool): - self._global_counter.push('exceeded_shares', success) + token = secret_alnum_string(6) + self._global_counter.push(token, success) def rate(self) -> float: return self._global_counter.fraction() diff --git a/batch/batch/worker/disk.py b/batch/batch/worker/disk.py index b5833666c5e..ebef846a02e 100644 --- a/batch/batch/worker/disk.py +++ b/batch/batch/worker/disk.py @@ -1,6 +1,6 @@ import logging -from hailtop.utils import check_shell_output, LoggingTimer +from hailtop.utils import check_shell_output, LoggingTimer, retry_all_errors_n_times log = logging.getLogger('disk') @@ -37,15 +37,26 @@ async def create(self, labels=None): async def delete(self): try: - await self._detach() + await self._unmount() finally: - await self._delete() + try: + await self._detach() + finally: + await self._delete() + + async def _unmount(self): + await retry_all_errors_n_times(max_errors=10, msg=f'error while unmounting disk {self.name}', error_logging_interval=3)( + check_shell_output, f'umount -v {self.disk_path} {self.mount_path}' + ) async def _format(self): - await check_shell_output(f'mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard {self.disk_path}') - await check_shell_output(f'mkdir -p {self.mount_path}') - await check_shell_output(f'mount -o discard,defaults {self.disk_path} {self.mount_path}') - await check_shell_output(f'chmod a+w {self.mount_path}') + async def format_disk(): + await check_shell_output(f'mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard {self.disk_path}') + await check_shell_output(f'mkdir -p {self.mount_path}') + await check_shell_output(f'mount -o discard,defaults {self.disk_path} {self.mount_path}') + await check_shell_output(f'chmod a+w {self.mount_path}') + + await retry_all_errors_n_times(max_errors=10, msg=f'error while formatting disk {self.name}', error_logging_interval=3)(format_disk) async def _create(self, labels=None): async with LoggingTimer(f'creating disk {self.name}'): diff --git a/batch/batch/worker/flock.py b/batch/batch/worker/flock.py deleted file mode 100644 index 30e409b45c1..00000000000 --- a/batch/batch/worker/flock.py +++ /dev/null @@ -1,51 +0,0 @@ -import fcntl -import os -import argparse -import subprocess as sp - -from pathlib import Path -from hailtop.utils import blocking_to_async - - -class Flock: - def __init__(self, path, pool=None, nonblock=False): - self.path = Path(path).resolve() - self.lock_path = self.path.parent - self.pool = pool - self.flock_flags = fcntl.LOCK_EX - if nonblock: - self.flock_flags |= fcntl.LOCK_NB - self.fd = -1 - - def __enter__(self): - self.lock_path.mkdir(parents=True, exist_ok=True) - self.fd = os.open(self.lock_path, os.O_RDONLY) - fcntl.flock(self.fd, self.flock_flags) - return self - - def __exit__(self, type, value, traceback): - fcntl.flock(self.fd, fcntl.LOCK_UN) - os.close(self.fd) - - async def __aenter__(self): - assert self.pool - return await blocking_to_async(self.pool, self.__enter__) - - async def __aexit__(self, exc_type, exc_val, exc_tb): - assert self.pool - return await blocking_to_async(self.pool, self.__exit__, exc_type, exc_val, exc_tb) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('path', type=str) - parser.add_argument('-c', dest='command', type=str, required=True) - parser.add_argument('-n', dest='nonblock', action='store_true') - args = parser.parse_args() - - with Flock(args.path): - try: - sp.check_output(args.command, stderr=sp.STDOUT, shell=True) - except sp.CalledProcessError as e: - print(e.output) - raise e diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index e57933dc38c..f21c227cc03 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict, Callable +from typing import Optional, Dict, Callable, Tuple import os import json import sys @@ -68,7 +68,6 @@ from ..utils import storage_gib_to_bytes, Box from .disk import Disk -from .flock import Flock # uvloop.install() @@ -281,6 +280,8 @@ def user_error(e): if isinstance(e, DockerError): if e.status == 404 and 'pull access denied' in e.message: return True + if e.status == 404 and 'not found: manifest unknown' in e.message: + return True if e.status == 400 and 'executable file not found' in e.message: return True return False @@ -360,6 +361,10 @@ def container_config(self): network = 'public' host_config['NetworkMode'] = network # not documented, I used strace to inspect the packets + unconfined = self.spec.get('unconfined') + if unconfined: + host_config['SecurityOpt'] = ["seccomp:unconfined", "apparmor:unconfined"] + config['HostConfig'] = host_config return config @@ -442,8 +447,11 @@ async def run(self, worker): docker.images.pull, self.image_ref_str, auth=auth ) except DockerError as e: - if e.status == 404 and 'pull access denied' in e.message: - self.short_error = 'image cannot be pulled' + if e.status == 404: + if 'pull access denied' in e.message: + self.short_error = 'image cannot be pulled' + elif 'not found: manifest unknown' in e.message: + self.short_error = 'image not found' raise if self.port is not None: @@ -464,12 +472,8 @@ async def run(self, worker): self.overlay_path = merged_overlay_path[:-7].replace(WORKER_DATA_DISK_MOUNT, '/host') os.makedirs(f'{self.overlay_path}/', exist_ok=True) - async with Flock('/xfsquota/projects', pool=worker.pool): - with open('/xfsquota/projects', 'a') as f: - f.write(f'{self.job.project_id}:{self.overlay_path}\n') - await check_shell_output( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.job.project_name}" /host/' + f'xfs_quota -x -c "project -s -p {self.overlay_path} {self.job.project_id}" /host/' ) with self.step('starting'): @@ -535,12 +539,6 @@ async def get_log(self): return self.log async def delete_container(self): - if self.overlay_path: - path = self.overlay_path.replace('/', r'\/') - - async with Flock('/xfsquota/projects', pool=worker.pool): - await check_shell(f"sed -i '/:{path}/d' /xfsquota/projects") - if self.container: try: log.info(f'{self}: deleting container') @@ -606,6 +604,7 @@ def populate_secret_host_path(host_path, secret_data): async def add_gcsfuse_bucket(mount_path, bucket, key_file, read_only): + assert bucket os.makedirs(mount_path) options = ['allow_other'] if read_only: @@ -778,7 +777,6 @@ def __init__( self.secrets = secrets self.env = job_spec.get('env', []) - self.project_name = f'batch-{self.batch_id}-job-{self.job_id}' self.project_id = Job.get_next_xfsquota_project_id() self.task_manager = task_manager @@ -908,6 +906,9 @@ def __init__( if network: assert network in ('public', 'private') main_spec['network'] = network + unconfined = job_spec.get('unconfined') + if unconfined: + main_spec['unconfined'] = unconfined containers['main'] = Container(self, 'main', main_spec) if output_files: @@ -970,25 +971,16 @@ async def run(self, worker): await self.setup_io() - async with Flock('/xfsquota/projid', pool=worker.pool): - with open('/xfsquota/projid', 'a') as f: - f.write(f'{self.project_name}:{self.project_id}\n') - if not self.disk: - async with Flock('/xfsquota/projects', pool=worker.pool): - with open('/xfsquota/projects', 'a') as f: - f.write(f'{self.project_id}:{self.scratch}\n') data_disk_storage_in_bytes = storage_gib_to_bytes( self.external_storage_in_gib + self.data_disk_storage_in_gib ) else: data_disk_storage_in_bytes = storage_gib_to_bytes(self.data_disk_storage_in_gib) + await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') await check_shell_output( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.project_name}" /host/' - ) - await check_shell_output( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_name}" /host/' + f'xfs_quota -x -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_id}" /host/' ) if self.secrets: @@ -1072,15 +1064,7 @@ async def cleanup(self): await check_shell(f'fusermount -u {mount_path}') log.info(f'unmounted gcsfuse bucket {bucket} from {mount_path}') - await check_shell( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft=0 bhard=0 {self.project_name}" /host' - ) - - async with Flock('/xfsquota/projid', pool=worker.pool): - await check_shell(f"sed -i '/{self.project_name}:{self.project_id}/d' /xfsquota/projid") - - async with Flock('/xfsquota/projects', pool=worker.pool): - await check_shell(f"sed -i '/{self.project_id}:/d' /xfsquota/projects") + await check_shell(f'xfs_quota -x -c "limit -p bsoft=0 bhard=0 {self.project_id}" /host') await blocking_to_async(self.pool, shutil.rmtree, self.scratch, ignore_errors=True) except asyncio.CancelledError: @@ -1199,19 +1183,9 @@ async def run(self, worker): os.makedirs(f'{self.scratch}/') - async with Flock('/xfsquota/projid', pool=worker.pool): - with open('/xfsquota/projid', 'a') as f: - f.write(f'{self.project_name}:{self.project_id}\n') - - async with Flock('/xfsquota/projects', pool=worker.pool): - with open('/xfsquota/projects', 'a') as f: - f.write(f'{self.project_id}:{self.scratch}\n') - - await check_shell_output( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "project -s {self.project_name}" /host/' - ) + await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') await check_shell_output( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft={self.data_disk_storage_in_gib} bhard={self.data_disk_storage_in_gib} {self.project_name}" /host/' + f'xfs_quota -x -c "limit -p bsoft={self.data_disk_storage_in_gib} bhard={self.data_disk_storage_in_gib} {self.project_id}" /host/' ) if self.secrets: @@ -1287,15 +1261,7 @@ async def cleanup(self): log.info(f'{self}: cleaning up') try: - await check_shell( - f'xfs_quota -x -D /xfsquota/projects -P /xfsquota/projid -c "limit -p bsoft=0 bhard=0 {self.project_name}" /host' - ) - - async with Flock('/xfsquota/projid', pool=worker.pool): - await check_shell(f"sed -i '/{self.project_name}:{self.project_id}/d' /xfsquota/projid") - - async with Flock('/xfsquota/projects', pool=worker.pool): - await check_shell(f"sed -i '/{self.project_id}:/d' /xfsquota/projects") + await check_shell(f'xfs_quota -x -c "limit -p bsoft=0 bhard=0 {self.project_id}" /host') await blocking_to_async(self.pool, shutil.rmtree, self.scratch, ignore_errors=True) except asyncio.CancelledError: @@ -1341,12 +1307,13 @@ def __str__(self): class Worker: def __init__(self): + self.active = False self.cores_mcpu = CORES * 1000 self.last_updated = time_msecs() self.cpu_sem = FIFOWeightedSemaphore(self.cores_mcpu) self.data_disk_space_remaining = Box(UNRESERVED_WORKER_DATA_DISK_SIZE_GB) self.pool = concurrent.futures.ThreadPoolExecutor() - self.jobs = {} + self.jobs: Dict[Tuple[int, int], Job] = {} self.stop_event = asyncio.Event() self.task_manager = aiotools.BackgroundTaskManager() self.jar_download_locks = defaultdict(asyncio.Lock) @@ -1403,6 +1370,10 @@ async def create_job_1(self, request): if id in self.jobs: return web.HTTPForbidden() + # check worker hasn't started shutting down + if not self.active: + return web.HTTPServiceUnavailable() + job = Job.create( batch_id, body['user'], body['gsa_key'], job_spec, format_version, self.task_manager, self.pool ) @@ -1499,6 +1470,7 @@ async def run(self): f'free worker data disk storage {self.data_disk_space_remaining.value}Gi' ) finally: + self.active = False log.info('shutting down') await site.stop() log.info('stopped site') @@ -1655,6 +1627,7 @@ async def activate(self): resp_json = await resp.json() self.headers = {'X-Hail-Instance-Name': NAME, 'Authorization': f'Bearer {resp_json["token"]}'} + self.active = True async def async_main(): @@ -1676,7 +1649,7 @@ async def async_main(): asyncio.get_event_loop().set_debug(True) log.debug('Tasks immediately after docker close') dump_all_stacktraces() - other_tasks = [t for t in asyncio.tasks() if t != asyncio.current_task()] + other_tasks = [t for t in asyncio.all_tasks() if t != asyncio.current_task()] if other_tasks: _, pending = await asyncio.wait(other_tasks, timeout=10 * 60, return_when=asyncio.ALL_COMPLETED) for t in pending: diff --git a/batch/deployment.yaml b/batch/deployment.yaml index cb1f3a88c68..8af559344fa 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -86,6 +86,8 @@ spec: - name: HAIL_SHOULD_PROFILE value: "1" {% endif %} + - name: HAIL_SCOPE + value: "{{ scope }}" {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME valueFrom: @@ -222,6 +224,8 @@ spec: value: "{{ global.k8s_server_url }}" - name: HAIL_SHA value: "{{ code.sha }}" + - name: HAIL_SCOPE + value: "{{ scope }}" {% if deploy %} - name: HAIL_BATCH_BUCKET_NAME valueFrom: diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index 7771927dd04..021561b78f3 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -18,23 +18,11 @@ deploy_config = get_deploy_config() +DOCKER_PREFIX = os.environ.get('DOCKER_PREFIX') DOCKER_ROOT_IMAGE = os.environ.get('DOCKER_ROOT_IMAGE', 'gcr.io/hail-vdc/ubuntu:18.04') SCOPE = os.environ.get('HAIL_SCOPE', 'test') -def poll_until(p, max_polls=None): - i = 0 - while True and (max_polls is None or i < max_polls): - x = p() - if x: - return x - # max 4.5s - j = random.randrange(math.floor(1.1 ** min(i, 40))) - time.sleep(0.100 * j) - i = i + 1 - raise ValueError(f'poll_until: exceeded max polls: {i} {max_polls}') - - @pytest.fixture def client(): client = BatchClient('test') @@ -269,6 +257,15 @@ def test_fail(client): assert j._get_exit_code(status, 'main') == 1, str(status) +def test_unknown_image(client): + b = client.create_batch() + j = b.create_job(f'{DOCKER_PREFIX}/does-not-exist', ['echo', 'test']) + b.submit() + status = j.wait() + assert j._get_exit_code(status, 'main') is None + assert status['status']['container_statuses']['main']['short_error'] == 'image not found' + + def test_running_job_log_and_status(client): b = client.create_batch() j = b.create_job(DOCKER_ROOT_IMAGE, ['sleep', '300']) @@ -566,6 +563,20 @@ def test_batch_create_validation(): # token None/missing {'billing_project': 'foo', 'n_jobs': 5, 'token': None}, {'billing_project': 'foo', 'n_jobs': 5}, + # empty gcsfuse bucket name + { + 'billing_project': 'foo', + 'n_jobs': 5, + 'token': 'baz', + 'gcsfuse': [{'bucket': '', 'mount_path': '/bucket', 'read_only': False}], + }, + # empty gcsfuse mount_path name + { + 'billing_project': 'foo', + 'n_jobs': 5, + 'token': 'baz', + 'gcsfuse': [{'bucket': 'foo', 'mount_path': '', 'read_only': False}], + }, # attribute key/value None {'attributes': {'k': None}, 'billing_project': 'foo', 'n_jobs': 5, 'token': 'baz'}, ] diff --git a/batch2/proxy.py b/batch2/proxy.py new file mode 100644 index 00000000000..0a3427237d9 --- /dev/null +++ b/batch2/proxy.py @@ -0,0 +1,19 @@ +import asyncio +from aiohttp import web +from hailtop.batch_client.aioclient import BatchClient + +routes = web.RouteTableDef() + +client = BatchClient('test') + + +@routes.get('/api/{route:.*}') +async def proxy_api(request): + route = request.match_info['route'] + data = await client._get(f'/api/{route}') + return web.json_response(await data.json()) + + +app = web.Application() +app.add_routes(routes) +web.run_app(app, host='0.0.0.0', port=5050) diff --git a/batch2/react-batch/.eslintrc.json b/batch2/react-batch/.eslintrc.json new file mode 100644 index 00000000000..854d0d491cc --- /dev/null +++ b/batch2/react-batch/.eslintrc.json @@ -0,0 +1,35 @@ +{ + "env": { + "browser": true, + "es2021": true + }, + "extends": [ + "airbnb" + ], + "parser": "@typescript-eslint/parser", + "parserOptions": { + "ecmaFeatures": { + "jsx": true + }, + "ecmaVersion": 12, + "sourceType": "module" + }, + "plugins": [ + "react", + "@typescript-eslint" + ], + "rules": { + "no-use-before-define": "off", + "@typescript-eslint/no-use-before-define": "off", + "react/jsx-filename-extension": [ + 2, + { + "extensions": [".tsx"] + } + ], + "react/jsx-one-expression-per-line": "off", + "import/extensions": "off", + "import/no-unresolved": "off", + "import/no-extraneous-dependencies": "off" + } +} diff --git a/batch2/react-batch/.gitignore b/batch2/react-batch/.gitignore new file mode 100644 index 00000000000..d451ff16c10 --- /dev/null +++ b/batch2/react-batch/.gitignore @@ -0,0 +1,5 @@ +node_modules +.DS_Store +dist +dist-ssr +*.local diff --git a/batch2/react-batch/index.html b/batch2/react-batch/index.html new file mode 100644 index 00000000000..3377ffaec2d --- /dev/null +++ b/batch2/react-batch/index.html @@ -0,0 +1,13 @@ + + + + + + + Batch2 React + + +
+ + + diff --git a/batch2/react-batch/package-lock.json b/batch2/react-batch/package-lock.json new file mode 100644 index 00000000000..74b6fd85ed7 --- /dev/null +++ b/batch2/react-batch/package-lock.json @@ -0,0 +1,6591 @@ +{ + "name": "react-batch", + "version": "0.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "version": "0.0.0", + "dependencies": { + "react": "^17.0.2", + "react-dom": "^17.0.0", + "react-router-dom": "^5.2.0" + }, + "devDependencies": { + "@types/react": "^17.0.5", + "@types/react-dom": "^17.0.0", + "@types/react-router-dom": "^5.1.7", + "@typescript-eslint/eslint-plugin": "^4.23.0", + "@typescript-eslint/parser": "^4.23.0", + "@vitejs/plugin-react-refresh": "^1.3.1", + "eslint": "^7.26.0", + "eslint-config-airbnb": "^18.2.1", + "eslint-plugin-import": "^2.23.0", + "eslint-plugin-jsx-a11y": "^6.4.1", + "eslint-plugin-react": "^7.23.2", + "eslint-plugin-react-hooks": "^4.2.0", + "typescript": "^4.1.2", + "vite": "^2.2.3" + } + }, + "node_modules/@babel/code-frame": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.13.tgz", + "integrity": "sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.12.13" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.14.0.tgz", + "integrity": "sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==", + "dev": true + }, + "node_modules/@babel/core": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.14.3.tgz", + "integrity": "sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@babel/generator": "^7.14.3", + "@babel/helper-compilation-targets": "^7.13.16", + "@babel/helper-module-transforms": "^7.14.2", + "@babel/helpers": "^7.14.0", + "@babel/parser": "^7.14.3", + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2", + "convert-source-map": "^1.7.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.1.2", + "semver": "^6.3.0", + "source-map": "^0.5.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/core/node_modules/semver": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", + "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@babel/generator": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.14.3.tgz", + "integrity": "sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==", + "dev": true, + "dependencies": { + "@babel/types": "^7.14.2", + "jsesc": "^2.5.1", + "source-map": "^0.5.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.13.16", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz", + "integrity": "sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==", + "dev": true, + "dependencies": { + "@babel/compat-data": "^7.13.15", + "@babel/helper-validator-option": "^7.12.17", + "browserslist": "^4.14.5", + "semver": "^6.3.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-compilation-targets/node_modules/semver": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", + "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@babel/helper-function-name": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz", + "integrity": "sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==", + "dev": true, + "dependencies": { + "@babel/helper-get-function-arity": "^7.12.13", + "@babel/template": "^7.12.13", + "@babel/types": "^7.14.2" + } + }, + "node_modules/@babel/helper-get-function-arity": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz", + "integrity": "sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==", + "dev": true, + "dependencies": { + "@babel/types": "^7.12.13" + } + }, + "node_modules/@babel/helper-member-expression-to-functions": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz", + "integrity": "sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==", + "dev": true, + "dependencies": { + "@babel/types": "^7.13.12" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz", + "integrity": "sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==", + "dev": true, + "dependencies": { + "@babel/types": "^7.13.12" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz", + "integrity": "sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==", + "dev": true, + "dependencies": { + "@babel/helper-module-imports": "^7.13.12", + "@babel/helper-replace-supers": "^7.13.12", + "@babel/helper-simple-access": "^7.13.12", + "@babel/helper-split-export-declaration": "^7.12.13", + "@babel/helper-validator-identifier": "^7.14.0", + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2" + } + }, + "node_modules/@babel/helper-optimise-call-expression": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz", + "integrity": "sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==", + "dev": true, + "dependencies": { + "@babel/types": "^7.12.13" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.13.0", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz", + "integrity": "sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==", + "dev": true + }, + "node_modules/@babel/helper-replace-supers": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz", + "integrity": "sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==", + "dev": true, + "dependencies": { + "@babel/helper-member-expression-to-functions": "^7.13.12", + "@babel/helper-optimise-call-expression": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2" + } + }, + "node_modules/@babel/helper-simple-access": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz", + "integrity": "sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==", + "dev": true, + "dependencies": { + "@babel/types": "^7.13.12" + } + }, + "node_modules/@babel/helper-split-export-declaration": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz", + "integrity": "sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==", + "dev": true, + "dependencies": { + "@babel/types": "^7.12.13" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz", + "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==", + "dev": true + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.12.17", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz", + "integrity": "sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==", + "dev": true + }, + "node_modules/@babel/helpers": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.14.0.tgz", + "integrity": "sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==", + "dev": true, + "dependencies": { + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.0", + "@babel/types": "^7.14.0" + } + }, + "node_modules/@babel/highlight": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz", + "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==", + "dev": true, + "dependencies": { + "@babel/helper-validator-identifier": "^7.14.0", + "chalk": "^2.0.0", + "js-tokens": "^4.0.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.14.3.tgz", + "integrity": "sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==", + "dev": true, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-self": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz", + "integrity": "sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.12.13" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-transform-react-jsx-source": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.14.2.tgz", + "integrity": "sha512-OMorspVyjxghAjzgeAWc6O7W7vHbJhV69NeTGdl9Mxgz6PaweAuo7ffB9T5A1OQ9dGcw0As4SYMUhyNC4u7mVg==", + "dev": true, + "dependencies": { + "@babel/helper-plugin-utils": "^7.13.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/runtime": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.14.0.tgz", + "integrity": "sha512-JELkvo/DlpNdJ7dlyw/eY7E0suy5i5GQH+Vlxaq1nsNJ+H7f4Vtv3jMeCEgRhZZQFXTjldYfQgv2qmM6M1v5wA==", + "dependencies": { + "regenerator-runtime": "^0.13.4" + } + }, + "node_modules/@babel/runtime-corejs3": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.14.0.tgz", + "integrity": "sha512-0R0HTZWHLk6G8jIk0FtoX+AatCtKnswS98VhXwGImFc759PJRp4Tru0PQYZofyijTFUr+gT8Mu7sgXVJLQ0ceg==", + "dev": true, + "dependencies": { + "core-js-pure": "^3.0.0", + "regenerator-runtime": "^0.13.4" + } + }, + "node_modules/@babel/template": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.12.13.tgz", + "integrity": "sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@babel/parser": "^7.12.13", + "@babel/types": "^7.12.13" + } + }, + "node_modules/@babel/traverse": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.14.2.tgz", + "integrity": "sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==", + "dev": true, + "dependencies": { + "@babel/code-frame": "^7.12.13", + "@babel/generator": "^7.14.2", + "@babel/helper-function-name": "^7.14.2", + "@babel/helper-split-export-declaration": "^7.12.13", + "@babel/parser": "^7.14.2", + "@babel/types": "^7.14.2", + "debug": "^4.1.0", + "globals": "^11.1.0" + } + }, + "node_modules/@babel/types": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.14.2.tgz", + "integrity": "sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==", + "dev": true, + "dependencies": { + "@babel/helper-validator-identifier": "^7.14.0", + "to-fast-properties": "^2.0.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.1.tgz", + "integrity": "sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==", + "dev": true, + "dependencies": { + "ajv": "^6.12.4", + "debug": "^4.1.1", + "espree": "^7.3.0", + "globals": "^12.1.0", + "ignore": "^4.0.6", + "import-fresh": "^3.2.1", + "js-yaml": "^3.13.1", + "minimatch": "^3.0.4", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/@eslint/eslintrc/node_modules/globals": { + "version": "12.4.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz", + "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==", + "dev": true, + "dependencies": { + "type-fest": "^0.8.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz", + "integrity": "sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "2.0.4", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz", + "integrity": "sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz", + "integrity": "sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==", + "dev": true, + "dependencies": { + "@nodelib/fs.scandir": "2.1.4", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@types/history": { + "version": "4.7.8", + "resolved": "https://registry.npmjs.org/@types/history/-/history-4.7.8.tgz", + "integrity": "sha512-S78QIYirQcUoo6UJZx9CSP0O2ix9IaeAXwQi26Rhr/+mg7qqPy8TzaxHSUut7eGjL8WmLccT7/MXf304WjqHcA==", + "dev": true + }, + "node_modules/@types/json-schema": { + "version": "7.0.7", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.7.tgz", + "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", + "dev": true + }, + "node_modules/@types/json5": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", + "integrity": "sha1-7ihweulOEdK4J7y+UnC86n8+ce4=", + "dev": true + }, + "node_modules/@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "node_modules/@types/react": { + "version": "17.0.6", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", + "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", + "dev": true, + "dependencies": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/react-dom": { + "version": "17.0.5", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-17.0.5.tgz", + "integrity": "sha512-ikqukEhH4H9gr4iJCmQVNzTB307kROe3XFfHAOTxOXPOw7lAoEXnM5KWTkzeANGL5Ce6ABfiMl/zJBYNi7ObmQ==", + "dev": true, + "dependencies": { + "@types/react": "*" + } + }, + "node_modules/@types/react-router": { + "version": "5.1.14", + "resolved": "https://registry.npmjs.org/@types/react-router/-/react-router-5.1.14.tgz", + "integrity": "sha512-LAJpqYUaCTMT2anZheoidiIymt8MuX286zoVFPM3DVb23aQBH0mAkFvzpd4LKqiolV8bBtZWT5Qp7hClCNDENw==", + "dev": true, + "dependencies": { + "@types/history": "*", + "@types/react": "*" + } + }, + "node_modules/@types/react-router-dom": { + "version": "5.1.7", + "resolved": "https://registry.npmjs.org/@types/react-router-dom/-/react-router-dom-5.1.7.tgz", + "integrity": "sha512-D5mHD6TbdV/DNHYsnwBTv+y73ei+mMjrkGrla86HthE4/PVvL1J94Bu3qABU+COXzpL23T1EZapVVpwHuBXiUg==", + "dev": true, + "dependencies": { + "@types/history": "*", + "@types/react": "*", + "@types/react-router": "*" + } + }, + "node_modules/@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "node_modules/@typescript-eslint/eslint-plugin": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.24.0.tgz", + "integrity": "sha512-qbCgkPM7DWTsYQGjx9RTuQGswi+bEt0isqDBeo+CKV0953zqI0Tp7CZ7Fi9ipgFA6mcQqF4NOVNwS/f2r6xShw==", + "dev": true, + "dependencies": { + "@typescript-eslint/experimental-utils": "4.24.0", + "@typescript-eslint/scope-manager": "4.24.0", + "debug": "^4.1.1", + "functional-red-black-tree": "^1.0.1", + "lodash": "^4.17.15", + "regexpp": "^3.0.0", + "semver": "^7.3.2", + "tsutils": "^3.17.1" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "@typescript-eslint/parser": "^4.0.0", + "eslint": "^5.0.0 || ^6.0.0 || ^7.0.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/experimental-utils": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.24.0.tgz", + "integrity": "sha512-IwTT2VNDKH1h8RZseMH4CcYBz6lTvRoOLDuuqNZZoThvfHEhOiZPQCow+5El3PtyxJ1iDr6UXZwYtE3yZQjhcw==", + "dev": true, + "dependencies": { + "@types/json-schema": "^7.0.3", + "@typescript-eslint/scope-manager": "4.24.0", + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/typescript-estree": "4.24.0", + "eslint-scope": "^5.0.0", + "eslint-utils": "^2.0.0" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "*" + } + }, + "node_modules/@typescript-eslint/parser": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.24.0.tgz", + "integrity": "sha512-dj1ZIh/4QKeECLb2f/QjRwMmDArcwc2WorWPRlB8UNTZlY1KpTVsbX7e3ZZdphfRw29aTFUSNuGB8w9X5sS97w==", + "dev": true, + "dependencies": { + "@typescript-eslint/scope-manager": "4.24.0", + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/typescript-estree": "4.24.0", + "debug": "^4.1.1" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^5.0.0 || ^6.0.0 || ^7.0.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.24.0.tgz", + "integrity": "sha512-9+WYJGDnuC9VtYLqBhcSuM7du75fyCS/ypC8c5g7Sdw7pGL4NDTbeH38eJPfzIydCHZDoOgjloxSAA3+4l/zsA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/visitor-keys": "4.24.0" + }, + "engines": { + "node": "^8.10.0 || ^10.13.0 || >=11.10.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/types": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.24.0.tgz", + "integrity": "sha512-tkZUBgDQKdvfs8L47LaqxojKDE+mIUmOzdz7r+u+U54l3GDkTpEbQ1Jp3cNqqAU9vMUCBA1fitsIhm7yN0vx9Q==", + "dev": true, + "engines": { + "node": "^8.10.0 || ^10.13.0 || >=11.10.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/typescript-estree": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.24.0.tgz", + "integrity": "sha512-kBDitL/by/HK7g8CYLT7aKpAwlR8doshfWz8d71j97n5kUa5caHWvY0RvEUEanL/EqBJoANev8Xc/mQ6LLwXGA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/visitor-keys": "4.24.0", + "debug": "^4.1.1", + "globby": "^11.0.1", + "is-glob": "^4.0.1", + "semver": "^7.3.2", + "tsutils": "^3.17.1" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.24.0.tgz", + "integrity": "sha512-4ox1sjmGHIxjEDBnMCtWFFhErXtKA1Ec0sBpuz0fqf3P+g3JFGyTxxbF06byw0FRsPnnbq44cKivH7Ks1/0s6g==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "4.24.0", + "eslint-visitor-keys": "^2.0.0" + }, + "engines": { + "node": "^8.10.0 || ^10.13.0 || >=11.10.1" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@vitejs/plugin-react-refresh": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-refresh/-/plugin-react-refresh-1.3.3.tgz", + "integrity": "sha512-J3KFwSQKrEK7fgOwTx0PMTlsolZORUch6BswjsM50q+Y7zSvX1ROIRn+tK2VE8SCvbYRHtzEKFlYW3vsWyTosQ==", + "dev": true, + "dependencies": { + "@babel/core": "^7.12.13", + "@babel/plugin-transform-react-jsx-self": "^7.12.13", + "@babel/plugin-transform-react-jsx-source": "^7.12.13", + "react-refresh": "^0.9.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/acorn": { + "version": "7.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", + "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", + "dev": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-jsx": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz", + "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==", + "dev": true, + "peerDependencies": { + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-colors": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", + "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", + "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "dependencies": { + "color-convert": "^1.9.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/aria-query": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-4.2.2.tgz", + "integrity": "sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==", + "dev": true, + "dependencies": { + "@babel/runtime": "^7.10.2", + "@babel/runtime-corejs3": "^7.10.2" + }, + "engines": { + "node": ">=6.0" + } + }, + "node_modules/array-includes": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.3.tgz", + "integrity": "sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "get-intrinsic": "^1.1.1", + "is-string": "^1.0.5" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/array.prototype.flat": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz", + "integrity": "sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.flatmap": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz", + "integrity": "sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1", + "function-bind": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/ast-types-flow": { + "version": "0.0.7", + "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", + "integrity": "sha1-9wtzXGvKGlycItmCw+Oef+ujva0=", + "dev": true + }, + "node_modules/astral-regex": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", + "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/axe-core": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.2.1.tgz", + "integrity": "sha512-evY7DN8qSIbsW2H/TWQ1bX3sXN1d4MNb5Vb4n7BzPuCwRHdkZ1H2eNLuSh73EoQqkGKUtju2G2HCcjCfhvZIAA==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/axobject-query": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-2.2.0.tgz", + "integrity": "sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==", + "dev": true + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "dependencies": { + "fill-range": "^7.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.16.6", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.16.6.tgz", + "integrity": "sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==", + "dev": true, + "dependencies": { + "caniuse-lite": "^1.0.30001219", + "colorette": "^1.2.2", + "electron-to-chromium": "^1.3.723", + "escalade": "^3.1.1", + "node-releases": "^1.1.71" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + } + }, + "node_modules/call-bind": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", + "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.1", + "get-intrinsic": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001228", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001228.tgz", + "integrity": "sha512-QQmLOGJ3DEgokHbMSA8cj2a+geXqmnpyOFT0lhQV6P3/YOJvGDEwoedcwxEQ30gJIwIIunHIicunJ2rzK5gB2A==", + "dev": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + } + }, + "node_modules/chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "dependencies": { + "color-name": "1.1.3" + } + }, + "node_modules/color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", + "dev": true + }, + "node_modules/colorette": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", + "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", + "dev": true + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", + "dev": true + }, + "node_modules/confusing-browser-globals": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz", + "integrity": "sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==", + "dev": true + }, + "node_modules/contains-path": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/contains-path/-/contains-path-1.0.0.tgz", + "integrity": "sha1-NFizMhhWA+ju0Y9RjUoQiIo6vJE=", + "dev": true, + "dependencies": { + "normalize-path": "^2.1.1", + "path-starts-with": "^1.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/convert-source-map": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-1.7.0.tgz", + "integrity": "sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==", + "dev": true, + "dependencies": { + "safe-buffer": "~5.1.1" + } + }, + "node_modules/core-js-pure": { + "version": "3.12.1", + "resolved": "https://registry.npmjs.org/core-js-pure/-/core-js-pure-3.12.1.tgz", + "integrity": "sha512-1cch+qads4JnDSWsvc7d6nzlKAippwjUlf6vykkTLW53VSV+NkE6muGBToAjEA8pG90cSfcud3JgVmW2ds5TaQ==", + "dev": true, + "hasInstallScript": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/core-js" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "node_modules/damerau-levenshtein": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.7.tgz", + "integrity": "sha512-VvdQIPGdWP0SqFXghj79Wf/5LArmreyMsGLa6FG6iC4t3j7j5s71TrwWmT/4akbDQIqjfACkLZmjXhA7g2oUZw==", + "dev": true + }, + "node_modules/debug": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", + "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", + "dev": true, + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/deep-is": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", + "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", + "dev": true + }, + "node_modules/define-properties": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", + "dev": true, + "dependencies": { + "object-keys": "^1.0.12" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", + "dev": true, + "dependencies": { + "path-type": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/electron-to-chromium": { + "version": "1.3.732", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.732.tgz", + "integrity": "sha512-qKD5Pbq+QMk4nea4lMuncUMhpEiQwaJyCW7MrvissnRcBDENhVfDmAqQYRQ3X525oTzhar9Zh1cK0L2d1UKYcw==", + "dev": true + }, + "node_modules/emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true + }, + "node_modules/enquirer": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz", + "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==", + "dev": true, + "dependencies": { + "ansi-colors": "^4.1.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "dependencies": { + "is-arrayish": "^0.2.1" + } + }, + "node_modules/es-abstract": { + "version": "1.18.0", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz", + "integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "es-to-primitive": "^1.2.1", + "function-bind": "^1.1.1", + "get-intrinsic": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.2", + "is-callable": "^1.2.3", + "is-negative-zero": "^2.0.1", + "is-regex": "^1.1.2", + "is-string": "^1.0.5", + "object-inspect": "^1.9.0", + "object-keys": "^1.1.1", + "object.assign": "^4.1.2", + "string.prototype.trimend": "^1.0.4", + "string.prototype.trimstart": "^1.0.4", + "unbox-primitive": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/es-to-primitive": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz", + "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==", + "dev": true, + "dependencies": { + "is-callable": "^1.1.4", + "is-date-object": "^1.0.1", + "is-symbol": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/esbuild": { + "version": "0.11.23", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.23.tgz", + "integrity": "sha512-iaiZZ9vUF5wJV8ob1tl+5aJTrwDczlvGP0JoMmnpC2B0ppiMCu8n8gmy5ZTGl5bcG081XBVn+U+jP+mPFm5T5Q==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + } + }, + "node_modules/escalade": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", + "dev": true, + "engines": { + "node": ">=0.8.0" + } + }, + "node_modules/eslint": { + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.26.0.tgz", + "integrity": "sha512-4R1ieRf52/izcZE7AlLy56uIHHDLT74Yzz2Iv2l6kDaYvEu9x+wMB5dZArVL8SYGXSYV2YAg70FcW5Y5nGGNIg==", + "dev": true, + "dependencies": { + "@babel/code-frame": "7.12.11", + "@eslint/eslintrc": "^0.4.1", + "ajv": "^6.10.0", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.0.1", + "doctrine": "^3.0.0", + "enquirer": "^2.3.5", + "eslint-scope": "^5.1.1", + "eslint-utils": "^2.1.0", + "eslint-visitor-keys": "^2.0.0", + "espree": "^7.3.1", + "esquery": "^1.4.0", + "esutils": "^2.0.2", + "file-entry-cache": "^6.0.1", + "functional-red-black-tree": "^1.0.1", + "glob-parent": "^5.0.0", + "globals": "^13.6.0", + "ignore": "^4.0.6", + "import-fresh": "^3.0.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "js-yaml": "^3.13.1", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash": "^4.17.21", + "minimatch": "^3.0.4", + "natural-compare": "^1.4.0", + "optionator": "^0.9.1", + "progress": "^2.0.0", + "regexpp": "^3.1.0", + "semver": "^7.2.1", + "strip-ansi": "^6.0.0", + "strip-json-comments": "^3.1.0", + "table": "^6.0.4", + "text-table": "^0.2.0", + "v8-compile-cache": "^2.0.3" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-config-airbnb": { + "version": "18.2.1", + "resolved": "https://registry.npmjs.org/eslint-config-airbnb/-/eslint-config-airbnb-18.2.1.tgz", + "integrity": "sha512-glZNDEZ36VdlZWoxn/bUR1r/sdFKPd1mHPbqUtkctgNG4yT2DLLtJ3D+yCV+jzZCc2V1nBVkmdknOJBZ5Hc0fg==", + "dev": true, + "dependencies": { + "eslint-config-airbnb-base": "^14.2.1", + "object.assign": "^4.1.2", + "object.entries": "^1.1.2" + }, + "engines": { + "node": ">= 6" + }, + "peerDependencies": { + "eslint": "^5.16.0 || ^6.8.0 || ^7.2.0", + "eslint-plugin-import": "^2.22.1", + "eslint-plugin-jsx-a11y": "^6.4.1", + "eslint-plugin-react": "^7.21.5", + "eslint-plugin-react-hooks": "^4 || ^3 || ^2.3.0 || ^1.7.0" + } + }, + "node_modules/eslint-config-airbnb-base": { + "version": "14.2.1", + "resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-14.2.1.tgz", + "integrity": "sha512-GOrQyDtVEc1Xy20U7vsB2yAoB4nBlfH5HZJeatRXHleO+OS5Ot+MWij4Dpltw4/DyIkqUfqz1epfhVR5XWWQPA==", + "dev": true, + "dependencies": { + "confusing-browser-globals": "^1.0.10", + "object.assign": "^4.1.2", + "object.entries": "^1.1.2" + }, + "engines": { + "node": ">= 6" + }, + "peerDependencies": { + "eslint": "^5.16.0 || ^6.8.0 || ^7.2.0", + "eslint-plugin-import": "^2.22.1" + } + }, + "node_modules/eslint-import-resolver-node": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz", + "integrity": "sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==", + "dev": true, + "dependencies": { + "debug": "^2.6.9", + "resolve": "^1.13.1" + } + }, + "node_modules/eslint-import-resolver-node/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/eslint-import-resolver-node/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", + "dev": true + }, + "node_modules/eslint-module-utils": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.6.1.tgz", + "integrity": "sha512-ZXI9B8cxAJIH4nfkhTwcRTEAnrVfobYqwjWy/QMCZ8rHkZHFjf9yO4BzpiF9kCSfNlMG54eKigISHpX0+AaT4A==", + "dev": true, + "dependencies": { + "debug": "^3.2.7", + "pkg-dir": "^2.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/eslint-module-utils/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/eslint-plugin-import": { + "version": "2.23.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.23.2.tgz", + "integrity": "sha512-LmNoRptHBxOP+nb0PIKz1y6OSzCJlB+0g0IGS3XV4KaKk2q4szqQ6s6F1utVf5ZRkxk/QOTjdxe7v4VjS99Bsg==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.3", + "array.prototype.flat": "^1.2.4", + "contains-path": "^1.0.0", + "debug": "^2.6.9", + "doctrine": "^2.1.0", + "eslint-import-resolver-node": "^0.3.4", + "eslint-module-utils": "^2.6.1", + "find-up": "^2.0.0", + "has": "^1.0.3", + "is-core-module": "^2.4.0", + "minimatch": "^3.0.4", + "object.values": "^1.1.3", + "pkg-up": "^2.0.0", + "read-pkg-up": "^3.0.0", + "resolve": "^1.20.0", + "tsconfig-paths": "^3.9.0" + }, + "engines": { + "node": ">=4" + }, + "peerDependencies": { + "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0" + } + }, + "node_modules/eslint-plugin-import/node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/eslint-plugin-import/node_modules/doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/eslint-plugin-import/node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", + "dev": true + }, + "node_modules/eslint-plugin-jsx-a11y": { + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz", + "integrity": "sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==", + "dev": true, + "dependencies": { + "@babel/runtime": "^7.11.2", + "aria-query": "^4.2.2", + "array-includes": "^3.1.1", + "ast-types-flow": "^0.0.7", + "axe-core": "^4.0.2", + "axobject-query": "^2.2.0", + "damerau-levenshtein": "^1.0.6", + "emoji-regex": "^9.0.0", + "has": "^1.0.3", + "jsx-ast-utils": "^3.1.0", + "language-tags": "^1.0.5" + }, + "engines": { + "node": ">=4.0" + }, + "peerDependencies": { + "eslint": "^3 || ^4 || ^5 || ^6 || ^7" + } + }, + "node_modules/eslint-plugin-react": { + "version": "7.23.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.23.2.tgz", + "integrity": "sha512-AfjgFQB+nYszudkxRkTFu0UR1zEQig0ArVMPloKhxwlwkzaw/fBiH0QWcBBhZONlXqQC51+nfqFrkn4EzHcGBw==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.3", + "array.prototype.flatmap": "^1.2.4", + "doctrine": "^2.1.0", + "has": "^1.0.3", + "jsx-ast-utils": "^2.4.1 || ^3.0.0", + "minimatch": "^3.0.4", + "object.entries": "^1.1.3", + "object.fromentries": "^2.0.4", + "object.values": "^1.1.3", + "prop-types": "^15.7.2", + "resolve": "^2.0.0-next.3", + "string.prototype.matchall": "^4.0.4" + }, + "engines": { + "node": ">=4" + }, + "peerDependencies": { + "eslint": "^3 || ^4 || ^5 || ^6 || ^7" + } + }, + "node_modules/eslint-plugin-react-hooks": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz", + "integrity": "sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==", + "dev": true, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" + } + }, + "node_modules/eslint-plugin-react/node_modules/doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/eslint-plugin-react/node_modules/resolve": { + "version": "2.0.0-next.3", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.3.tgz", + "integrity": "sha512-W8LucSynKUIDu9ylraa7ueVZ7hc0uAgJBxVsQSKOXOyle8a93qXhcz+XAXZ8bIq2d6i4Ehddn6Evt+0/UwKk6Q==", + "dev": true, + "dependencies": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "dev": true, + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/eslint-utils": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz", + "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==", + "dev": true, + "dependencies": { + "eslint-visitor-keys": "^1.1.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/mysticatea" + } + }, + "node_modules/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", + "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", + "dev": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/eslint/node_modules/@babel/code-frame": { + "version": "7.12.11", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", + "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.10.4" + } + }, + "node_modules/eslint/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/eslint/node_modules/chalk": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", + "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/eslint/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/eslint/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "node_modules/eslint/node_modules/globals": { + "version": "13.8.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz", + "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==", + "dev": true, + "dependencies": { + "type-fest": "^0.20.2" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eslint/node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/eslint/node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/eslint/node_modules/type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/espree": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz", + "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==", + "dev": true, + "dependencies": { + "acorn": "^7.4.0", + "acorn-jsx": "^5.3.1", + "eslint-visitor-keys": "^1.3.0" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/espree/node_modules/eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true, + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/esquery": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz", + "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==", + "dev": true, + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esquery/node_modules/estraverse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", + "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", + "dev": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esrecurse/node_modules/estraverse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", + "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", + "dev": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "node_modules/fast-glob": { + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.5.tgz", + "integrity": "sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.0", + "merge2": "^1.3.0", + "micromatch": "^4.0.2", + "picomatch": "^2.2.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", + "dev": true + }, + "node_modules/fastq": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", + "integrity": "sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==", + "dev": true, + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "dependencies": { + "flat-cache": "^3.0.4" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "dependencies": { + "locate-path": "^2.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/flat-cache": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", + "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==", + "dev": true, + "dependencies": { + "flatted": "^3.1.0", + "rimraf": "^3.0.2" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/flatted": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz", + "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==", + "dev": true + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", + "dev": true + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "node_modules/functional-red-black-tree": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz", + "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=", + "dev": true + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-intrinsic": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz", + "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/glob": { + "version": "7.1.7", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", + "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/globby": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.0.3.tgz", + "integrity": "sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==", + "dev": true, + "dependencies": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.1.1", + "ignore": "^5.1.4", + "merge2": "^1.3.0", + "slash": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/globby/node_modules/ignore": { + "version": "5.1.8", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", + "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", + "dev": true, + "engines": { + "node": ">= 4" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.6", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", + "integrity": "sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==", + "dev": true + }, + "node_modules/has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.1" + }, + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/has-bigints": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.1.tgz", + "integrity": "sha512-LSBS2LjbNBTf6287JEbEzvJgftkF5qFkmCo9hDRpAzKhUOlJ+hx8dd4USs00SgsUNwc4617J9ki5YtEClM2ffA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/has-symbols": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz", + "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/history": { + "version": "4.10.1", + "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz", + "integrity": "sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==", + "dependencies": { + "@babel/runtime": "^7.1.2", + "loose-envify": "^1.2.0", + "resolve-pathname": "^3.0.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0", + "value-equal": "^1.0.1" + } + }, + "node_modules/hoist-non-react-statics": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", + "integrity": "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==", + "dependencies": { + "react-is": "^16.7.0" + } + }, + "node_modules/hosted-git-info": { + "version": "2.8.9", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", + "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", + "dev": true + }, + "node_modules/ignore": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz", + "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", + "dev": true, + "engines": { + "node": ">= 4" + } + }, + "node_modules/import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=", + "dev": true, + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dev": true, + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "node_modules/internal-slot": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.3.tgz", + "integrity": "sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.1.0", + "has": "^1.0.3", + "side-channel": "^1.0.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=", + "dev": true + }, + "node_modules/is-bigint": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.2.tgz", + "integrity": "sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-boolean-object": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.1.tgz", + "integrity": "sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-callable": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.3.tgz", + "integrity": "sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-core-module": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", + "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", + "dev": true, + "dependencies": { + "has": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-date-object": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.4.tgz", + "integrity": "sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "dev": true, + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-negative-zero": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.1.tgz", + "integrity": "sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-number-object": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.5.tgz", + "integrity": "sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-regex": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.3.tgz", + "integrity": "sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "has-symbols": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-string": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.6.tgz", + "integrity": "sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==", + "dev": true, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-symbol": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz", + "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==", + "dev": true, + "dependencies": { + "has-symbols": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/isarray": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", + "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", + "dev": true + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsesc": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", + "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", + "dev": true, + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/json-parse-better-errors": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz", + "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", + "dev": true + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", + "dev": true + }, + "node_modules/json5": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz", + "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", + "dev": true, + "dependencies": { + "minimist": "^1.2.5" + }, + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/jsx-ast-utils": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz", + "integrity": "sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==", + "dev": true, + "dependencies": { + "array-includes": "^3.1.2", + "object.assign": "^4.1.2" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/language-subtag-registry": { + "version": "0.3.21", + "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz", + "integrity": "sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==", + "dev": true + }, + "node_modules/language-tags": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.5.tgz", + "integrity": "sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=", + "dev": true, + "dependencies": { + "language-subtag-registry": "~0.3.2" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/load-json-file": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", + "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", + "dev": true, + "dependencies": { + "graceful-fs": "^4.1.2", + "parse-json": "^4.0.0", + "pify": "^3.0.0", + "strip-bom": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/locate-path": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", + "integrity": "sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=", + "dev": true, + "dependencies": { + "p-locate": "^2.0.0", + "path-exists": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "dev": true + }, + "node_modules/lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=", + "dev": true + }, + "node_modules/lodash.truncate": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", + "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=", + "dev": true + }, + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, + "node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.4.tgz", + "integrity": "sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==", + "dev": true, + "dependencies": { + "braces": "^3.0.1", + "picomatch": "^2.2.3" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mini-create-react-context": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz", + "integrity": "sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==", + "dependencies": { + "@babel/runtime": "^7.12.1", + "tiny-warning": "^1.0.3" + }, + "peerDependencies": { + "prop-types": "^15.0.0", + "react": "^0.14.0 || ^15.0.0 || ^16.0.0 || ^17.0.0" + } + }, + "node_modules/minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true + }, + "node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "node_modules/nanoid": { + "version": "3.1.23", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", + "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", + "dev": true, + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", + "dev": true + }, + "node_modules/node-releases": { + "version": "1.1.72", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-1.1.72.tgz", + "integrity": "sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==", + "dev": true + }, + "node_modules/normalize-package-data": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", + "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", + "dev": true, + "dependencies": { + "hosted-git-info": "^2.1.4", + "resolve": "^1.10.0", + "semver": "2 || 3 || 4 || 5", + "validate-npm-package-license": "^3.0.1" + } + }, + "node_modules/normalize-package-data/node_modules/semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true, + "bin": { + "semver": "bin/semver" + } + }, + "node_modules/normalize-path": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-2.1.1.tgz", + "integrity": "sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=", + "dev": true, + "dependencies": { + "remove-trailing-separator": "^1.0.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz", + "integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.assign": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.2.tgz", + "integrity": "sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "has-symbols": "^1.0.1", + "object-keys": "^1.1.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object.entries": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.3.tgz", + "integrity": "sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1", + "has": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/object.fromentries": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.4.tgz", + "integrity": "sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/object.values": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.3.tgz", + "integrity": "sha512-nkF6PfDB9alkOUxpf1HNm/QlkeW3SReqL5WXeBLpEJJnlPSvRaDQpW3gQTksTN3fgJX4hL42RzKyOin6ff3tyw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dev": true, + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/optionator": { + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", + "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==", + "dev": true, + "dependencies": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.3" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/p-limit": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-1.3.0.tgz", + "integrity": "sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==", + "dev": true, + "dependencies": { + "p-try": "^1.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/p-locate": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-2.0.0.tgz", + "integrity": "sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=", + "dev": true, + "dependencies": { + "p-limit": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/p-try": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-1.0.0.tgz", + "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse-json": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", + "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", + "dev": true, + "dependencies": { + "error-ex": "^1.3.1", + "json-parse-better-errors": "^1.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/path-parse": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", + "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "dev": true + }, + "node_modules/path-starts-with": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/path-starts-with/-/path-starts-with-1.0.0.tgz", + "integrity": "sha1-soJDAV6LE43lcmgqxS2kLmRq2E4=", + "dev": true, + "dependencies": { + "normalize-path": "^2.1.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/path-to-regexp": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz", + "integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==", + "dependencies": { + "isarray": "0.0.1" + } + }, + "node_modules/path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/picomatch": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", + "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pify": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", + "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/pkg-dir": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-2.0.0.tgz", + "integrity": "sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=", + "dev": true, + "dependencies": { + "find-up": "^2.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/pkg-up": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-2.0.0.tgz", + "integrity": "sha1-yBmscoBZpGHKscOImivjxJoATX8=", + "dev": true, + "dependencies": { + "find-up": "^2.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postcss": { + "version": "8.2.15", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", + "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", + "dev": true, + "dependencies": { + "colorette": "^1.2.2", + "nanoid": "^3.1.23", + "source-map": "^0.6.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + } + }, + "node_modules/postcss/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "dev": true, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/prop-types": { + "version": "15.7.2", + "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz", + "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==", + "dependencies": { + "loose-envify": "^1.4.0", + "object-assign": "^4.1.1", + "react-is": "^16.8.1" + } + }, + "node_modules/punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-dom": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-17.0.2.tgz", + "integrity": "sha512-s4h96KtLDUQlsENhMn1ar8t2bEa+q/YAtj8pPPdIjPDGBDIVNsrD9aXNWqspUe6AzKCIG0C1HZZLqLV7qpOBGA==", + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "scheduler": "^0.20.2" + }, + "peerDependencies": { + "react": "17.0.2" + } + }, + "node_modules/react-is": { + "version": "16.13.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", + "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" + }, + "node_modules/react-refresh": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.9.0.tgz", + "integrity": "sha512-Gvzk7OZpiqKSkxsQvO/mbTN1poglhmAV7gR/DdIrRrSMXraRQQlfikRJOr3Nb9GTMPC5kof948Zy6jJZIFtDvQ==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-router": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.2.0.tgz", + "integrity": "sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==", + "dependencies": { + "@babel/runtime": "^7.1.2", + "history": "^4.9.0", + "hoist-non-react-statics": "^3.1.0", + "loose-envify": "^1.3.1", + "mini-create-react-context": "^0.4.0", + "path-to-regexp": "^1.7.0", + "prop-types": "^15.6.2", + "react-is": "^16.6.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0" + }, + "peerDependencies": { + "react": ">=15" + } + }, + "node_modules/react-router-dom": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-5.2.0.tgz", + "integrity": "sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==", + "dependencies": { + "@babel/runtime": "^7.1.2", + "history": "^4.9.0", + "loose-envify": "^1.3.1", + "prop-types": "^15.6.2", + "react-router": "5.2.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0" + }, + "peerDependencies": { + "react": ">=15" + } + }, + "node_modules/read-pkg": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz", + "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=", + "dev": true, + "dependencies": { + "load-json-file": "^4.0.0", + "normalize-package-data": "^2.3.2", + "path-type": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/read-pkg-up": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-3.0.0.tgz", + "integrity": "sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc=", + "dev": true, + "dependencies": { + "find-up": "^2.0.0", + "read-pkg": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/read-pkg/node_modules/path-type": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz", + "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==", + "dev": true, + "dependencies": { + "pify": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/regenerator-runtime": { + "version": "0.13.7", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", + "integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==" + }, + "node_modules/regexp.prototype.flags": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz", + "integrity": "sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/regexpp": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz", + "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/mysticatea" + } + }, + "node_modules/remove-trailing-separator": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz", + "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=", + "dev": true + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve": { + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", + "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", + "dev": true, + "dependencies": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/resolve-pathname": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-pathname/-/resolve-pathname-3.0.0.tgz", + "integrity": "sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==" + }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "dev": true, + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rollup": { + "version": "2.48.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.48.0.tgz", + "integrity": "sha512-wl9ZSSSsi5579oscSDYSzGn092tCS076YB+TQrzsGuSfYyJeep8eEWj0eaRjuC5McuMNmcnR8icBqiE/FWNB1A==", + "dev": true, + "dependencies": { + "fsevents": "~2.3.1" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=10.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.1" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true + }, + "node_modules/scheduler": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.20.2.tgz", + "integrity": "sha512-2eWfGgAqqWFGqtdMmcL5zCMK1U8KlXv8SQFGglL3CEtd0aDVDWgeF/YoCmvln55m5zSk3J/20hTaSBeSObsQDQ==", + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, + "node_modules/semver": { + "version": "7.3.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz", + "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==", + "dev": true, + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", + "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.0", + "get-intrinsic": "^1.0.2", + "object-inspect": "^1.9.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/slice-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", + "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "astral-regex": "^2.0.0", + "is-fullwidth-code-point": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/slice-ansi?sponsor=1" + } + }, + "node_modules/slice-ansi/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/slice-ansi/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/slice-ansi/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "node_modules/source-map": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", + "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/spdx-correct": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", + "integrity": "sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==", + "dev": true, + "dependencies": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-exceptions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz", + "integrity": "sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==", + "dev": true + }, + "node_modules/spdx-expression-parse": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", + "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "dev": true, + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/spdx-license-ids": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.8.tgz", + "integrity": "sha512-NDgA96EnaLSvtbM7trJj+t1LUR3pirkDCcz9nOUlPb5DMBGsH7oES6C3hs3j7R9oHEa1EMvReS/BUAIT5Tcr0g==", + "dev": true + }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", + "dev": true + }, + "node_modules/string-width": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", + "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "dev": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/string.prototype.matchall": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz", + "integrity": "sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has-symbols": "^1.0.1", + "internal-slot": "^1.0.3", + "regexp.prototype.flags": "^1.3.1", + "side-channel": "^1.0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/string.prototype.trimend": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.4.tgz", + "integrity": "sha512-y9xCjw1P23Awk8EvTpcyL2NIr1j7wJ39f+k6lvRnSMz+mz9CGz9NYPelDk42kOz6+ql8xjfK8oYzy3jAP5QU5A==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/string.prototype.trimstart": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.4.tgz", + "integrity": "sha512-jh6e984OBfvxS50tdY2nRZnoC5/mLFKOREQfw8t5yytkoUsJRNxvI/E39qu1sD0OtWI3OC0XgKSmcWwziwYuZw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", + "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "dev": true, + "dependencies": { + "ansi-regex": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "dependencies": { + "has-flag": "^3.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/table": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/table/-/table-6.7.1.tgz", + "integrity": "sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==", + "dev": true, + "dependencies": { + "ajv": "^8.0.1", + "lodash.clonedeep": "^4.5.0", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/table/node_modules/ajv": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.4.0.tgz", + "integrity": "sha512-7QD2l6+KBSLwf+7MuYocbWvRPdOu63/trReTLu2KFwkgctnub1auoF+Y1WYcm09CTM7quuscrzqmASaLHC/K4Q==", + "dev": true, + "dependencies": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/table/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true + }, + "node_modules/text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=", + "dev": true + }, + "node_modules/tiny-invariant": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.1.0.tgz", + "integrity": "sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==" + }, + "node_modules/tiny-warning": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", + "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==" + }, + "node_modules/to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/tsconfig-paths": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz", + "integrity": "sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==", + "dev": true, + "dependencies": { + "@types/json5": "^0.0.29", + "json5": "^1.0.1", + "minimist": "^1.2.0", + "strip-bom": "^3.0.0" + } + }, + "node_modules/tsconfig-paths/node_modules/json5": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "dev": true, + "dependencies": { + "minimist": "^1.2.0" + }, + "bin": { + "json5": "lib/cli.js" + } + }, + "node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/tsutils": { + "version": "3.21.0", + "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz", + "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==", + "dev": true, + "dependencies": { + "tslib": "^1.8.1" + }, + "engines": { + "node": ">= 6" + }, + "peerDependencies": { + "typescript": ">=2.8.0 || >= 3.2.0-dev || >= 3.3.0-dev || >= 3.4.0-dev || >= 3.5.0-dev || >= 3.6.0-dev || >= 3.6.0-beta || >= 3.7.0-dev || >= 3.7.0-beta" + } + }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/type-fest": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz", + "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/typescript": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", + "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=4.2.0" + } + }, + "node_modules/unbox-primitive": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.1.tgz", + "integrity": "sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.1", + "has-bigints": "^1.0.1", + "has-symbols": "^1.0.2", + "which-boxed-primitive": "^1.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/v8-compile-cache": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz", + "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==", + "dev": true + }, + "node_modules/validate-npm-package-license": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", + "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "dev": true, + "dependencies": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "node_modules/value-equal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/value-equal/-/value-equal-1.0.1.tgz", + "integrity": "sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==" + }, + "node_modules/vite": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.3.tgz", + "integrity": "sha512-eO1iwRbn3/BfkNVMNJDeANAFCZ5NobYOFPu7IqfY7DcI7I9nFGjJIZid0EViTmLDGwwSUPmRAq3cRBbO3+DsMA==", + "dev": true, + "dependencies": { + "esbuild": "^0.11.23", + "fsevents": "~2.3.1", + "postcss": "^8.2.10", + "resolve": "^1.19.0", + "rollup": "^2.38.5" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": ">=12.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.1" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/which-boxed-primitive": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz", + "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==", + "dev": true, + "dependencies": { + "is-bigint": "^1.0.1", + "is-boolean-object": "^1.1.0", + "is-number-object": "^1.0.4", + "is-string": "^1.0.5", + "is-symbol": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/word-wrap": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", + "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", + "dev": true + }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + } + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.13.tgz", + "integrity": "sha512-HV1Cm0Q3ZrpCR93tkWOYiuYIgLxZXZFVG2VgK+MBWjUqZTundupbfx2aXarXuw5Ko5aMcjtJgbSs4vUGBS5v6g==", + "dev": true, + "requires": { + "@babel/highlight": "^7.12.13" + } + }, + "@babel/compat-data": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.14.0.tgz", + "integrity": "sha512-vu9V3uMM/1o5Hl5OekMUowo3FqXLJSw+s+66nt0fSWVWTtmosdzn45JHOB3cPtZoe6CTBDzvSw0RdOY85Q37+Q==", + "dev": true + }, + "@babel/core": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.14.3.tgz", + "integrity": "sha512-jB5AmTKOCSJIZ72sd78ECEhuPiDMKlQdDI/4QRI6lzYATx5SSogS1oQA2AoPecRCknm30gHi2l+QVvNUu3wZAg==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.12.13", + "@babel/generator": "^7.14.3", + "@babel/helper-compilation-targets": "^7.13.16", + "@babel/helper-module-transforms": "^7.14.2", + "@babel/helpers": "^7.14.0", + "@babel/parser": "^7.14.3", + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2", + "convert-source-map": "^1.7.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.1.2", + "semver": "^6.3.0", + "source-map": "^0.5.0" + }, + "dependencies": { + "semver": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", + "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "dev": true + } + } + }, + "@babel/generator": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.14.3.tgz", + "integrity": "sha512-bn0S6flG/j0xtQdz3hsjJ624h3W0r3llttBMfyHX3YrZ/KtLYr15bjA0FXkgW7FpvrDuTuElXeVjiKlYRpnOFA==", + "dev": true, + "requires": { + "@babel/types": "^7.14.2", + "jsesc": "^2.5.1", + "source-map": "^0.5.0" + } + }, + "@babel/helper-compilation-targets": { + "version": "7.13.16", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.13.16.tgz", + "integrity": "sha512-3gmkYIrpqsLlieFwjkGgLaSHmhnvlAYzZLlYVjlW+QwI+1zE17kGxuJGmIqDQdYp56XdmGeD+Bswx0UTyG18xA==", + "dev": true, + "requires": { + "@babel/compat-data": "^7.13.15", + "@babel/helper-validator-option": "^7.12.17", + "browserslist": "^4.14.5", + "semver": "^6.3.0" + }, + "dependencies": { + "semver": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", + "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "dev": true + } + } + }, + "@babel/helper-function-name": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.14.2.tgz", + "integrity": "sha512-NYZlkZRydxw+YT56IlhIcS8PAhb+FEUiOzuhFTfqDyPmzAhRge6ua0dQYT/Uh0t/EDHq05/i+e5M2d4XvjgarQ==", + "dev": true, + "requires": { + "@babel/helper-get-function-arity": "^7.12.13", + "@babel/template": "^7.12.13", + "@babel/types": "^7.14.2" + } + }, + "@babel/helper-get-function-arity": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.12.13.tgz", + "integrity": "sha512-DjEVzQNz5LICkzN0REdpD5prGoidvbdYk1BVgRUOINaWJP2t6avB27X1guXK1kXNrX0WMfsrm1A/ZBthYuIMQg==", + "dev": true, + "requires": { + "@babel/types": "^7.12.13" + } + }, + "@babel/helper-member-expression-to-functions": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-member-expression-to-functions/-/helper-member-expression-to-functions-7.13.12.tgz", + "integrity": "sha512-48ql1CLL59aKbU94Y88Xgb2VFy7a95ykGRbJJaaVv+LX5U8wFpLfiGXJJGUozsmA1oEh/o5Bp60Voq7ACyA/Sw==", + "dev": true, + "requires": { + "@babel/types": "^7.13.12" + } + }, + "@babel/helper-module-imports": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.13.12.tgz", + "integrity": "sha512-4cVvR2/1B693IuOvSI20xqqa/+bl7lqAMR59R4iu39R9aOX8/JoYY1sFaNvUMyMBGnHdwvJgUrzNLoUZxXypxA==", + "dev": true, + "requires": { + "@babel/types": "^7.13.12" + } + }, + "@babel/helper-module-transforms": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.14.2.tgz", + "integrity": "sha512-OznJUda/soKXv0XhpvzGWDnml4Qnwp16GN+D/kZIdLsWoHj05kyu8Rm5kXmMef+rVJZ0+4pSGLkeixdqNUATDA==", + "dev": true, + "requires": { + "@babel/helper-module-imports": "^7.13.12", + "@babel/helper-replace-supers": "^7.13.12", + "@babel/helper-simple-access": "^7.13.12", + "@babel/helper-split-export-declaration": "^7.12.13", + "@babel/helper-validator-identifier": "^7.14.0", + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2" + } + }, + "@babel/helper-optimise-call-expression": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-optimise-call-expression/-/helper-optimise-call-expression-7.12.13.tgz", + "integrity": "sha512-BdWQhoVJkp6nVjB7nkFWcn43dkprYauqtk++Py2eaf/GRDFm5BxRqEIZCiHlZUGAVmtwKcsVL1dC68WmzeFmiA==", + "dev": true, + "requires": { + "@babel/types": "^7.12.13" + } + }, + "@babel/helper-plugin-utils": { + "version": "7.13.0", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.13.0.tgz", + "integrity": "sha512-ZPafIPSwzUlAoWT8DKs1W2VyF2gOWthGd5NGFMsBcMMol+ZhK+EQY/e6V96poa6PA/Bh+C9plWN0hXO1uB8AfQ==", + "dev": true + }, + "@babel/helper-replace-supers": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/helper-replace-supers/-/helper-replace-supers-7.14.3.tgz", + "integrity": "sha512-Rlh8qEWZSTfdz+tgNV/N4gz1a0TMNwCUcENhMjHTHKp3LseYH5Jha0NSlyTQWMnjbYcwFt+bqAMqSLHVXkQ6UA==", + "dev": true, + "requires": { + "@babel/helper-member-expression-to-functions": "^7.13.12", + "@babel/helper-optimise-call-expression": "^7.12.13", + "@babel/traverse": "^7.14.2", + "@babel/types": "^7.14.2" + } + }, + "@babel/helper-simple-access": { + "version": "7.13.12", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.13.12.tgz", + "integrity": "sha512-7FEjbrx5SL9cWvXioDbnlYTppcZGuCY6ow3/D5vMggb2Ywgu4dMrpTJX0JdQAIcRRUElOIxF3yEooa9gUb9ZbA==", + "dev": true, + "requires": { + "@babel/types": "^7.13.12" + } + }, + "@babel/helper-split-export-declaration": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.12.13.tgz", + "integrity": "sha512-tCJDltF83htUtXx5NLcaDqRmknv652ZWCHyoTETf1CXYJdPC7nohZohjUgieXhv0hTJdRf2FjDueFehdNucpzg==", + "dev": true, + "requires": { + "@babel/types": "^7.12.13" + } + }, + "@babel/helper-validator-identifier": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.14.0.tgz", + "integrity": "sha512-V3ts7zMSu5lfiwWDVWzRDGIN+lnCEUdaXgtVHJgLb1rGaA6jMrtB9EmE7L18foXJIE8Un/A/h6NJfGQp/e1J4A==", + "dev": true + }, + "@babel/helper-validator-option": { + "version": "7.12.17", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.12.17.tgz", + "integrity": "sha512-TopkMDmLzq8ngChwRlyjR6raKD6gMSae4JdYDB8bByKreQgG0RBTuKe9LRxW3wFtUnjxOPRKBDwEH6Mg5KeDfw==", + "dev": true + }, + "@babel/helpers": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.14.0.tgz", + "integrity": "sha512-+ufuXprtQ1D1iZTO/K9+EBRn+qPWMJjZSw/S0KlFrxCw4tkrzv9grgpDHkY9MeQTjTY8i2sp7Jep8DfU6tN9Mg==", + "dev": true, + "requires": { + "@babel/template": "^7.12.13", + "@babel/traverse": "^7.14.0", + "@babel/types": "^7.14.0" + } + }, + "@babel/highlight": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.14.0.tgz", + "integrity": "sha512-YSCOwxvTYEIMSGaBQb5kDDsCopDdiUGsqpatp3fOlI4+2HQSkTmEVWnVuySdAC5EWCqSWWTv0ib63RjR7dTBdg==", + "dev": true, + "requires": { + "@babel/helper-validator-identifier": "^7.14.0", + "chalk": "^2.0.0", + "js-tokens": "^4.0.0" + } + }, + "@babel/parser": { + "version": "7.14.3", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.14.3.tgz", + "integrity": "sha512-7MpZDIfI7sUC5zWo2+foJ50CSI5lcqDehZ0lVgIhSi4bFEk94fLAKlF3Q0nzSQQ+ca0lm+O6G9ztKVBeu8PMRQ==", + "dev": true + }, + "@babel/plugin-transform-react-jsx-self": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.12.13.tgz", + "integrity": "sha512-FXYw98TTJ125GVCCkFLZXlZ1qGcsYqNQhVBQcZjyrwf8FEUtVfKIoidnO8S0q+KBQpDYNTmiGo1gn67Vti04lQ==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.12.13" + } + }, + "@babel/plugin-transform-react-jsx-source": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.14.2.tgz", + "integrity": "sha512-OMorspVyjxghAjzgeAWc6O7W7vHbJhV69NeTGdl9Mxgz6PaweAuo7ffB9T5A1OQ9dGcw0As4SYMUhyNC4u7mVg==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.13.0" + } + }, + "@babel/runtime": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.14.0.tgz", + "integrity": "sha512-JELkvo/DlpNdJ7dlyw/eY7E0suy5i5GQH+Vlxaq1nsNJ+H7f4Vtv3jMeCEgRhZZQFXTjldYfQgv2qmM6M1v5wA==", + "requires": { + "regenerator-runtime": "^0.13.4" + } + }, + "@babel/runtime-corejs3": { + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/@babel/runtime-corejs3/-/runtime-corejs3-7.14.0.tgz", + "integrity": "sha512-0R0HTZWHLk6G8jIk0FtoX+AatCtKnswS98VhXwGImFc759PJRp4Tru0PQYZofyijTFUr+gT8Mu7sgXVJLQ0ceg==", + "dev": true, + "requires": { + "core-js-pure": "^3.0.0", + "regenerator-runtime": "^0.13.4" + } + }, + "@babel/template": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.12.13.tgz", + "integrity": "sha512-/7xxiGA57xMo/P2GVvdEumr8ONhFOhfgq2ihK3h1e6THqzTAkHbkXgB0xI9yeTfIUoH3+oAeHhqm/I43OTbbjA==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.12.13", + "@babel/parser": "^7.12.13", + "@babel/types": "^7.12.13" + } + }, + "@babel/traverse": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.14.2.tgz", + "integrity": "sha512-TsdRgvBFHMyHOOzcP9S6QU0QQtjxlRpEYOy3mcCO5RgmC305ki42aSAmfZEMSSYBla2oZ9BMqYlncBaKmD/7iA==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.12.13", + "@babel/generator": "^7.14.2", + "@babel/helper-function-name": "^7.14.2", + "@babel/helper-split-export-declaration": "^7.12.13", + "@babel/parser": "^7.14.2", + "@babel/types": "^7.14.2", + "debug": "^4.1.0", + "globals": "^11.1.0" + } + }, + "@babel/types": { + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.14.2.tgz", + "integrity": "sha512-SdjAG/3DikRHpUOjxZgnkbR11xUlyDMUFJdvnIgZEE16mqmY0BINMmc4//JMJglEmn6i7sq6p+mGrFWyZ98EEw==", + "dev": true, + "requires": { + "@babel/helper-validator-identifier": "^7.14.0", + "to-fast-properties": "^2.0.0" + } + }, + "@eslint/eslintrc": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.1.tgz", + "integrity": "sha512-5v7TDE9plVhvxQeWLXDTvFvJBdH6pEsdnl2g/dAptmuFEPedQ4Erq5rsDsX+mvAM610IhNaO2W5V1dOOnDKxkQ==", + "dev": true, + "requires": { + "ajv": "^6.12.4", + "debug": "^4.1.1", + "espree": "^7.3.0", + "globals": "^12.1.0", + "ignore": "^4.0.6", + "import-fresh": "^3.2.1", + "js-yaml": "^3.13.1", + "minimatch": "^3.0.4", + "strip-json-comments": "^3.1.1" + }, + "dependencies": { + "globals": { + "version": "12.4.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-12.4.0.tgz", + "integrity": "sha512-BWICuzzDvDoH54NHKCseDanAhE3CeDorgDL5MT6LMXXj2WCnd9UC2szdk4AWLfjdgNBCXLUanXYcpBBKOSWGwg==", + "dev": true, + "requires": { + "type-fest": "^0.8.1" + } + } + } + }, + "@nodelib/fs.scandir": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.4.tgz", + "integrity": "sha512-33g3pMJk3bg5nXbL/+CY6I2eJDzZAni49PfJnL5fghPTggPvBd/pFNSgJsdAgWptuFu7qq/ERvOYFlhvsLTCKA==", + "dev": true, + "requires": { + "@nodelib/fs.stat": "2.0.4", + "run-parallel": "^1.1.9" + } + }, + "@nodelib/fs.stat": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.4.tgz", + "integrity": "sha512-IYlHJA0clt2+Vg7bccq+TzRdJvv19c2INqBSsoOLp1je7xjtr7J26+WXR72MCdvU9q1qTzIWDfhMf+DRvQJK4Q==", + "dev": true + }, + "@nodelib/fs.walk": { + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.6.tgz", + "integrity": "sha512-8Broas6vTtW4GIXTAHDoE32hnN2M5ykgCpWGbuXHQ15vEMqr23pB76e/GZcYsZCHALv50ktd24qhEyKr6wBtow==", + "dev": true, + "requires": { + "@nodelib/fs.scandir": "2.1.4", + "fastq": "^1.6.0" + } + }, + "@types/history": { + "version": "4.7.8", + "resolved": "https://registry.npmjs.org/@types/history/-/history-4.7.8.tgz", + "integrity": "sha512-S78QIYirQcUoo6UJZx9CSP0O2ix9IaeAXwQi26Rhr/+mg7qqPy8TzaxHSUut7eGjL8WmLccT7/MXf304WjqHcA==", + "dev": true + }, + "@types/json-schema": { + "version": "7.0.7", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.7.tgz", + "integrity": "sha512-cxWFQVseBm6O9Gbw1IWb8r6OS4OhSt3hPZLkFApLjM8TEXROBuQGLAH2i2gZpcXdLBIrpXuTDhH7Vbm1iXmNGA==", + "dev": true + }, + "@types/json5": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", + "integrity": "sha1-7ihweulOEdK4J7y+UnC86n8+ce4=", + "dev": true + }, + "@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "@types/react": { + "version": "17.0.6", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", + "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", + "dev": true, + "requires": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "@types/react-dom": { + "version": "17.0.5", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-17.0.5.tgz", + "integrity": "sha512-ikqukEhH4H9gr4iJCmQVNzTB307kROe3XFfHAOTxOXPOw7lAoEXnM5KWTkzeANGL5Ce6ABfiMl/zJBYNi7ObmQ==", + "dev": true, + "requires": { + "@types/react": "*" + } + }, + "@types/react-router": { + "version": "5.1.14", + "resolved": "https://registry.npmjs.org/@types/react-router/-/react-router-5.1.14.tgz", + "integrity": "sha512-LAJpqYUaCTMT2anZheoidiIymt8MuX286zoVFPM3DVb23aQBH0mAkFvzpd4LKqiolV8bBtZWT5Qp7hClCNDENw==", + "dev": true, + "requires": { + "@types/history": "*", + "@types/react": "*" + } + }, + "@types/react-router-dom": { + "version": "5.1.7", + "resolved": "https://registry.npmjs.org/@types/react-router-dom/-/react-router-dom-5.1.7.tgz", + "integrity": "sha512-D5mHD6TbdV/DNHYsnwBTv+y73ei+mMjrkGrla86HthE4/PVvL1J94Bu3qABU+COXzpL23T1EZapVVpwHuBXiUg==", + "dev": true, + "requires": { + "@types/history": "*", + "@types/react": "*", + "@types/react-router": "*" + } + }, + "@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "@typescript-eslint/eslint-plugin": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.24.0.tgz", + "integrity": "sha512-qbCgkPM7DWTsYQGjx9RTuQGswi+bEt0isqDBeo+CKV0953zqI0Tp7CZ7Fi9ipgFA6mcQqF4NOVNwS/f2r6xShw==", + "dev": true, + "requires": { + "@typescript-eslint/experimental-utils": "4.24.0", + "@typescript-eslint/scope-manager": "4.24.0", + "debug": "^4.1.1", + "functional-red-black-tree": "^1.0.1", + "lodash": "^4.17.15", + "regexpp": "^3.0.0", + "semver": "^7.3.2", + "tsutils": "^3.17.1" + } + }, + "@typescript-eslint/experimental-utils": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/experimental-utils/-/experimental-utils-4.24.0.tgz", + "integrity": "sha512-IwTT2VNDKH1h8RZseMH4CcYBz6lTvRoOLDuuqNZZoThvfHEhOiZPQCow+5El3PtyxJ1iDr6UXZwYtE3yZQjhcw==", + "dev": true, + "requires": { + "@types/json-schema": "^7.0.3", + "@typescript-eslint/scope-manager": "4.24.0", + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/typescript-estree": "4.24.0", + "eslint-scope": "^5.0.0", + "eslint-utils": "^2.0.0" + } + }, + "@typescript-eslint/parser": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-4.24.0.tgz", + "integrity": "sha512-dj1ZIh/4QKeECLb2f/QjRwMmDArcwc2WorWPRlB8UNTZlY1KpTVsbX7e3ZZdphfRw29aTFUSNuGB8w9X5sS97w==", + "dev": true, + "requires": { + "@typescript-eslint/scope-manager": "4.24.0", + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/typescript-estree": "4.24.0", + "debug": "^4.1.1" + } + }, + "@typescript-eslint/scope-manager": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-4.24.0.tgz", + "integrity": "sha512-9+WYJGDnuC9VtYLqBhcSuM7du75fyCS/ypC8c5g7Sdw7pGL4NDTbeH38eJPfzIydCHZDoOgjloxSAA3+4l/zsA==", + "dev": true, + "requires": { + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/visitor-keys": "4.24.0" + } + }, + "@typescript-eslint/types": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-4.24.0.tgz", + "integrity": "sha512-tkZUBgDQKdvfs8L47LaqxojKDE+mIUmOzdz7r+u+U54l3GDkTpEbQ1Jp3cNqqAU9vMUCBA1fitsIhm7yN0vx9Q==", + "dev": true + }, + "@typescript-eslint/typescript-estree": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-4.24.0.tgz", + "integrity": "sha512-kBDitL/by/HK7g8CYLT7aKpAwlR8doshfWz8d71j97n5kUa5caHWvY0RvEUEanL/EqBJoANev8Xc/mQ6LLwXGA==", + "dev": true, + "requires": { + "@typescript-eslint/types": "4.24.0", + "@typescript-eslint/visitor-keys": "4.24.0", + "debug": "^4.1.1", + "globby": "^11.0.1", + "is-glob": "^4.0.1", + "semver": "^7.3.2", + "tsutils": "^3.17.1" + } + }, + "@typescript-eslint/visitor-keys": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-4.24.0.tgz", + "integrity": "sha512-4ox1sjmGHIxjEDBnMCtWFFhErXtKA1Ec0sBpuz0fqf3P+g3JFGyTxxbF06byw0FRsPnnbq44cKivH7Ks1/0s6g==", + "dev": true, + "requires": { + "@typescript-eslint/types": "4.24.0", + "eslint-visitor-keys": "^2.0.0" + } + }, + "@vitejs/plugin-react-refresh": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react-refresh/-/plugin-react-refresh-1.3.3.tgz", + "integrity": "sha512-J3KFwSQKrEK7fgOwTx0PMTlsolZORUch6BswjsM50q+Y7zSvX1ROIRn+tK2VE8SCvbYRHtzEKFlYW3vsWyTosQ==", + "dev": true, + "requires": { + "@babel/core": "^7.12.13", + "@babel/plugin-transform-react-jsx-self": "^7.12.13", + "@babel/plugin-transform-react-jsx-source": "^7.12.13", + "react-refresh": "^0.9.0" + } + }, + "acorn": { + "version": "7.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", + "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", + "dev": true + }, + "acorn-jsx": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.1.tgz", + "integrity": "sha512-K0Ptm/47OKfQRpNQ2J/oIN/3QYiK6FwW+eJbILhsdxh2WTLdl+30o8aGdTbm5JbffpFFAg/g+zi1E+jvJha5ng==", + "dev": true, + "requires": {} + }, + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "ansi-colors": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", + "integrity": "sha512-JoX0apGbHaUJBNl6yF+p6JAFYZ666/hhCGKN5t9QFjbJQKUU/g8MNbFDbvfrgKXvI1QpZplPOnwIo99lX/AAmA==", + "dev": true + }, + "ansi-regex": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", + "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "dev": true + }, + "ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "requires": { + "color-convert": "^1.9.0" + } + }, + "argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "requires": { + "sprintf-js": "~1.0.2" + } + }, + "aria-query": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-4.2.2.tgz", + "integrity": "sha512-o/HelwhuKpTj/frsOsbNLNgnNGVIFsVP/SW2BSF14gVl7kAfMOJ6/8wUAUvG1R1NHKrfG+2sHZTu0yauT1qBrA==", + "dev": true, + "requires": { + "@babel/runtime": "^7.10.2", + "@babel/runtime-corejs3": "^7.10.2" + } + }, + "array-includes": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.3.tgz", + "integrity": "sha512-gcem1KlBU7c9rB+Rq8/3PPKsK2kjqeEBa3bD5kkQo4nYlOHQCJqIJFqBXDEfwaRuYTT4E+FxA9xez7Gf/e3Q7A==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "get-intrinsic": "^1.1.1", + "is-string": "^1.0.5" + } + }, + "array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true + }, + "array.prototype.flat": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/array.prototype.flat/-/array.prototype.flat-1.2.4.tgz", + "integrity": "sha512-4470Xi3GAPAjZqFcljX2xzckv1qeKPizoNkiS0+O4IoPR2ZNpcjE0pkhdihlDouK+x6QOast26B4Q/O9DJnwSg==", + "dev": true, + "requires": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1" + } + }, + "array.prototype.flatmap": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/array.prototype.flatmap/-/array.prototype.flatmap-1.2.4.tgz", + "integrity": "sha512-r9Z0zYoxqHz60vvQbWEdXIEtCwHF0yxaWfno9qzXeNHvfyl3BZqygmGzb84dsubyaXLH4husF+NFgMSdpZhk2Q==", + "dev": true, + "requires": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1", + "function-bind": "^1.1.1" + } + }, + "ast-types-flow": { + "version": "0.0.7", + "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", + "integrity": "sha1-9wtzXGvKGlycItmCw+Oef+ujva0=", + "dev": true + }, + "astral-regex": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", + "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", + "dev": true + }, + "axe-core": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.2.1.tgz", + "integrity": "sha512-evY7DN8qSIbsW2H/TWQ1bX3sXN1d4MNb5Vb4n7BzPuCwRHdkZ1H2eNLuSh73EoQqkGKUtju2G2HCcjCfhvZIAA==", + "dev": true + }, + "axobject-query": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-2.2.0.tgz", + "integrity": "sha512-Td525n+iPOOyUQIeBfcASuG6uJsDOITl7Mds5gFyerkWiX7qhUTdYUBlSgNMyVqtSJqwpt1kXGLdUt6SykLMRA==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "dev": true, + "requires": { + "fill-range": "^7.0.1" + } + }, + "browserslist": { + "version": "4.16.6", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.16.6.tgz", + "integrity": "sha512-Wspk/PqO+4W9qp5iUTJsa1B/QrYn1keNCcEP5OvP7WBwT4KaDly0uONYmC6Xa3Z5IqnUgS0KcgLYu1l74x0ZXQ==", + "dev": true, + "requires": { + "caniuse-lite": "^1.0.30001219", + "colorette": "^1.2.2", + "electron-to-chromium": "^1.3.723", + "escalade": "^3.1.1", + "node-releases": "^1.1.71" + } + }, + "call-bind": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", + "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", + "dev": true, + "requires": { + "function-bind": "^1.1.1", + "get-intrinsic": "^1.0.2" + } + }, + "callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true + }, + "caniuse-lite": { + "version": "1.0.30001228", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001228.tgz", + "integrity": "sha512-QQmLOGJ3DEgokHbMSA8cj2a+geXqmnpyOFT0lhQV6P3/YOJvGDEwoedcwxEQ30gJIwIIunHIicunJ2rzK5gB2A==", + "dev": true + }, + "chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + } + }, + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "requires": { + "color-name": "1.1.3" + } + }, + "color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", + "dev": true + }, + "colorette": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", + "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", + "dev": true + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", + "dev": true + }, + "confusing-browser-globals": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/confusing-browser-globals/-/confusing-browser-globals-1.0.10.tgz", + "integrity": "sha512-gNld/3lySHwuhaVluJUKLePYirM3QNCKzVxqAdhJII9/WXKVX5PURzMVJspS1jTslSqjeuG4KMVTSouit5YPHA==", + "dev": true + }, + "contains-path": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/contains-path/-/contains-path-1.0.0.tgz", + "integrity": "sha1-NFizMhhWA+ju0Y9RjUoQiIo6vJE=", + "dev": true, + "requires": { + "normalize-path": "^2.1.1", + "path-starts-with": "^1.0.0" + } + }, + "convert-source-map": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-1.7.0.tgz", + "integrity": "sha512-4FJkXzKXEDB1snCFZlLP4gpC3JILicCpGbzG9f9G7tGqGCzETQ2hWPrcinA9oU4wtf2biUaEH5065UnMeR33oA==", + "dev": true, + "requires": { + "safe-buffer": "~5.1.1" + } + }, + "core-js-pure": { + "version": "3.12.1", + "resolved": "https://registry.npmjs.org/core-js-pure/-/core-js-pure-3.12.1.tgz", + "integrity": "sha512-1cch+qads4JnDSWsvc7d6nzlKAippwjUlf6vykkTLW53VSV+NkE6muGBToAjEA8pG90cSfcud3JgVmW2ds5TaQ==", + "dev": true + }, + "cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "requires": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + } + }, + "csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "damerau-levenshtein": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.7.tgz", + "integrity": "sha512-VvdQIPGdWP0SqFXghj79Wf/5LArmreyMsGLa6FG6iC4t3j7j5s71TrwWmT/4akbDQIqjfACkLZmjXhA7g2oUZw==", + "dev": true + }, + "debug": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz", + "integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==", + "dev": true, + "requires": { + "ms": "2.1.2" + } + }, + "deep-is": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.3.tgz", + "integrity": "sha1-s2nW+128E+7PUk+RsHD+7cNXzzQ=", + "dev": true + }, + "define-properties": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.1.3.tgz", + "integrity": "sha512-3MqfYKj2lLzdMSf8ZIZE/V+Zuy+BgD6f164e8K2w7dgnpKArBDerGYpM46IYYcjnkdPNMjPk9A6VFB8+3SKlXQ==", + "dev": true, + "requires": { + "object-keys": "^1.0.12" + } + }, + "dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", + "dev": true, + "requires": { + "path-type": "^4.0.0" + } + }, + "doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "requires": { + "esutils": "^2.0.2" + } + }, + "electron-to-chromium": { + "version": "1.3.732", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.732.tgz", + "integrity": "sha512-qKD5Pbq+QMk4nea4lMuncUMhpEiQwaJyCW7MrvissnRcBDENhVfDmAqQYRQ3X525oTzhar9Zh1cK0L2d1UKYcw==", + "dev": true + }, + "emoji-regex": { + "version": "9.2.2", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", + "dev": true + }, + "enquirer": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.3.6.tgz", + "integrity": "sha512-yjNnPr315/FjS4zIsUxYguYUPP2e1NK4d7E7ZOLiyYCcbFBiTMyID+2wvm2w6+pZ/odMA7cRkjhsPbltwBOrLg==", + "dev": true, + "requires": { + "ansi-colors": "^4.1.1" + } + }, + "error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "requires": { + "is-arrayish": "^0.2.1" + } + }, + "es-abstract": { + "version": "1.18.0", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.18.0.tgz", + "integrity": "sha512-LJzK7MrQa8TS0ja2w3YNLzUgJCGPdPOV1yVvezjNnS89D+VR08+Szt2mz3YB2Dck/+w5tfIq/RoUAFqJJGM2yw==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "es-to-primitive": "^1.2.1", + "function-bind": "^1.1.1", + "get-intrinsic": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.2", + "is-callable": "^1.2.3", + "is-negative-zero": "^2.0.1", + "is-regex": "^1.1.2", + "is-string": "^1.0.5", + "object-inspect": "^1.9.0", + "object-keys": "^1.1.1", + "object.assign": "^4.1.2", + "string.prototype.trimend": "^1.0.4", + "string.prototype.trimstart": "^1.0.4", + "unbox-primitive": "^1.0.0" + } + }, + "es-to-primitive": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/es-to-primitive/-/es-to-primitive-1.2.1.tgz", + "integrity": "sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA==", + "dev": true, + "requires": { + "is-callable": "^1.1.4", + "is-date-object": "^1.0.1", + "is-symbol": "^1.0.2" + } + }, + "esbuild": { + "version": "0.11.23", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.23.tgz", + "integrity": "sha512-iaiZZ9vUF5wJV8ob1tl+5aJTrwDczlvGP0JoMmnpC2B0ppiMCu8n8gmy5ZTGl5bcG081XBVn+U+jP+mPFm5T5Q==", + "dev": true + }, + "escalade": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "dev": true + }, + "escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", + "dev": true + }, + "eslint": { + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.26.0.tgz", + "integrity": "sha512-4R1ieRf52/izcZE7AlLy56uIHHDLT74Yzz2Iv2l6kDaYvEu9x+wMB5dZArVL8SYGXSYV2YAg70FcW5Y5nGGNIg==", + "dev": true, + "requires": { + "@babel/code-frame": "7.12.11", + "@eslint/eslintrc": "^0.4.1", + "ajv": "^6.10.0", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.0.1", + "doctrine": "^3.0.0", + "enquirer": "^2.3.5", + "eslint-scope": "^5.1.1", + "eslint-utils": "^2.1.0", + "eslint-visitor-keys": "^2.0.0", + "espree": "^7.3.1", + "esquery": "^1.4.0", + "esutils": "^2.0.2", + "file-entry-cache": "^6.0.1", + "functional-red-black-tree": "^1.0.1", + "glob-parent": "^5.0.0", + "globals": "^13.6.0", + "ignore": "^4.0.6", + "import-fresh": "^3.0.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "js-yaml": "^3.13.1", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash": "^4.17.21", + "minimatch": "^3.0.4", + "natural-compare": "^1.4.0", + "optionator": "^0.9.1", + "progress": "^2.0.0", + "regexpp": "^3.1.0", + "semver": "^7.2.1", + "strip-ansi": "^6.0.0", + "strip-json-comments": "^3.1.0", + "table": "^6.0.4", + "text-table": "^0.2.0", + "v8-compile-cache": "^2.0.3" + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.12.11", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", + "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==", + "dev": true, + "requires": { + "@babel/highlight": "^7.10.4" + } + }, + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "chalk": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", + "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", + "dev": true, + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "globals": { + "version": "13.8.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.8.0.tgz", + "integrity": "sha512-rHtdA6+PDBIjeEvA91rpqzEvk/k3/i7EeNQiryiWuJH0Hw9cpyJMAt2jtbAwUaRdhD+573X4vWw6IcjKPasi9Q==", + "dev": true, + "requires": { + "type-fest": "^0.20.2" + } + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + }, + "type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true + } + } + }, + "eslint-config-airbnb": { + "version": "18.2.1", + "resolved": "https://registry.npmjs.org/eslint-config-airbnb/-/eslint-config-airbnb-18.2.1.tgz", + "integrity": "sha512-glZNDEZ36VdlZWoxn/bUR1r/sdFKPd1mHPbqUtkctgNG4yT2DLLtJ3D+yCV+jzZCc2V1nBVkmdknOJBZ5Hc0fg==", + "dev": true, + "requires": { + "eslint-config-airbnb-base": "^14.2.1", + "object.assign": "^4.1.2", + "object.entries": "^1.1.2" + } + }, + "eslint-config-airbnb-base": { + "version": "14.2.1", + "resolved": "https://registry.npmjs.org/eslint-config-airbnb-base/-/eslint-config-airbnb-base-14.2.1.tgz", + "integrity": "sha512-GOrQyDtVEc1Xy20U7vsB2yAoB4nBlfH5HZJeatRXHleO+OS5Ot+MWij4Dpltw4/DyIkqUfqz1epfhVR5XWWQPA==", + "dev": true, + "requires": { + "confusing-browser-globals": "^1.0.10", + "object.assign": "^4.1.2", + "object.entries": "^1.1.2" + } + }, + "eslint-import-resolver-node": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.4.tgz", + "integrity": "sha512-ogtf+5AB/O+nM6DIeBUNr2fuT7ot9Qg/1harBfBtaP13ekEWFQEEMP94BCB7zaNW3gyY+8SHYF00rnqYwXKWOA==", + "dev": true, + "requires": { + "debug": "^2.6.9", + "resolve": "^1.13.1" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "requires": { + "ms": "2.0.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", + "dev": true + } + } + }, + "eslint-module-utils": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.6.1.tgz", + "integrity": "sha512-ZXI9B8cxAJIH4nfkhTwcRTEAnrVfobYqwjWy/QMCZ8rHkZHFjf9yO4BzpiF9kCSfNlMG54eKigISHpX0+AaT4A==", + "dev": true, + "requires": { + "debug": "^3.2.7", + "pkg-dir": "^2.0.0" + }, + "dependencies": { + "debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "dev": true, + "requires": { + "ms": "^2.1.1" + } + } + } + }, + "eslint-plugin-import": { + "version": "2.23.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.23.2.tgz", + "integrity": "sha512-LmNoRptHBxOP+nb0PIKz1y6OSzCJlB+0g0IGS3XV4KaKk2q4szqQ6s6F1utVf5ZRkxk/QOTjdxe7v4VjS99Bsg==", + "dev": true, + "requires": { + "array-includes": "^3.1.3", + "array.prototype.flat": "^1.2.4", + "contains-path": "^1.0.0", + "debug": "^2.6.9", + "doctrine": "^2.1.0", + "eslint-import-resolver-node": "^0.3.4", + "eslint-module-utils": "^2.6.1", + "find-up": "^2.0.0", + "has": "^1.0.3", + "is-core-module": "^2.4.0", + "minimatch": "^3.0.4", + "object.values": "^1.1.3", + "pkg-up": "^2.0.0", + "read-pkg-up": "^3.0.0", + "resolve": "^1.20.0", + "tsconfig-paths": "^3.9.0" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dev": true, + "requires": { + "ms": "2.0.0" + } + }, + "doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "requires": { + "esutils": "^2.0.2" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", + "dev": true + } + } + }, + "eslint-plugin-jsx-a11y": { + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-jsx-a11y/-/eslint-plugin-jsx-a11y-6.4.1.tgz", + "integrity": "sha512-0rGPJBbwHoGNPU73/QCLP/vveMlM1b1Z9PponxO87jfr6tuH5ligXbDT6nHSSzBC8ovX2Z+BQu7Bk5D/Xgq9zg==", + "dev": true, + "requires": { + "@babel/runtime": "^7.11.2", + "aria-query": "^4.2.2", + "array-includes": "^3.1.1", + "ast-types-flow": "^0.0.7", + "axe-core": "^4.0.2", + "axobject-query": "^2.2.0", + "damerau-levenshtein": "^1.0.6", + "emoji-regex": "^9.0.0", + "has": "^1.0.3", + "jsx-ast-utils": "^3.1.0", + "language-tags": "^1.0.5" + } + }, + "eslint-plugin-react": { + "version": "7.23.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.23.2.tgz", + "integrity": "sha512-AfjgFQB+nYszudkxRkTFu0UR1zEQig0ArVMPloKhxwlwkzaw/fBiH0QWcBBhZONlXqQC51+nfqFrkn4EzHcGBw==", + "dev": true, + "requires": { + "array-includes": "^3.1.3", + "array.prototype.flatmap": "^1.2.4", + "doctrine": "^2.1.0", + "has": "^1.0.3", + "jsx-ast-utils": "^2.4.1 || ^3.0.0", + "minimatch": "^3.0.4", + "object.entries": "^1.1.3", + "object.fromentries": "^2.0.4", + "object.values": "^1.1.3", + "prop-types": "^15.7.2", + "resolve": "^2.0.0-next.3", + "string.prototype.matchall": "^4.0.4" + }, + "dependencies": { + "doctrine": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz", + "integrity": "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==", + "dev": true, + "requires": { + "esutils": "^2.0.2" + } + }, + "resolve": { + "version": "2.0.0-next.3", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-2.0.0-next.3.tgz", + "integrity": "sha512-W8LucSynKUIDu9ylraa7ueVZ7hc0uAgJBxVsQSKOXOyle8a93qXhcz+XAXZ8bIq2d6i4Ehddn6Evt+0/UwKk6Q==", + "dev": true, + "requires": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + } + } + } + }, + "eslint-plugin-react-hooks": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.2.0.tgz", + "integrity": "sha512-623WEiZJqxR7VdxFCKLI6d6LLpwJkGPYKODnkH3D7WpOG5KM8yWueBd8TLsNAetEJNF5iJmolaAKO3F8yzyVBQ==", + "dev": true, + "requires": {} + }, + "eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "dev": true, + "requires": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + } + }, + "eslint-utils": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz", + "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==", + "dev": true, + "requires": { + "eslint-visitor-keys": "^1.1.0" + }, + "dependencies": { + "eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true + } + } + }, + "eslint-visitor-keys": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", + "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", + "dev": true + }, + "espree": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz", + "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==", + "dev": true, + "requires": { + "acorn": "^7.4.0", + "acorn-jsx": "^5.3.1", + "eslint-visitor-keys": "^1.3.0" + }, + "dependencies": { + "eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true + } + } + }, + "esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true + }, + "esquery": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.4.0.tgz", + "integrity": "sha512-cCDispWt5vHHtwMY2YrAQ4ibFkAL8RbH5YGBnZBc90MolvvfkkQcJro/aZiAQUlQ3qgrYS6D6v8Gc5G5CQsc9w==", + "dev": true, + "requires": { + "estraverse": "^5.1.0" + }, + "dependencies": { + "estraverse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", + "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", + "dev": true + } + } + }, + "esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "requires": { + "estraverse": "^5.2.0" + }, + "dependencies": { + "estraverse": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.2.0.tgz", + "integrity": "sha512-BxbNGGNm0RyRYvUdHpIwv9IWzeM9XClbOxwoATuFdOE7ZE6wHL+HQ5T8hoPM+zHvmKzzsEqhgy0GrQ5X13afiQ==", + "dev": true + } + } + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "fast-glob": { + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.5.tgz", + "integrity": "sha512-2DtFcgT68wiTTiwZ2hNdJfcHNke9XOfnwmBRWXhmeKM8rF0TGwmC/Qto3S7RoZKp5cilZbxzO5iTNTQsJ+EeDg==", + "dev": true, + "requires": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.0", + "merge2": "^1.3.0", + "micromatch": "^4.0.2", + "picomatch": "^2.2.1" + } + }, + "fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", + "dev": true + }, + "fastq": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", + "integrity": "sha512-7Eczs8gIPDrVzT+EksYBcupqMyxSHXXrHOLRRxU2/DicV8789MRBRR8+Hc2uWzUupOs4YS4JzBmBxjjCVBxD/g==", + "dev": true, + "requires": { + "reusify": "^1.0.4" + } + }, + "file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "requires": { + "flat-cache": "^3.0.4" + } + }, + "fill-range": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "dev": true, + "requires": { + "to-regex-range": "^5.0.1" + } + }, + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + }, + "flat-cache": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", + "integrity": "sha512-dm9s5Pw7Jc0GvMYbshN6zchCA9RgQlzzEZX3vylR9IqFfS8XciblUXOKfW6SiuJ0e13eDYZoZV5wdrev7P3Nwg==", + "dev": true, + "requires": { + "flatted": "^3.1.0", + "rimraf": "^3.0.2" + } + }, + "flatted": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.1.1.tgz", + "integrity": "sha512-zAoAQiudy+r5SvnSw3KJy5os/oRJYHzrzja/tBDqrZtNhUw8bt6y8OBzMWcjWr+8liV8Eb6yOhw8WZ7VFZ5ZzA==", + "dev": true + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=", + "dev": true + }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "optional": true + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "functional-red-black-tree": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz", + "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=", + "dev": true + }, + "gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true + }, + "get-intrinsic": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.1.1.tgz", + "integrity": "sha512-kWZrnVM42QCiEA2Ig1bG8zjoIMOgxWwYCEeNdwY6Tv/cOSeGpcoX4pXHfKUxNKVoArnrEr2e9srnAxxGIraS9Q==", + "dev": true, + "requires": { + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.1" + } + }, + "glob": { + "version": "7.1.7", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", + "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "requires": { + "is-glob": "^4.0.1" + } + }, + "globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true + }, + "globby": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.0.3.tgz", + "integrity": "sha512-ffdmosjA807y7+lA1NM0jELARVmYul/715xiILEjo3hBLPTcirgQNnXECn5g3mtR8TOLCVbkfua1Hpen25/Xcg==", + "dev": true, + "requires": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.1.1", + "ignore": "^5.1.4", + "merge2": "^1.3.0", + "slash": "^3.0.0" + }, + "dependencies": { + "ignore": { + "version": "5.1.8", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", + "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", + "dev": true + } + } + }, + "graceful-fs": { + "version": "4.2.6", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.6.tgz", + "integrity": "sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ==", + "dev": true + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-bigints": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.1.tgz", + "integrity": "sha512-LSBS2LjbNBTf6287JEbEzvJgftkF5qFkmCo9hDRpAzKhUOlJ+hx8dd4USs00SgsUNwc4617J9ki5YtEClM2ffA==", + "dev": true + }, + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=", + "dev": true + }, + "has-symbols": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.2.tgz", + "integrity": "sha512-chXa79rL/UC2KlX17jo3vRGz0azaWEx5tGqZg5pO3NUyEJVB17dMruQlzCCOfUvElghKcm5194+BCRvi2Rv/Gw==", + "dev": true + }, + "history": { + "version": "4.10.1", + "resolved": "https://registry.npmjs.org/history/-/history-4.10.1.tgz", + "integrity": "sha512-36nwAD620w12kuzPAsyINPWJqlNbij+hpK1k9XRloDtym8mxzGYl2c17LnV6IAGB2Dmg4tEa7G7DlawS0+qjew==", + "requires": { + "@babel/runtime": "^7.1.2", + "loose-envify": "^1.2.0", + "resolve-pathname": "^3.0.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0", + "value-equal": "^1.0.1" + } + }, + "hoist-non-react-statics": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", + "integrity": "sha512-/gGivxi8JPKWNm/W0jSmzcMPpfpPLc3dY/6GxhX2hQ9iGj3aDfklV4ET7NjKpSinLpJ5vafa9iiGIEZg10SfBw==", + "requires": { + "react-is": "^16.7.0" + } + }, + "hosted-git-info": { + "version": "2.8.9", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", + "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", + "dev": true + }, + "ignore": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz", + "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", + "dev": true + }, + "import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "requires": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + } + }, + "imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha1-khi5srkoojixPcT7a21XbyMUU+o=", + "dev": true + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true + }, + "internal-slot": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.3.tgz", + "integrity": "sha512-O0DB1JC/sPyZl7cIo78n5dR7eUSwwpYPiXRhTzNxZVAMUuB8vlnRFyLxdrVToks6XPLVnFfbzaVd5WLjhgg+vA==", + "dev": true, + "requires": { + "get-intrinsic": "^1.1.0", + "has": "^1.0.3", + "side-channel": "^1.0.4" + } + }, + "is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=", + "dev": true + }, + "is-bigint": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-bigint/-/is-bigint-1.0.2.tgz", + "integrity": "sha512-0JV5+SOCQkIdzjBK9buARcV804Ddu7A0Qet6sHi3FimE9ne6m4BGQZfRn+NZiXbBk4F4XmHfDZIipLj9pX8dSA==", + "dev": true + }, + "is-boolean-object": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/is-boolean-object/-/is-boolean-object-1.1.1.tgz", + "integrity": "sha512-bXdQWkECBUIAcCkeH1unwJLIpZYaa5VvuygSyS/c2lf719mTKZDU5UdDRlpd01UjADgmW8RfqaP+mRaVPdr/Ng==", + "dev": true, + "requires": { + "call-bind": "^1.0.2" + } + }, + "is-callable": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.3.tgz", + "integrity": "sha512-J1DcMe8UYTBSrKezuIUTUwjXsho29693unXM2YhJUTR2txK/eG47bvNa/wipPFmZFgr/N6f1GA66dv0mEyTIyQ==", + "dev": true + }, + "is-core-module": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", + "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", + "dev": true, + "requires": { + "has": "^1.0.3" + } + }, + "is-date-object": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.4.tgz", + "integrity": "sha512-/b4ZVsG7Z5XVtIxs/h9W8nvfLgSAyKYdtGWQLbqy6jA1icmgjf8WCoTKgeS4wy5tYaPePouzFMANbnj94c2Z+A==", + "dev": true + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + "dev": true + }, + "is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true + }, + "is-glob": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.1.tgz", + "integrity": "sha512-5G0tKtBTFImOqDnLB2hG6Bp2qcKEFduo4tZu9MT/H6NQv/ghhy30o55ufafxJ/LdH79LLs2Kfrn85TLKyA7BUg==", + "dev": true, + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-negative-zero": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.1.tgz", + "integrity": "sha512-2z6JzQvZRa9A2Y7xC6dQQm4FSTSTNWjKIYYTt4246eMTJmIo0Q+ZyOsU66X8lxK1AbB92dFeglPLrhwpeRKO6w==", + "dev": true + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true + }, + "is-number-object": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/is-number-object/-/is-number-object-1.0.5.tgz", + "integrity": "sha512-RU0lI/n95pMoUKu9v1BZP5MBcZuNSVJkMkAG2dJqC4z2GlkGUNeH68SuHuBKBD/XFe+LHZ+f9BKkLET60Niedw==", + "dev": true + }, + "is-regex": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.3.tgz", + "integrity": "sha512-qSVXFz28HM7y+IWX6vLCsexdlvzT1PJNFSBuaQLQ5o0IEw8UDYW6/2+eCMVyIsbM8CNLX2a/QWmSpyxYEHY7CQ==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "has-symbols": "^1.0.2" + } + }, + "is-string": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/is-string/-/is-string-1.0.6.tgz", + "integrity": "sha512-2gdzbKUuqtQ3lYNrUTQYoClPhm7oQu4UdpSZMp1/DGgkHBT8E2Z1l0yMdb6D4zNAxwDiMv8MdulKROJGNl0Q0w==", + "dev": true + }, + "is-symbol": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/is-symbol/-/is-symbol-1.0.4.tgz", + "integrity": "sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg==", + "dev": true, + "requires": { + "has-symbols": "^1.0.2" + } + }, + "isarray": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", + "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", + "dev": true + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "requires": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + } + }, + "jsesc": { + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz", + "integrity": "sha512-OYu7XEzjkCQ3C5Ps3QIZsQfNpqoJyZZA99wd9aWd05NCtC5pWOkShK2mkL6HXQR6/Cy2lbNdPlZBpuQHXE63gA==", + "dev": true + }, + "json-parse-better-errors": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz", + "integrity": "sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw==", + "dev": true + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", + "dev": true + }, + "json5": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz", + "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", + "dev": true, + "requires": { + "minimist": "^1.2.5" + } + }, + "jsx-ast-utils": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.2.0.tgz", + "integrity": "sha512-EIsmt3O3ljsU6sot/J4E1zDRxfBNrhjyf/OKjlydwgEimQuznlM4Wv7U+ueONJMyEn1WRE0K8dhi3dVAXYT24Q==", + "dev": true, + "requires": { + "array-includes": "^3.1.2", + "object.assign": "^4.1.2" + } + }, + "language-subtag-registry": { + "version": "0.3.21", + "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.21.tgz", + "integrity": "sha512-L0IqwlIXjilBVVYKFT37X9Ih11Um5NEl9cbJIuU/SwP/zEEAbBPOnEeeuxVMf45ydWQRDQN3Nqc96OgbH1K+Pg==", + "dev": true + }, + "language-tags": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/language-tags/-/language-tags-1.0.5.tgz", + "integrity": "sha1-0yHbxNowuovzAk4ED6XBRmH5GTo=", + "dev": true, + "requires": { + "language-subtag-registry": "~0.3.2" + } + }, + "levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + } + }, + "load-json-file": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", + "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "parse-json": "^4.0.0", + "pify": "^3.0.0", + "strip-bom": "^3.0.0" + } + }, + "locate-path": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz", + "integrity": "sha1-K1aLJl7slExtnA3pw9u7ygNUzY4=", + "dev": true, + "requires": { + "p-locate": "^2.0.0", + "path-exists": "^3.0.0" + } + }, + "lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "dev": true + }, + "lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=", + "dev": true + }, + "lodash.truncate": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", + "integrity": "sha1-WjUNoLERO4N+z//VgSy+WNbq4ZM=", + "dev": true + }, + "loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "requires": { + "js-tokens": "^3.0.0 || ^4.0.0" + } + }, + "lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dev": true, + "requires": { + "yallist": "^4.0.0" + } + }, + "merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true + }, + "micromatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.4.tgz", + "integrity": "sha512-pRmzw/XUcwXGpD9aI9q/0XOwLNygjETJ8y0ao0wdqprrzDa4YnxLcz7fQRZr8voh8V10kGhABbNcHVk5wHgWwg==", + "dev": true, + "requires": { + "braces": "^3.0.1", + "picomatch": "^2.2.3" + } + }, + "mini-create-react-context": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/mini-create-react-context/-/mini-create-react-context-0.4.1.tgz", + "integrity": "sha512-YWCYEmd5CQeHGSAKrYvXgmzzkrvssZcuuQDDeqkT+PziKGMgE+0MCCtcKbROzocGBG1meBLl2FotlRwf4gAzbQ==", + "requires": { + "@babel/runtime": "^7.12.1", + "tiny-warning": "^1.0.3" + } + }, + "minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "dev": true + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "nanoid": { + "version": "3.1.23", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", + "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", + "dev": true + }, + "natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=", + "dev": true + }, + "node-releases": { + "version": "1.1.72", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-1.1.72.tgz", + "integrity": "sha512-LLUo+PpH3dU6XizX3iVoubUNheF/owjXCZZ5yACDxNnPtgFuludV1ZL3ayK1kVep42Rmm0+R9/Y60NQbZ2bifw==", + "dev": true + }, + "normalize-package-data": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz", + "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", + "dev": true, + "requires": { + "hosted-git-info": "^2.1.4", + "resolve": "^1.10.0", + "semver": "2 || 3 || 4 || 5", + "validate-npm-package-license": "^3.0.1" + }, + "dependencies": { + "semver": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==", + "dev": true + } + } + }, + "normalize-path": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-2.1.1.tgz", + "integrity": "sha1-GrKLVW4Zg2Oowab35vogE3/mrtk=", + "dev": true, + "requires": { + "remove-trailing-separator": "^1.0.1" + } + }, + "object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" + }, + "object-inspect": { + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.10.3.tgz", + "integrity": "sha512-e5mCJlSH7poANfC8z8S9s9S2IN5/4Zb3aZ33f5s8YqoazCFzNLloLU8r5VCG+G7WoqLvAAZoVMcy3tp/3X0Plw==", + "dev": true + }, + "object-keys": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==", + "dev": true + }, + "object.assign": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/object.assign/-/object.assign-4.1.2.tgz", + "integrity": "sha512-ixT2L5THXsApyiUPYKmW+2EHpXXe5Ii3M+f4e+aJFAHao5amFRW6J0OO6c/LU8Be47utCx2GL89hxGB6XSmKuQ==", + "dev": true, + "requires": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "has-symbols": "^1.0.1", + "object-keys": "^1.1.1" + } + }, + "object.entries": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.3.tgz", + "integrity": "sha512-ym7h7OZebNS96hn5IJeyUmaWhaSM4SVtAPPfNLQEI2MYWCO2egsITb9nab2+i/Pwibx+R0mtn+ltKJXRSeTMGg==", + "dev": true, + "requires": { + "call-bind": "^1.0.0", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.1", + "has": "^1.0.3" + } + }, + "object.fromentries": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.4.tgz", + "integrity": "sha512-EsFBshs5RUUpQEY1D4q/m59kMfz4YJvxuNCJcv/jWwOJr34EaVnG11ZrZa0UHB3wnzV1wx8m58T4hQL8IuNXlQ==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has": "^1.0.3" + } + }, + "object.values": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.3.tgz", + "integrity": "sha512-nkF6PfDB9alkOUxpf1HNm/QlkeW3SReqL5WXeBLpEJJnlPSvRaDQpW3gQTksTN3fgJX4hL42RzKyOin6ff3tyw==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has": "^1.0.3" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "optionator": { + "version": "0.9.1", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.1.tgz", + "integrity": "sha512-74RlY5FCnhq4jRxVUPKDaRwrVNXMqsGsiW6AJw4XK8hmtm10wC0ypZBLw5IIp85NZMr91+qd1RvvENwg7jjRFw==", + "dev": true, + "requires": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.3" + } + }, + "p-limit": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-1.3.0.tgz", + "integrity": "sha512-vvcXsLAJ9Dr5rQOPk7toZQZJApBl2K4J6dANSsEuh6QI41JYcsS/qhTGa9ErIUUgK3WNQoJYvylxvjqmiqEA9Q==", + "dev": true, + "requires": { + "p-try": "^1.0.0" + } + }, + "p-locate": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-2.0.0.tgz", + "integrity": "sha1-IKAQOyIqcMj9OcwuWAaA893l7EM=", + "dev": true, + "requires": { + "p-limit": "^1.1.0" + } + }, + "p-try": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-1.0.0.tgz", + "integrity": "sha1-y8ec26+P1CKOE/Yh8rGiN8GyB7M=", + "dev": true + }, + "parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "requires": { + "callsites": "^3.0.0" + } + }, + "parse-json": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", + "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", + "dev": true, + "requires": { + "error-ex": "^1.3.1", + "json-parse-better-errors": "^1.0.1" + } + }, + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "dev": true + }, + "path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true + }, + "path-parse": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", + "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "dev": true + }, + "path-starts-with": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/path-starts-with/-/path-starts-with-1.0.0.tgz", + "integrity": "sha1-soJDAV6LE43lcmgqxS2kLmRq2E4=", + "dev": true, + "requires": { + "normalize-path": "^2.1.1" + } + }, + "path-to-regexp": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-1.8.0.tgz", + "integrity": "sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA==", + "requires": { + "isarray": "0.0.1" + } + }, + "path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true + }, + "picomatch": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", + "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", + "dev": true + }, + "pify": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz", + "integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=", + "dev": true + }, + "pkg-dir": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-2.0.0.tgz", + "integrity": "sha1-9tXREJ4Z1j7fQo4L1X4Sd3YVM0s=", + "dev": true, + "requires": { + "find-up": "^2.1.0" + } + }, + "pkg-up": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-2.0.0.tgz", + "integrity": "sha1-yBmscoBZpGHKscOImivjxJoATX8=", + "dev": true, + "requires": { + "find-up": "^2.1.0" + } + }, + "postcss": { + "version": "8.2.15", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", + "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", + "dev": true, + "requires": { + "colorette": "^1.2.2", + "nanoid": "^3.1.23", + "source-map": "^0.6.1" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true + }, + "progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "dev": true + }, + "prop-types": { + "version": "15.7.2", + "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.7.2.tgz", + "integrity": "sha512-8QQikdH7//R2vurIJSutZ1smHYTcLpRWEOlHnzcWHmBYrOGUysKwSsrC89BCiFj3CbrfJ/nXFdJepOVrY1GCHQ==", + "requires": { + "loose-envify": "^1.4.0", + "object-assign": "^4.1.1", + "react-is": "^16.8.1" + } + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", + "dev": true + }, + "queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true + }, + "react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, + "react-dom": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-17.0.2.tgz", + "integrity": "sha512-s4h96KtLDUQlsENhMn1ar8t2bEa+q/YAtj8pPPdIjPDGBDIVNsrD9aXNWqspUe6AzKCIG0C1HZZLqLV7qpOBGA==", + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1", + "scheduler": "^0.20.2" + } + }, + "react-is": { + "version": "16.13.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", + "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" + }, + "react-refresh": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.9.0.tgz", + "integrity": "sha512-Gvzk7OZpiqKSkxsQvO/mbTN1poglhmAV7gR/DdIrRrSMXraRQQlfikRJOr3Nb9GTMPC5kof948Zy6jJZIFtDvQ==", + "dev": true + }, + "react-router": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-5.2.0.tgz", + "integrity": "sha512-smz1DUuFHRKdcJC0jobGo8cVbhO3x50tCL4icacOlcwDOEQPq4TMqwx3sY1TP+DvtTgz4nm3thuo7A+BK2U0Dw==", + "requires": { + "@babel/runtime": "^7.1.2", + "history": "^4.9.0", + "hoist-non-react-statics": "^3.1.0", + "loose-envify": "^1.3.1", + "mini-create-react-context": "^0.4.0", + "path-to-regexp": "^1.7.0", + "prop-types": "^15.6.2", + "react-is": "^16.6.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0" + } + }, + "react-router-dom": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-5.2.0.tgz", + "integrity": "sha512-gxAmfylo2QUjcwxI63RhQ5G85Qqt4voZpUXSEqCwykV0baaOTQDR1f0PmY8AELqIyVc0NEZUj0Gov5lNGcXgsA==", + "requires": { + "@babel/runtime": "^7.1.2", + "history": "^4.9.0", + "loose-envify": "^1.3.1", + "prop-types": "^15.6.2", + "react-router": "5.2.0", + "tiny-invariant": "^1.0.2", + "tiny-warning": "^1.0.0" + } + }, + "read-pkg": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz", + "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=", + "dev": true, + "requires": { + "load-json-file": "^4.0.0", + "normalize-package-data": "^2.3.2", + "path-type": "^3.0.0" + }, + "dependencies": { + "path-type": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz", + "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==", + "dev": true, + "requires": { + "pify": "^3.0.0" + } + } + } + }, + "read-pkg-up": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/read-pkg-up/-/read-pkg-up-3.0.0.tgz", + "integrity": "sha1-PtSWaF26D4/hGNBpHcUfSh/5bwc=", + "dev": true, + "requires": { + "find-up": "^2.0.0", + "read-pkg": "^3.0.0" + } + }, + "regenerator-runtime": { + "version": "0.13.7", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", + "integrity": "sha512-a54FxoJDIr27pgf7IgeQGxmqUNYrcV338lf/6gH456HZ/PhX+5BcwHXG9ajESmwe6WRO0tAzRUrRmNONWgkrew==" + }, + "regexp.prototype.flags": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.3.1.tgz", + "integrity": "sha512-JiBdRBq91WlY7uRJ0ds7R+dU02i6LKi8r3BuQhNXn+kmeLN+EfHhfjqMRis1zJxnlu88hq/4dx0P2OP3APRTOA==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + } + }, + "regexpp": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz", + "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==", + "dev": true + }, + "remove-trailing-separator": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/remove-trailing-separator/-/remove-trailing-separator-1.1.0.tgz", + "integrity": "sha1-wkvOKig62tW8P1jg1IJJuSN52O8=", + "dev": true + }, + "require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true + }, + "resolve": { + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", + "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", + "dev": true, + "requires": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + } + }, + "resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true + }, + "resolve-pathname": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-pathname/-/resolve-pathname-3.0.0.tgz", + "integrity": "sha512-C7rARubxI8bXFNB/hqcp/4iUeIXJhJZvFPFPiSPRnhU5UPxzMFIl+2E6yY6c4k9giDJAhtV+enfA+G89N6Csng==" + }, + "reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true + }, + "rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "dev": true, + "requires": { + "glob": "^7.1.3" + } + }, + "rollup": { + "version": "2.48.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.48.0.tgz", + "integrity": "sha512-wl9ZSSSsi5579oscSDYSzGn092tCS076YB+TQrzsGuSfYyJeep8eEWj0eaRjuC5McuMNmcnR8icBqiE/FWNB1A==", + "dev": true, + "requires": { + "fsevents": "~2.3.1" + } + }, + "run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "requires": { + "queue-microtask": "^1.2.2" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true + }, + "scheduler": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.20.2.tgz", + "integrity": "sha512-2eWfGgAqqWFGqtdMmcL5zCMK1U8KlXv8SQFGglL3CEtd0aDVDWgeF/YoCmvln55m5zSk3J/20hTaSBeSObsQDQ==", + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, + "semver": { + "version": "7.3.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz", + "integrity": "sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ==", + "dev": true, + "requires": { + "lru-cache": "^6.0.0" + } + }, + "shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "requires": { + "shebang-regex": "^3.0.0" + } + }, + "shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true + }, + "side-channel": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", + "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "dev": true, + "requires": { + "call-bind": "^1.0.0", + "get-intrinsic": "^1.0.2", + "object-inspect": "^1.9.0" + } + }, + "slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true + }, + "slice-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", + "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", + "dev": true, + "requires": { + "ansi-styles": "^4.0.0", + "astral-regex": "^2.0.0", + "is-fullwidth-code-point": "^3.0.0" + }, + "dependencies": { + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + } + } + }, + "source-map": { + "version": "0.5.7", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", + "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", + "dev": true + }, + "spdx-correct": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz", + "integrity": "sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==", + "dev": true, + "requires": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "spdx-exceptions": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz", + "integrity": "sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==", + "dev": true + }, + "spdx-expression-parse": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", + "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "dev": true, + "requires": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "spdx-license-ids": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.8.tgz", + "integrity": "sha512-NDgA96EnaLSvtbM7trJj+t1LUR3pirkDCcz9nOUlPb5DMBGsH7oES6C3hs3j7R9oHEa1EMvReS/BUAIT5Tcr0g==", + "dev": true + }, + "sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", + "dev": true + }, + "string-width": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", + "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.0" + }, + "dependencies": { + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + } + } + }, + "string.prototype.matchall": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.4.tgz", + "integrity": "sha512-pknFIWVachNcyqRfaQSeu/FUfpvJTe4uskUSZ9Wc1RijsPuzbZ8TyYT8WCNnntCjUEqQ3vUHMAfVj2+wLAisPQ==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3", + "es-abstract": "^1.18.0-next.2", + "has-symbols": "^1.0.1", + "internal-slot": "^1.0.3", + "regexp.prototype.flags": "^1.3.1", + "side-channel": "^1.0.4" + } + }, + "string.prototype.trimend": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.4.tgz", + "integrity": "sha512-y9xCjw1P23Awk8EvTpcyL2NIr1j7wJ39f+k6lvRnSMz+mz9CGz9NYPelDk42kOz6+ql8xjfK8oYzy3jAP5QU5A==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + } + }, + "string.prototype.trimstart": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.4.tgz", + "integrity": "sha512-jh6e984OBfvxS50tdY2nRZnoC5/mLFKOREQfw8t5yytkoUsJRNxvI/E39qu1sD0OtWI3OC0XgKSmcWwziwYuZw==", + "dev": true, + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.3" + } + }, + "strip-ansi": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", + "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.0" + } + }, + "strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", + "dev": true + }, + "strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true + }, + "supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + }, + "table": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/table/-/table-6.7.1.tgz", + "integrity": "sha512-ZGum47Yi6KOOFDE8m223td53ath2enHcYLgOCjGr5ngu8bdIARQk6mN/wRMv4yMRcHnCSnHbCEha4sobQx5yWg==", + "dev": true, + "requires": { + "ajv": "^8.0.1", + "lodash.clonedeep": "^4.5.0", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0" + }, + "dependencies": { + "ajv": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.4.0.tgz", + "integrity": "sha512-7QD2l6+KBSLwf+7MuYocbWvRPdOu63/trReTLu2KFwkgctnub1auoF+Y1WYcm09CTM7quuscrzqmASaLHC/K4Q==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + } + }, + "json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "dev": true + } + } + }, + "text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha1-f17oI66AUgfACvLfSoTsP8+lcLQ=", + "dev": true + }, + "tiny-invariant": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.1.0.tgz", + "integrity": "sha512-ytxQvrb1cPc9WBEI/HSeYYoGD0kWnGEOR8RY6KomWLBVhqz0RgTwVO9dLrGz7dC+nN9llyI7OKAgRq8Vq4ZBSw==" + }, + "tiny-warning": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", + "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==" + }, + "to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha1-3F5pjL0HkmW8c+A3doGk5Og/YW4=", + "dev": true + }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "requires": { + "is-number": "^7.0.0" + } + }, + "tsconfig-paths": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.9.0.tgz", + "integrity": "sha512-dRcuzokWhajtZWkQsDVKbWyY+jgcLC5sqJhg2PSgf4ZkH2aHPvaOY8YWGhmjb68b5qqTfasSsDO9k7RUiEmZAw==", + "dev": true, + "requires": { + "@types/json5": "^0.0.29", + "json5": "^1.0.1", + "minimist": "^1.2.0", + "strip-bom": "^3.0.0" + }, + "dependencies": { + "json5": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "dev": true, + "requires": { + "minimist": "^1.2.0" + } + } + } + }, + "tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "tsutils": { + "version": "3.21.0", + "resolved": "https://registry.npmjs.org/tsutils/-/tsutils-3.21.0.tgz", + "integrity": "sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==", + "dev": true, + "requires": { + "tslib": "^1.8.1" + } + }, + "type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1" + } + }, + "type-fest": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.8.1.tgz", + "integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==", + "dev": true + }, + "typescript": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", + "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", + "dev": true + }, + "unbox-primitive": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.1.tgz", + "integrity": "sha512-tZU/3NqK3dA5gpE1KtyiJUrEB0lxnGkMFHptJ7q6ewdZ8s12QrODwNbhIJStmJkd1QDXa1NRA8aF2A1zk/Ypyw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1", + "has-bigints": "^1.0.1", + "has-symbols": "^1.0.2", + "which-boxed-primitive": "^1.0.2" + } + }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "v8-compile-cache": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz", + "integrity": "sha512-l8lCEmLcLYZh4nbunNZvQCJc5pv7+RCwa8q/LdUx8u7lsWvPDKmpodJAJNwkAhJC//dFY48KuIEmjtd4RViDrA==", + "dev": true + }, + "validate-npm-package-license": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", + "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "dev": true, + "requires": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "value-equal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/value-equal/-/value-equal-1.0.1.tgz", + "integrity": "sha512-NOJ6JZCAWr0zlxZt+xqCHNTEKOsrks2HQd4MqhP1qy4z1SkbEP467eNx6TgDKXMvUOb+OENfJCZwM+16n7fRfw==" + }, + "vite": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.3.tgz", + "integrity": "sha512-eO1iwRbn3/BfkNVMNJDeANAFCZ5NobYOFPu7IqfY7DcI7I9nFGjJIZid0EViTmLDGwwSUPmRAq3cRBbO3+DsMA==", + "dev": true, + "requires": { + "esbuild": "^0.11.23", + "fsevents": "~2.3.1", + "postcss": "^8.2.10", + "resolve": "^1.19.0", + "rollup": "^2.38.5" + } + }, + "which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "requires": { + "isexe": "^2.0.0" + } + }, + "which-boxed-primitive": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz", + "integrity": "sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg==", + "dev": true, + "requires": { + "is-bigint": "^1.0.1", + "is-boolean-object": "^1.1.0", + "is-number-object": "^1.0.4", + "is-string": "^1.0.5", + "is-symbol": "^1.0.3" + } + }, + "word-wrap": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", + "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==", + "dev": true + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", + "dev": true + }, + "yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + } + } +} diff --git a/batch2/react-batch/package.json b/batch2/react-batch/package.json new file mode 100644 index 00000000000..e2d5e5c9778 --- /dev/null +++ b/batch2/react-batch/package.json @@ -0,0 +1,30 @@ +{ + "name": "react-batch", + "version": "0.0.0", + "scripts": { + "dev": "vite", + "build": "tsc && vite build", + "serve": "vite preview" + }, + "dependencies": { + "react": "^17.0.2", + "react-dom": "^17.0.0", + "react-router-dom": "^5.2.0" + }, + "devDependencies": { + "@types/react": "^17.0.5", + "@types/react-dom": "^17.0.0", + "@types/react-router-dom": "^5.1.7", + "@typescript-eslint/eslint-plugin": "^4.23.0", + "@typescript-eslint/parser": "^4.23.0", + "@vitejs/plugin-react-refresh": "^1.3.1", + "eslint": "^7.26.0", + "eslint-config-airbnb": "^18.2.1", + "eslint-plugin-import": "^2.23.0", + "eslint-plugin-jsx-a11y": "^6.4.1", + "eslint-plugin-react": "^7.23.2", + "eslint-plugin-react-hooks": "^4.2.0", + "typescript": "^4.1.2", + "vite": "^2.2.3" + } +} diff --git a/batch2/react-batch/src/App.tsx b/batch2/react-batch/src/App.tsx new file mode 100644 index 00000000000..cb1d1f4a03c --- /dev/null +++ b/batch2/react-batch/src/App.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { Switch, Route } from 'react-router-dom'; +import BatchPage from './pages/BatchPage'; +import BatchesPage from './pages/BatchesPage'; + +import '@hail/common/hail.css'; + +export default function App() { + return ( +
+
+ + + + + +
+
+ ); +} + +function BadRoute() { + return

Uh oh! That page does not exist...

; +} diff --git a/batch2/react-batch/src/components/BatchTable.tsx b/batch2/react-batch/src/components/BatchTable.tsx new file mode 100644 index 00000000000..5d2225ce185 --- /dev/null +++ b/batch2/react-batch/src/components/BatchTable.tsx @@ -0,0 +1,20 @@ +import React from 'react'; +import { Link } from 'react-router-dom'; +import type { Batch } from '@hail/common/types'; + +function BatchTableRow({ batch }: { batch: Batch }) { + return ( + <> + {batch.id} + {batch.state} + + ); +} + +export default function BatchTable({ batches }: { batches: Batch[] }) { + return ( +
    + {batches.map((b) =>
  • )} +
+ ); +} diff --git a/batch2/react-batch/src/components/JobTable.tsx b/batch2/react-batch/src/components/JobTable.tsx new file mode 100644 index 00000000000..f4185b833df --- /dev/null +++ b/batch2/react-batch/src/components/JobTable.tsx @@ -0,0 +1,21 @@ +import React from 'react'; +import type { Job } from '@hail/common/types'; + +function JobTableRow({ job }: { job: Job }) { + return
{JSON.stringify(job, null, 2)}
; +} + +type JobTableProps = { + batchId: number, + jobs: Job[], +} +export default function JobTable({ batchId, jobs }: JobTableProps) { + return ( + <> +

Batch #{batchId}

+
    + {jobs.map((j) =>
  1. )} +
+ + ); +} diff --git a/batch2/react-batch/src/main.tsx b/batch2/react-batch/src/main.tsx new file mode 100644 index 00000000000..dabadc36c77 --- /dev/null +++ b/batch2/react-batch/src/main.tsx @@ -0,0 +1,13 @@ +import React from 'react'; +import ReactDOM from 'react-dom'; +import { BrowserRouter } from 'react-router-dom'; +import App from './App'; + +import '@hail/common/hail.css'; + +ReactDOM.render( + + + , + document.getElementById('root'), +); diff --git a/batch2/react-batch/src/pages/BatchPage.tsx b/batch2/react-batch/src/pages/BatchPage.tsx new file mode 100644 index 00000000000..2cf8611894f --- /dev/null +++ b/batch2/react-batch/src/pages/BatchPage.tsx @@ -0,0 +1,14 @@ +import React from 'react'; +import { useParams } from 'react-router-dom'; +import { useJobs } from '@hail/common/react/batch-client'; +import JobTable from '../components/JobTable'; + +import '@hail/common/hail.css'; + +type BatchPageParams = { id?: string }; +export default function BatchPage() { + const id = parseInt(useParams().id!, 10); + const jobs = useJobs(id); + + return jobs ? :
Loading...
; +} diff --git a/batch2/react-batch/src/pages/BatchesPage.tsx b/batch2/react-batch/src/pages/BatchesPage.tsx new file mode 100644 index 00000000000..e1116a3f896 --- /dev/null +++ b/batch2/react-batch/src/pages/BatchesPage.tsx @@ -0,0 +1,14 @@ +import React from 'react'; +import { useBatches } from '@hail/common/react/batch-client'; +import BatchTable from '../components/BatchTable'; + +export default function BatchesPage() { + const batches = useBatches(); + + return ( + <> +

Batches

+ {batches ? :
Loading...
} + + ); +} diff --git a/batch2/react-batch/tsconfig.json b/batch2/react-batch/tsconfig.json new file mode 100644 index 00000000000..51efd413cde --- /dev/null +++ b/batch2/react-batch/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ESNext", + "lib": ["DOM", "DOM.Iterable", "ESNext"], + "types": ["vite/client"], + "allowJs": false, + "skipLibCheck": false, + "esModuleInterop": false, + "allowSyntheticDefaultImports": true, + "strict": true, + "forceConsistentCasingInFileNames": true, + "module": "ESNext", + "moduleResolution": "Node", + "resolveJsonModule": true, + "isolatedModules": true, + "noEmit": true, + "jsx": "react" + }, + "include": ["./src"] +} diff --git a/batch2/react-batch/vite.config.ts b/batch2/react-batch/vite.config.ts new file mode 100644 index 00000000000..b874b79498b --- /dev/null +++ b/batch2/react-batch/vite.config.ts @@ -0,0 +1,14 @@ +/* eslint-disable */ +import { defineConfig } from 'vite'; +import reactRefresh from '@vitejs/plugin-react-refresh'; + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [reactRefresh()], + server: { + proxy: { + '/api': 'http://localhost:5050' + }, + cors: true, + } +}) diff --git a/batch2/svelte-batch/.gitignore b/batch2/svelte-batch/.gitignore new file mode 100644 index 00000000000..126fe84d533 --- /dev/null +++ b/batch2/svelte-batch/.gitignore @@ -0,0 +1,4 @@ +/node_modules/ +/dist/ +/.vscode/ +.DS_Store diff --git a/batch2/svelte-batch/index.html b/batch2/svelte-batch/index.html new file mode 100644 index 00000000000..b794a404672 --- /dev/null +++ b/batch2/svelte-batch/index.html @@ -0,0 +1,13 @@ + + + + + + + Batch2 Svelte + + +
+ + + diff --git a/batch2/svelte-batch/package-lock.json b/batch2/svelte-batch/package-lock.json new file mode 100644 index 00000000000..041eee60caa --- /dev/null +++ b/batch2/svelte-batch/package-lock.json @@ -0,0 +1,940 @@ +{ + "name": "svelte-batch", + "version": "0.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "version": "0.0.0", + "devDependencies": { + "@sveltejs/vite-plugin-svelte": "^1.0.0-next.7", + "svelte": "^3.37.0", + "svelte-navigator": "^3.1.5", + "svelte-preprocess": "^4.7.2", + "typescript": "^4.2.4", + "vite": "^2.2.3" + } + }, + "node_modules/@rollup/pluginutils": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-4.1.0.tgz", + "integrity": "sha512-TrBhfJkFxA+ER+ew2U2/fHbebhLT/l/2pRk0hfj9KusXUuRXd2v0R58AfaZK9VXDQ4TogOSEmICVrQAA3zFnHQ==", + "dev": true, + "dependencies": { + "estree-walker": "^2.0.1", + "picomatch": "^2.2.2" + }, + "engines": { + "node": ">= 8.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0" + } + }, + "node_modules/@sveltejs/vite-plugin-svelte": { + "version": "1.0.0-next.10", + "resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-1.0.0-next.10.tgz", + "integrity": "sha512-ImvxbhPePm2hWNTKBSA3LHAYGwiEjHjvvgfPLXm4R87sfZ+BMXql9jBmDpzUC/URBLT4BB3Jxos/i523qkJBHg==", + "dev": true, + "dependencies": { + "@rollup/pluginutils": "^4.1.0", + "chalk": "^4.1.1", + "debug": "^4.3.2", + "hash-sum": "^2.0.0", + "require-relative": "^0.8.7", + "slash": "^4.0.0", + "source-map": "^0.7.3", + "svelte-hmr": "^0.14.2" + }, + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "svelte": "^3.37.0", + "vite": "^2.2.3" + } + }, + "node_modules/@types/node": { + "version": "15.0.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-15.0.3.tgz", + "integrity": "sha512-/WbxFeBU+0F79z9RdEOXH4CsDga+ibi5M8uEYr91u3CkT/pdWcV8MCook+4wDPnZBexRdwWS+PiVZ2xJviAzcQ==", + "dev": true + }, + "node_modules/@types/pug": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.4.tgz", + "integrity": "sha1-h3L80EGOPNLMFxVV1zAHQVBR9LI=", + "dev": true + }, + "node_modules/@types/sass": { + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/@types/sass/-/sass-1.16.0.tgz", + "integrity": "sha512-2XZovu4NwcqmtZtsBR5XYLw18T8cBCnU2USFHTnYLLHz9fkhnoEMoDsqShJIOFsFhn5aJHjweiUUdTrDGujegA==", + "dev": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/chalk": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", + "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "node_modules/colorette": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", + "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", + "dev": true + }, + "node_modules/debug": { + "version": "4.3.2", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", + "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", + "dev": true, + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/dedent-js": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dedent-js/-/dedent-js-1.0.1.tgz", + "integrity": "sha1-vuX7fJ5yfYXf+iRZDRDsGrElUwU=", + "dev": true + }, + "node_modules/detect-indent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz", + "integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/esbuild": { + "version": "0.11.20", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.20.tgz", + "integrity": "sha512-QOZrVpN/Yz74xfat0H6euSgn3RnwLevY1mJTEXneukz1ln9qB+ieaerRMzSeETpz/UJWsBMzRVR/andBht5WKw==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "dev": true + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "node_modules/has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "dependencies": { + "function-bind": "^1.1.1" + }, + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/hash-sum": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/hash-sum/-/hash-sum-2.0.0.tgz", + "integrity": "sha512-WdZTbAByD+pHfl/g9QSsBIIwy8IT+EsPiKDs0KNX+zSHhdDLFKdZu0BQHljvO+0QI/BasbMSUa8wYNCZTvhslg==", + "dev": true + }, + "node_modules/is-core-module": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", + "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", + "dev": true, + "dependencies": { + "has": "^1.0.3" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/lower-case": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz", + "integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==", + "dev": true, + "dependencies": { + "tslib": "^2.0.3" + } + }, + "node_modules/min-indent": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", + "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", + "dev": true, + "engines": { + "node": ">=4" + } + }, + "node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "node_modules/nanoid": { + "version": "3.1.23", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", + "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", + "dev": true, + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/no-case": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", + "integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==", + "dev": true, + "dependencies": { + "lower-case": "^2.0.2", + "tslib": "^2.0.3" + } + }, + "node_modules/pascal-case": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz", + "integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==", + "dev": true, + "dependencies": { + "no-case": "^3.0.4", + "tslib": "^2.0.3" + } + }, + "node_modules/path-parse": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", + "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "dev": true + }, + "node_modules/picomatch": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", + "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", + "dev": true, + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.2.15", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", + "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", + "dev": true, + "dependencies": { + "colorette": "^1.2.2", + "nanoid": "^3.1.23", + "source-map": "^0.6.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + } + }, + "node_modules/postcss/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/require-relative": { + "version": "0.8.7", + "resolved": "https://registry.npmjs.org/require-relative/-/require-relative-0.8.7.tgz", + "integrity": "sha1-eZlTn8ngR6N5KPoZb44VY9q9Nt4=", + "dev": true + }, + "node_modules/resolve": { + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", + "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", + "dev": true, + "dependencies": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/rollup": { + "version": "2.47.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.47.0.tgz", + "integrity": "sha512-rqBjgq9hQfW0vRmz+0S062ORRNJXvwRpzxhFXORvar/maZqY6za3rgQ/p1Glg+j1hnc1GtYyQCPiAei95uTElg==", + "dev": true, + "dependencies": { + "fsevents": "~2.3.1" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=10.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.1" + } + }, + "node_modules/slash": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", + "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", + "dev": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/source-map": { + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", + "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/strip-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", + "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", + "dev": true, + "dependencies": { + "min-indent": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/svelte-hmr": { + "version": "0.14.3", + "resolved": "https://registry.npmjs.org/svelte-hmr/-/svelte-hmr-0.14.3.tgz", + "integrity": "sha512-N56xX405zLMw2tpGHKRx5h+kmdeZwxI21pvyC6OyBHJDCF6DlwWBm9TifdQmSD4dloWSmpDPzHWYa3CSjfopUg==", + "dev": true, + "peerDependencies": { + "svelte": ">=3.19.0" + } + }, + "node_modules/svelte-navigator": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/svelte-navigator/-/svelte-navigator-3.1.5.tgz", + "integrity": "sha512-CGTaexasSLpUaTSN2AlYqii0JeisIgg7uZbm8XCLKlpM9Qv3IltlJ7Nvh90Xw9ND97KqtGOjNJ3LNwMN1ABV0w==", + "dev": true, + "dependencies": { + "svelte2tsx": "^0.1.151" + }, + "peerDependencies": { + "svelte": "3.x" + } + }, + "node_modules/svelte-preprocess": { + "version": "4.7.3", + "resolved": "https://registry.npmjs.org/svelte-preprocess/-/svelte-preprocess-4.7.3.tgz", + "integrity": "sha512-Zx1/xLeGOIBlZMGPRCaXtlMe4ZA0faato5Dc3CosEqwu75MIEPuOstdkH6cy+RYTUYynoxzNaDxkPX4DbrPwRA==", + "dev": true, + "hasInstallScript": true, + "dependencies": { + "@types/pug": "^2.0.4", + "@types/sass": "^1.16.0", + "detect-indent": "^6.0.0", + "strip-indent": "^3.0.0" + }, + "engines": { + "node": ">= 9.11.2" + }, + "peerDependencies": { + "@babel/core": "^7.10.2", + "coffeescript": "^2.5.1", + "less": "^3.11.3", + "postcss": "^7 || ^8", + "postcss-load-config": "^2.1.0 || ^3.0.0", + "pug": "^3.0.0", + "sass": "^1.26.8", + "stylus": "^0.54.7", + "sugarss": "^2.0.0", + "svelte": "^3.23.0", + "typescript": "^3.9.5 || ^4.0.0" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + }, + "coffeescript": { + "optional": true + }, + "less": { + "optional": true + }, + "node-sass": { + "optional": true + }, + "postcss": { + "optional": true + }, + "postcss-load-config": { + "optional": true + }, + "pug": { + "optional": true + }, + "sass": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "typescript": { + "optional": true + } + } + }, + "node_modules/svelte2tsx": { + "version": "0.1.191", + "resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.1.191.tgz", + "integrity": "sha512-pSciOMX9LAmvirtCilFRqrtHJxml4pdyMKq7Tjpac4EaWsO89Cfsjl04wBZ7YZKzW8UotbPOjCaJEoC/XDc/QQ==", + "dev": true, + "dependencies": { + "dedent-js": "^1.0.1", + "pascal-case": "^3.1.1" + }, + "peerDependencies": { + "svelte": "^3.24", + "typescript": "^4.1.2" + } + }, + "node_modules/tslib": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz", + "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==", + "dev": true + }, + "node_modules/typescript": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", + "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", + "dev": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=4.2.0" + } + }, + "node_modules/vite": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.2.tgz", + "integrity": "sha512-QhLdOompDrfkyryCNTts9HE+eJhvhN9ibKNJ5Q8DpQai+6nOsuIlaveZNg67e1O/2QaWqXeBo82eHnAs1De2bQ==", + "dev": true, + "dependencies": { + "esbuild": "^0.11.20", + "fsevents": "~2.3.1", + "postcss": "^8.2.10", + "resolve": "^1.19.0", + "rollup": "^2.38.5" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": ">=12.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.1" + } + } + }, + "dependencies": { + "@rollup/pluginutils": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-4.1.0.tgz", + "integrity": "sha512-TrBhfJkFxA+ER+ew2U2/fHbebhLT/l/2pRk0hfj9KusXUuRXd2v0R58AfaZK9VXDQ4TogOSEmICVrQAA3zFnHQ==", + "dev": true, + "requires": { + "estree-walker": "^2.0.1", + "picomatch": "^2.2.2" + } + }, + "@sveltejs/vite-plugin-svelte": { + "version": "1.0.0-next.10", + "resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-1.0.0-next.10.tgz", + "integrity": "sha512-ImvxbhPePm2hWNTKBSA3LHAYGwiEjHjvvgfPLXm4R87sfZ+BMXql9jBmDpzUC/URBLT4BB3Jxos/i523qkJBHg==", + "dev": true, + "requires": { + "@rollup/pluginutils": "^4.1.0", + "chalk": "^4.1.1", + "debug": "^4.3.2", + "hash-sum": "^2.0.0", + "require-relative": "^0.8.7", + "slash": "^4.0.0", + "source-map": "^0.7.3", + "svelte-hmr": "^0.14.2" + } + }, + "@types/node": { + "version": "15.0.3", + "resolved": "https://registry.npmjs.org/@types/node/-/node-15.0.3.tgz", + "integrity": "sha512-/WbxFeBU+0F79z9RdEOXH4CsDga+ibi5M8uEYr91u3CkT/pdWcV8MCook+4wDPnZBexRdwWS+PiVZ2xJviAzcQ==", + "dev": true + }, + "@types/pug": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/pug/-/pug-2.0.4.tgz", + "integrity": "sha1-h3L80EGOPNLMFxVV1zAHQVBR9LI=", + "dev": true + }, + "@types/sass": { + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/@types/sass/-/sass-1.16.0.tgz", + "integrity": "sha512-2XZovu4NwcqmtZtsBR5XYLw18T8cBCnU2USFHTnYLLHz9fkhnoEMoDsqShJIOFsFhn5aJHjweiUUdTrDGujegA==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "chalk": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.1.tgz", + "integrity": "sha512-diHzdDKxcU+bAsUboHLPEDQiw0qEe0qd7SYUn3HgcFlWgbDcfLGswOHYeGrHKzG9z6UYf01d9VFMfZxPM1xZSg==", + "dev": true, + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "colorette": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-1.2.2.tgz", + "integrity": "sha512-MKGMzyfeuutC/ZJ1cba9NqcNpfeqMUcYmyF1ZFY6/Cn7CNSAKx6a+s48sqLqyAiZuaP2TcqMhoo+dlwFnVxT9w==", + "dev": true + }, + "debug": { + "version": "4.3.2", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", + "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", + "dev": true, + "requires": { + "ms": "2.1.2" + } + }, + "dedent-js": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dedent-js/-/dedent-js-1.0.1.tgz", + "integrity": "sha1-vuX7fJ5yfYXf+iRZDRDsGrElUwU=", + "dev": true + }, + "detect-indent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/detect-indent/-/detect-indent-6.0.0.tgz", + "integrity": "sha512-oSyFlqaTHCItVRGK5RmrmjB+CmaMOW7IaNA/kdxqhoa6d17j/5ce9O9eWXmV/KEdRwqpQA+Vqe8a8Bsybu4YnA==", + "dev": true + }, + "esbuild": { + "version": "0.11.20", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.11.20.tgz", + "integrity": "sha512-QOZrVpN/Yz74xfat0H6euSgn3RnwLevY1mJTEXneukz1ln9qB+ieaerRMzSeETpz/UJWsBMzRVR/andBht5WKw==", + "dev": true + }, + "estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "dev": true + }, + "fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "optional": true + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==", + "dev": true + }, + "has": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "dev": true, + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "hash-sum": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/hash-sum/-/hash-sum-2.0.0.tgz", + "integrity": "sha512-WdZTbAByD+pHfl/g9QSsBIIwy8IT+EsPiKDs0KNX+zSHhdDLFKdZu0BQHljvO+0QI/BasbMSUa8wYNCZTvhslg==", + "dev": true + }, + "is-core-module": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.4.0.tgz", + "integrity": "sha512-6A2fkfq1rfeQZjxrZJGerpLCTHRNEBiSgnu0+obeJpEPZRUooHgsizvzv0ZjJwOz3iWIHdJtVWJ/tmPr3D21/A==", + "dev": true, + "requires": { + "has": "^1.0.3" + } + }, + "lower-case": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz", + "integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==", + "dev": true, + "requires": { + "tslib": "^2.0.3" + } + }, + "min-indent": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", + "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", + "dev": true + }, + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "dev": true + }, + "nanoid": { + "version": "3.1.23", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.1.23.tgz", + "integrity": "sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==", + "dev": true + }, + "no-case": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz", + "integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==", + "dev": true, + "requires": { + "lower-case": "^2.0.2", + "tslib": "^2.0.3" + } + }, + "pascal-case": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz", + "integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==", + "dev": true, + "requires": { + "no-case": "^3.0.4", + "tslib": "^2.0.3" + } + }, + "path-parse": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.6.tgz", + "integrity": "sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==", + "dev": true + }, + "picomatch": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.2.3.tgz", + "integrity": "sha512-KpELjfwcCDUb9PeigTs2mBJzXUPzAuP2oPcA989He8Rte0+YUAjw1JVedDhuTKPkHjSYzMN3npC9luThGYEKdg==", + "dev": true + }, + "postcss": { + "version": "8.2.15", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.2.15.tgz", + "integrity": "sha512-2zO3b26eJD/8rb106Qu2o7Qgg52ND5HPjcyQiK2B98O388h43A448LCslC0dI2P97wCAQRJsFvwTRcXxTKds+Q==", + "dev": true, + "requires": { + "colorette": "^1.2.2", + "nanoid": "^3.1.23", + "source-map": "^0.6.1" + }, + "dependencies": { + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + } + } + }, + "require-relative": { + "version": "0.8.7", + "resolved": "https://registry.npmjs.org/require-relative/-/require-relative-0.8.7.tgz", + "integrity": "sha1-eZlTn8ngR6N5KPoZb44VY9q9Nt4=", + "dev": true + }, + "resolve": { + "version": "1.20.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", + "integrity": "sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A==", + "dev": true, + "requires": { + "is-core-module": "^2.2.0", + "path-parse": "^1.0.6" + } + }, + "rollup": { + "version": "2.47.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.47.0.tgz", + "integrity": "sha512-rqBjgq9hQfW0vRmz+0S062ORRNJXvwRpzxhFXORvar/maZqY6za3rgQ/p1Glg+j1hnc1GtYyQCPiAei95uTElg==", + "dev": true, + "requires": { + "fsevents": "~2.3.1" + } + }, + "slash": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz", + "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==", + "dev": true + }, + "source-map": { + "version": "0.7.3", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", + "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", + "dev": true + }, + "strip-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", + "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", + "dev": true, + "requires": { + "min-indent": "^1.0.0" + } + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + }, + "svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true + }, + "svelte-hmr": { + "version": "0.14.3", + "resolved": "https://registry.npmjs.org/svelte-hmr/-/svelte-hmr-0.14.3.tgz", + "integrity": "sha512-N56xX405zLMw2tpGHKRx5h+kmdeZwxI21pvyC6OyBHJDCF6DlwWBm9TifdQmSD4dloWSmpDPzHWYa3CSjfopUg==", + "dev": true, + "requires": {} + }, + "svelte-navigator": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/svelte-navigator/-/svelte-navigator-3.1.5.tgz", + "integrity": "sha512-CGTaexasSLpUaTSN2AlYqii0JeisIgg7uZbm8XCLKlpM9Qv3IltlJ7Nvh90Xw9ND97KqtGOjNJ3LNwMN1ABV0w==", + "dev": true, + "requires": { + "svelte2tsx": "^0.1.151" + } + }, + "svelte-preprocess": { + "version": "4.7.3", + "resolved": "https://registry.npmjs.org/svelte-preprocess/-/svelte-preprocess-4.7.3.tgz", + "integrity": "sha512-Zx1/xLeGOIBlZMGPRCaXtlMe4ZA0faato5Dc3CosEqwu75MIEPuOstdkH6cy+RYTUYynoxzNaDxkPX4DbrPwRA==", + "dev": true, + "requires": { + "@types/pug": "^2.0.4", + "@types/sass": "^1.16.0", + "detect-indent": "^6.0.0", + "strip-indent": "^3.0.0" + } + }, + "svelte2tsx": { + "version": "0.1.191", + "resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.1.191.tgz", + "integrity": "sha512-pSciOMX9LAmvirtCilFRqrtHJxml4pdyMKq7Tjpac4EaWsO89Cfsjl04wBZ7YZKzW8UotbPOjCaJEoC/XDc/QQ==", + "dev": true, + "requires": { + "dedent-js": "^1.0.1", + "pascal-case": "^3.1.1" + } + }, + "tslib": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz", + "integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==", + "dev": true + }, + "typescript": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.2.4.tgz", + "integrity": "sha512-V+evlYHZnQkaz8TRBuxTA92yZBPotr5H+WhQ7bD3hZUndx5tGOa1fuCgeSjxAzM1RiN5IzvadIXTVefuuwZCRg==", + "dev": true + }, + "vite": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-2.3.2.tgz", + "integrity": "sha512-QhLdOompDrfkyryCNTts9HE+eJhvhN9ibKNJ5Q8DpQai+6nOsuIlaveZNg67e1O/2QaWqXeBo82eHnAs1De2bQ==", + "dev": true, + "requires": { + "esbuild": "^0.11.20", + "fsevents": "~2.3.1", + "postcss": "^8.2.10", + "resolve": "^1.19.0", + "rollup": "^2.38.5" + } + } + } +} diff --git a/batch2/svelte-batch/package.json b/batch2/svelte-batch/package.json new file mode 100644 index 00000000000..e256d65e9f4 --- /dev/null +++ b/batch2/svelte-batch/package.json @@ -0,0 +1,17 @@ +{ + "name": "svelte-batch", + "version": "0.0.0", + "scripts": { + "dev": "vite", + "build": "vite build", + "serve": "vite preview" + }, + "devDependencies": { + "@sveltejs/vite-plugin-svelte": "^1.0.0-next.7", + "svelte": "^3.37.0", + "svelte-navigator": "^3.1.5", + "svelte-preprocess": "^4.7.2", + "typescript": "^4.2.4", + "vite": "^2.2.3" + } +} diff --git a/batch2/svelte-batch/public/favicon.ico b/batch2/svelte-batch/public/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..d75d248ef0b15096a95054643a4d97f5d9b60846 GIT binary patch literal 1150 zcmaKqSxA*Z7>4KMS_;~x;8i!JU{-`tpyx!I(n2NFMU)w2L{WiMS3#Lcp@vrpA*5Yp zSy6|`AkYfDD(e{`n8yX0pLf20X1@3RmKkGw`Vte3=0)aUq%ldx zm^49K+Hw0b#^`KboP)QXJOwbuVUFxlAs{RfqJ+twGylWfOp{Hc$s#253LlN1nsFVc zKa>40?h5(7PTC6ltDx)(Y&Ze2xggCq(kK? zTA`;gAfKD!+uFjpxc_A3+Ma(L28W=z4Gvs@r*ECk`;c45=S#;=oA|abt`f&j5&uJO z3Dn+&^gZ%h4JidsaTR{{!_Y8PUx(-%PosPy2gi@qIvBMMYz;e3L1{f~mrd9RdB>pZ zD}4R|sk_C`;=cT&r)c=8u>7h9)u32*SbL`xiq3(pq5C^5-sSOw;<|fv@nfXfl&U`2 z81K5ExDp;bf#DISW%IY%k&2-noShOoz-;kb(u?5RFX-ro?87j3GZdCXrFc8bTx}jd zz_n@djWnxc*TbbCjEq80FPyG}1zQwvjq7R6ZSWuQ@_#A*LN5n<3$BI?X}q%iD!B-s zdSFcNp!EgpJr6CAK?klug4>=)Tv z+F#{yt>6EK)3NU=L&y_W3UNaC?Tg=6YE0)^V;(0Mb0$WJ7>7@Lg0~+3x9d)!Pd + import { Router, Route } from 'svelte-navigator' + import '@hail/common/hail.css' + import BatchesPage from './pages/BatchesPage.svelte' + import BatchPage from './pages/BatchPage.svelte' + + +
+ +
+ + + + + + + +

Uh oh! That page does not exist...

+
+
+
+
diff --git a/batch2/svelte-batch/src/assets/svelte.png b/batch2/svelte-batch/src/assets/svelte.png new file mode 100644 index 0000000000000000000000000000000000000000..e673c91c7bcb0e3a1be737745af56990613a641e GIT binary patch literal 5185 zcmb7Ic{r5c+ka+Z#*(pQFEeFJ2}O~Z8EF@d5F#cN<(nC6vWyuCh3ZR|LZ*_XWJ_TN zGm@>cg|g2KDNlX&N=se&V8QiJoo*%Kj+*cI2_v~tsxBn zz@`(&51#=5sJICQkT7x8Ql!%%u6zlnoR5ga1q=NDo|E#Tvlx+&i6{s!iYADXh@uR# zsDLVZaZglB7qwK1df1}TUeOF!w7eiTzrmZlAzn^C?2LmGAdfM@6NqH$J$fa(m%HH1 zEfIf;QtXMtHqMbFKSx~OKEuf3c~rB^bdVIWHs`$YVN>_&XMCrPgxJLYDO?fn5VAhz zS{B*|nZ)foWa$5LZCB%jF2cAcUORK-k8ut2urUfK=zcD`G@zLOQwDqfy#AkE*PAJx z4GhXXimv`pa!)O#G7HtL5)-th2wK70>Ye}Gbc4OQY3E&j(YUf>x;${qk(kCxEbmWI zRa1Ok9w9+fDE)D8K*t0V9-I9LPEuhSu@$-e+FCf5be=t#I@-)=37iq+*2{ba2H2FWiIdr6?Kc=LDOLd-zI-=UBUAUEa*oP{^!lu7LH2;!g18V=DQ5^+iQ!k z_q?5*CAo2QiN^^sS&F$uuNyr&q(NB8NGlnH{spB704y!@*#_GxnoJ8qq88l_0H z+N{Dd%ic8-6zjYP(|CfMwWz_vgae*Bibc6^4}Og8iZd$Yf4Repz2wpP>3;iml^>LE z`w;i4F4)5cz@2j~(2rZE^7n+Zt|0ArFaOnDB?vsW`og-;ZCkJ^5x)HRA?fCWgW)zB zg1~Q;P$%t_;4=ablxaey+KXQ#Xw*;6TBXLuGrh`S!3$3}W!F+Ez<6C=C$36`#$<4o z2Aq=F0bzwdNlU@mYD4k}PCy`=ROKjuMP9x;^KGmGwMRYm8*QDRWTM^$Gyh8QP44y# zw7$mydNNyM=`F6N=&QmP3(t%#k5_LV-qq&p!=wBhv8E=5kjvE3$O+~yx7&~UyC8_ zdv9csIBh?UT&>PkUg{VHHzZYoe}Xg?@|i;L__UJe=IPTwWY0%%dk#LMf0}Ac5k#XfN13Ts3vSg+4s*G0A2*i-!;o3ErBBhw2|*>K@EQww znf^f!xTE_@s7_PkuJ)~8rI}A;&6ld&a}7i3?1U)Pp-(-9EcnGvwz|YS&0_(h0e;dA zbBSOC`|;P9$%`iGmcT>9E6uKAPw4|J&SX)_6gE+>4gyy-1TB~UZUyw+;Zu=gr(wiZ z3HoBGc;BZ{)UPu5>~4^37zY%30f`CxB&WtPibuS|Y;D{aNIqr05-Z7eA%3ip5Su`- zSb#;)f^dqDc*mX?iLbEYa6E2NXN!=vFjGqjlm0fb%^zS;P-09~OdLn5d+7u9B8sZt zDL|(kE>dqXUPu>ov_Zx%jiZV+&c1+Ihn#>UE$`-B&VaOxE62#Es?vlP)aJgZDTVj= zYWcOyQ@GP-k72ie-G*$-V4@$%xbXoC=>+XyTwdF5t6j@^whHV|O!P*{YaUiQ5{b8; zr>x}Uo|yQW(=2Dw$3$c2=-K9-L`0=H1X&@y9nn@R*QmES;KDVBhKA1kI0RX&@Q&U( zZEv*fLeDCmj&40dS7Jl!^`ReE>(J!YL1Z|NP~R#`4!ZbzK&cLf6f*H`{#?q+dWJ)Z zE;le*hCP6kdU-5@x~nDj9$bd1to2-K2-4KyL^Xm5TB`CJJ|M13oBU>apA(C+IN+xc z{dvi-b$)i1jKBt;$rAG9&0t))j(N&03`^cbiCIttM9R5|C-^kg6(HsYK|Ho@j{1s$ zZhJ*9hkd?v%zE*6SFHZW=R#Uch#l2#bgAofCx}fDgHC-23)O2VYAEIdr&Iz4L6eh9 zvvdbLoEqmVgbVAi^EtCGjvb&p!z#3t`l%xw9*8i%i6)oV+COulKRG@iqiD17y!;yP zd!+y9?X@j{zP;Sg%Zxbl9Cy&Jl7X z1#?Mo4FtI~z0*VQWA%&DgYK2Z||2J*(0x8`gi> zxV0QcKX>)4YA2SUC3fkQyFdLjogxe(wgSJUofsu5w57^ z3+#?&yX#h36xC^deink;;{E+nyg};Nmpb9Ix4HJ?(rwoZ)#Odo$G|gtq~7YPqRh4( zh1ZA?z7enrUBo~5d>1fHwEuL8Y`nQ(^KeV-eyUKR7$WdAqkGklSBG49RabVZ@|_$U z5(RUUylOpjFk=d%4o#g01a`M7_MU_p8+dQZ^FB(UhhLaWUAB#1G$h2hB~+O%As$lX z;5DnxFjV|J1k)ejZQoz><{B+wxYAp$#rsZK%cH90XTbV+rNK`HD^$aDIy~$`kL=1V z`DjIA%#f)v6T$5{CSbt*co0r72lYjlUKk|PVo%7XI_b4T#PSd=@}MpzD6m6YMqxmg zog14%H-elu+8&v4tu$t6kCV{}wmPe-@$`>V=~P>Td7p3i__?d2W?didI7KO0`AtDS zNkYFh{fi?q_87+Zuy(-sy>bf*vYQb2Zu$O-%G;w6LaQy~^@6 zi%!2m+^_dUu`8tYw+hDBoVCb>vvT?YvVi1wJd0XA;TNQDu?xVxPSOf7n?0s5$TrhD4#!Ej8RWHotCK$T>pJr<6W}ft zs2=&E!~c=f`Z4B`3$P}ftU2Efp@%slfc-J;xRRfVU{RNDpRBms=jB%j5mx;R-|v;vEX+_-hII!_*f};KVAN?G&KRX2GAP z@M-P#1(Lu}Vf%(uI#n;@WUr&j6T9yeKm(vc3$0bvQVrP+0>Gj(#Mx=P07kC*HFfwN zL@_McO}h|6=EYg>1Wid!yHn^8@{Wrac4o6d;9D$$eI)Dq^iw7pk3j;75`Y_=EP$1W zV@}mQsr#6i*6kMpfC>Qgw};`VlrIpn0(C`5t*y2QT|UXZ83+LaJPXTFRLcbf&;$?? z*o01LS#cm2mpPaQt^Q6K4)<7a_aXez;t12qY*}+D5Y(;1-=Wkwzuh}`7!Jd@I*TP< z{kaqVyWCNRCgT21z|n_T{krVdCM4`SutmqRNR#5u{Qmfb-+6{vSI7Eyw!BMVJ_^_V z=e)8FLDBy9)HQtG^Qy*B9zxH2=uOs+Fi7E~92GST6s^KC-+fiaTdfwdNsskFo15Aw z>Y0)goNAwX{kFLGl+yEV)Wm3qF_(yxO)113`bU1q^?tmduw|-0m;uYduI4Y_u*6%Q zD_HN#Ir9SFY2xda>Rz&Y!FC)~sCq?a{nIB@6U;;a8yAD{C0-UVtlm}gpx(Jv#iCS5 za~|tC=IwX7Ce%$se?DYzGp13*Dcw74EzW6C4fnsgQ1_ftW(glh zYR`vEVWs!4#3U~BlYDPlNkkH3?^}zBVx;XO=;oPdo>SK>Wmc7%E)<{7oEXQ)P_97y zW^Mys9}K7)M**F*?y+#TLcw6>1W3pOwun;-HlY$c!d|P?OP0jdwL{H#Ju41xj#=wQ zK1%#&e@95andgyN3Xp->QqM`sS$Hr$>(OL$g~x$7q;xwy^sp4bD$|?g$X<~}&jbCG z!mwp&N@N864PGXd{FIENON#LY4&g3Hb68}-^3p7<7|&i7!qYv82c zWzcl^2op_+0jl*Z)ll9|^7uIEu}Vo`l`?kH>gC>=20o%p1Sho>_*hqbcTI!%!uka) zm37F1BxUAQlmHfdlujuuchBZ$u^?W4Db}C;@aS>HzF2dqzyMOy*Sh z(5Wv}OKL;O7>XObV}F;DhLVKI!>&4SlHa~ZNj{@va7%gk!tN9yH)f`)Y>BNNee-wqA@-P7 zmo+fE1fDFDy5jJ;Xx%Vphi<8q*sE+o6j#svA+b8COA9Tb>VG}kVH{;4npU-WV@SN> z7h5iYHXpu;bW`YCjvKbdZ+RuWyp}W%apAIAI#7XabEo}8k*lC(H12@_m>L8(PF&v^ zaNz#Z{+A36u5PQePx%t|DWl-{b)%94C(3iFnQCKqB@UdvUJ&t}uRrZ-(~}LzHt>s? zI4^1WJ-_da&#$`sHM;;m#u)`M=-XB+@(Dr3e1V1XFj+N$#+uG$EhjA+$Y(InEUE1| zzr;{K2u|<}LNm zeA;QzyA%d`Y%7x3CQmytPLj~7MjBV}+Y1oeosBMhsAZtpM^q-K2SK$1RuY)*r>Ac) zyx&D(@M4P!OS?bxb&=*qsLrp#$aL5l~B@cgqSn$l)9a+Ej#0$9I`r}~GR>lgGJLL0AYHaiMz z57?PKj3e0X-KfnMGScNGpI}CopnjI306}!4=8YMK!NNC_o5B*XvJ~Q7gN|s#j?BxH z&pqp-7!uE}Lf;N#&_OrAd-W3Ju4q6>@mIUVW8H-gbD950f3-t{IF#cVf1gTT#;Fi% zL3ztx?fKh2{6f@fl5oybzmlxNPrT}|$H{0{B)$ED+1bc(~OSM{-l{1dmLsMzh(PL+# z^-QYsfRKLw0CxvyusMaFRAGzu=X-Ta&i1yewRWmEXKzr^arb{88cLjS{NPaL18a*Igysgcdvt!TEjakV5xkVE<*{Q0J4)t!~JyB2ikK)7;hr{KEi1Gggj~dWS literal 0 HcmV?d00001 diff --git a/batch2/svelte-batch/src/components/BatchTable.svelte b/batch2/svelte-batch/src/components/BatchTable.svelte new file mode 100644 index 00000000000..eb60c9391e1 --- /dev/null +++ b/batch2/svelte-batch/src/components/BatchTable.svelte @@ -0,0 +1,15 @@ + + +

Batches

+
    + {#each batches as batch} +
  • + {batch.id} + {batch.state} +
  • + {/each} +
diff --git a/batch2/svelte-batch/src/components/JobTable.svelte b/batch2/svelte-batch/src/components/JobTable.svelte new file mode 100644 index 00000000000..32ec0e8f6ba --- /dev/null +++ b/batch2/svelte-batch/src/components/JobTable.svelte @@ -0,0 +1,14 @@ + + +

Batch #{batchId}

+
    + {#each jobs as job} +
  1. {JSON.stringify(job, null, 2)}
  2. + {/each} +
diff --git a/batch2/svelte-batch/src/global.d.ts b/batch2/svelte-batch/src/global.d.ts new file mode 100644 index 00000000000..4078e7476a2 --- /dev/null +++ b/batch2/svelte-batch/src/global.d.ts @@ -0,0 +1,2 @@ +/// +/// diff --git a/batch2/svelte-batch/src/main.ts b/batch2/svelte-batch/src/main.ts new file mode 100644 index 00000000000..d8200ac4fe3 --- /dev/null +++ b/batch2/svelte-batch/src/main.ts @@ -0,0 +1,7 @@ +import App from './App.svelte' + +const app = new App({ + target: document.getElementById('app') +}) + +export default app diff --git a/batch2/svelte-batch/src/pages/BatchPage.svelte b/batch2/svelte-batch/src/pages/BatchPage.svelte new file mode 100644 index 00000000000..4af01ffeb68 --- /dev/null +++ b/batch2/svelte-batch/src/pages/BatchPage.svelte @@ -0,0 +1,25 @@ + + +{#if jobs} + +{:else} +
Loading...
+{/if} diff --git a/batch2/svelte-batch/src/pages/BatchesPage.svelte b/batch2/svelte-batch/src/pages/BatchesPage.svelte new file mode 100644 index 00000000000..99fd07e791f --- /dev/null +++ b/batch2/svelte-batch/src/pages/BatchesPage.svelte @@ -0,0 +1,24 @@ + + +{#if batches} + +{:else} +
Loading...
+{/if} diff --git a/batch2/svelte-batch/svelte.config.cjs b/batch2/svelte-batch/svelte.config.cjs new file mode 100644 index 00000000000..0b32783a0c9 --- /dev/null +++ b/batch2/svelte-batch/svelte.config.cjs @@ -0,0 +1,7 @@ +const sveltePreprocess = require('svelte-preprocess') + +module.exports = { + // Consult https://github.com/sveltejs/svelte-preprocess + // for more information about preprocessors + preprocess: sveltePreprocess() +} diff --git a/batch2/svelte-batch/tsconfig.json b/batch2/svelte-batch/tsconfig.json new file mode 100644 index 00000000000..15d55f64d58 --- /dev/null +++ b/batch2/svelte-batch/tsconfig.json @@ -0,0 +1,37 @@ +{ + "compilerOptions": { + "moduleResolution": "node", + "target": "esnext", + "module": "esnext", + /** + * svelte-preprocess cannot figure out whether you have + * a value or a type, so tell TypeScript to enforce using + * `import type` instead of `import` for Types. + */ + "importsNotUsedAsValues": "error", + "isolatedModules": true, + "resolveJsonModule": true, + /** + * To have warnings / errors of the Svelte compiler at the + * correct position, enable source maps by default. + */ + "sourceMap": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "baseUrl": ".", + /** + * Typecheck JS in `.svelte` and `.js` files by default. + * Disable checkJs if you'd like to use dynamic types in JS. + * Note that setting allowJs false does not prevent the use + * of JS in `.svelte` files. + */ + "allowJs": true, + "checkJs": true + }, + /** + * Use global.d.ts instead of compilerOptions.types + * to avoid limiting type declarations. + */ + "include": ["src/**/*.d.ts", "src/**/*.ts", "src/**/*.js", "src/**/*.svelte"] +} diff --git a/batch2/svelte-batch/vite.config.js b/batch2/svelte-batch/vite.config.js new file mode 100644 index 00000000000..bc4d2e689af --- /dev/null +++ b/batch2/svelte-batch/vite.config.js @@ -0,0 +1,13 @@ +import { defineConfig } from 'vite' +import svelte from '@sveltejs/vite-plugin-svelte' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [svelte()], + server: { + proxy: { + '/api': 'http://localhost:5050' + }, + cors: true, + } +}) diff --git a/benchmark-service/Makefile b/benchmark-service/Makefile index 6098045eff5..ca075066cea 100644 --- a/benchmark-service/Makefile +++ b/benchmark-service/Makefile @@ -1,7 +1,8 @@ include ../config.mk -BENCHMARK_LATEST = $(DOCKER_PREFIX)/benchmark:latest -BENCHMARK_IMAGE = $(DOCKER_PREFIX)/benchmark:$(shell docker images -q --no-trunc benchmark:latest | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +BENCHMARK_IMAGE := $(DOCKER_PREFIX)/benchmark:$(TOKEN) EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -14,20 +15,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(BENCHMARK_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -f Dockerfile.out -t benchmark --cache-from benchmark,$(BENCHMARK_LATEST),service-base .. - -.PHONY: push -push: build - docker tag benchmark $(BENCHMARK_LATEST) - docker push $(BENCHMARK_LATEST) - docker tag benchmark $(BENCHMARK_IMAGE) - docker push $(BENCHMARK_IMAGE) + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. benchmark/Dockerfile.out $(BENCHMARK_IMAGE) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "benchmark_image":{"image":"$(BENCHMARK_IMAGE)"},"benchmark_database":{"user_secret_name":"sql-benchmark-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/benchmark/python/benchmark_hail/compare/compare.py b/benchmark/python/benchmark_hail/compare/compare.py index 2484f7c2d8c..b099400d93a 100644 --- a/benchmark/python/benchmark_hail/compare/compare.py +++ b/benchmark/python/benchmark_hail/compare/compare.py @@ -113,10 +113,12 @@ def format(name, ratio, t1, t2, memory_ratio, mem1, mem2): print(format('Benchmark Name', 'Ratio', 'Time 1', 'Time 2', 'Mem Ratio', 'Mem 1 (MB)', 'Mem 2 (MB)')) print(format('--------------', '-----', '------', '------', '---------', '----------', '----------')) for name, r1, r2, m1, m2 in comparison: - comps.append(r2 / r1) print(format(name, fmt_diff(r2 / r1), fmt_time(r1, 8), fmt_time(r2, 8), fmt_mem_ratio(m2, m1), fmt_mem(m1), fmt_mem(m2))) + if name.startswith('sentinel'): + continue + comps.append(r2 / r1) print('----------------------') print(f'Harmonic mean: {fmt_diff(hmean(comps))}') diff --git a/benchmark/python/benchmark_hail/run/__init__.py b/benchmark/python/benchmark_hail/run/__init__.py index b7f5f6cea92..5e6846830ec 100644 --- a/benchmark/python/benchmark_hail/run/__init__.py +++ b/benchmark/python/benchmark_hail/run/__init__.py @@ -5,6 +5,7 @@ from . import linalg_benchmarks from . import shuffle_benchmarks from . import combiner_benchmarks +from . import sentinel_benchmarks __all__ = [ 'run_all', @@ -16,4 +17,5 @@ 'linalg_benchmarks', 'methods_benchmarks', 'shuffle_benchmarks', - 'combiner_benchmarks'] + 'combiner_benchmarks', + 'sentinel_benchmarks'] diff --git a/benchmark/python/benchmark_hail/run/methods_benchmarks.py b/benchmark/python/benchmark_hail/run/methods_benchmarks.py index 10a81b0414a..8b33a95546e 100644 --- a/benchmark/python/benchmark_hail/run/methods_benchmarks.py +++ b/benchmark/python/benchmark_hail/run/methods_benchmarks.py @@ -212,4 +212,36 @@ def linear_regression_rows_nd(mt_path): res = hl._linear_regression_rows_nd(y=[mt[key] for key in pheno_dict.keys()], x=mt.x, covariates=[mt[key] for key in cov_dict.keys()]) - res._force_count() \ No newline at end of file + res._force_count() + +@benchmark(args=random_doubles.handle('mt')) +def logistic_regression_rows_wald(mt_path): + mt = hl.read_matrix_table(mt_path) + mt = mt.head(2000) + num_phenos = 5 + num_covs = 2 + pheno_dict = {f"pheno_{i}": hl.rand_bool(.5, seed=i) for i in range(num_phenos)} + cov_dict = {f"cov_{i}": hl.rand_unif(0, 1, seed=i) for i in range(num_covs)} + mt = mt.annotate_cols(**pheno_dict) + mt = mt.annotate_cols(**cov_dict) + res = hl.logistic_regression_rows(test='wald', + y=[mt[key] for key in pheno_dict.keys()], + x=mt.x, + covariates=[mt[key] for key in cov_dict.keys()]) + res._force_count() + +@benchmark(args=random_doubles.handle('mt')) +def logistic_regression_rows_wald_nd(mt_path): + mt = hl.read_matrix_table(mt_path) + mt = mt.head(2000) + num_phenos = 5 + num_covs = 2 + pheno_dict = {f"pheno_{i}": hl.rand_bool(.5, seed=i) for i in range(num_phenos)} + cov_dict = {f"cov_{i}": hl.rand_unif(0, 1, seed=i) for i in range(num_covs)} + mt = mt.annotate_cols(**pheno_dict) + mt = mt.annotate_cols(**cov_dict) + res = hl._logistic_regression_rows_nd(test='wald', + y=[mt[key] for key in pheno_dict.keys()], + x=mt.x, + covariates=[mt[key] for key in cov_dict.keys()]) + res._force_count() diff --git a/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py b/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py new file mode 100644 index 00000000000..296ec4e6104 --- /dev/null +++ b/benchmark/python/benchmark_hail/run/sentinel_benchmarks.py @@ -0,0 +1,50 @@ +import hail as hl + +from .resources import * +from .utils import benchmark +import gzip + + +def read_gunzip(path): + with gzip.open(path) as f: + for line in f: + pass + + +@benchmark(args=many_ints_table.handle('tsv')) +def sentinel_read_gunzip_1(path): + read_gunzip(path) + + +@benchmark(args=many_ints_table.handle('tsv')) +def sentinel_read_gunzip_2(path): + read_gunzip(path) + + +@benchmark(args=many_ints_table.handle('tsv')) +def sentinel_read_gunzip_3(path): + read_gunzip(path) + + +def iter_hash(m, n): + x = 0 + for i in range(m): + y = 0 + for j in range(n): + y = hash(y + j) + x += y + + +@benchmark() +def sentinel_cpu_hash_1(): + iter_hash(10000, 25000) + + +@benchmark() +def sentinel_cpu_hash_2(): + iter_hash(10000, 25000) + + +@benchmark() +def sentinel_cpu_hash_3(): + iter_hash(10000, 25000) diff --git a/benchmark/scripts/benchmark_in_batch.py b/benchmark/scripts/benchmark_in_batch.py index c3e023924a5..2ccd900ccb8 100644 --- a/benchmark/scripts/benchmark_in_batch.py +++ b/benchmark/scripts/benchmark_in_batch.py @@ -1,7 +1,8 @@ import os import random -import sys import re +import sys +import time from benchmark_hail.run.resources import all_resources from benchmark_hail.run.utils import list_benchmarks @@ -18,9 +19,10 @@ labeled_sha = SHA label = os.environ.get('BENCHMARK_LABEL') + timestamp = time.strftime('%Y-%m-%d') if label: labeled_sha = f'{labeled_sha}-{label}' - output_file = os.path.join(BUCKET_BASE, f'{labeled_sha}.json') + output_file = os.path.join(BUCKET_BASE, f'{timestamp}-{labeled_sha}.json') permissions_test_file = os.path.join(BUCKET_BASE, f'permissions-test') b = hb.Batch( diff --git a/bootstrap-gateway/Makefile b/bootstrap-gateway/Makefile index dbb9616a73a..c535ccd9800 100644 --- a/bootstrap-gateway/Makefile +++ b/bootstrap-gateway/Makefile @@ -1,23 +1,17 @@ include ../config.mk -.PHONY: build push deploy +.PHONY: build deploy -GATEWAY_LATEST = $(DOCKER_PREFIX)/gateway:latest -GATEWAY_IMAGE = $(DOCKER_PREFIX)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +GATEWAY_IMAGE := $(DOCKER_PREFIX)/gateway:$(TOKEN) build: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(GATEWAY_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out - docker build -t gateway -f Dockerfile.out --cache-from gateway,$(GATEWAY_LATEST),hail-ubuntu . - -push: build - docker tag gateway $(GATEWAY_LATEST) - docker push $(GATEWAY_LATEST) - docker tag gateway $(GATEWAY_IMAGE) - docker push $(GATEWAY_IMAGE) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh . Dockerfile.out $(GATEWAY_IMAGE) -deploy: push +deploy: build python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"global":{"ip":"$(IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":true,"gateway_image":{"image":"$(GATEWAY_IMAGE)"},"global":{"project":"$(PROJECT)"}}' deployment.yaml deployment.yaml.out diff --git a/build.yaml b/build.yaml index 53e84ab088f..8c7c9b0cf84 100644 --- a/build.yaml +++ b/build.yaml @@ -46,6 +46,7 @@ steps: - gcr-pull-key - gcr-push-service-account-key - test-gsa-key + - test-aws-key - auth-oauth2-client-secret - benchmark-gsa-key - kind: buildImage2 @@ -156,7 +157,7 @@ steps: - from: /repo/tls to: /io/tls dependsOn: - - service_base_image + - hail_ubuntu_image - merge_code - kind: runImage name: create_ssl_config_hail_root @@ -627,10 +628,10 @@ steps: - service_base_image - merge_code - kind: buildImage2 - name: hail_kaniko_image - dockerFile: /io/repo/ci/kaniko/Dockerfile + name: hail_buildkit_image + dockerFile: /io/repo/ci/buildkit/Dockerfile contextPath: /io/repo/ci - publishAs: hail-kaniko + publishAs: hail-buildkit inputs: - from: /repo/ci to: /io/repo/ci @@ -1252,8 +1253,10 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 @@ -1272,6 +1275,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1293,8 +1300,10 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 @@ -1313,6 +1322,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1334,8 +1347,10 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 @@ -1354,6 +1369,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1375,8 +1394,10 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 @@ -1395,6 +1416,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1416,8 +1441,10 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 @@ -1436,6 +1463,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_tests_image @@ -1453,8 +1484,10 @@ steps: tar xzf test.tar.gz tar xvf debug-wheel-container.tar python3 -m pip install --no-dependencies hail-*-py3-none-any.whl - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export HAIL_TEST_S3_BUCKET=hail-test-dy5rg + export AWS_SHARED_CREDENTIALS_FILE=/test-aws-key/credentials python3 -m pytest --log-cli-level=INFO -s -vv --instafail --durations=50 -n 10 test/hailtop/aiotools/test_copy.py inputs: - from: /debug-wheel-container.tar @@ -1466,6 +1499,10 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key + - name: test-aws-key + namespace: + valueFrom: default_ns.name + mountPath: /test-aws-key dependsOn: - default_ns - hail_run_image @@ -1487,7 +1524,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell" python3 -m pytest -m unchecked_allocator --ignore=test/hailtop/batch/ --log-cli-level=INFO -s -vv --instafail --durations=50 test @@ -1604,7 +1641,7 @@ steps: python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_RESOURCES_DIR=./resources export HAIL_DOCTEST_DATA_DIR=./data - export HAIL_TEST_BUCKET=cpg-hail-test + export HAIL_TEST_GCS_BUCKET=cpg-hail-test export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export HAIL_QUERY_BACKEND=local @@ -1726,7 +1763,7 @@ steps: script: | set -ex export HAIL_SHORT_VERSION='0.2' - export SPHINXOPTS='-tchecktutorial' + export SPHINXOPTS='-tgenerate_notebook_outputs' mkdir -p {{ token }}/python cd {{ token }} @@ -2464,6 +2501,7 @@ steps: - base_image - build_hail_jar_only - merge_code + - create_test_gsa_keys - kind: deploy name: deploy_query namespace: @@ -3033,7 +3071,7 @@ steps: - deploy_batch - deploy_ci_agent - create_certs - - hail_kaniko_image + - hail_buildkit_image - kind: runImage name: test_ci image: @@ -3081,6 +3119,7 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3133,6 +3172,7 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3185,6 +3225,7 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3237,6 +3278,7 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3289,6 +3331,7 @@ steps: cd /io/hailtop set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 export DOCKER_PREFIX="{{ global.docker_prefix }}" @@ -3340,6 +3383,7 @@ steps: script: | set -ex export HAIL_GSA_KEY_FILE=/test-gsa-key/key.json + export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json cd /io/hailtop/batch hailctl config set batch/billing_project test hailctl config set batch/bucket cpg-hail-test diff --git a/ci/Dockerfile.ci-utils b/ci/Dockerfile.ci-utils index 557a30c0dbc..f8cf1c7b6ca 100644 --- a/ci/Dockerfile.ci-utils +++ b/ci/Dockerfile.ci-utils @@ -1,6 +1,5 @@ FROM {{ service_base_image.image }} -RUN hail-apt-get-install docker.io RUN hail-pip-install twine COPY jinja2_render.py . COPY wait-for.py . diff --git a/ci/Makefile b/ci/Makefile index 40c2067d193..fc155da4a7e 100644 --- a/ci/Makefile +++ b/ci/Makefile @@ -1,13 +1,10 @@ include ../config.mk -CI_UTILS_LATEST = $(DOCKER_PREFIX)/ci-utils:latest -CI_UTILS_IMAGE = $(DOCKER_PREFIX)/ci-utils:$(shell docker images -q --no-trunc ci-utils | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) -CI_LATEST = $(DOCKER_PREFIX)/ci:latest -CI_IMAGE = $(DOCKER_PREFIX)/ci:$(shell docker images -q --no-trunc ci | sed -e 's,[^:]*:,,') - -HAIL_KANIKO_LATEST = $(DOCKER_PREFIX)/hail_kaniko:latest -HAIL_KANIKO_IMAGE = $(DOCKER_PREFIX)/hail_kaniko:$(shell docker images -q --no-trunc hail_kaniko | sed -e 's,[^:]*:,,') +CI_UTILS_IMAGE := $(DOCKER_PREFIX)/ci-utils:$(TOKEN) +CI_IMAGE := $(DOCKER_PREFIX)/ci:$(TOKEN) +HAIL_BUILDKIT_IMAGE := $(DOCKER_PREFIX)/hail-buildkit:$(TOKEN) EXTRA_PYTHONPATH := ../batch:../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -26,48 +23,27 @@ check: blacken: $(BLACK) -.PHONY: build-ci-utils -build-ci-utils: - $(MAKE) -C ../docker build - -docker pull $(CI_UTILS_LATEST) - python3 jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile.ci-utils Dockerfile.ci-utils.out - docker build -t ci-utils -f Dockerfile.ci-utils.out --cache-from ci-utils,$(CI_UTILS_LATEST),service-base . - -.PHONY: push-ci-utils -push-ci-utils: build-ci-utils - docker tag ci-utils $(CI_UTILS_LATEST) - docker push $(CI_UTILS_LATEST) - docker tag ci-utils $(CI_UTILS_IMAGE) - docker push $(CI_UTILS_IMAGE) +.PHONY: service-base +service-base: + $(MAKE) -C ../docker service-base -.PHONY: hail-kaniko-build -hail-kaniko-build: - -docker pull $(HAIL_KANIKO_LATEST) - docker build -t hail_kaniko -f kaniko/Dockerfile --cache-from hail_kaniko,$(HAIL_KANIKO_LATEST),service-base . +.PHONY: build-ci-utils +build-ci-utils: service-base + python3 jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile.ci-utils Dockerfile.ci-utils.out + ../docker-build.sh . Dockerfile.ci-utils.out $(CI_UTILS_IMAGE) -.PHONY: push -hail-kaniko-push: hail-kaniko-build - docker tag hail_kaniko $(HAIL_KANIKO_LATEST) - docker push $(HAIL_KANIKO_LATEST) - docker tag hail_kaniko $(HAIL_KANIKO_IMAGE) - docker push $(HAIL_KANIKO_IMAGE) +.PHONY: build-hail-buildkit +build-hail-buildkit: + ../docker-build.sh . buildkit/Dockerfile $(HAIL_BUILDKIT_IMAGE) .PHONY: build -build: - $(MAKE) -C ../docker build - -docker pull $(CI_LATEST) - python3 jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -t ci -f Dockerfile.out --cache-from ci,$(CI_LATEST),service-base .. - -.PHONY: push -push: build - docker tag ci $(CI_LATEST) - docker push $(CI_LATEST) - docker tag ci $(CI_IMAGE) - docker push $(CI_IMAGE) +build: service-base + python3 jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. ci/Dockerfile.out $(CI_IMAGE) .PHONY: deploy -deploy: push push-ci-utils hail-kaniko-push +deploy: build build-ci-utils build-hail-buildkit ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_kaniko_image":{"image":"$(HAIL_KANIKO_IMAGE)"}}' deployment.yaml deployment.yaml.out + ! [ -z "$(CI_DEVELOPER_TEST_REPO_TOKEN)" -a $(NAMESPACE) != "default" ] # for dev namespaces, you must specify a github repo by its token, check your currently running CI's value for HAIL_WATCHED_BRANCHES + python3 jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"ci_image":{"image":"$(CI_IMAGE)"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","ip":"$(IP)","domain":"$(DOMAIN)", "k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"ci_utils_image":{"image":"$(CI_UTILS_IMAGE)"},"ci_database":{"user_secret_name":"sql-ci-user-config"},"hail_buildkit_image":{"image":"$(HAIL_BUILDKIT_IMAGE)"},"create_ci_test_repo":{"token":"$(CI_DEVELOPER_TEST_REPO_TOKEN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/ci/buildkit/Dockerfile b/ci/buildkit/Dockerfile new file mode 100644 index 00000000000..85a346933c0 --- /dev/null +++ b/ci/buildkit/Dockerfile @@ -0,0 +1,6 @@ +FROM moby/buildkit:v0.8.3-rootless +USER root +RUN apk add python3 py-pip && pip3 install jinja2 +USER user +COPY --chown=user:user jinja2_render.py /home/user/jinja2_render.py +COPY --chown=user:user buildkit/convert-google-application-credentials-to-docker-auth-config /home/user/convert-google-application-credentials-to-docker-auth-config diff --git a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config b/ci/buildkit/convert-google-application-credentials-to-docker-auth-config similarity index 71% rename from ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config rename to ci/buildkit/convert-google-application-credentials-to-docker-auth-config index e5f976696a0..e75b628e800 100644 --- a/ci/kaniko/convert-google-application-credentials-to-kaniko-auth-config +++ b/ci/buildkit/convert-google-application-credentials-to-docker-auth-config @@ -1,4 +1,6 @@ set +e +mkdir -p $HOME/.docker + echo '{"auths": { "'$REGISTRY'": { "auth": "'$(echo -n "_json_key:$(cat $GOOGLE_APPLICATION_CREDENTIALS)" | base64 | tr -d \\n )'"}}}' \ - > /kaniko/.docker/config.json + > $HOME/.docker/config.json diff --git a/ci/ci/build.py b/ci/ci/build.py index cd711108320..e07dcc6526f 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -1,5 +1,4 @@ import abc -import os.path import json import logging from collections import defaultdict, Counter @@ -17,7 +16,7 @@ DOMAIN, IP, CI_UTILS_IMAGE, - KANIKO_IMAGE, + BUILDKIT_IMAGE, DEFAULT_NAMESPACE, KUBERNETES_SERVER_URL, BUCKET, @@ -248,11 +247,18 @@ def __init__( self.publish_as = publish_as self.inputs = inputs self.resources = resources + self.extra_cache_repository = None + if publish_as: + self.extra_cache_repository = f'{DOCKER_PREFIX}/{self.publish_as}' if params.scope == 'deploy' and publish_as and not is_test_deployment: self.base_image = f'{DOCKER_PREFIX}/{self.publish_as}' else: self.base_image = f'{DOCKER_PREFIX}/ci-intermediate' self.image = f'{self.base_image}:{self.token}' + if publish_as: + self.cache_repository = f'{DOCKER_PREFIX}/{self.publish_as}:cache' + else: + self.cache_repository = f'{DOCKER_PREFIX}/ci-intermediate:cache' self.job = None def wrapped_job(self): @@ -291,43 +297,48 @@ def build(self, batch, code, scope): if isinstance(self.dockerfile, dict): assert ['inline'] == list(self.dockerfile.keys()) - unrendered_dockerfile = f'/io/Dockerfile.in.{self.token}' + unrendered_dockerfile = f'/home/user/Dockerfile.in.{self.token}' create_inline_dockerfile_if_present = f'echo {shq(self.dockerfile["inline"])} > {unrendered_dockerfile};\n' else: assert isinstance(self.dockerfile, str) unrendered_dockerfile = self.dockerfile create_inline_dockerfile_if_present = '' - dockerfile_in_context = os.path.join(context, 'Dockerfile.' + self.token) - cache_repo = DOCKER_PREFIX + '/cache' script = f''' set -ex {create_inline_dockerfile_if_present} -cp {unrendered_dockerfile} /python3.7-slim-stretch/Dockerfile.in - -time chroot /python3.7-slim-stretch /usr/local/bin/python3 \ - jinja2_render.py \ +time python3 \ + ~/jinja2_render.py \ {shq(json.dumps(config))} \ - /Dockerfile.in \ - /Dockerfile.out + {unrendered_dockerfile} \ + /home/user/Dockerfile -mv /python3.7-slim-stretch/Dockerfile.out {shq(dockerfile_in_context)} - -set +e -/busybox/sh /convert-google-application-credentials-to-kaniko-auth-config -set -e +set +x +/bin/sh /home/user/convert-google-application-credentials-to-docker-auth-config +set -x -exec /kaniko/executor --dockerfile={shq(dockerfile_in_context)} --context=dir://{shq(context)} --destination={shq(self.image)} --cache=true --cache-repo={shq(cache_repo)} --snapshotMode=redo --use-new-run''' +export BUILDKITD_FLAGS=--oci-worker-no-process-sandbox +export BUILDCTL_CONNECT_RETRIES_MAX=100 # https://github.com/moby/buildkit/issues/1423 +buildctl-daemonless.sh \ + build \ + --frontend dockerfile.v0 \ + --local context={shq(context)} \ + --local dockerfile=/home/user \ + --output 'type=image,"name={shq(self.image)},{shq(self.cache_repository)}",push=true' \ + --export-cache type=inline \ + --import-cache type=registry,ref={shq(self.cache_repository)} \ + --trace=/home/user/trace +cat /home/user/trace +''' log.info(f'step {self.name}, script:\n{script}') docker_registry = DOCKER_PREFIX.split('/')[0] - self.job = batch.create_job( - KANIKO_IMAGE, - command=['/busybox/sh', '-c', script], + BUILDKIT_IMAGE, + command=['/bin/sh', '-c', script], secrets=[ { 'namespace': DEFAULT_NAMESPACE, @@ -343,6 +354,7 @@ def build(self, batch, code, scope): resources=self.resources, input_files=input_files, parents=self.deps_parents(), + unconfined=True, ) def cleanup(self, batch, scope, parents): diff --git a/ci/ci/constants.py b/ci/ci/constants.py index 08e33cad8cd..54907db995e 100644 --- a/ci/ci/constants.py +++ b/ci/ci/constants.py @@ -32,4 +32,5 @@ def __init__(self, gh_username: str, hail_username: Optional[str] = None, teams: User('lgruen'), User('CDiaz96', 'carolin', [SERVICES_TEAM]), User('daniel-goldstein', 'dgoldste', [SERVICES_TEAM]), + User('ammekk', 'ammekk'), ] diff --git a/ci/ci/environment.py b/ci/ci/environment.py index d94a6fbd0dd..bc69229efdb 100644 --- a/ci/ci/environment.py +++ b/ci/ci/environment.py @@ -13,7 +13,7 @@ assert DOMAIN != '' IP = os.environ.get('HAIL_IP') CI_UTILS_IMAGE = os.environ.get('HAIL_CI_UTILS_IMAGE', f'{DOCKER_PREFIX}/ci-utils:latest') -KANIKO_IMAGE = os.environ['HAIL_KANIKO_IMAGE'] +BUILDKIT_IMAGE = os.environ['HAIL_BUILDKIT_IMAGE'] DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] KUBERNETES_SERVER_URL = os.environ['KUBERNETES_SERVER_URL'] BUCKET = os.environ['HAIL_CI_BUCKET_NAME'] diff --git a/ci/ci/templates/pr-table.html b/ci/ci/templates/pr-table.html index 405392a45db..e4bc4624487 100644 --- a/ci/ci/templates/pr-table.html +++ b/ci/ci/templates/pr-table.html @@ -6,6 +6,7 @@ PR Build State + Labels Review State Author @@ -28,6 +29,9 @@ * {% endif %} + + {{ pr.labels|join(", ") }} + {% if pr.review_state %} {{ pr.review_state }} diff --git a/ci/ci/templates/pr.html b/ci/ci/templates/pr.html index 5b6f34f8e8a..c18191aed86 100644 --- a/ci/ci/templates/pr.html +++ b/ci/ci/templates/pr.html @@ -17,6 +17,7 @@

{{ pr.title }} {% for name, value in batch['attributes'].items() %}
{{ name }}: {{ value }}
{% endfor %} +
labels: {{ pr.labels|join(", ") }}

diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 602ed75dab6..c12b07aeff1 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -60,8 +60,8 @@ spec: value: "{{ global.zone }}" - name: HAIL_CI_UTILS_IMAGE value: "{{ ci_utils_image.image }}" - - name: HAIL_KANIKO_IMAGE - value: "{{ hail_kaniko_image.image }}" + - name: HAIL_BUILDKIT_IMAGE + value: "{{ hail_buildkit_image.image }}" - name: HAIL_IP value: "{{ global.ip }}" - name: HAIL_DOMAIN diff --git a/ci/kaniko/Dockerfile b/ci/kaniko/Dockerfile deleted file mode 100644 index 5f06cd43e50..00000000000 --- a/ci/kaniko/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.7-slim-stretch -RUN pip3 install jinja2 - -FROM gcr.io/kaniko-project/executor:debug - -COPY --from=0 /bin/ /python3.7-slim-stretch/bin -# boot is empty -# cannot copy dev in kaniko -# etc is too big in kaniko, ld is necessary for python to dynamically link -COPY --from=0 /etc/ld.so.cache /python3.7-slim-stretch/etc/ld.so.cache -COPY --from=0 /etc/ld.so.conf /python3.7-slim-stretch/etc/ld.so.conf -# home is empty -COPY --from=0 /lib/ /python3.7-slim-stretch/lib -COPY --from=0 /lib64/ /python3.7-slim-stretch/lib64 -# media is empty -# mnt is empty -# opt is empty -# cannot copy proc in kaniko -COPY --from=0 /root/ /python3.7-slim-stretch/root -COPY --from=0 /run/ /python3.7-slim-stretch/run -COPY --from=0 /sbin/ /python3.7-slim-stretch/sbin -# srv is empty -# cannot copy sys in kaniko -# ignore tmp -COPY --from=0 /usr/ /python3.7-slim-stretch/usr -COPY --from=0 /var/ /python3.7-slim-stretch/var - -COPY jinja2_render.py /python3.7-slim-stretch/jinja2_render.py -COPY kaniko/convert-google-application-credentials-to-kaniko-auth-config /convert-google-application-credentials-to-kaniko-auth-config diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index ce45331b4c7..bf8f1b377f9 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -111,7 +111,7 @@ steps: - from: /repo/ci/test/resources to: /io/ci/test/resources dependsOn: - - service_base_image + - hail_ubuntu_image - merge_code - kind: runImage name: create_certs @@ -137,10 +137,10 @@ steps: - default_ns - create_certs_image - kind: buildImage2 - name: hail_kaniko_image - dockerFile: /io/repo/ci/kaniko/Dockerfile + name: hail_buildkit_image + dockerFile: /io/repo/ci/buildkit/Dockerfile contextPath: /io/repo/ci - publishAs: hail-kaniko + publishAs: hail-buildkit inputs: - from: /repo/ci to: /io/repo/ci diff --git a/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh b/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh new file mode 100644 index 00000000000..3c3687f416d --- /dev/null +++ b/datasets/extract/extract_1000_Genomes_30x_GRCh38_samples.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +wget -c -O - "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000G_2504_high_coverage/20130606_g1k_3202_samples_ped_population.txt" | +bgzip -c | +gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_samples_ped_population.txt.bgz diff --git a/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py b/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py new file mode 100644 index 00000000000..e121e1388f7 --- /dev/null +++ b/datasets/extract/extract_1000_Genomes_NYGC_30x_GRCh38.py @@ -0,0 +1,32 @@ +import hailtop.batch as hb + +phased_url_root = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/" \ + "1000G_2504_high_coverage/working/20201028_3202_phased" +gt_url_root = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/" \ + "1000G_2504_high_coverage/working/20201028_3202_raw_GT_with_annot" + +backend = hb.ServiceBackend(billing_project="hail-datasets-api") +batch = hb.Batch(backend=backend, name="1kg-highcov") +for i in [str(x) for x in range(1,23)]: + j = batch.new_job(name=i) + j.image("gcr.io/broad-ctsa/datasets:041421") + j.command(f"wget -c -O - {phased_url_root}/CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.filtered.shapeit2-duohmm-phased.vcf.gz | " + f"zcat | " + f"bgzip -c | " + f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{i}_GRCh38.vcf.bgz") +for i in ["X"]: + j = batch.new_job(name=i) + j.image("gcr.io/broad-ctsa/datasets:041421") + j.command(f"wget -c -O - {phased_url_root}/CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.filtered.eagle2-phased.vcf.gz | " + f"zcat | " + f"bgzip -c | " + f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{i}_GRCh38.vcf.bgz") +for i in ["Y"]: + j = batch.new_job(name=i) + j.image("gcr.io/broad-ctsa/datasets:041421") + j.command(f"wget -c -O - {gt_url_root}/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr{i}.recalibrated_variants.vcf.gz | " + f"zcat | " + f"bgzip -c | " + f"gsutil cp - gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chr{i}_GRCh38.vcf.bgz") +batch.run(open=True, wait=False) +backend.close() diff --git a/datasets/extract/extract_CADD.py b/datasets/extract/extract_CADD.py new file mode 100644 index 00000000000..c8d74c373fa --- /dev/null +++ b/datasets/extract/extract_CADD.py @@ -0,0 +1,35 @@ +import hailtop.batch as hb + +name = "CADD" +tmp_bucket = "gs://hail-datasets-tmp" +builds = { + "GRCh37": { + "snvs_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/whole_genome_SNVs.tsv.gz", + "indels_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh37/InDels.tsv.gz", + "version": "v1.6" + }, + "GRCh38": { + "snvs_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz", + "indels_url": "https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel.tsv.gz", + "version": "v1.6" + } +} + +backend = hb.ServiceBackend(billing_project="hail-datasets-api") +batch = hb.Batch(backend=backend, name=name) +for build in ["GRCh37", "GRCh38"]: + snvs_url = builds[build]["snvs_url"] + indels_url = builds[build]["indels_url"] + version = builds[build]["version"] + + j = batch.new_job(name=f"{name}_{version}_{build}") + j.image("gcr.io/broad-ctsa/datasets:050521") + j.command("gcloud -q auth activate-service-account --key-file=/gsa-key/key.json") + j.command(f"wget -c -O - {snvs_url} {indels_url} | " + "zcat | " + "grep -v '^#' | " + """awk -v FS=$'\t' -v OFS=$'\t' 'BEGIN {print "chromosome","position","ref","alt","raw_score","PHRED_score"} {print $0}' | """ + "bgzip -c | " + f"gsutil cp - {tmp_bucket}/{name}/{name}_{version}_{build}.tsv.bgz") +batch.run(open=True, wait=False) +backend.close() diff --git a/datasets/extract/extract_dbSNP.py b/datasets/extract/extract_dbSNP.py new file mode 100644 index 00000000000..c1401ebe029 --- /dev/null +++ b/datasets/extract/extract_dbSNP.py @@ -0,0 +1,29 @@ +import hailtop.batch as hb + +name = "dbSNP" +tmp_bucket = "gs://hail-datasets-tmp" +builds = { + "GRCh37": { + "url": "https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.25.gz", + "version": "154" + }, + "GRCh38": { + "url": "https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.38.gz", + "version": "154" + } +} + +backend = hb.ServiceBackend(billing_project="hail-datasets-api") +batch = hb.Batch(backend=backend, name=name) +for build in ["GRCh37", "GRCh38"]: + vcf = builds[build]["url"] + version = builds[build]["version"] + j = batch.new_job(name=f"{name}_{version}_{build}") + j.image("gcr.io/broad-ctsa/datasets:050521") + j.command("gcloud -q auth activate-service-account --key-file=/gsa-key/key.json") + j.command(f"wget -c -O - {vcf} | " + "zcat | " + "bgzip -c | " + f"gsutil cp - {tmp_bucket}/{name}/{name}_{version}_{build}.vcf.bgz") +batch.run(open=True, wait=False) +backend.close() diff --git a/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb b/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb new file mode 100644 index 00000000000..1a13c9e4854 --- /dev/null +++ b/datasets/notebooks/1kg_NYGC_30x_datasets.ipynb @@ -0,0 +1,814 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "social-parcel", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import hail as hl\n", + "hl.init()" + ] + }, + { + "cell_type": "markdown", + "id": "portuguese-enclosure", + "metadata": {}, + "source": [ + "NYGC 30x HighCov samples Hail Table:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "hollywood-princess", + "metadata": {}, + "outputs": [], + "source": [ + "ht_samples = hl.import_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_samples_ped_population.txt.bgz\", \n", + " delimiter=\"\\s+\",\n", + " impute=True\n", + ")\n", + "\n", + "ht_samples = ht_samples.annotate(\n", + " FatherID = hl.if_else(ht_samples.FatherID == \"0\", \n", + " hl.missing(hl.tstr), \n", + " ht_samples.FatherID), \n", + " MotherID = hl.if_else(ht_samples.MotherID == \"0\", \n", + " hl.missing(hl.tstr), \n", + " ht_samples.MotherID),\n", + " Sex = hl.if_else(ht_samples.Sex == 1, \"male\", \"female\")\n", + ")\n", + "ht_samples = ht_samples.key_by(\"SampleID\")\n", + "\n", + "n_rows = ht_samples.count()\n", + "n_partitions = ht_samples.n_partitions()\n", + "\n", + "ht_samples = ht_samples.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_samples\",\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions)\n", + ")\n", + "\n", + "ht_samples.write(\"gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht\", overwrite=False)\n", + "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht\")\n", + "ht_samples.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "ruled-processor", + "metadata": {}, + "source": [ + "### Phased genotypes" + ] + }, + { + "cell_type": "markdown", + "id": "elegant-maker", + "metadata": {}, + "source": [ + "Creating MTs for the phased data is straightforward, as multiallelic variants were split during phasing." + ] + }, + { + "cell_type": "markdown", + "id": "increasing-component", + "metadata": {}, + "source": [ + "#### Autosomes (phased):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "excessive-library", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "mt = hl.import_vcf(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}_GRCh38.vcf.bgz\",\n", + " reference_genome=\"GRCh38\"\n", + ")\n", + "\n", + "n_rows, n_cols = mt.count()\n", + "n_partitions = mt.n_partitions()\n", + "\n", + "mt = mt.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_autosomes\",\n", + " reference_genome=\"GRCh38\",\n", + " n_rows=n_rows,\n", + " n_cols=n_cols,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "\n", + "# Get list of INFO fields that are arrays\n", + "known_keys = [x[0] for x in list(mt.row.info.items()) if \"array\" in str(x[1])]\n", + "\n", + "# Extract value from INFO array fields (all arrays are length 1)\n", + "mt = mt.annotate_rows(\n", + " info = mt.info.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(mt.info[k]),\n", + " mt.info[k][0])\n", + " for k in known_keys}\n", + " )\n", + ")\n", + "\n", + "mt = mt.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_phased_GRCh38.mt\",\n", + " overwrite=False,\n", + " _read_if_exists=True\n", + ")\n", + "\n", + "mt = mt.annotate_cols(**ht_samples[mt.s])\n", + "mt = hl.sample_qc(mt)\n", + "mt = hl.variant_qc(mt)\n", + "\n", + "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt\", overwrite=False)\n", + "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt\")\n", + "mt.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "leading-directory", + "metadata": {}, + "source": [ + "#### ChrX (phased):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "alien-medicaid", + "metadata": {}, + "outputs": [], + "source": [ + "mt = hl.import_vcf(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_phased_chrX_GRCh38.vcf.bgz\",\n", + " reference_genome=\"GRCh38\"\n", + ")\n", + "\n", + "n_rows, n_cols = mt.count()\n", + "n_partitions = mt.n_partitions()\n", + "\n", + "mt = mt.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_chrX\",\n", + " reference_genome=\"GRCh38\",\n", + " n_rows=n_rows,\n", + " n_cols=n_cols,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "\n", + "# Get list of INFO fields that are arrays\n", + "known_keys = [x[0] for x in list(mt.row.info.items()) if \"array\" in str(x[1])]\n", + "\n", + "# Extract appropriate value from INFO array fields (all arrays are length 1)\n", + "mt = mt.annotate_rows(\n", + " info = mt.info.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(mt.info[k]),\n", + " mt.info[k][0])\n", + " for k in known_keys}\n", + " )\n", + ")\n", + "\n", + "mt = mt.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_phased_GRCh38.mt\",\n", + " overwrite=False,\n", + " _read_if_exists=True\n", + ")\n", + "\n", + "mt = mt.annotate_cols(**ht_samples[mt.s])\n", + "mt = hl.sample_qc(mt)\n", + "mt = hl.variant_qc(mt)\n", + "\n", + "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt\", overwrite=False)\n", + "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt\")\n", + "mt.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "ideal-change", + "metadata": {}, + "source": [ + "### Unphased genotypes" + ] + }, + { + "cell_type": "markdown", + "id": "statutory-karaoke", + "metadata": {}, + "source": [ + "#### Autosomes (unphased):" + ] + }, + { + "cell_type": "markdown", + "id": "above-wales", + "metadata": {}, + "source": [ + "Import chr1-chr22 VCF to `MatrixTable` and checkpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "painful-virtue", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.import_vcf(\n", + " (\"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_\"\n", + " \"chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}_\"\n", + " \"GRCh38.vcf.bgz\"),\n", + " reference_genome=\"GRCh38\",\n", + " array_elements_required=False\n", + ")\n", + "mt = mt.annotate_entries(\n", + " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", + " hl.missing(mt.PL.dtype), \n", + " mt.PL)\n", + ")\n", + "mt = mt.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_imported_vcf.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "original-admission", + "metadata": {}, + "source": [ + "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", + "\n", + "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "diverse-march", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_imported_vcf.mt\"\n", + ")\n", + "\n", + "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", + "bi = bi.annotate_rows(a_index=1, was_split=False)\n", + "bi = bi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_biallelic.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", + "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", + "multi = multi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_multiallelic.mt\", \n", + " overwrite=False,\n", + " _read_if_exists=True\n", + ")\n", + "\n", + "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", + "split = split.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_multiallelic_split.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = split.union_rows(bi)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = unioned.repartition(12000, shuffle=True)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned_repart.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "criminal-terry", + "metadata": {}, + "source": [ + "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", + "\n", + "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "conscious-society", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "unioned = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/autosomes_unphased_GRCh38_unioned_repart.mt\"\n", + ")\n", + "\n", + "# Get list of INFO fields that are arrays\n", + "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", + "\n", + "# Extract appropriate values from INFO array fields after splitting\n", + "mt = unioned.annotate_rows(\n", + " info = unioned.info.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", + " unioned.info[k][unioned.a_index - 1]) \n", + " for k in known_keys}\n", + " )\n", + ")\n", + "\n", + "n_rows, n_cols = mt.count()\n", + "n_partitions = mt.n_partitions()\n", + "\n", + "mt = mt.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_autosomes\",\n", + " reference_genome=\"GRCh38\",\n", + " n_rows=n_rows,\n", + " n_cols=n_cols,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "\n", + "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", + "mt = mt.annotate_cols(**ht_samples[mt.s])\n", + "mt = hl.sample_qc(mt)\n", + "mt = hl.variant_qc(mt)\n", + "\n", + "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt\", overwrite=False)\n", + "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt\")\n", + "mt.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "blank-dance", + "metadata": {}, + "source": [ + "#### ChrX (unphased):" + ] + }, + { + "cell_type": "markdown", + "id": "distributed-numbers", + "metadata": {}, + "source": [ + "Import chrX VCF to `MatrixTable` and checkpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "organized-bunny", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.import_vcf(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chrX_GRCh38.vcf.bgz\",\n", + " reference_genome=\"GRCh38\", \n", + " array_elements_required=False\n", + ")\n", + "mt = mt.annotate_entries(\n", + " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", + " hl.missing(mt.PL.dtype), \n", + " mt.PL)\n", + ")\n", + "mt = mt.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_imported_vcf.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "official-doubt", + "metadata": {}, + "source": [ + "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", + "\n", + "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "convertible-distribution", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_imported_vcf.mt\"\n", + ")\n", + "\n", + "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", + "bi = bi.annotate_rows(a_index=1, was_split=False)\n", + "bi = bi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_biallelic.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", + "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", + "multi = multi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_multiallelic.mt\", \n", + " overwrite=False,\n", + " _read_if_exists=True\n", + ")\n", + "\n", + "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", + "split = split.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_multiallelic_split.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = split.union_rows(bi)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = unioned.repartition(512, shuffle=True)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned_repart.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "lonely-storm", + "metadata": {}, + "source": [ + "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", + "\n", + "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "beginning-outline", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "unioned = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrX_unphased_GRCh38_unioned_repart.mt\"\n", + ")\n", + "\n", + "# Get list of INFO fields that are arrays\n", + "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", + "\n", + "# Extract appropriate values from INFO array fields after splitting\n", + "mt = unioned.annotate_rows(\n", + " info = unioned.info.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", + " unioned.info[k][unioned.a_index - 1]) \n", + " for k in known_keys}\n", + " )\n", + ")\n", + "\n", + "n_rows, n_cols = mt.count()\n", + "n_partitions = mt.n_partitions()\n", + "\n", + "mt = mt.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_chrX\",\n", + " reference_genome=\"GRCh38\",\n", + " n_rows=n_rows,\n", + " n_cols=n_cols,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "\n", + "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", + "mt = mt.annotate_cols(**ht_samples[mt.s])\n", + "mt = hl.sample_qc(mt)\n", + "mt = hl.variant_qc(mt)\n", + "\n", + "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt\", overwrite=False)\n", + "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt\")\n", + "mt.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "existing-helping", + "metadata": {}, + "source": [ + "#### ChrY (unphased):" + ] + }, + { + "cell_type": "markdown", + "id": "suspected-savannah", + "metadata": {}, + "source": [ + "Import chrY VCF to `MatrixTable` and checkpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "distinguished-smooth", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.import_vcf(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/1000_Genomes_NYGC_30x_chrY_GRCh38.vcf.bgz\",\n", + " reference_genome=\"GRCh38\", \n", + " array_elements_required=False\n", + ")\n", + "mt = mt.annotate_entries(\n", + " PL = hl.if_else(mt.PL.contains(hl.missing(hl.tint32)), \n", + " hl.missing(mt.PL.dtype), \n", + " mt.PL)\n", + ")\n", + "mt = mt.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_imported_vcf.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "suspected-locator", + "metadata": {}, + "source": [ + "Separate biallelic and multiallelic variants, split multiallelic variants with `split_multi_hts`, and then `union_rows` the split multiallelic MT back to the biallelic MT. \n", + "\n", + "For multiallelic variants we will just set `PL` to be missing, to avoid running into index out of bounds errors in `split_multi_hts`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "answering-nitrogen", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "mt = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_imported_vcf.mt\"\n", + ")\n", + "\n", + "bi = mt.filter_rows(hl.len(mt.alleles) == 2)\n", + "bi = bi.annotate_rows(a_index=1, was_split=False)\n", + "bi = bi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_biallelic.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "multi = mt.filter_rows(hl.len(mt.alleles) > 2)\n", + "multi = multi.annotate_entries(PL = hl.missing(multi.PL.dtype))\n", + "multi = multi.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_multiallelic.mt\", \n", + " overwrite=False,\n", + " _read_if_exists=True\n", + ")\n", + "\n", + "split = hl.split_multi_hts(multi, keep_star=True, permit_shuffle=True)\n", + "split = split.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_multiallelic_split.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = split.union_rows(bi)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")\n", + "\n", + "unioned = unioned.repartition(8, shuffle=True)\n", + "unioned = unioned.checkpoint(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned_repart.mt\", \n", + " overwrite=False, \n", + " _read_if_exists=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "professional-cleaning", + "metadata": {}, + "source": [ + "After splitting multiallelic variants, we need to extract the appropriate values from the `INFO` array fields with `a_index`. \n", + "\n", + "Then annotate globals with metadata, annotate columns with sample relationships, perform `sample_qc` and `variant_qc`, and write final MT to `hail-datasets-us`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "alternate-motor", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "unioned = hl.read_matrix_table(\n", + " \"gs://hail-datasets-tmp/1000_Genomes_NYGC_30x/checkpoints/chrY_unphased_GRCh38_unioned_repart.mt\"\n", + ")\n", + "\n", + "# Get list of INFO fields that are arrays\n", + "known_keys = [x[0] for x in list(unioned.row.info.items()) if \"array\" in str(x[1])]\n", + "\n", + "# Extract appropriate values from INFO array fields after splitting\n", + "mt = unioned.annotate_rows(\n", + " info = unioned.info.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(unioned.info[k]), \n", + " unioned.info[k][unioned.a_index - 1]) \n", + " for k in known_keys}\n", + " )\n", + ")\n", + "\n", + "n_rows, n_cols = mt.count()\n", + "n_partitions = mt.n_partitions()\n", + "\n", + "mt = mt.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=\"1000_Genomes_HighCov_chrY\",\n", + " reference_genome=\"GRCh38\",\n", + " n_rows=n_rows,\n", + " n_cols=n_cols,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "\n", + "ht_samples = hl.read_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht\")\n", + "mt = mt.annotate_cols(**ht_samples[mt.s])\n", + "mt = hl.sample_qc(mt)\n", + "mt = hl.variant_qc(mt)\n", + "\n", + "mt.write(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt\", overwrite=False)\n", + "mt = hl.read_matrix_table(\"gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt\")\n", + "mt.describe()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Create/update schemas" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "id": "exposed-ivory", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "import textwrap\n", + "\n", + "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n", + "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", + "with open(datasets_path, \"r\") as f:\n", + " datasets = json.load(f)\n", + "\n", + "names = datasets.keys()\n", + "for name in [name for name in names if \"1000_Genomes_HighCov\" in name]:\n", + " versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n", + " if not versions:\n", + " versions = [None]\n", + " reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n", + " if not reference_genomes:\n", + " reference_genomes = [None]\n", + "\n", + " print(name)\n", + " # Create schemas for unphased versions, since phased entries only have GT\n", + " if name == \"1000_Genomes_HighCov_chrY\":\n", + " v = versions[0]\n", + " else:\n", + " v = versions[1]\n", + " print(v)\n", + " print(reference_genomes[0] + \"\\n\")\n", + "\n", + " path = [dataset[\"url\"][\"gcp\"][\"us\"]\n", + " for dataset in datasets[name][\"versions\"]\n", + " if all([dataset[\"version\"] == v,\n", + " dataset[\"reference_genome\"] == reference_genomes[0]])]\n", + " assert len(path) == 1\n", + " path = path[0]\n", + " if path.endswith(\".ht\"):\n", + " table = hl.methods.read_table(path)\n", + " table_class = \"hail.Table\"\n", + " else:\n", + " table = hl.methods.read_matrix_table(path)\n", + " table_class = \"hail.MatrixTable\"\n", + "\n", + " description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n", + " description = \"\\n\".join([line.rstrip() for line in description])\n", + "\n", + " template = \"\"\".. _{dataset}:\n", + "\n", + "{dataset}\n", + "{underline1}\n", + "\n", + "* **Versions:** {versions}\n", + "* **Reference genome builds:** {ref_genomes}\n", + "* **Type:** :class:`{class}`\n", + "\n", + "Schema ({version0}, {ref_genome0})\n", + "{underline2}\n", + "\n", + ".. code-block:: text\n", + "\n", + "{schema}\n", + "\n", + "\"\"\"\n", + " context = {\n", + " \"dataset\": name,\n", + " \"underline1\": len(name) * \"=\",\n", + " \"version0\": v,\n", + " \"ref_genome0\": reference_genomes[0],\n", + " \"versions\": \", \".join([str(version) for version in versions]),\n", + " \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n", + " \"underline2\": len(\"\".join([\"Schema (\", str(v), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n", + " \"schema\": textwrap.indent(description, \" \"),\n", + " \"class\": table_class\n", + " }\n", + " with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n", + " f.write(template.format(**context).strip())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/datasets/notebooks/CADD_datasets.ipynb b/datasets/notebooks/CADD_datasets.ipynb new file mode 100644 index 00000000000..1499df66e60 --- /dev/null +++ b/datasets/notebooks/CADD_datasets.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### CADD\n", + "\n", + "Use to create CADD Hail Tables after downloading raw data from https://cadd.gs.washington.edu/ with Hail Batch (see `datasets/extract/extract_CADD.py`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import hail as hl\n", + "hl.init()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_root = \"gs://hail-datasets-tmp\"\n", + "output_root = \"gs://hail-datasets-us\"\n", + "\n", + "name = \"CADD\"\n", + "version = \"v1.6\"\n", + "builds = [\"GRCh37\", \"GRCh38\"]\n", + "\n", + "for build in builds:\n", + " ht = hl.import_table(f\"{input_root}/{name}/{name}_{version}_{build}.tsv.bgz\",\n", + " min_partitions=2048,\n", + " types={\"position\": hl.tint,\n", + " \"raw_score\": hl.tfloat,\n", + " \"PHRED_score\": hl.tfloat})\n", + "\n", + " if build == \"GRCh37\":\n", + " ht = ht.annotate(locus = hl.locus(ht.chromosome, ht.position, build))\n", + " else:\n", + " ht = ht.annotate(locus = hl.locus(\"chr\" + ht.chromosome, ht.position, build))\n", + "\n", + " ht = ht.annotate(alleles = [ht.ref, ht.alt])\n", + " ht = ht.select(\"locus\", \"alleles\", \"raw_score\", \"PHRED_score\")\n", + " ht = ht.key_by(\"locus\", \"alleles\")\n", + " \n", + " n_rows = ht.count()\n", + " n_partitions = ht.n_partitions()\n", + " \n", + " ht = ht.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=name,\n", + " version=version,\n", + " reference_genome=build,\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions\n", + " )\n", + " )\n", + " \n", + " ht.write(f\"{output_root}/{name}_{version}_{build}.ht\")\n", + " ht = hl.read_table(f\"{output_root}/{name}_{version}_{build}.ht\")\n", + " ht.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ht37 = hl.read_table(\"gs://hail-datasets-us/CADD_v1.6_GRCh37.ht\")\n", + "ht37.describe()\n", + "print(f\"GRCh37: {str(hl.eval(ht37.metadata))}\")\n", + "ht37.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ht38 = hl.read_table(\"gs://hail-datasets-us/CADD_v1.6_GRCh38.ht\")\n", + "ht38.describe()\n", + "print(f\"GRCh38: {str(hl.eval(ht38.metadata))}\")\n", + "ht38.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/datasets/notebooks/dbSNP_datasets.ipynb b/datasets/notebooks/dbSNP_datasets.ipynb new file mode 100644 index 00000000000..7d8c07602d0 --- /dev/null +++ b/datasets/notebooks/dbSNP_datasets.ipynb @@ -0,0 +1,685 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## dbSNP\n", + "\n", + "Use to create Hail Tables for dbSNP, after downloading raw data from https://ftp.ncbi.nih.gov/snp/. \n", + "\n", + "Raw data downloaded with Hail Batch, see `hail/datasets/extract/extract_dbSNP.py`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hail as hl\n", + "hl.init()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Hail Tables from GRCh37 and GRCh38 assembly reports\n", + "\n", + "The contigs in the VCFs are [RefSeq](https://www.ncbi.nlm.nih.gov/refseq/) accession numbers, and need to be mapped back to the appropriate chromosome for each reference genome.\n", + "\n", + "The GRCh37 assembly can be found [here](https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.25), and the assembly report mapping chromosomes to RefSeq sequences can be found [here](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_assembly_report.txt).\n", + "\n", + "The GRCh38 assembly can be found [here](https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.39), and the assembly report mapping chromosomes to RefSeq sequences can be found [here](https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### GRCh37" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "ht = hl.import_table(\"gs://hail-datasets-tmp/dbSNP/GCF_000001405.25_GRCh37.p13_assembly_report.txt\", \n", + " no_header=True, \n", + " comment=\"#\",\n", + " delimiter=\"\\t\", \n", + " missing=\"na\")\n", + "\n", + "field_names = ['sequence_name','sequence_role','assigned_molecule',\n", + " 'assigned_molecule_location/type', 'genbank_accn', 'relationship', \n", + " 'refseq_accn', 'assembly_unit', 'sequence_length', 'ucsc_style_name']\n", + "\n", + "name = \"dbSNP\"\n", + "version = \"154\"\n", + "build = \"GRCh37\"\n", + "n_rows = ht.count()\n", + "n_partitions = ht.n_partitions()\n", + "\n", + "ht = ht.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=name,\n", + " version=version,\n", + " reference_genome=build,\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "ht = ht.rename(dict(zip([f\"f{i}\" for i in range(10)], field_names)))\n", + "ht = ht.drop(\"relationship\").key_by(\"refseq_accn\")\n", + "\n", + "ht.write(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", + "ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", + "ht.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### GRCh38" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "ht = hl.import_table(\"gs://hail-datasets-tmp/dbSNP/GCF_000001405.39_GRCh38.p13_assembly_report.txt\", \n", + " no_header=True, \n", + " comment=\"#\",\n", + " delimiter=\"\\t\", \n", + " missing=\"na\")\n", + "\n", + "field_names = ['sequence_name','sequence_role','assigned_molecule',\n", + " 'assigned_molecule_location/type', 'genbank_accn', 'relationship', \n", + " 'refseq_accn', 'assembly_unit', 'sequence_length', 'ucsc_style_name']\n", + "\n", + "name = \"dbSNP\"\n", + "version = \"154\"\n", + "build = \"GRCh38\"\n", + "n_rows = ht.count()\n", + "n_partitions = ht.n_partitions()\n", + "\n", + "ht = ht.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=name,\n", + " version=version,\n", + " reference_genome=build,\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions\n", + " )\n", + ")\n", + "ht = ht.rename(dict(zip([f\"f{i}\" for i in range(10)], field_names)))\n", + "ht = ht.drop(\"relationship\").key_by(\"refseq_accn\")\n", + "\n", + "ht.write(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", + "ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", + "ht.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Hail Tables for dbSNP\n", + "\n", + "Now we can use the assembly report for each reference genome build to map from RefSeq accession numbers to chromosomes, and create Hail Tables. There are no samples or entries in the dbSNP VCFs. Some helpful information about the dbSNP VCFs is available [here](https://www.ncbi.nlm.nih.gov/snp/docs/products/vcf/redesign/).\n", + "\n", + "We will create two Hail Tables for each reference genome build, both keyed by `[\"locus\", \"alleles\"]`:\n", + "\n", + " - Table with all fields from the imported VCF (e.g. `gs://hail-datasets-us/dbSNP_154_GRCh37.ht`)\n", + " - Table with only the rsID field (e.g. `gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht`)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First load VCFs to get all the contigs present in each dataset so we can create a mapping to used to recode contigs from RefSeq accession numbers to GRCh37/38 builds. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt37 = hl.import_vcf(f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh37.vcf.bgz\", \n", + " header_file=f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh37_header_only.vcf.txt\", \n", + " reference_genome=None, \n", + " min_partitions=512)\n", + "\n", + "mt38 = hl.import_vcf(f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh38.vcf.bgz\", \n", + " header_file=f\"gs://hail-datasets-tmp/dbSNP/dbSNP_154_GRCh38_header_only.vcf.txt\", \n", + " reference_genome=None, \n", + " min_partitions=512)\n", + "\n", + "mt37 = mt37.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/dbSNP_154_GRCh37_no_coding.mt\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + "\n", + "mt38 = mt38.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/dbSNP_154_GRCh38_no_coding.mt\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + "\n", + "# To get all contigs present for recoding to correct reference genome mapping\n", + "contigs_present37 = mt37.aggregate_rows(hl.agg.collect_as_set(mt37.locus.contig))\n", + "contigs_present38 = mt38.aggregate_rows(hl.agg.collect_as_set(mt38.locus.contig))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Load NCBI assembly reports with RefSeq mappings\n", + "assembly37_ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht\")\n", + "assembly37_ht = assembly37_ht.annotate(\n", + " contig = hl.if_else(assembly37_ht.sequence_role == \"unlocalized-scaffold\", \n", + " assembly37_ht.genbank_accn, \n", + " assembly37_ht.assigned_molecule)\n", + ")\n", + "assembly38_ht = hl.read_table(\"gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht\")\n", + "\n", + "# Map RefSeq identifiers to chromosomes for GRCh37 using the \"contig\" field we created in assembly report\n", + "rg37 = hl.get_reference(\"GRCh37\")\n", + "refseq_to_chr37 = dict(zip(assembly37_ht.refseq_accn.collect(), assembly37_ht.contig.collect()))\n", + "refseq_to_chr37 = {k: v for k, v in refseq_to_chr37.items() if k in contigs_present37 and v in rg37.contigs}\n", + "\n", + "# Map RefSeq identifiers to chromosomes for GRCh38 using the \"ucsc_style_name\" field in assembly report\n", + "rg38 = hl.get_reference(\"GRCh38\")\n", + "refseq_to_chr38 = dict(zip(assembly38_ht.refseq_accn.collect(), assembly38_ht.ucsc_style_name.collect()))\n", + "refseq_to_chr38 = {k: v for k, v in refseq_to_chr38.items() if k in contigs_present38 and v in rg38.contigs}\n", + "\n", + "recodings = {\n", + " \"GRCh37\": refseq_to_chr37, \n", + " \"GRCh38\": refseq_to_chr38\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the function and known keys below to convert allele frequency arrays to structs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert array of strings like hl.array([\"GnomAD:.,1,3.187e-05\",\"TOPMED:.,1,2.389e-05\"]) to a struct\n", + "def arr_str_to_struct(hl_array, known_keys):\n", + " _dict = hl.dict(\n", + " hl_array.map(\n", + " lambda x: (\"_\" + x.split(\":\")[0], \n", + " x.split(\":\")[1].split(\",\").map(lambda x: hl.if_else(x == \".\", hl.missing(hl.tfloat), hl.float(x))))\n", + " )\n", + " )\n", + " _struct = hl.rbind(_dict, lambda d: hl.struct(**{k: _dict.get(k) for k in known_keys}))\n", + " return _struct\n", + "\n", + "# To get all possible keys for allele frequency arrays after loading VCF as MatrixTable\n", + "# known_keys_FREQ = mt.aggregate_rows(\n", + "# hl.agg.explode(\n", + "# lambda x: hl.agg.collect_as_set(x), mt.info.FREQ.split(\"\\\\|\").map(lambda x: x.split(\":\")[0])\n", + "# )\n", + "# )\n", + "\n", + "known_keys = ['GENOME_DK','TWINSUK','dbGaP_PopFreq','Siberian','Chileans',\n", + " 'FINRISK','HapMap','Estonian','ALSPAC','GoESP',\n", + " 'TOPMED','PAGE_STUDY','1000Genomes','Korea1K','ChromosomeY',\n", + " 'ExAC','Qatari','GoNL','MGP','GnomAD',\n", + " 'Vietnamese','GnomAD_exomes','PharmGKB','KOREAN','Daghestan',\n", + " 'HGDP_Stanford','NorthernSweden','SGDP_PRJ']\n", + "known_keys_FREQ = list(map(lambda x: f\"_{x}\", known_keys))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now can read in VCF files again as MatrixTables with the correct contig recodings, and reformat the allele frequency information in `info.FREQ` and the clinical attributes in `info`.\n", + "\n", + "Note that we are specifying a separate header file in the `hl.import_vcf` calls in the cell below. \n", + "\n", + "To make parsing strings easier, the following INFO fields in the VCF headers were changed from `Number=.` to `Number=1`: FREQ, CLNHGVS, CLNVI, CLNORIGIN, CLNSIG, CLNDISB, CLNDN, CLNREVSTAT, CLNACC. \n", + "\n", + "The modified VCF headers used are available in `gs://hail-datasets-tmp/dbSNP`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "name = \"dbSNP\"\n", + "version = \"154\"\n", + "builds = [\"GRCh37\", \"GRCh38\"]\n", + "\n", + "for build in builds:\n", + " mt = hl.import_vcf(f\"gs://hail-datasets-tmp/{name}/{name}_{version}_{build}.vcf.bgz\", \n", + " header_file=f\"gs://hail-datasets-tmp/{name}/{name}_{version}_{build}_header_only.vcf.txt\", \n", + " contig_recoding=recodings[build], \n", + " skip_invalid_loci=True, \n", + " reference_genome=build, \n", + " min_partitions=512)\n", + "\n", + " # First annotation, go from str to array for FREQ\n", + " mt = mt.annotate_rows(\n", + " info = mt.info.annotate(\n", + " FREQ = hl.or_missing(hl.is_defined(mt.info.FREQ), mt.info.FREQ.split(\"\\\\|\"))\n", + " )\n", + " )\n", + " # Second annotation, turn array into a struct for FREQ\n", + " mt = mt.annotate_rows(\n", + " info = mt.info.annotate(\n", + " FREQ = hl.or_missing(hl.is_defined(mt.info.FREQ), \n", + " arr_str_to_struct(mt.info.FREQ, known_keys_FREQ))\n", + " )\n", + " )\n", + " # Reformat clinical attributes from str to array\n", + " mt = mt.annotate_rows(\n", + " info = mt.info.annotate(\n", + " CLNHGVS = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNHGVS), \n", + " mt.info.CLNHGVS.split(\"(?:(\\|)|(\\,))\")).map(lambda x: hl.if_else((x == \".\"), hl.missing(hl.tstr), x)),\n", + " CLNVI = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNVI), \n", + " mt.info.CLNVI.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNORIGIN = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNORIGIN), \n", + " mt.info.CLNORIGIN.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNSIG = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNSIG), \n", + " mt.info.CLNSIG.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNDISDB = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNDISDB), \n", + " mt.info.CLNDISDB.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNDN = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNDN), \n", + " mt.info.CLNDN.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNREVSTAT = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNREVSTAT), \n", + " mt.info.CLNREVSTAT.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\"),\n", + " CLNACC = hl.or_missing(\n", + " hl.is_defined(mt.info.CLNACC), \n", + " mt.info.CLNACC.split(\"(?:(\\|)|(\\,))\")).filter(lambda x: x != \".\")\n", + " )\n", + " )\n", + " \n", + " mt = mt.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.mt\", \n", + " _read_if_exists=True, \n", + " overwrite=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can just grab the `rows` table since we have no sample or entry information in the MatrixTable. \n", + "\n", + "From there, we need to filter the biallelic and multiallelic variants into separate tables, split the multiallelic variants, and then union the split multiallelic table rows back with the biallelic table rows.\n", + "\n", + "The allele frequency arrays start with the reference allele which is then followed by alternate alleles as ordered in the ALT column (from the VCF). So after splitting we can index the array with `a_index` to pull out the relevant allele frequency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "name = \"dbSNP\"\n", + "version = \"154\"\n", + "builds = [\"GRCh37\", \"GRCh38\"]\n", + "\n", + "for build in builds:\n", + " # No samples or entries in MT, just grab table with the rows\n", + " mt = hl.read_matrix_table(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.mt\")\n", + " ht = mt.rows()\n", + " \n", + " ht_ba = ht.filter(hl.len(ht.alleles) <= 2)\n", + " ht_ba = ht_ba.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_biallelic.ht\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + "\n", + " ht_ma = ht.filter(hl.len(ht.alleles) > 2)\n", + " ht_ma = ht_ma.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_multiallelic.ht\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + "\n", + " ht_split = hl.split_multi(ht_ma, keep_star=True, permit_shuffle=True)\n", + " ht_split = ht_split.repartition(64, shuffle=False)\n", + " ht_split = ht_split.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_split_multiallelic.ht\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + " \n", + " # Next, have to fix indices and union ht_split with ht_ba\n", + " ht_union = ht_ba.union(ht_split, unify=True)\n", + " ht_union = ht_union.annotate(\n", + " a_index = hl.if_else(hl.is_missing(ht_union.a_index), 1, ht_union.a_index),\n", + " was_split = hl.if_else(hl.is_missing(ht_union.was_split), False, ht_union.was_split),\n", + " old_locus = hl.if_else(hl.is_missing(ht_union.old_locus), ht_union.locus, ht_union.old_locus),\n", + " old_alleles = hl.if_else(hl.is_missing(ht_union.old_alleles), ht_union.alleles, ht_union.old_alleles)\n", + " )\n", + " ht_union = ht_union.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}_unioned.ht\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + " \n", + " # Arrays for AFs start w/ ref allele in index 0, so just use a_index to get alternate AFs\n", + " ht = ht_union.annotate(\n", + " info = ht_union.info.annotate(\n", + " FREQ = ht_union.info.FREQ.annotate(\n", + " **{k: hl.or_missing(hl.is_defined(ht_union.info.FREQ[k]), \n", + " ht_union.info.FREQ[k][ht_union.a_index]) \n", + " for k in known_keys_FREQ}\n", + " )\n", + " )\n", + " )\n", + " ht = ht.repartition(512, shuffle=True)\n", + " ht = ht.checkpoint(f\"gs://hail-datasets-tmp/checkpoints/{name}_{version}_{build}.ht\", \n", + " _read_if_exists=True, \n", + " overwrite=False)\n", + "\n", + " n_rows = ht.count()\n", + " n_partitions = ht.n_partitions()\n", + "\n", + " ht = ht.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=name,\n", + " version=version,\n", + " reference_genome=build,\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions\n", + " )\n", + " )\n", + " ht.write(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", + " ht = hl.read_table(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", + " ht.describe()\n", + " print(str(hl.eval(ht.metadata)) + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Also write tables with only the rsID field, for smaller tables that just map `[locus, alleles]` to `rsID`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "name = \"dbSNP\"\n", + "version = \"154\"\n", + "builds = [\"GRCh37\", \"GRCh38\"]\n", + "\n", + "for build in builds:\n", + " # Write table with only rsid's\n", + " ht_rsid = hl.read_table(f\"gs://hail-datasets-us/{name}_{version}_{build}.ht\")\n", + " ht_rsid = ht_rsid.select(\"rsid\")\n", + "\n", + " n_rows = ht_rsid.count()\n", + " n_partitions = ht_rsid.n_partitions()\n", + "\n", + " ht_rsid = ht_rsid.annotate_globals(\n", + " metadata=hl.struct(\n", + " name=f\"{name}_rsid\",\n", + " version=version,\n", + " reference_genome=build,\n", + " n_rows=n_rows,\n", + " n_partitions=n_partitions\n", + " )\n", + " )\n", + " ht_rsid.write(f\"gs://hail-datasets-us/{name}_rsid_{version}_{build}.ht\")\n", + " ht_rsid = hl.read_table(f\"gs://hail-datasets-us/{name}_rsid_{version}_{build}.ht\")\n", + " ht_rsid.describe()\n", + " print(str(hl.eval(ht_rsid.metadata)) + \"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To check uniqueness of keys\n", + "tables = [\"gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht\", \"gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht\"]\n", + "for table in tables:\n", + " ht = hl.read_table(table)\n", + " \n", + " ht_count = ht.count()\n", + " print(f\"n = {ht_count}\")\n", + " ht_distinct_count = ht.distinct().count()\n", + " print(f\"n_distinct = {ht_distinct_count}\")\n", + " \n", + " if ht_count == ht_distinct_count:\n", + " print(f\"{table} rows unique\\n\")\n", + " else:\n", + " print(f\"{table} rows NOT unique\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add dbSNP to datasets API and annotation DB\n", + "\n", + "Now we can add the tables we created to `hail/python/hail/experimental/datasets.json`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "\n", + "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", + "with open(datasets_path, \"r\") as f:\n", + " datasets = json.load(f)\n", + "\n", + "names = [\"dbSNP\", \"dbSNP_rsid\"]\n", + "version = \"154\"\n", + "builds = [\"GRCh37\", \"GRCh38\"]\n", + "\n", + "gcs_us_url_root = \"gs://hail-datasets-us\"\n", + "gcs_eu_url_root = \"gs://hail-datasets-eu\"\n", + "aws_us_url_root = \"s3://hail-datasets-us-east-1\"\n", + "\n", + "for name in names:\n", + " json_entry = {\n", + " \"annotation_db\": {\n", + " \"key_properties\": []\n", + " },\n", + " \"description\": \"dbSNP: Reference SNP (rs or RefSNP) Hail Table. The database includes both common and rare single-base nucleotide variation (SNV), short (=< 50bp) deletion/insertion polymorphisms, and other classes of small genetic variations.\",\n", + " \"url\": \"https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/\",\n", + " \"versions\": [\n", + " {\n", + " \"reference_genome\": builds[0],\n", + " \"url\": {\n", + " \"aws\": {\n", + " \"us\": f\"{aws_us_url_root}/{name}_{version}_{builds[0]}.ht\"\n", + " },\n", + " \"gcp\": {\n", + " \"eu\": f\"{gcs_eu_url_root}/{name}_{version}_{builds[0]}.ht\",\n", + " \"us\": f\"{gcs_us_url_root}/{name}_{version}_{builds[0]}.ht\"\n", + " }\n", + " },\n", + " \"version\": version\n", + " },\n", + " {\n", + " \"reference_genome\": builds[1],\n", + " \"url\": {\n", + " \"aws\": {\n", + " \"us\": f\"{aws_us_url_root}/{name}_{version}_{builds[1]}.ht\"\n", + " },\n", + " \"gcp\": {\n", + " \"eu\": f\"{gcs_eu_url_root}/{name}_{version}_{builds[1]}.ht\",\n", + " \"us\": f\"{gcs_us_url_root}/{name}_{version}_{builds[1]}.ht\"\n", + " }\n", + " },\n", + " \"version\": version\n", + " } \n", + " ]\n", + " }\n", + " \n", + " if name == \"dbSNP_rsid\":\n", + " json_entry[\"description\"] = \"dbSNP: This Hail Table contains a mapping from locus/allele pairs to Reference SNP IDs (rsID). For the full dataset, see dbSNP.\"\n", + " \n", + " datasets[name] = json_entry\n", + "\n", + "# Write new entries back to datasets.json config:\n", + "with open(datasets_path, \"w\") as f:\n", + " json.dump(datasets, f, sort_keys=True, ensure_ascii=False, indent=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After adding tables to `datasets.json`, create .rst schema files for docs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Create/update schema .rst file\n", + "import textwrap\n", + "\n", + "output_dir = os.path.abspath(\"../../hail/python/hail/docs/datasets/schemas\")\n", + "datasets_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", + "with open(datasets_path, \"r\") as f:\n", + " datasets = json.load(f)\n", + "\n", + "names = [\"dbSNP\", \"dbSNP_rsid\"]\n", + "for name in names:\n", + " versions = sorted(set(dataset[\"version\"] for dataset in datasets[name][\"versions\"]))\n", + " if not versions:\n", + " versions = [None]\n", + " reference_genomes = sorted(set(dataset[\"reference_genome\"] for dataset in datasets[name][\"versions\"]))\n", + " if not reference_genomes:\n", + " reference_genomes = [None]\n", + "\n", + " print(name)\n", + " print(versions[0])\n", + " print(reference_genomes[0] + \"\\n\")\n", + "\n", + " path = [dataset[\"url\"][\"gcp\"][\"us\"]\n", + " for dataset in datasets[name][\"versions\"]\n", + " if all([dataset[\"version\"] == versions[0],\n", + " dataset[\"reference_genome\"] == reference_genomes[0]])]\n", + " assert len(path) == 1\n", + " path = path[0]\n", + " if path.endswith(\".ht\"):\n", + " table = hl.methods.read_table(path)\n", + " table_class = \"hail.Table\"\n", + " else:\n", + " table = hl.methods.read_matrix_table(path)\n", + " table_class = \"hail.MatrixTable\"\n", + "\n", + " description = table.describe(handler=lambda x: str(x)).split(\"\\n\")\n", + " description = \"\\n\".join([line.rstrip() for line in description])\n", + "\n", + " template = \"\"\".. _{dataset}:\n", + "\n", + "{dataset}\n", + "{underline1}\n", + "\n", + "* **Versions:** {versions}\n", + "* **Reference genome builds:** {ref_genomes}\n", + "* **Type:** :class:`{class}`\n", + "\n", + "Schema ({version0}, {ref_genome0})\n", + "{underline2}\n", + "\n", + ".. code-block:: text\n", + "\n", + "{schema}\n", + "\n", + "\"\"\"\n", + " context = {\n", + " \"dataset\": name,\n", + " \"underline1\": len(name) * \"=\",\n", + " \"version0\": versions[0],\n", + " \"ref_genome0\": reference_genomes[0],\n", + " \"versions\": \", \".join([str(version) for version in versions]),\n", + " \"ref_genomes\": \", \".join([str(reference_genome) for reference_genome in reference_genomes]),\n", + " \"underline2\": len(\"\".join([\"Schema (\", str(versions[0]), \", \", str(reference_genomes[0]), \")\"])) * \"~\",\n", + " \"schema\": textwrap.indent(description, \" \"),\n", + " \"class\": table_class\n", + " }\n", + " with open(output_dir + f\"/{name}.rst\", \"w\") as f:\n", + " f.write(template.format(**context).strip())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/datasets/notebooks/reformat_buckets.ipynb b/datasets/notebooks/reformat_buckets.ipynb new file mode 100644 index 00000000000..74bfbf1e864 --- /dev/null +++ b/datasets/notebooks/reformat_buckets.ipynb @@ -0,0 +1,195 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import os\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Create list with each line containing old path followed by new path\n", + "lines = []\n", + "with open(\"reformat_buckets.txt\", \"r\") as f:\n", + " for line in f:\n", + " line = line.replace(\"gsutil -u broad-ctsa -m cp -r \", \"\").replace(\"*\", \"\")\n", + " lines.append(line)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Create dict mapping current urls to new urls\n", + "new_mappings = {}\n", + "\n", + "# GCS\n", + "# Mappings from old_name: new_name for hail-datasets-us\n", + "for line in lines:\n", + " line = line.strip().split(\" \")\n", + " line = [x.rstrip(\"/\") for x in line]\n", + " new_mappings[line[0]] = line[1]\n", + "# Mappings from old_name: new_name for hail-datasets-eu\n", + "for line in lines:\n", + " line = line.replace(\"hail-datasets-us/\", \"hail-datasets-eu/\")\n", + " line = line.strip().split(\" \")\n", + " line = [x.rstrip(\"/\") for x in line]\n", + " new_mappings[line[0]] = line[1]\n", + "# AWS\n", + "# Mappings from old_name: new_name for hail-datasets-us-east-1\n", + "for line in lines:\n", + " line = line.replace(\"gs://hail-datasets-us/\", \"s3://hail-datasets-us-east-1/\")\n", + " line = line.strip().split(\" \")\n", + " line = [x.rstrip(\"/\") for x in line]\n", + " new_mappings[line[0]] = line[1]\n", + "\n", + "with open(\"reformat_buckets_mappings.json\", \"w\") as f:\n", + " json.dump(new_mappings, f, sort_keys=True, ensure_ascii=False, indent=2)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Load config file\n", + "datasets_json_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", + "with open(datasets_json_path) as f:\n", + " datasets_json = json.load(f)\n", + "\n", + "# Update urls for all datasets according to new mappings\n", + "dataset_names = [name for name in datasets_json]\n", + "for name in dataset_names:\n", + " versions = datasets_json[name][\"versions\"]\n", + " for version in versions:\n", + " if \"aws\" in version[\"url\"]:\n", + " if version[\"url\"][\"aws\"][\"us\"] in new_mappings.keys():\n", + " version[\"url\"][\"aws\"][\"us\"] = new_mappings[version[\"url\"][\"aws\"][\"us\"]]\n", + " if \"gcp\" in version[\"url\"]:\n", + " if \"us\" in version[\"url\"][\"gcp\"]:\n", + " if version[\"url\"][\"gcp\"][\"us\"] in new_mappings.keys():\n", + " version[\"url\"][\"gcp\"][\"us\"] = new_mappings[version[\"url\"][\"gcp\"][\"us\"]]\n", + " if \"eu\" in version[\"url\"][\"gcp\"]:\n", + " if version[\"url\"][\"gcp\"][\"eu\"] in new_mappings.keys():\n", + " version[\"url\"][\"gcp\"][\"eu\"] = new_mappings[version[\"url\"][\"gcp\"][\"eu\"]]\n", + " # Update GTEx names while we're at it\n", + " if \"GTEx_eQTL\" in name or \"GTEx_sQTL\" in name:\n", + " tissue = name.split(\"_\", 3)[-1]\n", + " qtl = name.split(\"_\", 3)[1]\n", + " updated_name = f\"GTEx_{qtl}_{tissue}_all_snp_gene_associations\"\n", + " datasets_json[updated_name] = datasets_json[name]\n", + " del datasets_json[name]\n", + "\n", + "# Write new entries to config file\n", + "with open(datasets_json_path, \"w\") as f:\n", + " json.dump(datasets_json, f, sort_keys=True, ensure_ascii=False, indent=2)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import hail as hl\n", + "hl.init(spark_conf={\"spark.hadoop.fs.s3a.aws.credentials.provider\":\n", + " \"org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider\"})\n", + "\n", + "# Test that we can load datasets from GCS and AWS\n", + "datasets_json_path = os.path.abspath(\"../../hail/python/hail/experimental/datasets.json\")\n", + "with open(datasets_json_path) as f:\n", + " datasets_json = json.load(f)\n", + "\n", + "dataset_names = [name for name in datasets_json]\n", + "for name in dataset_names:\n", + " print(name)\n", + " versions = datasets_json[name][\"versions\"]\n", + " for version in versions:\n", + " if \"gcp\" in version[\"url\"]:\n", + " if \"us\" in version[\"url\"][\"gcp\"]:\n", + " url = version[\"url\"][\"gcp\"][\"us\"]\n", + " print(url)\n", + " if url.endswith(\".ht\"):\n", + " hl.read_table(url)\n", + " elif url.endswith(\".mt\"):\n", + " hl.read_matrix_table(url)\n", + " else:\n", + " hl.linalg.BlockMatrix.read(url)\n", + " if \"eu\" in version[\"url\"][\"gcp\"]:\n", + " url = version[\"url\"][\"gcp\"][\"eu\"]\n", + " print(url)\n", + " if url.endswith(\".ht\"):\n", + " hl.read_table(url)\n", + " elif url.endswith(\".mt\"):\n", + " hl.read_matrix_table(url)\n", + " else:\n", + " hl.linalg.BlockMatrix.read(url)\n", + " if \"aws\" in version[\"url\"]:\n", + " url = version[\"url\"][\"aws\"][\"us\"].replace(\"s3://\", \"s3a://\")\n", + " print(url)\n", + " if url.endswith(\".ht\"):\n", + " hl.read_table(url)\n", + " elif url.endswith(\".mt\"):\n", + " hl.read_matrix_table(url)\n", + " else:\n", + " hl.linalg.BlockMatrix.read(url)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/datasets/notebooks/reformat_buckets.txt b/datasets/notebooks/reformat_buckets.txt new file mode 100644 index 00000000000..1d4e644223d --- /dev/null +++ b/datasets/notebooks/reformat_buckets.txt @@ -0,0 +1,201 @@ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt/* gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_samples.ht/* gs://hail-datasets-us/1000_Genomes/phase_3/samples.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_sample_relationships.ht/* gs://hail-datasets-us/1000_Genomes/phase_3/sample_relationships.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht/* gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he/* gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD.v1.4.GRCh37.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD.v1.4.GRCh38.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD_v1.6_GRCh37.ht/* gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/CADD_v1.6_GRCh38.ht/* gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/DANN.GRCh37.ht/* gs://hail-datasets-us/DANN/GRCh37/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/DANN.GRCh38.ht/* gs://hail-datasets-us/DANN/GRCh38/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_elements.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_elements.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_scores.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GERP_scores.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt/* gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt/* gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt/* gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt/* gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_154_GRCh37.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_154_GRCh38.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht/* gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht/* gs://hail-datasets-us/dbSNP/assembly_reports/GRCh37/p13.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht/* gs://hail-datasets-us/dbSNP/assembly_reports/GRCh38/p13.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt/* gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp4.0a.GRCh37.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/dbnsfp4.0a.GRCh38.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gencode.v19.annotation.GRCh37.ht/* gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gencode.v31.annotation.GRCh38.ht/* gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gene_specific_summary_2019-07.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/variant_summary_2019-07.GRCh37.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/variant_summary_2019-07.GRCh38.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/gnomad_v2.1.1_lof_metrics_by_gene.ht/* gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht/* gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht/* gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/DANN.GRCh37.ht/* gs://hail-datasets-us/DANN/GRCh37/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/DANN.GRCh38.ht/* gs://hail-datasets-us/DANN/GRCh38/table.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht/* gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht/* gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht/* gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht/* gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht/* gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht/* gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht/* gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht/ +gsutil -u broad-ctsa -m cp -r gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht/* gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht/ diff --git a/datasets/notebooks/reformat_buckets_mappings.json b/datasets/notebooks/reformat_buckets_mappings.json new file mode 100644 index 00000000000..97a696cd943 --- /dev/null +++ b/datasets/notebooks/reformat_buckets_mappings.json @@ -0,0 +1,605 @@ +{ + "gs://hail-datasets-eu/1000_Genomes_NYGC_30x_HighCov_samples.ht": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/samples.ht", + "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/autosomes.mt", + "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/autosomes.mt", + "gs://hail-datasets-eu/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", + "gs://hail-datasets-eu/1000_Genomes_chrMT.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrMT.mt", + "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrX.mt", + "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrX.mt", + "gs://hail-datasets-eu/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", + "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh37.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrY.mt", + "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrY.mt", + "gs://hail-datasets-eu/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", + "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", + "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", + "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", + "gs://hail-datasets-eu/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "gs://hail-datasets-eu/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", + "gs://hail-datasets-eu/1000_Genomes_phase3_sample_relationships.ht": "gs://hail-datasets-eu/1000_Genomes/phase_3/sample_relationships.ht", + "gs://hail-datasets-eu/1000_Genomes_phase3_samples.ht": "gs://hail-datasets-eu/1000_Genomes/phase_3/samples.ht", + "gs://hail-datasets-eu/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", + "gs://hail-datasets-eu/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", + "gs://hail-datasets-eu/CADD_v1.6_GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.6/GRCh37/table.ht", + "gs://hail-datasets-eu/CADD_v1.6_GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.6/GRCh38/table.ht", + "gs://hail-datasets-eu/DANN.GRCh37.ht": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", + "gs://hail-datasets-eu/DANN.GRCh38.ht": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", + "gs://hail-datasets-eu/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-eu/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-eu/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-eu/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-eu/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", + "gs://hail-datasets-eu/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", + "gs://hail-datasets-eu/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", + "gs://hail-datasets-eu/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", + "gs://hail-datasets-eu/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", + "gs://hail-datasets-eu/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", + "gs://hail-datasets-eu/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", + "gs://hail-datasets-eu/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", + "gs://hail-datasets-eu/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "gs://hail-datasets-eu/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", + "gs://hail-datasets-eu/NCBI_assembly_report_p13_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/assembly_reports/GRCh37/p13.ht", + "gs://hail-datasets-eu/NCBI_assembly_report_p13_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/assembly_reports/GRCh38/p13.ht", + "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", + "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", + "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", + "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", + "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", + "gs://hail-datasets-eu/annotations/DANN.GRCh37.ht": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", + "gs://hail-datasets-eu/annotations/DANN.GRCh38.ht": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", + "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", + "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", + "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", + "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", + "gs://hail-datasets-eu/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "gs://hail-datasets-eu/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", + "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", + "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", + "gs://hail-datasets-eu/annotations/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", + "gs://hail-datasets-eu/annotations/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", + "gs://hail-datasets-eu/annotations/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", + "gs://hail-datasets-eu/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", + "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", + "gs://hail-datasets-eu/dbSNP_154_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/full_table.ht", + "gs://hail-datasets-eu/dbSNP_154_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/full_table.ht", + "gs://hail-datasets-eu/dbSNP_rsid_154_GRCh37.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/rsid_only_table.ht", + "gs://hail-datasets-eu/dbSNP_rsid_154_GRCh38.ht": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/rsid_only_table.ht", + "gs://hail-datasets-eu/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", + "gs://hail-datasets-eu/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", + "gs://hail-datasets-eu/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", + "gs://hail-datasets-eu/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", + "gs://hail-datasets-eu/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", + "gs://hail-datasets-eu/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", + "gs://hail-datasets-eu/giant_bmi_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", + "gs://hail-datasets-eu/giant_bmi_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", + "gs://hail-datasets-eu/giant_bmi_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", + "gs://hail-datasets-eu/giant_bmi_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", + "gs://hail-datasets-eu/giant_bmi_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", + "gs://hail-datasets-eu/giant_bmi_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", + "gs://hail-datasets-eu/giant_height_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AFR.ht", + "gs://hail-datasets-eu/giant_height_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_ALL.ht", + "gs://hail-datasets-eu/giant_height_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AMR.ht", + "gs://hail-datasets-eu/giant_height_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EAS.ht", + "gs://hail-datasets-eu/giant_height_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EUR.ht", + "gs://hail-datasets-eu/giant_height_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_SAS.ht", + "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", + "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", + "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", + "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", + "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", + "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", + "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", + "gs://hail-datasets-eu/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "gs://hail-datasets-eu/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", + "gs://hail-datasets-eu/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", + "gs://hail-datasets-us/1000_Genomes_NYGC_30x_HighCov_samples.ht": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/samples.ht", + "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt", + "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt", + "gs://hail-datasets-us/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", + "gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt", + "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt", + "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt", + "gs://hail-datasets-us/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", + "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt", + "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt", + "gs://hail-datasets-us/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", + "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", + "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", + "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", + "gs://hail-datasets-us/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "gs://hail-datasets-us/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", + "gs://hail-datasets-us/1000_Genomes_phase3_sample_relationships.ht": "gs://hail-datasets-us/1000_Genomes/phase_3/sample_relationships.ht", + "gs://hail-datasets-us/1000_Genomes_phase3_samples.ht": "gs://hail-datasets-us/1000_Genomes/phase_3/samples.ht", + "gs://hail-datasets-us/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht", + "gs://hail-datasets-us/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht", + "gs://hail-datasets-us/CADD_v1.6_GRCh37.ht": "gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht", + "gs://hail-datasets-us/CADD_v1.6_GRCh38.ht": "gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht", + "gs://hail-datasets-us/DANN.GRCh37.ht": "gs://hail-datasets-us/DANN/GRCh37/table.ht", + "gs://hail-datasets-us/DANN.GRCh38.ht": "gs://hail-datasets-us/DANN/GRCh38/table.ht", + "gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-us/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-us/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-us/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht", + "gs://hail-datasets-us/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht", + "gs://hail-datasets-us/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht", + "gs://hail-datasets-us/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht", + "gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", + "gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", + "gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", + "gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", + "gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", + "gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh37.ht": "gs://hail-datasets-us/dbSNP/assembly_reports/GRCh37/p13.ht", + "gs://hail-datasets-us/NCBI_assembly_report_p13_GRCh38.ht": "gs://hail-datasets-us/dbSNP/assembly_reports/GRCh38/p13.ht", + "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", + "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", + "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", + "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht", + "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht", + "gs://hail-datasets-us/annotations/DANN.GRCh37.ht": "gs://hail-datasets-us/DANN/GRCh37/table.ht", + "gs://hail-datasets-us/annotations/DANN.GRCh38.ht": "gs://hail-datasets-us/DANN/GRCh38/table.ht", + "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht", + "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht", + "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht", + "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht", + "gs://hail-datasets-us/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht", + "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht", + "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht", + "gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht", + "gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht", + "gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht", + "gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht", + "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht", + "gs://hail-datasets-us/dbSNP_154_GRCh37.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht", + "gs://hail-datasets-us/dbSNP_154_GRCh38.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht", + "gs://hail-datasets-us/dbSNP_rsid_154_GRCh37.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht", + "gs://hail-datasets-us/dbSNP_rsid_154_GRCh38.ht": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht", + "gs://hail-datasets-us/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht", + "gs://hail-datasets-us/dbnsfp4.0a.GRCh37.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht", + "gs://hail-datasets-us/dbnsfp4.0a.GRCh38.ht": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht", + "gs://hail-datasets-us/gencode.v19.annotation.GRCh37.ht": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht", + "gs://hail-datasets-us/gencode.v31.annotation.GRCh38.ht": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht", + "gs://hail-datasets-us/gene_specific_summary_2019-07.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht", + "gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", + "gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", + "gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", + "gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", + "gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", + "gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", + "gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht", + "gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht", + "gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht", + "gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht", + "gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht", + "gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht", + "gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", + "gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", + "gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", + "gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", + "gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", + "gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", + "gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", + "gs://hail-datasets-us/gnomad_v2.1.1_lof_metrics_by_gene.ht": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "gs://hail-datasets-us/variant_summary_2019-07.GRCh37.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht", + "gs://hail-datasets-us/variant_summary_2019-07.GRCh38.txt.gz.ht": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht", + "s3://hail-datasets-us-east-1/1000_Genomes_NYGC_30x_HighCov_samples.ht": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/samples.ht", + "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/autosomes.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/autosomes.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_autosomes_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrMT.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrMT.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrX.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrX.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrX_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh37.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrY.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrY.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_chrY_NYGC_30x_HighCov_GRCh38.mt": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrY.mt", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001.bm": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001.bm", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001.metadata.he": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001.metadata.he", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.bm": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.bm", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_European_autosomes_maf_gt_001_standardized.metadata.he": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/European_autosomes_maf_gt_001_standardized.metadata.he", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_sample_relationships.ht": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/sample_relationships.ht", + "s3://hail-datasets-us-east-1/1000_Genomes_phase3_samples.ht": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/samples.ht", + "s3://hail-datasets-us-east-1/CADD.v1.4.GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht", + "s3://hail-datasets-us-east-1/CADD.v1.4.GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht", + "s3://hail-datasets-us-east-1/CADD_v1.6_GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh37/table.ht", + "s3://hail-datasets-us-east-1/CADD_v1.6_GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh38/table.ht", + "s3://hail-datasets-us-east-1/DANN.GRCh37.ht": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht", + "s3://hail-datasets-us-east-1/DANN.GRCh38.ht": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht", + "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "s3://hail-datasets-us-east-1/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "s3://hail-datasets-us-east-1/GERP_elements.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht", + "s3://hail-datasets-us-east-1/GERP_elements.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht", + "s3://hail-datasets-us-east-1/GERP_scores.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht", + "s3://hail-datasets-us-east-1/GERP_scores.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht", + "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", + "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", + "s3://hail-datasets-us-east-1/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", + "s3://hail-datasets-us-east-1/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_all_snp_gene_associations_v8_GRCh38.mt": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/all_snp_gene_associations.mt", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", + "s3://hail-datasets-us-east-1/LDSC_baseline_v1.1_ld_scores.GRCh37.mt": "s3://hail-datasets-us-east-1/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", + "s3://hail-datasets-us-east-1/NCBI_assembly_report_p13_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/assembly_reports/GRCh37/p13.ht", + "s3://hail-datasets-us-east-1/NCBI_assembly_report_p13_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/assembly_reports/GRCh38/p13.ht", + "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", + "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", + "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", + "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh37.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht", + "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh38.ht": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht", + "s3://hail-datasets-us-east-1/annotations/DANN.GRCh37.ht": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht", + "s3://hail-datasets-us-east-1/annotations/DANN.GRCh38.ht": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht", + "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht", + "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht", + "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh37.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht", + "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh38.ht": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht", + "s3://hail-datasets-us-east-1/annotations/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "s3://hail-datasets-us-east-1/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht", + "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh37.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht", + "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh38.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht", + "s3://hail-datasets-us-east-1/annotations/gencode.v19.annotation.GRCh37.ht": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht", + "s3://hail-datasets-us-east-1/annotations/gencode.v31.annotation.GRCh38.ht": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht", + "s3://hail-datasets-us-east-1/annotations/gene_specific_summary_2019-07.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht", + "s3://hail-datasets-us-east-1/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht", + "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht", + "s3://hail-datasets-us-east-1/dbSNP_154_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/full_table.ht", + "s3://hail-datasets-us-east-1/dbSNP_154_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/full_table.ht", + "s3://hail-datasets-us-east-1/dbSNP_rsid_154_GRCh37.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/rsid_only_table.ht", + "s3://hail-datasets-us-east-1/dbSNP_rsid_154_GRCh38.ht": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/rsid_only_table.ht", + "s3://hail-datasets-us-east-1/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht", + "s3://hail-datasets-us-east-1/dbnsfp4.0a.GRCh37.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht", + "s3://hail-datasets-us-east-1/dbnsfp4.0a.GRCh38.ht": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht", + "s3://hail-datasets-us-east-1/gencode.v19.annotation.GRCh37.ht": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht", + "s3://hail-datasets-us-east-1/gencode.v31.annotation.GRCh38.ht": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht", + "s3://hail-datasets-us-east-1/gene_specific_summary_2019-07.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_AFR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_ALL_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_AMR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_EAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_EUR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", + "s3://hail-datasets-us-east-1/giant_bmi_exome_SAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_AFR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AFR.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_ALL_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_ALL.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_AMR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AMR.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_EAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EAS.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_EUR_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EUR.ht", + "s3://hail-datasets-us-east-1/giant_height_exome_SAS_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_SAS.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", + "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", + "s3://hail-datasets-us-east-1/gnomad_v2.1.1_lof_metrics_by_gene.ht": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "s3://hail-datasets-us-east-1/variant_summary_2019-07.GRCh37.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht", + "s3://hail-datasets-us-east-1/variant_summary_2019-07.GRCh38.txt.gz.ht": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht" +} diff --git a/dev-docs/batch-operation.md b/dev-docs/batch-operation.md index 224d56e6f20..752851cdd5f 100644 --- a/dev-docs/batch-operation.md +++ b/dev-docs/batch-operation.md @@ -10,14 +10,19 @@ kubectl top pods -l app=batch-driver Less readily available metrics include the request latency (which might naturally vary by request type) and scheduling latency. The goal is to operate Batch around 95% CPU. When Batch becomes overwhelmed, CPU is saturated and request latency increases. New requests inject work into the system, but time out and are retried due to latency, creating a bad feedback loop. -The incoming request rate from workers is controlled by the `internal-gateway`. The `internal-gateway` is fixed at 3 replicates and imposes a per-instance request limit: +The incoming request rate from workers is controlled by the `internal-gateway`. The `internal-gateway` is fixed at 3 replicates and imposes a per-instance, per-namespace request limit: ``` -limit_req_zone global zone=limit:1m rate=45r/s; +map $service $batch_driver_limit_key { + "batch-driver" "$namespace"; + default ""; # no key => no limit +} + +limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=10r/s; server { location { - limit_req zone=limit burst=20 nodelay; + limit_req zone=batch_driver burst=20 nodelay; } } ``` @@ -33,3 +38,30 @@ Watch the behavior under maximum load. Normally the allocated cores increases f You can inspect the `internal-gateway` logs to determine if the request rate is maximized. When the maximum request rate is exceeded, `internal-gateway` nginx returns 503 and logs a message. To determine if the cluster size is at the maximum, check the CPU and `internal-gateway` request rate when the cluster is not growing, but just replacing preempted nodes. The CPU should not be pegged, and `internal-gateway` should reject requests at most in transient bursts. In general, the load will be much lower at equilibrium because filling an empty node requires many operations. + +## Quotas + +When using Local SSDs on preemptible machines there are only two quotas that matter: "Preemptible +Local SSD (GB)" (`PREEMPTIBLE_LOCAL_SSD_GB`) and "Preemptible CPUs" (`PREEMPTIBLE_CPUS`). The former +is measured in GB, so you'll need 375 GB of quota for every machine with a Local SSD. The latter is +measured in cores. For example, if you are using a mix of n1 and n2 machines with 8 cores and 1 +Local SSD, a 5000 machine (40,000 core) cluster will need: + +- 1,875,000 GB of Preemptible Local SSD quota, and + +- 40,000 cores of Preemptible CPUs quota. + +In practice, we use Local SSD quota much faster than CPU quota. Google will freely gives us a 5,000 +core quota in any given zone. We've also received quotas as high as 300,000 cores. Google is +hesitant to grant a quota of more than 400,000 GB in a zone. The largest Preemptible Local SSD quota +we have been granted in one zone is 640,000 GB. + +We recommend requesting double your quota when you're using 80-90% of the current quota. Repeating +this process will generally allow you to quickly scale the cluster. + +A higher or lower quota is requested by going to the "Quota Metric Details" page for a specific +quota (e.g. `PREEMPTIBLE_CPUS`), selecting the regions or zones of interest, and clicking "Edit +Quotas". + +Quota requests during Eastern Time business hours appear to be approved faster. We presume this is +because our Technical Account Managers work in the Cambridge, MA Google office. diff --git a/dev-docs/compiler-team/development_tools.md b/dev-docs/compiler-team/development_tools.md new file mode 100644 index 00000000000..8aeb9bb5d1d --- /dev/null +++ b/dev-docs/compiler-team/development_tools.md @@ -0,0 +1,79 @@ +# Hail development tools + +This document describes and links tools used by the Hail compiler team. +The document is written for the most common operating system used by +the team, macOS. + +## General tools + +##### Homebrew - macOS package manager + +Homebrew is hard to live without. Use it to install many of the other tools +used by the team. + +https://brew.sh/ + +##### git - version control + +It's nice to have a relatively recent version of git. Install this with +brew: + + brew install git + +It will probably be necessary to change system paths so that the +installed git is available before system git, as [described here](https://ajahne.github.io/blog/tools/2018/06/11/how-to-upgrade-git-mac.html). + +Once this is working, you should fork the hail-is/hail repository into +your own user space, then clone the repository locally: + + git clone https://github.com/username/hail.git + +Then add a remote for the main repository to pull in changes: + + git remote add hi https://github.com/hail-is/hail.git + + +##### Zulip - dev / user chat + +We use Zulip for development discussion and conversations with users +(though not typically for user support). + +Get it here: + +https://zulip.com/ + +Our Zulip server is https://hail.zulipchat.com + +##### Anaconda - manage Python installations and packages + +https://www.anaconda.com/download/#macos + +After installing Anaconda, you should create a new dev environment +for Hail with: + + conda create --name hail python=3.7 + +and + + conda activate hail + +(put the latter in a shell .rc file so this is done on shell startup) + +##### IntelliJ IDEA - IDE for java/scala/python + +https://www.jetbrains.com/idea/ + +Configuration is hard to document here, get help by asking the team. + +##### iTerm2 - terminal replacement + +iTerm2 is (subjectively) nicer to use and objectively more customizable +than the built-in macOS terminal. + +https://iterm2.com/ + +##### Google cloud utilities + +We primarily use Google Cloud for development. Get the SDK here: + +https://cloud.google.com/sdk/docs/install \ No newline at end of file diff --git a/dev-docs/development_process.md b/dev-docs/development_process.md index 7f34f116fac..066e4be23d7 100644 --- a/dev-docs/development_process.md +++ b/dev-docs/development_process.md @@ -117,7 +117,7 @@ Your namespace name is the same as your username. Submitting a dev deploy with hailctl will give you the link to a UI where you can monitor the progress of everything deploying and get the logs for any steps that fail. You can also see a recent history of your dev deploys at -[ci.hail.is/me](ci.hail.is/me). +[ci.hail.is/me](https://ci.hail.is/me). If the tests fail, you can then examine the Kubernetes logs for the service @@ -159,9 +159,9 @@ PRs will not have any labels. For the PR title, start the title with the name of the service(s) the changes impact. For example, if it’s a Benchmark change, then you’d write -[benchmark]. If it’s a Hail Query change, then it would be [query]. We also want +`[benchmark]`. If it’s a Hail Query change, then it would be `[query]`. We also want the title to be descriptive enough to know what the change is without being too -verbose. An example is “[batch] Added read_only option for gcsfuse”. +verbose. An example is “`[batch]` Added read_only option for gcsfuse”. For the PR commit message, we want the message to be descriptive of the complete set of changes that occurred, especially if it’s a complicated set of diff --git a/dev-docs/google-cloud-cookbook.md b/dev-docs/google-cloud-cookbook.md new file mode 100644 index 00000000000..815c8c2fd2c --- /dev/null +++ b/dev-docs/google-cloud-cookbook.md @@ -0,0 +1,30 @@ +# Google Cloud Cookbook + +Executable snippets for certain operational tasks. These snippets will likely bitrot and otherwise +be unreliable. The reader is recommended to test the snippet before use. + +## Delete All Detached Disks + +There were approximately 240,000 disks accidentally left unattached in a project. This script was +used to delete them. We only delete 990 at a time because there is a "queries per second" quota that +limits how many queries we can make every 100 seconds. We empirically determined that deleting 990 +instances at a time did not exceed the quota. + +``` +for i in $(seq 0 280) +do + gcloud compute disks list \ + --filter='LAST_ATTACH_TIMESTAMP < LAST_DETATCH_TIMESTAMP and name ~ "^batch-disk" and STATUS=Ready' \ + --limit=990 \ + | tail -n +2 \ + > /tmp/disks + for zone in $(cat /tmp/disks | awk '{print $2}' | uniq ) + do + echo $zone + awk '$2=="'$zone'" {print $1}' /tmp/disks \ + | grep -Ee '^batch-disk' \ + | xargs /bin/bash -c 'yes | gcloud compute disks delete '--zone=$zone' $*' % & + done + wait +done +``` diff --git a/docker-build.sh b/docker-build.sh new file mode 100755 index 00000000000..73367a15d9f --- /dev/null +++ b/docker-build.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +CONTEXT="$(cd $1 && pwd)" +DOCKERFILE="$CONTEXT/$2" +REMOTE_IMAGE_NAME=$3 +EXTRA_CACHE=$4 + +CACHE_NAME=$(echo $REMOTE_IMAGE_NAME | sed -E 's/(:[^:]+)(@[^@]+)?$//'):cache + +DOCKER_BUILDKIT=1 docker build \ + --file ${DOCKERFILE} \ + --cache-from ${CACHE_NAME} \ + ${EXTRA_CACHE:+--cache-from ${EXTRA_CACHE}} \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --tag ${REMOTE_IMAGE_NAME} \ + --tag ${CACHE_NAME} \ + ${CONTEXT} + +time DOCKER_BUILDKIT=1 docker push ${REMOTE_IMAGE_NAME} +time DOCKER_BUILDKIT=1 docker push ${CACHE_NAME} diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index b1fa1fc5200..e1d969d3d64 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -1,5 +1,17 @@ FROM {{ hail_ubuntu_image.image }} +RUN hail-pip-install pyspark==3.1.1 +ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark +ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" +ENV PYSPARK_PYTHON python3 + +# source: https://cloud.google.com/storage/docs/gsutil_install#linux +RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ + install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl +ENV PATH $PATH:/google-cloud-sdk/bin + RUN hail-apt-get-install \ git \ htop \ @@ -14,35 +26,23 @@ RUN hail-apt-get-install \ liblapack3 \ g++-10 \ gcc-10 \ - cmake - -RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 && \ + cmake \ + && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 && \ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10 -# source: https://cloud.google.com/storage/docs/gsutil_install#linux -RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ - curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ - install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl -ENV PATH $PATH:/google-cloud-sdk/bin -RUN gcloud version && kubectl version --client=true - -COPY docker/requirements.txt . -RUN hail-pip-install -r requirements.txt pyspark==3.1.1 - -ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark -ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" -ENV PYSPARK_PYTHON python3 - # Regarding explicitly selecting 2.0.1: https://github.com/hail-is/hail/issues/8343 RUN wget -nv -O ${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar COPY docker/core-site.xml ${SPARK_HOME}/conf/core-site.xml -COPY pylintrc setup.cfg / - RUN git clone https://github.com/catchorg/Catch2.git --depth 1 --branch v2.13.3 && \ cd Catch2 && \ cmake -Bbuild -H. -DBUILD_TESTING=OFF && \ cmake --build build/ --target install && \ cd .. && \ rm -rf Catch2 + +COPY docker/requirements.txt . +RUN hail-pip-install -r requirements.txt + +COPY pylintrc setup.cfg / diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index c4c9fbfb6ca..4c53181ed61 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -1,16 +1,8 @@ FROM {{ base_image.image }} -RUN hail-apt-get-install build-essential python3-dev - COPY docker/service-base-requirements.txt . RUN hail-pip-install -r service-base-requirements.txt -COPY hail/python/setup-hailtop.py /hailtop/setup.py -COPY hail/python/hailtop /hailtop/hailtop/ -COPY /hail_version /hailtop/hailtop/hail_version -COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in -RUN hail-pip-install /hailtop && rm -rf /hailtop - COPY gear/setup.py /gear/setup.py COPY gear/gear /gear/gear/ RUN hail-pip-install /gear && rm -rf /gear @@ -19,3 +11,9 @@ COPY web_common/setup.py web_common/MANIFEST.in /web_common/ COPY web_common/web_common /web_common/web_common/ RUN hail-pip-install /web_common && rm -rf /web_common RUN echo 'from hailtop import version; print(version());' | python3 + +COPY hail/python/setup-hailtop.py /hailtop/setup.py +COPY hail/python/hailtop /hailtop/hailtop/ +COPY /hail_version /hailtop/hailtop/hail_version +COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in +RUN hail-pip-install /hailtop && rm -rf /hailtop diff --git a/docker/Makefile b/docker/Makefile index 72996926c38..484b74fa332 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -1,96 +1,58 @@ include ../config.mk .PHONY: build -build: base-stmp service-base hail-ubuntu +build: base service-base service-java-run-base hail-ubuntu -HAIL_UBUNTU_LATEST = $(DOCKER_PREFIX)/hail-ubuntu:latest -HAIL_UBUNTU_IMAGE = $(DOCKER_PREFIX)/hail-ubuntu:$(shell docker images -q --no-trunc hail-ubuntu:latest | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) -BASE_LATEST = $(DOCKER_PREFIX)/base:latest -BASE_IMAGE = $(DOCKER_PREFIX)/base:$(shell docker images -q --no-trunc base:latest | sed -e 's,[^:]*:,,') +HAIL_UBUNTU_IMAGE := $(DOCKER_PREFIX)/hail-ubuntu:$(TOKEN) +BASE_IMAGE := $(DOCKER_PREFIX)/base:$(TOKEN) +SERVICE_BASE_IMAGE := $(DOCKER_PREFIX)/service-base:$(TOKEN) +SERVICE_JAVA_RUN_BASE_IMAGE := $(DOCKER_PREFIX)/service-java-run-base:$(TOKEN) +HAIL_PUBLIC_IMAGE := $(DOCKER_PREFIX)/hail-public:$(TOKEN) +GENETICS_PUBLIC_IMAGE := $(DOCKER_PREFIX)/genetics-public:$(TOKEN) -SERVICE_BASE_LATEST = $(DOCKER_PREFIX)/service-base:latest -SERVICE_BASE_IMAGE = $(DOCKER_PREFIX)/service-base:$(shell docker images -q --no-trunc service-base:latest | sed -e 's,[^:]*:,,') - -SERVICE_JAVA_RUN_BASE_LATEST = $(DOCKER_PREFIX)/service-java-run-base:latest -SERVICE_JAVA_RUN_BASE_IMAGE = $(DOCKER_PREFIX)/service-java-run-base:$(shell docker images -q --no-trunc service-java-run-base:latest | sed -e 's,[^:]*:,,') - -HAIL_PUBLIC_LATEST = $(DOCKER_PREFIX)/hail-public:latest -HAIL_PUBLIC_IMAGE = $(DOCKER_PREFIX)/hail-public:$(shell docker images -q --no-trunc hail-public:latest | sed -e 's,[^:]*:,,') - -GENETICS_PUBLIC_LATEST = $(DOCKER_PREFIX)/genetics-public:latest -GENETICS_PUBLIC_IMAGE = $(DOCKER_PREFIX)/genetics-public:$(shell docker images -q --no-trunc genetics-public:latest | sed -e 's,[^:]*:,,') - -.PHONY: hail-ubuntu -hail-ubuntu: hail-ubuntu-stmp - -hail-ubuntu-stmp: hail-ubuntu/Dockerfile hail-ubuntu/hail-pip-install hail-ubuntu/pip.conf hail-ubuntu/hail-apt-get-install - -docker pull $(DOCKER_PREFIX)/ubuntu:bionic-20200921 - -docker pull $(HAIL_UBUNTU_LATEST) +.PHONY: hail-ubuntu hail-ubuntu-image-ref +hail-ubuntu: hail-ubuntu-image-ref +hail-ubuntu-image-ref: hail-ubuntu/Dockerfile hail-ubuntu/hail-pip-install hail-ubuntu/pip.conf hail-ubuntu/hail-apt-get-install python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' hail-ubuntu/Dockerfile hail-ubuntu/Dockerfile.out - docker build -t hail-ubuntu -f hail-ubuntu/Dockerfile.out --cache-from hail-ubuntu,$(HAIL_UBUNTU_LATEST),ubuntu:bionic-20200921 hail-ubuntu - touch hail-ubuntu-stmp - -.PHONY: base -base: base-stmp - -base-stmp: hail-ubuntu-stmp Dockerfile.base core-site.xml requirements.txt ../pylintrc ../setup.cfg - -docker pull $(BASE_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.base Dockerfile.base.out - [ "$(shell bash stat-permissions.sh Dockerfile.base)" = "644" ] - [ "$(shell bash stat-permissions.sh core-site.xml)" = "644" ] - [ "$(shell bash stat-permissions.sh requirements.txt)" = "644" ] - [ "$(shell bash stat-permissions.sh ../pylintrc)" = "644" ] - [ "$(shell bash stat-permissions.sh ../setup.cfg)" = "644" ] - docker build -t base -f Dockerfile.base.out --cache-from base,$(BASE_LATEST),hail-ubuntu .. - touch base-stmp - -.PHONY: service-base -service-base: base-stmp - -docker pull $(SERVICE_BASE_LATEST) + ../docker-build.sh hail-ubuntu Dockerfile.out $(HAIL_UBUNTU_IMAGE) + echo $(HAIL_UBUNTU_IMAGE) > $@ + +.PHONY: base base-image-ref +base: base-image-ref +base-image-ref: hail-ubuntu-image-ref Dockerfile.base core-site.xml requirements.txt ../pylintrc ../setup.cfg + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat hail-ubuntu-image-ref)'"}}' Dockerfile.base Dockerfile.base.out + ../docker-build.sh .. docker/Dockerfile.base.out $(BASE_IMAGE) + echo $(BASE_IMAGE) > $@ + +.PHONY: hail_version +hail_version: $(MAKE) -C ../hail python/hailtop/hail_version cp ../hail/python/hailtop/hail_version ../hail_version - python3 ../ci/jinja2_render.py '{"base_image":{"image":"base"}}' Dockerfile.service-base Dockerfile.service-base.out - [ "$(shell bash stat-permissions.sh Dockerfile.service-base.out)" = "644" ] - [ "$(shell bash stat-permissions.sh service-base-requirements.txt)" = "644" ] - docker build -t service-base -f Dockerfile.service-base.out --cache-from service-base,$(SERVICE_BASE_LATEST),base,hail-ubuntu .. -.PHONY: service-java-run-base -service-java-run-base: hail-ubuntu-stmp - -docker pull $(SERVICE_JAVA_RUN_BASE_LATEST) - $(MAKE) -C ../hail python/hailtop/hail_version - cp ../hail/python/hailtop/hail_version ../hail_version - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.service-java-run-base Dockerfile.service-java-run-base.out - [ "$(shell bash stat-permissions.sh Dockerfile.service-java-run-base.out)" = "644" ] - docker build -t service-java-run-base -f Dockerfile.service-java-run-base.out --cache-from service-java-run-base,$(SERVICE_JAVA_RUN_BASE_LATEST),base,hail-ubuntu .. +.PHONY: service-base service-base-image-ref +service-base: service-base-image-ref +service-base-image-ref: base hail_version + python3 ../ci/jinja2_render.py '{"base_image":{"image":"'$$(cat base-image-ref)'"}}' Dockerfile.service-base Dockerfile.service-base.out + ../docker-build.sh .. docker/Dockerfile.service-base.out $(SERVICE_BASE_IMAGE) + echo $(SERVICE_BASE_IMAGE) > $@ -.PHONY: push -push: build - docker tag hail-ubuntu $(HAIL_UBUNTU_LATEST) - docker push $(HAIL_UBUNTU_LATEST) - docker tag hail-ubuntu $(HAIL_UBUNTU_IMAGE) - docker push $(HAIL_UBUNTU_IMAGE) - docker tag base $(BASE_LATEST) - docker push $(BASE_LATEST) - docker tag base $(BASE_IMAGE) - docker push $(BASE_IMAGE) - docker tag service-base $(SERVICE_BASE_LATEST) - docker push $(SERVICE_BASE_LATEST) - docker tag service-base $(SERVICE_BASE_IMAGE) - docker push $(SERVICE_BASE_IMAGE) - docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_LATEST) - docker push $(SERVICE_JAVA_RUN_BASE_LATEST) - docker tag service-java-run-base $(SERVICE_JAVA_RUN_BASE_IMAGE) - docker push $(SERVICE_JAVA_RUN_BASE_IMAGE) +.PHONY: service-java-run-base service-java-run-base-image-ref +service-java-run-base: service-java-run-base-image-ref +service-java-run-base-image-ref: hail-ubuntu-image-ref hail_version + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat hail-ubuntu-image-ref)'"}}' Dockerfile.service-java-run-base Dockerfile.service-java-run-base.out + ../docker-build.sh .. docker/Dockerfile.service-java-run-base.out $(SERVICE_JAVA_RUN_BASE_IMAGE) + echo $(SERVICE_JAVA_RUN_BASE_IMAGE) > $@ .PHONY: deploy -deploy: push +deploy: build .PHONY: clean clean: - rm -f base-stmp hail-ubuntu-stmp + rm -f base hail-ubuntu-image-ref base service-base service-java-run-base ## Public Images ############################################################### -publish-public-images: +publish-public-images: hail-ubuntu-image-ref bash publish-public-images.sh $(DOCKER_PREFIX) diff --git a/docker/publish-public-images.sh b/docker/publish-public-images.sh index b7e8d829b09..5626e4a2e78 100644 --- a/docker/publish-public-images.sh +++ b/docker/publish-public-images.sh @@ -10,27 +10,27 @@ hail_pip_version=$(cat hail/hail_pip_version) build_and_push() { name=$1 - base=$2 versioned_short=hailgenetics/$name:$hail_pip_version versioned_full=$docker_prefix/$versioned_short - latest_full=$docker_prefix/hailgenetics/$name:latest - - docker pull $latest || true - docker build \ - $name/ \ - -f $name/Dockerfile.out \ - -t $versioned_short \ - -t $versioned_full \ - -t $latest_full \ - --cache-from $latest_full,$base - docker push $versioned_short - docker push $versioned_full - docker push $latest_full + cache=$docker_prefix/hailgenetics/$name:cache + + DOCKER_BUILDKIT=1 docker build \ + --file $name/Dockerfile.out \ + --cache-from ${cache} \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --tag $versioned_short \ + --tag $versioned_full \ + --tag $cache \ + ${name} + + time DOCKER_BUILDKIT=1 docker push ${versioned_short} + time DOCKER_BUILDKIT=1 docker push ${versioned_full} + time DOCKER_BUILDKIT=1 docker push ${cache} } -python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' hail/Dockerfile hail/Dockerfile.out -build_and_push hail hail-ubuntu +python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$(cat hail-ubuntu-image-ref)'"}}' hail/Dockerfile hail/Dockerfile.out +build_and_push hail python3 ../ci/jinja2_render.py '{"hail_public_image":{"image":"'hailgenetics/hail:$hail_pip_version'"}}' genetics/Dockerfile genetics/Dockerfile.out -build_and_push genetics hailgenetics/hail:${hail_pip_version} +build_and_push genetics diff --git a/docker/python-dill/push.sh b/docker/python-dill/push.sh index 4f6bfbdaad9..82522eff021 100644 --- a/docker/python-dill/push.sh +++ b/docker/python-dill/push.sh @@ -3,8 +3,21 @@ for version in 3.6 3.6-slim 3.7 3.7-slim 3.8 3.8-slim do sed "s/@PYTHON_VERSION@/$version/g" Dockerfile > Dockerfile.out - docker build --tag hailgenetics/python-dill:$version - 3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6 keyrings.alt>=3.1 @@ -48,13 +51,13 @@ pytest-instafail==0.4.2 pytest-xdist==2.2.1 python-dateutil==2.8.1 python-json-logger==0.1.11 -requests==2.22.0 +requests==2.25.1 setuptools>=38.6.0 sortedcontainers==2.1.0 tabulate==0.8.3 tqdm==4.42.1 twine>=1.11.0 -urllib3==1.24.3 +urllib3==1.26.5 uvloop==0.14.0 Werkzeug==0.15.4 wheel>=0.31.0 diff --git a/gateway/Makefile b/gateway/Makefile index 6ee3f846e9d..46e1dde6661 100644 --- a/gateway/Makefile +++ b/gateway/Makefile @@ -2,23 +2,17 @@ include ../config.mk .PHONY: build push deploy -GATEWAY_LATEST = $(DOCKER_PREFIX)/gateway:latest -GATEWAY_IMAGE = $(DOCKER_PREFIX)/gateway:$(shell docker images -q --no-trunc gateway | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +GATEWAY_IMAGE := $(DOCKER_PREFIX)/gateway:$(TOKEN) build: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(GATEWAY_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out python3 ../ci/jinja2_render.py '{"domain": "$(DOMAIN)","subdomains":[$(shell sed -e 's/\(.*\)/"\1"/' ../letsencrypt/subdomains.txt | paste -s -d, -)]}' gateway.nginx.conf gateway.nginx.conf.out - docker build -t gateway -f Dockerfile.out --cache-from gateway,$(GATEWAY_LATEST),hail-ubuntu . - -push: build - docker tag gateway $(GATEWAY_LATEST) - docker push $(GATEWAY_LATEST) - docker tag gateway $(GATEWAY_IMAGE) - docker push $(GATEWAY_IMAGE) + ../docker-build.sh . Dockerfile.out $(GATEWAY_IMAGE) -deploy: push +deploy: build python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"global":{"ip":"$(IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"gateway_image":{"image":"$(GATEWAY_IMAGE)"},"global":{"project":"$(PROJECT)"}}' deployment.yaml deployment.yaml.out diff --git a/grafana/Makefile b/grafana/Makefile index cee6e4c0c32..182474f8112 100644 --- a/grafana/Makefile +++ b/grafana/Makefile @@ -2,23 +2,17 @@ include ../config.mk .PHONY: build push deploy -GRAFANA_NGINX_LATEST = $(DOCKER_PREFIX)/grafana_nginx:latest -GRAFANA_NGINX_IMAGE = $(DOCKER_PREFIX)/grafana_nginx:$(shell docker images -q --no-trunc grafana_nginx | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +GRAFANA_NGINX_IMAGE := $(DOCKER_PREFIX)/grafana_nginx:$(TOKEN) build: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(GRAFANA_NGINX_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - docker build -t grafana_nginx -f Dockerfile.nginx.out --cache-from grafana_nginx,$(GRAFANA_NGINX_LATEST),hail-ubuntu . - -push: build - docker tag grafana_nginx $(GRAFANA_NGINX_LATEST) - docker push $(GRAFANA_NGINX_LATEST) - docker tag grafana_nginx $(GRAFANA_NGINX_IMAGE) - docker push $(GRAFANA_NGINX_IMAGE) + ../docker-build.sh . Dockerfile.nginx.out $(GRAFANA_NGINX_IMAGE) -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "grafana_nginx_image": {"image": "$(GRAFANA_NGINX_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/grafana/deployment.yaml b/grafana/deployment.yaml index 99b065fe484..292a68ea942 100644 --- a/grafana/deployment.yaml +++ b/grafana/deployment.yaml @@ -29,7 +29,7 @@ spec: secretName: ssl-config-grafana containers: - name: grafana - image: grafana/grafana:7.3.7 + image: grafana/grafana:8.0.2 env: {% if deploy %} - name: GF_SERVER_DOMAIN diff --git a/hail/Makefile b/hail/Makefile index 5595addebc1..d1da1b43436 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.13 SPARK_VERSION ?= 3.1.1 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 67 +HAIL_PATCH_VERSION := 70 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 @@ -192,7 +192,7 @@ copy-py-files: $(PYTHON_VERSION_INFO) $(SHADOW_JAR) $(INIT_SCRIPTS) $(PY_FILES) mkdir -p build/deploy mkdir -p build/deploy/src cp ../README.md build/deploy/ - rsync -rv \ + rsync -r \ --exclude '.eggs/' \ --exclude '.pytest_cache/' \ --exclude '__pycache__/' \ @@ -336,7 +336,7 @@ HAIL_CACHE_VERSION = $(shell cat python/hail/hail_version) .PHONY: hail-docs hail-docs: $(PYTHON_VERSION_INFO) python/hail/docs/change_log.rst $(MAKE) -C python/hail/docs \ - SPHINXOPTS='-tchecktutorial' \ + SPHINXOPTS='-tgenerate_notebook_outputs' \ BUILDDIR=$(HAIL_DIR)/build/docs/hail \ html mkdir -p build/www/docs diff --git a/hail/build.gradle b/hail/build.gradle index a40f9381576..439d4dca774 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -27,9 +27,6 @@ repositories { mavenCentral() jcenter() maven { url "https://repository.cloudera.com/artifactory/cloudera-repos/" } - maven { url "https://repo.spring.io/plugins-release/" } - // Necessary for elasticsearch spark 3.0.1 snapshot. - maven { url "https://oss.sonatype.org/content/repositories/snapshots/"} } sourceSets.main.scala.srcDir "src/main/java" @@ -195,7 +192,7 @@ dependencies { } else if (sparkVersion.startsWith("3.0.")) { assert(scalaMajorVersion == "2.12") - bundled 'org.elasticsearch:elasticsearch-spark-30_2.12:7.13.0-SNAPSHOT' + bundled 'org.elasticsearch:elasticsearch-spark-30_2.12:7.13.1' } else if (sparkVersion.startsWith("2.4.")) { assert(scalaMajorVersion == "2.11") diff --git a/hail/python/MANIFEST.in b/hail/python/MANIFEST.in index 77493686b12..aa401baf489 100644 --- a/hail/python/MANIFEST.in +++ b/hail/python/MANIFEST.in @@ -5,3 +5,4 @@ include hail/backend/hail-all-spark.jar include hailtop/hail_version include hailtop/hailctl/deploy.yaml include hailtop/py.typed +include requirements.txt diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index 7634d7d5109..b62b32d6d59 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -59,7 +59,7 @@ from hail.expr import aggregators as agg # noqa: E402 from hail.utils import (Struct, Interval, hadoop_copy, hadoop_open, hadoop_ls, # noqa: E402 hadoop_stat, hadoop_exists, hadoop_is_file, - hadoop_is_dir, copy_log) + hadoop_is_dir, hadoop_scheme_supported, copy_log) from .context import (init, init_local, stop, spark_context, tmp_dir, default_reference, # noqa: E402 get_reference, set_global_seed, _set_flags, _get_flags, current_backend, @@ -94,6 +94,7 @@ 'hadoop_stat', 'hadoop_exists', 'hadoop_ls', + 'hadoop_scheme_supported', 'copy_log', 'Struct', 'Interval', diff --git a/hail/python/hail/backend/spark_backend.py b/hail/python/hail/backend/spark_backend.py index fff59fd5a29..d0ff75dd327 100644 --- a/hail/python/hail/backend/spark_backend.py +++ b/hail/python/hail/backend/spark_backend.py @@ -8,6 +8,9 @@ import py4j import pyspark +from typing import List + +import hail as hl from hail.utils.java import Env, scala_package_object, scala_object from hail.expr.types import dtype from hail.expr.table_type import ttable @@ -21,7 +24,6 @@ from .py4j_backend import Py4JBackend, handle_java_exception from ..hail_logging import Logger - if pyspark.__version__ < '3' and sys.version_info > (3, 8): raise EnvironmentError('Hail with spark {} requires Python 3.6 or 3.7, found {}.{}'.format( pyspark.__version__, sys.version_info.major, sys.version_info.minor)) @@ -308,7 +310,8 @@ def to_spark(self, t, flatten): t = t.expand_types() if flatten: t = t.flatten() - return pyspark.sql.DataFrame(self._jbackend.pyToDF(self._to_java_table_ir(t._tir)), Env.spark_session()._wrapped) + return pyspark.sql.DataFrame(self._jbackend.pyToDF(self._to_java_table_ir(t._tir)), + Env.spark_session()._wrapped) def to_pandas(self, t, flatten): return self.to_spark(t, flatten).toPandas() @@ -369,3 +372,13 @@ def register_ir_function(self, name, type_parameters, argument_names, argument_t def persist_ir(self, ir): return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) + + def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.Interval]', intervals_type): + json_repr = { + 'paths': paths, + 'intervals': intervals_type._convert_to_json(intervals), + 'intervalPointType': intervals_type.element_type.point_type._parsable_string(), + } + + results = self._jhc.backend().pyReadMultipleMatrixTables(json.dumps(json_repr)) + return [MatrixTable._from_java(jm) for jm in results] diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index 41e83b20ffb..8746e22ca69 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -35,7 +35,10 @@ def _get_local_tmpdir(local_tmpdir): def _get_log(log): if log is None: py_version = version() - log = hail.utils.timestamp_path(os.path.join(os.getcwd(), 'hail'), + log_dir = os.environ.get('HAIL_LOG_DIR') + if log_dir is None: + log_dir = os.getcwd() + log = hail.utils.timestamp_path(os.path.join(log_dir, 'hail'), suffix=f'-{py_version}.log') return log diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 257df758294..223e52edb02 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -24,13 +24,46 @@ an earlier version of Hail to read files written in a later version. --- +## Version 0.2.70 + +Released 2021-06-21 + +--- + +## Version 0.2.69 + +Released 2021-06-14 + +### New Features + +- (hail#10592) Added `hl.get_hgdp` function. +- (hail#10555) Added `hl.hadoop_scheme_supported` function. +- (hail#10551) Indexing ndarrays now supports ellipses. + +### Bug fixes + +- (hail#10553) Dividing two integers now returns a `float64`, not a `float32`. +- (hail#10595) Don't include nans in `lambda_gc_agg`. + +### hailctl dataproc + +- (hail#10574) Hail logs will now be stored in `/home/hail` by default. + +--- + +## Version 0.2.68 + +Released 2021-05-27 + +--- + ## Version 0.2.67 ### Critical performance fix Released 2021-05-06 -- (hail#10451) Fixed a memory leak / performance bug triggered by `hl.literal(...).contains(...) +- (hail#10451) Fixed a memory leak / performance bug triggered by `hl.literal(...).contains(...)` --- diff --git a/hail/python/hail/docs/conf.py b/hail/python/hail/docs/conf.py index 170977131c7..0a906b4db67 100644 --- a/hail/python/hail/docs/conf.py +++ b/hail/python/hail/docs/conf.py @@ -65,7 +65,9 @@ nbsphinx_timeout = 300 nbsphinx_allow_errors = False # F821 undefined name 'tags' -if not tags.has('checktutorial'): # noqa: F821 +if not tags.has('checktutorial') and not tags.has('generate_notebook_outputs'): # noqa: F821 + # these flags have the same effect: they run the notebook and save the output in the generated + # rST file. nbsphinx_execute = 'never' autosummary_generate = True diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst new file mode 100644 index 00000000000..60b039de6de --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_autosomes.rst @@ -0,0 +1,214 @@ +.. _1000_Genomes_HighCov_autosomes: + +1000_Genomes_HighCov_autosomes +============================== + +* **Versions:** NYGC_30x_phased, NYGC_30x_unphased +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (NYGC_30x_unphased, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'FamilyID': str + 'FatherID': str + 'MotherID': str + 'Sex': str + 'Population': str + 'Superpopulation': str + 'sample_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + AC: int32, + AF: float64, + AN: int32, + BaseQRankSum: float64, + ClippingRankSum: float64, + DP: int32, + DS: bool, + END: int32, + FS: float64, + HaplotypeScore: float64, + InbreedingCoeff: float64, + MLEAC: int32, + MLEAF: float64, + MQ: float64, + MQ0: int32, + MQRankSum: float64, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool, + QD: float64, + RAW_MQ: float64, + ReadPosRankSum: float64, + SOR: float64, + VQSLOD: float64, + VariantType: str, + culprit: str, + AN_EUR: int32, + AN_EAS: int32, + AN_AMR: int32, + AN_SAS: int32, + AN_AFR: int32, + AC_EUR: int32, + AC_EAS: int32, + AC_AMR: int32, + AC_SAS: int32, + AC_AFR: int32, + AC_Hom_EUR: int32, + AC_Hom_EAS: int32, + AC_Hom_AMR: int32, + AC_Hom_SAS: int32, + AC_Hom_AFR: int32, + AC_Hom: int32, + AC_Het_EUR: int32, + AC_Het_EAS: int32, + AC_Het_AMR: int32, + AC_Het_SAS: int32, + AC_Het_AFR: int32, + AC_Het: int32, + AF_EUR: float64, + AF_EAS: float64, + AF_AMR: float64, + AF_SAS: float64, + AF_AFR: float64, + HWE_EUR: float64, + HWE_EAS: float64, + HWE_AMR: float64, + HWE_SAS: float64, + HWE_AFR: float64, + HWE: float64, + ExcHet_EUR: float64, + ExcHet_EAS: float64, + ExcHet_AMR: float64, + ExcHet_SAS: float64, + ExcHet_AFR: float64, + ExcHet: float64, + ME: float64, + AN_EUR_unrel: int32, + AN_EAS_unrel: int32, + AN_AMR_unrel: int32, + AN_SAS_unrel: int32, + AN_AFR_unrel: int32, + AC_EUR_unrel: int32, + AC_EAS_unrel: int32, + AC_AMR_unrel: int32, + AC_SAS_unrel: int32, + AC_AFR_unrel: int32, + AC_Hom_EUR_unrel: int32, + AC_Hom_EAS_unrel: int32, + AC_Hom_AMR_unrel: int32, + AC_Hom_SAS_unrel: int32, + AC_Hom_AFR_unrel: int32, + AC_Het_EUR_unrel: int32, + AC_Het_EAS_unrel: int32, + AC_Het_AMR_unrel: int32, + AC_Het_SAS_unrel: int32, + AC_Het_AFR_unrel: int32, + AF_EUR_unrel: float64, + AF_EAS_unrel: float64, + AF_AMR_unrel: float64, + AF_SAS_unrel: float64, + AF_AFR_unrel: float64, + HWE_EUR_unrel: float64, + HWE_EAS_unrel: float64, + HWE_AMR_unrel: float64, + HWE_SAS_unrel: float64, + HWE_AFR_unrel: float64 + } + 'a_index': int32 + 'was_split': bool + 'variant_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'AB': float64 + 'AD': array + 'DP': int32 + 'GQ': int32 + 'GT': call + 'MIN_DP': int32 + 'MQ0': int32 + 'PGT': call + 'PID': str + 'PL': array + 'RGQ': int32 + 'SB': array + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst new file mode 100644 index 00000000000..7f1b80a2acb --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrX.rst @@ -0,0 +1,214 @@ +.. _1000_Genomes_HighCov_chrX: + +1000_Genomes_HighCov_chrX +========================= + +* **Versions:** NYGC_30x_phased, NYGC_30x_unphased +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (NYGC_30x_unphased, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'FamilyID': str + 'FatherID': str + 'MotherID': str + 'Sex': str + 'Population': str + 'Superpopulation': str + 'sample_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + AC: int32, + AF: float64, + AN: int32, + BaseQRankSum: float64, + ClippingRankSum: float64, + DP: int32, + DS: bool, + END: int32, + FS: float64, + HaplotypeScore: float64, + InbreedingCoeff: float64, + MLEAC: int32, + MLEAF: float64, + MQ: float64, + MQ0: int32, + MQRankSum: float64, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool, + QD: float64, + RAW_MQ: float64, + ReadPosRankSum: float64, + SOR: float64, + VQSLOD: float64, + VariantType: str, + culprit: str, + AN_EUR: int32, + AN_EAS: int32, + AN_AMR: int32, + AN_SAS: int32, + AN_AFR: int32, + AC_EUR: int32, + AC_EAS: int32, + AC_AMR: int32, + AC_SAS: int32, + AC_AFR: int32, + AC_Hom_EUR: int32, + AC_Hom_EAS: int32, + AC_Hom_AMR: int32, + AC_Hom_SAS: int32, + AC_Hom_AFR: int32, + AC_Hom: int32, + AC_Het_EUR: int32, + AC_Het_EAS: int32, + AC_Het_AMR: int32, + AC_Het_SAS: int32, + AC_Het_AFR: int32, + AC_Het: int32, + AF_EUR: float64, + AF_EAS: float64, + AF_AMR: float64, + AF_SAS: float64, + AF_AFR: float64, + HWE_EUR: float64, + HWE_EAS: float64, + HWE_AMR: float64, + HWE_SAS: float64, + HWE_AFR: float64, + HWE: float64, + ExcHet_EUR: float64, + ExcHet_EAS: float64, + ExcHet_AMR: float64, + ExcHet_SAS: float64, + ExcHet_AFR: float64, + ExcHet: float64, + ME: float64, + AN_EUR_unrel: int32, + AN_EAS_unrel: int32, + AN_AMR_unrel: int32, + AN_SAS_unrel: int32, + AN_AFR_unrel: int32, + AC_EUR_unrel: int32, + AC_EAS_unrel: int32, + AC_AMR_unrel: int32, + AC_SAS_unrel: int32, + AC_AFR_unrel: int32, + AC_Hom_EUR_unrel: int32, + AC_Hom_EAS_unrel: int32, + AC_Hom_AMR_unrel: int32, + AC_Hom_SAS_unrel: int32, + AC_Hom_AFR_unrel: int32, + AC_Het_EUR_unrel: int32, + AC_Het_EAS_unrel: int32, + AC_Het_AMR_unrel: int32, + AC_Het_SAS_unrel: int32, + AC_Het_AFR_unrel: int32, + AF_EUR_unrel: float64, + AF_EAS_unrel: float64, + AF_AMR_unrel: float64, + AF_SAS_unrel: float64, + AF_AFR_unrel: float64, + HWE_EUR_unrel: float64, + HWE_EAS_unrel: float64, + HWE_AMR_unrel: float64, + HWE_SAS_unrel: float64, + HWE_AFR_unrel: float64 + } + 'a_index': int32 + 'was_split': bool + 'variant_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'AB': float64 + 'AD': array + 'DP': int32 + 'GQ': int32 + 'GT': call + 'MIN_DP': int32 + 'MQ0': int32 + 'PGT': call + 'PID': str + 'PL': array + 'RGQ': int32 + 'SB': array + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst new file mode 100644 index 00000000000..27c4e92d74f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_HighCov_chrY.rst @@ -0,0 +1,175 @@ +.. _1000_Genomes_HighCov_chrY: + +1000_Genomes_HighCov_chrY +========================= + +* **Versions:** NYGC_30x_unphased +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (NYGC_30x_unphased, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'FamilyID': str + 'FatherID': str + 'MotherID': str + 'Sex': str + 'Population': str + 'Superpopulation': str + 'sample_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + AC: int32, + AF: float64, + AN: int32, + BaseQRankSum: float64, + ClippingRankSum: float64, + DP: int32, + DS: bool, + END: int32, + ExcessHet: float64, + FS: float64, + HaplotypeScore: float64, + InbreedingCoeff: float64, + MLEAC: int32, + MLEAF: float64, + MQ: float64, + MQ0: int32, + MQRankSum: float64, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool, + QD: float64, + RAW_MQ: float64, + ReadPosRankSum: float64, + SOR: float64, + VQSLOD: float64, + VariantType: str, + culprit: str, + AN_EAS: int32, + AN_AMR: int32, + AN_EUR: int32, + AN_AFR: int32, + AN_SAS: int32, + AN_EUR_unrel: int32, + AN_EAS_unrel: int32, + AN_AMR_unrel: int32, + AN_SAS_unrel: int32, + AN_AFR_unrel: int32, + AC_EAS: int32, + AC_AMR: int32, + AC_EUR: int32, + AC_AFR: int32, + AC_SAS: int32, + AC_EUR_unrel: int32, + AC_EAS_unrel: int32, + AC_AMR_unrel: int32, + AC_SAS_unrel: int32, + AC_AFR_unrel: int32, + AF_EAS: float64, + AF_AMR: float64, + AF_EUR: float64, + AF_AFR: float64, + AF_SAS: float64, + AF_EUR_unrel: float64, + AF_EAS_unrel: float64, + AF_AMR_unrel: float64, + AF_SAS_unrel: float64, + AF_AFR_unrel: float64 + } + 'a_index': int32 + 'was_split': bool + 'variant_qc': struct { + dp_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + gq_stats: struct { + mean: float64, + stdev: float64, + min: float64, + max: float64 + }, + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_filtered: int64, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'AB': float64 + 'AD': array + 'DP': int32 + 'GQ': int32 + 'GT': call + 'MIN_DP': int32 + 'MQ0': int32 + 'PGT': call + 'PID': str + 'PL': array + 'RGQ': int32 + 'SB': array + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst new file mode 100644 index 00000000000..37f2a7384b8 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_autosomes.rst @@ -0,0 +1,128 @@ +.. _1000_Genomes_Retracted_autosomes: + +1000_Genomes_Retracted_autosomes +================================ + +* **Versions:** phase_3 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (phase_3, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'population': str + 'super_population': str + 'is_female': bool + 'family_id': str + 'relationship_role': str + 'maternal_id': str + 'paternal_id': str + 'children_ids': array + 'sibling_ids': array + 'second_order_relationship_ids': array + 'third_order_relationship_ids': array + 'sample_qc': struct { + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + CIEND: int32, + CIPOS: int32, + CS: str, + END: int32, + IMPRECISE: bool, + MC: array, + MEINFO: array, + MEND: int32, + MLEN: int32, + MSTART: int32, + SVLEN: array, + SVTYPE: str, + TSD: str, + AC: int32, + AF: float64, + NS: int32, + AN: int32, + EAS_AF: float64, + EUR_AF: float64, + AFR_AF: float64, + AMR_AF: float64, + SAS_AF: float64, + DP: int32, + AA: str, + VT: str, + EX_TARGET: bool, + MULTI_ALLELIC: bool, + STRAND_FLIP: bool, + REF_SWITCH: bool, + DEPRECATED_RSID: array, + RSID_REMOVED: array, + GRCH37_38_REF_STRING_MATCH: bool, + NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, + GRCH37_POS: int32, + GRCH37_REF: str, + ALLELE_TRANSFORM: bool, + REF_NEW_ALLELE: bool, + CHROM_CHANGE_BETWEEN_ASSEMBLIES: str + } + 'a_index': int32 + 'was_split': bool + 'old_locus': locus + 'old_alleles': array + 'variant_qc': struct { + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + n_called: int64, + n_not_called: int64, + call_rate: float32, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'GT': call + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst new file mode 100644 index 00000000000..c2f90a8592b --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrX.rst @@ -0,0 +1,128 @@ +.. _1000_Genomes_Retracted_chrX: + +1000_Genomes_Retracted_chrX +=========================== + +* **Versions:** phase_3 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (phase_3, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'population': str + 'super_population': str + 'is_female': bool + 'family_id': str + 'relationship_role': str + 'maternal_id': str + 'paternal_id': str + 'children_ids': array + 'sibling_ids': array + 'second_order_relationship_ids': array + 'third_order_relationship_ids': array + 'sample_qc': struct { + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + CIEND: int32, + CIPOS: int32, + CS: str, + END: int32, + IMPRECISE: bool, + MC: array, + MEINFO: array, + MEND: int32, + MLEN: int32, + MSTART: int32, + SVLEN: array, + SVTYPE: str, + TSD: str, + AC: int32, + AF: float64, + NS: int32, + AN: int32, + EAS_AF: float64, + EUR_AF: float64, + AFR_AF: float64, + AMR_AF: float64, + SAS_AF: float64, + DP: int32, + AA: str, + VT: str, + EX_TARGET: bool, + MULTI_ALLELIC: bool, + STRAND_FLIP: bool, + REF_SWITCH: bool, + DEPRECATED_RSID: array, + RSID_REMOVED: array, + GRCH37_38_REF_STRING_MATCH: bool, + NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, + GRCH37_POS: int32, + GRCH37_REF: str, + ALLELE_TRANSFORM: bool, + REF_NEW_ALLELE: bool, + CHROM_CHANGE_BETWEEN_ASSEMBLIES: str + } + 'a_index': int32 + 'was_split': bool + 'old_locus': locus + 'old_alleles': array + 'variant_qc': struct { + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + n_called: int64, + n_not_called: int64, + call_rate: float32, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'GT': call + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst new file mode 100644 index 00000000000..03e258facd1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_Retracted_chrY.rst @@ -0,0 +1,117 @@ +.. _1000_Genomes_Retracted_chrY: + +1000_Genomes_Retracted_chrY +=========================== + +* **Versions:** phase_3 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (phase_3, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_cols: int32, + n_partitions: int32 + } + ---------------------------------------- + Column fields: + 's': str + 'population': str + 'super_population': str + 'is_female': bool + 'family_id': str + 'relationship_role': str + 'maternal_id': str + 'paternal_id': str + 'children_ids': array + 'sibling_ids': array + 'second_order_relationship_ids': array + 'third_order_relationship_ids': array + 'sample_qc': struct { + call_rate: float64, + n_called: int64, + n_not_called: int64, + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_singleton: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + n_star: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + DP: int32, + END: int32, + SVTYPE: str, + AA: str, + AC: int32, + AF: float64, + NS: int32, + AN: int32, + EAS_AF: float64, + EUR_AF: float64, + AFR_AF: float64, + AMR_AF: float64, + SAS_AF: float64, + VT: str, + EX_TARGET: bool, + MULTI_ALLELIC: bool, + STRAND_FLIP: bool, + REF_SWITCH: bool, + DEPRECATED_RSID: str, + RSID_REMOVED: str, + GRCH37_38_REF_STRING_MATCH: bool, + NOT_ALL_RSIDS_STRAND_CHANGE_OR_REF_SWITCH: bool, + GRCH37_POS: int32, + GRCH37_REF: str, + ALLELE_TRANSFORM: bool, + REF_NEW_ALLELE: bool, + CHROM_CHANGE_BETWEEN_ASSEMBLIES: str + } + 'a_index': int32 + 'was_split': bool + 'old_locus': locus + 'old_alleles': array + 'variant_qc': struct { + AC: array, + AF: array, + AN: int32, + homozygote_count: array, + n_called: int64, + n_not_called: int64, + call_rate: float32, + n_het: int64, + n_non_ref: int64, + het_freq_hwe: float64, + p_value_hwe: float64 + } + ---------------------------------------- + Entry fields: + 'GT': call + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst index d4676aaea39..aa891dd30cb 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_autosomes.rst @@ -4,7 +4,7 @@ ====================== * **Versions:** phase_3 -* **Reference genome builds:** GRCh37, GRCh38 +* **Reference genome builds:** GRCh37 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -115,4 +115,3 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst index 8a058234af1..5ebcbb7647e 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrX.rst @@ -4,7 +4,7 @@ ================= * **Versions:** phase_3 -* **Reference genome builds:** GRCh37, GRCh38 +* **Reference genome builds:** GRCh37 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -115,4 +115,3 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst index ee27a256f0d..663e4910596 100644 --- a/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst +++ b/hail/python/hail/docs/datasets/schemas/1000_Genomes_chrY.rst @@ -4,7 +4,7 @@ ================= * **Versions:** phase_3 -* **Reference genome builds:** GRCh37, GRCh38 +* **Reference genome builds:** GRCh37 * **Type:** :class:`hail.MatrixTable` Schema (phase_3, GRCh37) @@ -104,4 +104,3 @@ Schema (phase_3, GRCh37) Column key: ['s'] Row key: ['locus', 'alleles'] ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/CADD.rst b/hail/python/hail/docs/datasets/schemas/CADD.rst index f9f1b075a28..90f72cac06b 100644 --- a/hail/python/hail/docs/datasets/schemas/CADD.rst +++ b/hail/python/hail/docs/datasets/schemas/CADD.rst @@ -3,7 +3,7 @@ CADD ==== -* **Versions:** 1.4 +* **Versions:** 1.4, 1.6 * **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.Table` @@ -30,4 +30,3 @@ Schema (1.4, GRCh37) ---------------------------------------- Key: ['locus', 'alleles'] ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst similarity index 88% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst index 21cc80548bb..1ecce309f93 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia: +.. _GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations: -GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia +GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations ======================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst new file mode 100644 index 00000000000..1fae6e47cc0 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations: + +GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations +============================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst index 94fba0afb96..33f7c4a9e8e 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1: +.. _GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations: -GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1 +GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst new file mode 100644 index 00000000000..bf52a3f05ab --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Aorta_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Artery_Aorta_all_snp_gene_associations: + +GTEx_eQTL_Artery_Aorta_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst new file mode 100644 index 00000000000..2da0160aec6 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Coronary_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Artery_Coronary_all_snp_gene_associations: + +GTEx_eQTL_Artery_Coronary_all_snp_gene_associations +=================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst index 7bee21efdf7..28597a4b4b7 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Artery_Tibial_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum: +.. _GTEx_eQTL_Artery_Tibial_all_snp_gene_associations: -GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum +GTEx_eQTL_Artery_Tibial_all_snp_gene_associations ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst index fe93b201cdc..fe0f91b29b4 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic: +.. _GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations: -GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic +GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations ================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3dd286b6fa8 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations: + +GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations +======================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..2727bc4a840 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations: + +GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst new file mode 100644 index 00000000000..8a86950ca5f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations: + +GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst index 22d91eca4c5..d3f1c83a6c8 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes: +.. _GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations: -GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes +GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations ==================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst new file mode 100644 index 00000000000..058174375af --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Cortex_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Cortex_all_snp_gene_associations: + +GTEx_eQTL_Brain_Cortex_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst new file mode 100644 index 00000000000..53f4b85dd00 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations: + +GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations +============================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst new file mode 100644 index 00000000000..df9fe4cf3df --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations: + +GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations +===================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst index 74f28cce57f..98d59c12132 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction: +.. _GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations: -GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction +GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations ====================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..92cf844108d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations: + +GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations +========================================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..0b2a7b33d69 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations: + +GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst new file mode 100644 index 00000000000..98a3c320511 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations: + +GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations +================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst new file mode 100644 index 00000000000..e328efdf574 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations: + +GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations +========================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst new file mode 100644 index 00000000000..d6dc9d7be81 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations: + +GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations +========================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst new file mode 100644 index 00000000000..401490b52cb --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations: + +GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations +============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst new file mode 100644 index 00000000000..9092d0265d1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations: + +GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations +===================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst new file mode 100644 index 00000000000..f74f4e4bba4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations: + +GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations +================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst new file mode 100644 index 00000000000..5d978076af4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Colon_Transverse_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Colon_Transverse_all_snp_gene_associations: + +GTEx_eQTL_Colon_Transverse_all_snp_gene_associations +==================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst new file mode 100644 index 00000000000..bd4621ed006 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations: + +GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations +======================================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst new file mode 100644 index 00000000000..a7bdadd47a4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations: + +GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations +==================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst new file mode 100644 index 00000000000..d7cae1acb66 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations: + +GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst new file mode 100644 index 00000000000..7f2e6694b9a --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations: + +GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations +========================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst new file mode 100644 index 00000000000..2c80cd1b735 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations: + +GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3fdd01e06bd --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations: + +GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations +================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst index 69fbe08ecf6..99cb6080806 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Substantia_nigra.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Liver_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Brain_Substantia_nigra: +.. _GTEx_eQTL_Liver_all_snp_gene_associations: -GTEx_eQTL_allpairs_Brain_Substantia_nigra +GTEx_eQTL_Liver_all_snp_gene_associations ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst index f0af835ef21..b9937d7d3ae 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Breast_Mammary_Tissue.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Lung_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Breast_Mammary_Tissue: +.. _GTEx_eQTL_Lung_all_snp_gene_associations: -GTEx_eQTL_allpairs_Breast_Mammary_Tissue +GTEx_eQTL_Lung_all_snp_gene_associations ======================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst new file mode 100644 index 00000000000..ef26366b9e4 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations: + +GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst new file mode 100644 index 00000000000..e7244d44cb1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations: + +GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations +=================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3798c114bf2 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations: + +GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst index d9a18af9156..7208baa2c76 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Atrial_Appendage.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Ovary_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Heart_Atrial_Appendage: +.. _GTEx_eQTL_Ovary_all_snp_gene_associations: -GTEx_eQTL_allpairs_Heart_Atrial_Appendage +GTEx_eQTL_Ovary_all_snp_gene_associations ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3f38c933b46 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pancreas_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Pancreas_all_snp_gene_associations: + +GTEx_eQTL_Pancreas_all_snp_gene_associations +============================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst index 7a1ea5953a4..0bbdbfcb526 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Pituitary_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts: +.. _GTEx_eQTL_Pituitary_all_snp_gene_associations: -GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts +GTEx_eQTL_Pituitary_all_snp_gene_associations ============================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst new file mode 100644 index 00000000000..490d25cb499 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Prostate_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Prostate_all_snp_gene_associations: + +GTEx_eQTL_Prostate_all_snp_gene_associations +============================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst new file mode 100644 index 00000000000..fe06b6395a7 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations: + +GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations +=================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst new file mode 100644 index 00000000000..9fa7ddc5ae1 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations: + +GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations +============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst new file mode 100644 index 00000000000..cf0cb983784 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations: + +GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations +================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst index e11155d1e5d..61149cb9651 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Aorta.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Spleen_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_allpairs_Artery_Aorta: +.. _GTEx_eQTL_Spleen_all_snp_gene_associations: -GTEx_eQTL_allpairs_Artery_Aorta -=============================== +GTEx_eQTL_Spleen_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst index e4cd83a2f62..53afd786e96 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Stomach_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9: +.. _GTEx_eQTL_Stomach_all_snp_gene_associations: -GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9 +GTEx_eQTL_Stomach_all_snp_gene_associations =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst index 22c8c78368f..8b9e89728ad 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adrenal_Gland.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Testis_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_allpairs_Adrenal_Gland: +.. _GTEx_eQTL_Testis_all_snp_gene_associations: -GTEx_eQTL_allpairs_Adrenal_Gland -================================ +GTEx_eQTL_Testis_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst index c75671298bc..34d6c4701da 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Thyroid_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_eQTL_allpairs_Adipose_Visceral_Omentum: +.. _GTEx_eQTL_Thyroid_all_snp_gene_associations: -GTEx_eQTL_allpairs_Adipose_Visceral_Omentum +GTEx_eQTL_Thyroid_all_snp_gene_associations =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst index bcc6c605321..64dfdc1c721 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Coronary.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Uterus_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_allpairs_Artery_Coronary: +.. _GTEx_eQTL_Uterus_all_snp_gene_associations: -GTEx_eQTL_allpairs_Artery_Coronary -================================== +GTEx_eQTL_Uterus_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst similarity index 86% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst index a051200fc19..35abd532099 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Adipose_Subcutaneous.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Vagina_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_eQTL_allpairs_Adipose_Subcutaneous: +.. _GTEx_eQTL_Vagina_all_snp_gene_associations: -GTEx_eQTL_allpairs_Adipose_Subcutaneous -======================================= +GTEx_eQTL_Vagina_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst new file mode 100644 index 00000000000..862600b473a --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_Whole_Blood_all_snp_gene_associations.rst @@ -0,0 +1,39 @@ +.. _GTEx_eQTL_Whole_Blood_all_snp_gene_associations: + +GTEx_eQTL_Whole_Blood_all_snp_gene_associations +=============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'gene_id': str + 'variant_id': str + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst deleted file mode 100644 index 16d156b7085..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Artery_Tibial.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Artery_Tibial: - -GTEx_eQTL_allpairs_Artery_Tibial -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst deleted file mode 100644 index 82d5617f4c8..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Amygdala.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Amygdala: - -GTEx_eQTL_allpairs_Brain_Amygdala -================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst deleted file mode 100644 index e0b8994e344..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24: - -GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24 -======================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst deleted file mode 100644 index 63902160116..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia: - -GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst deleted file mode 100644 index 5d2b2af3f1c..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere: - -GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst deleted file mode 100644 index be35ca19066..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cerebellum.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Cerebellum: - -GTEx_eQTL_allpairs_Brain_Cerebellum -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst deleted file mode 100644 index 268c1976bab..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Cortex.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Cortex: - -GTEx_eQTL_allpairs_Brain_Cortex -=============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst deleted file mode 100644 index 665692560d1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hippocampus.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Hippocampus: - -GTEx_eQTL_allpairs_Brain_Hippocampus -==================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst deleted file mode 100644 index d312030fcb6..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Hypothalamus.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Hypothalamus: - -GTEx_eQTL_allpairs_Brain_Hypothalamus -===================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst deleted file mode 100644 index 38c4c2820ed..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia: - -GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst deleted file mode 100644 index d64cfda1dd5..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Sigmoid.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Colon_Sigmoid: - -GTEx_eQTL_allpairs_Colon_Sigmoid -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst deleted file mode 100644 index ee809ed97a1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Colon_Transverse.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Colon_Transverse: - -GTEx_eQTL_allpairs_Colon_Transverse -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst deleted file mode 100644 index 8593aa7ad1d..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Mucosa.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Esophagus_Mucosa: - -GTEx_eQTL_allpairs_Esophagus_Mucosa -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst deleted file mode 100644 index 0ed95e808cd..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Esophagus_Muscularis.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Esophagus_Muscularis: - -GTEx_eQTL_allpairs_Esophagus_Muscularis -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst deleted file mode 100644 index a7fb139ff02..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Heart_Left_Ventricle.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Heart_Left_Ventricle: - -GTEx_eQTL_allpairs_Heart_Left_Ventricle -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst deleted file mode 100644 index f592163b795..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Kidney_Cortex.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Kidney_Cortex: - -GTEx_eQTL_allpairs_Kidney_Cortex -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst deleted file mode 100644 index 35ed2ea186c..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Liver.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Liver: - -GTEx_eQTL_allpairs_Liver -======================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst deleted file mode 100644 index d1f7d7f4e91..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Minor_Salivary_Gland.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Minor_Salivary_Gland: - -GTEx_eQTL_allpairs_Minor_Salivary_Gland -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst deleted file mode 100644 index 43066dfce22..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Muscle_Skeletal.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Muscle_Skeletal: - -GTEx_eQTL_allpairs_Muscle_Skeletal -================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst deleted file mode 100644 index 685788c3bd1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Nerve_Tibial.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Nerve_Tibial: - -GTEx_eQTL_allpairs_Nerve_Tibial -=============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst deleted file mode 100644 index 68847a52817..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Ovary.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Ovary: - -GTEx_eQTL_allpairs_Ovary -======================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst deleted file mode 100644 index 936d3a97d10..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pancreas.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Pancreas: - -GTEx_eQTL_allpairs_Pancreas -=========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst deleted file mode 100644 index 95afebfd9d5..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Pituitary.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Pituitary: - -GTEx_eQTL_allpairs_Pituitary -============================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst deleted file mode 100644 index e252471e957..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Prostate.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Prostate: - -GTEx_eQTL_allpairs_Prostate -=========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst deleted file mode 100644 index 514a58a69e0..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg: - -GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg -============================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst deleted file mode 100644 index 47134d85953..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Spleen.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Spleen: - -GTEx_eQTL_allpairs_Spleen -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst deleted file mode 100644 index 7b2e575b99d..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Stomach.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Stomach: - -GTEx_eQTL_allpairs_Stomach -========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst deleted file mode 100644 index 88f97eb38cb..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Testis.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Testis: - -GTEx_eQTL_allpairs_Testis -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst deleted file mode 100644 index 743fdcb7d3d..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Thyroid.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Thyroid: - -GTEx_eQTL_allpairs_Thyroid -========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst deleted file mode 100644 index ef2cf2654c1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Uterus.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Uterus: - -GTEx_eQTL_allpairs_Uterus -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst deleted file mode 100644 index 82487f0024c..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Vagina.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Vagina: - -GTEx_eQTL_allpairs_Vagina -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst deleted file mode 100644 index 31c04731633..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Whole_Blood.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _GTEx_eQTL_allpairs_Whole_Blood: - -GTEx_eQTL_allpairs_Whole_Blood -============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst index 66f21944ef2..8ada5364461 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia: +.. _GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations: -GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia +GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations ======================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst new file mode 100644 index 00000000000..093fe6bf0e3 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations: + +GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations +============================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst index 5ed0b6b3ed7..187c8e160d3 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1: +.. _GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations: -GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1 +GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst new file mode 100644 index 00000000000..0c8d81edeed --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Aorta_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Artery_Aorta_all_snp_gene_associations: + +GTEx_sQTL_Artery_Aorta_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst new file mode 100644 index 00000000000..1d4fb614395 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Coronary_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Artery_Coronary_all_snp_gene_associations: + +GTEx_sQTL_Artery_Coronary_all_snp_gene_associations +=================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst index a0c0e060f84..70420933e8d 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Artery_Tibial_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum: +.. _GTEx_sQTL_Artery_Tibial_all_snp_gene_associations: -GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum +GTEx_sQTL_Artery_Tibial_all_snp_gene_associations ================================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst index 4bcb41d6c74..176d54c9890 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic: +.. _GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations: -GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic +GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations ================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst new file mode 100644 index 00000000000..c6dd450788c --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations: + +GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations +======================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..4ed7ba6fb53 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations: + +GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst new file mode 100644 index 00000000000..b7b1adbb0e5 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations: + +GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst similarity index 90% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst index eef0af0b28c..4cd86a145fa 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes: +.. _GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations: -GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes +GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations ==================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst new file mode 100644 index 00000000000..8e62af681a6 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Cortex_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Cortex_all_snp_gene_associations: + +GTEx_sQTL_Brain_Cortex_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst new file mode 100644 index 00000000000..1ba6f881e77 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations: + +GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations +============================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst new file mode 100644 index 00000000000..8ab36bd1461 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations: + +GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations +===================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst similarity index 89% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst index b1292d43d20..2865572a1a9 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction: +.. _GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations: -GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction +GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations ====================================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..b31b6fbac15 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations: + +GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations +========================================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst new file mode 100644 index 00000000000..6e3e63fc703 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations: + +GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations +=============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst new file mode 100644 index 00000000000..acda830ef64 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations: + +GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations +================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst new file mode 100644 index 00000000000..e51d8f62902 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations: + +GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations +========================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst new file mode 100644 index 00000000000..5efb8007818 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations: + +GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations +========================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3172e2c974d --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations: + +GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations +============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst new file mode 100644 index 00000000000..9517b8766a3 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations: + +GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations +===================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst new file mode 100644 index 00000000000..ce243ff04f6 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations: + +GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations +================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst new file mode 100644 index 00000000000..f129ce9bcce --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Colon_Transverse_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Colon_Transverse_all_snp_gene_associations: + +GTEx_sQTL_Colon_Transverse_all_snp_gene_associations +==================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst new file mode 100644 index 00000000000..5fb54441f2e --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations: + +GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations +======================================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst new file mode 100644 index 00000000000..f92d2ef1265 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations: + +GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations +==================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst new file mode 100644 index 00000000000..d60e6b4a246 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations: + +GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst new file mode 100644 index 00000000000..f7c8ceba38a --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations: + +GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations +========================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst new file mode 100644 index 00000000000..17c994fb956 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations: + +GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst new file mode 100644 index 00000000000..3cff963512b --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations: + +GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations +================================================= + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst index cd4511b5d25..b31fb33dd72 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Substantia_nigra.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Liver_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Brain_Substantia_nigra: +.. _GTEx_sQTL_Liver_all_snp_gene_associations: -GTEx_sQTL_allpairs_Brain_Substantia_nigra +GTEx_sQTL_Liver_all_snp_gene_associations ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst similarity index 92% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst index 93b2e173ad9..8cff1e193ae 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Breast_Mammary_Tissue.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Lung_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Breast_Mammary_Tissue: +.. _GTEx_sQTL_Lung_all_snp_gene_associations: -GTEx_sQTL_allpairs_Breast_Mammary_Tissue +GTEx_sQTL_Lung_all_snp_gene_associations ======================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst new file mode 100644 index 00000000000..12ae74f2ebb --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations: + +GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations +======================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst new file mode 100644 index 00000000000..6d9c90164b2 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations: + +GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations +=================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst new file mode 100644 index 00000000000..a8ac97dd802 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations: + +GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations +================================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst index 78708f62780..b2a7403e39a 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Atrial_Appendage.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Ovary_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Heart_Atrial_Appendage: +.. _GTEx_sQTL_Ovary_all_snp_gene_associations: -GTEx_sQTL_allpairs_Heart_Atrial_Appendage +GTEx_sQTL_Ovary_all_snp_gene_associations ========================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst new file mode 100644 index 00000000000..ac85e8c57ea --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pancreas_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Pancreas_all_snp_gene_associations: + +GTEx_sQTL_Pancreas_all_snp_gene_associations +============================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst index 32475df3201..c6511b9404c 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Pituitary_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts: +.. _GTEx_sQTL_Pituitary_all_snp_gene_associations: -GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts +GTEx_sQTL_Pituitary_all_snp_gene_associations ============================================= * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst new file mode 100644 index 00000000000..5fd8ee897ae --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Prostate_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Prostate_all_snp_gene_associations: + +GTEx_sQTL_Prostate_all_snp_gene_associations +============================================ + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst new file mode 100644 index 00000000000..80f95bfc44e --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations: + +GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations +=================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst new file mode 100644 index 00000000000..184172d66a8 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations: + +GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations +============================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst new file mode 100644 index 00000000000..b6e96ee8016 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations: + +GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations +================================================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst index 4403302cb9f..c258702aa3a 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Subcutaneous.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Spleen_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_allpairs_Adipose_Subcutaneous: +.. _GTEx_sQTL_Spleen_all_snp_gene_associations: -GTEx_sQTL_allpairs_Adipose_Subcutaneous -======================================= +GTEx_sQTL_Spleen_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst index 38589f54a13..5b777f812f0 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Stomach_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9: +.. _GTEx_sQTL_Stomach_all_snp_gene_associations: -GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9 +GTEx_sQTL_Stomach_all_snp_gene_associations =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst index 82957434abb..a854831eb0f 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Coronary.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Testis_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_allpairs_Artery_Coronary: +.. _GTEx_sQTL_Testis_all_snp_gene_associations: -GTEx_sQTL_allpairs_Artery_Coronary -================================== +GTEx_sQTL_Testis_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst similarity index 91% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst index b0eff871a7e..f56a112099f 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Thyroid_all_snp_gene_associations.rst @@ -1,6 +1,6 @@ -.. _GTEx_sQTL_allpairs_Adipose_Visceral_Omentum: +.. _GTEx_sQTL_Thyroid_all_snp_gene_associations: -GTEx_sQTL_allpairs_Adipose_Visceral_Omentum +GTEx_sQTL_Thyroid_all_snp_gene_associations =========================================== * **Versions:** v8 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst index 436f78f73fc..69e6c711f78 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Adrenal_Gland.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Uterus_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_allpairs_Adrenal_Gland: +.. _GTEx_sQTL_Uterus_all_snp_gene_associations: -GTEx_sQTL_allpairs_Adrenal_Gland -================================ +GTEx_sQTL_Uterus_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst similarity index 87% rename from hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst rename to hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst index a417c60633a..db3f8ba27da 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Aorta.rst +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Vagina_all_snp_gene_associations.rst @@ -1,7 +1,7 @@ -.. _GTEx_sQTL_allpairs_Artery_Aorta: +.. _GTEx_sQTL_Vagina_all_snp_gene_associations: -GTEx_sQTL_allpairs_Artery_Aorta -=============================== +GTEx_sQTL_Vagina_all_snp_gene_associations +========================================== * **Versions:** v8 * **Reference genome builds:** GRCh38 diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst new file mode 100644 index 00000000000..367a833e7e6 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_Whole_Blood_all_snp_gene_associations.rst @@ -0,0 +1,42 @@ +.. _GTEx_sQTL_Whole_Blood_all_snp_gene_associations: + +GTEx_sQTL_Whole_Blood_all_snp_gene_associations +=============================================== + +* **Versions:** v8 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (v8, GRCh38) +~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'phenotype_id': struct { + intron: interval>, + cluster: str, + gene_id: str + } + 'tss_distance': int32 + 'ma_samples': int32 + 'ma_count': int32 + 'maf': float64 + 'pval_nominal': float64 + 'slope': float64 + 'slope_se': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst deleted file mode 100644 index 4818985ebe0..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Artery_Tibial.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Artery_Tibial: - -GTEx_sQTL_allpairs_Artery_Tibial -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst deleted file mode 100644 index fe07981a273..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Amygdala.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Amygdala: - -GTEx_sQTL_allpairs_Brain_Amygdala -================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst deleted file mode 100644 index ad57111f8c6..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24: - -GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24 -======================================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst deleted file mode 100644 index fbe7b6d8334..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia: - -GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst deleted file mode 100644 index f9e382c3b05..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere: - -GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst deleted file mode 100644 index e3637045be4..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cerebellum.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Cerebellum: - -GTEx_sQTL_allpairs_Brain_Cerebellum -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst deleted file mode 100644 index e71b58449be..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Cortex.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Cortex: - -GTEx_sQTL_allpairs_Brain_Cortex -=============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst deleted file mode 100644 index aa9c72c87b9..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hippocampus.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Hippocampus: - -GTEx_sQTL_allpairs_Brain_Hippocampus -==================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst deleted file mode 100644 index f2f59f5945f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Hypothalamus.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Hypothalamus: - -GTEx_sQTL_allpairs_Brain_Hypothalamus -===================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst deleted file mode 100644 index 6ba79533e93..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia: - -GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia -============================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst deleted file mode 100644 index 52255749085..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Sigmoid.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Colon_Sigmoid: - -GTEx_sQTL_allpairs_Colon_Sigmoid -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst deleted file mode 100644 index 4b9ac30ec31..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Colon_Transverse.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Colon_Transverse: - -GTEx_sQTL_allpairs_Colon_Transverse -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst deleted file mode 100644 index 5bc94f4278f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Mucosa.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Esophagus_Mucosa: - -GTEx_sQTL_allpairs_Esophagus_Mucosa -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst deleted file mode 100644 index b51e24620f9..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Esophagus_Muscularis.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Esophagus_Muscularis: - -GTEx_sQTL_allpairs_Esophagus_Muscularis -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst deleted file mode 100644 index cb586153e85..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Heart_Left_Ventricle.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Heart_Left_Ventricle: - -GTEx_sQTL_allpairs_Heart_Left_Ventricle -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst deleted file mode 100644 index cbc5df23483..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Kidney_Cortex.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Kidney_Cortex: - -GTEx_sQTL_allpairs_Kidney_Cortex -================================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst deleted file mode 100644 index 5de66bab179..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Liver.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Liver: - -GTEx_sQTL_allpairs_Liver -======================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst deleted file mode 100644 index 8bd1658b4de..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Lung.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Lung: - -GTEx_sQTL_allpairs_Lung -======================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst deleted file mode 100644 index ad8bb02e277..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Minor_Salivary_Gland.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Minor_Salivary_Gland: - -GTEx_sQTL_allpairs_Minor_Salivary_Gland -======================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst deleted file mode 100644 index 00ffa3f6066..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Muscle_Skeletal.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Muscle_Skeletal: - -GTEx_sQTL_allpairs_Muscle_Skeletal -================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst deleted file mode 100644 index 477b8d5c4c1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Nerve_Tibial.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Nerve_Tibial: - -GTEx_sQTL_allpairs_Nerve_Tibial -=============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst deleted file mode 100644 index 67b13839c84..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Ovary.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Ovary: - -GTEx_sQTL_allpairs_Ovary -======================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst deleted file mode 100644 index dcc44f3a5b4..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pancreas.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Pancreas: - -GTEx_sQTL_allpairs_Pancreas -=========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst deleted file mode 100644 index 71ea6b4db6e..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Pituitary.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Pituitary: - -GTEx_sQTL_allpairs_Pituitary -============================ - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst deleted file mode 100644 index baeeff2e1bf..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Prostate.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Prostate: - -GTEx_sQTL_allpairs_Prostate -=========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst deleted file mode 100644 index f47fd638828..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg: - -GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg -============================================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst deleted file mode 100644 index 66a842b49cf..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Spleen.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Spleen: - -GTEx_sQTL_allpairs_Spleen -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst deleted file mode 100644 index a16502ee211..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Stomach.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Stomach: - -GTEx_sQTL_allpairs_Stomach -========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst deleted file mode 100644 index 2120ff1f433..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Testis.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Testis: - -GTEx_sQTL_allpairs_Testis -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst deleted file mode 100644 index 5c5d1bec7b1..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Thyroid.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Thyroid: - -GTEx_sQTL_allpairs_Thyroid -========================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst deleted file mode 100644 index 8375d148fca..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Uterus.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Uterus: - -GTEx_sQTL_allpairs_Uterus -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst deleted file mode 100644 index 7c583ad799e..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Vagina.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Vagina: - -GTEx_sQTL_allpairs_Vagina -========================= - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst b/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst deleted file mode 100644 index 245107ebe49..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_sQTL_allpairs_Whole_Blood.rst +++ /dev/null @@ -1,42 +0,0 @@ -.. _GTEx_sQTL_allpairs_Whole_Blood: - -GTEx_sQTL_allpairs_Whole_Blood -============================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - version: str, - reference_genome: str, - n_rows: int32, - n_partitions: int32 - } - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'phenotype_id': struct { - intron: interval>, - cluster: str, - gene_id: str - } - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/dbSNP.rst b/hail/python/hail/docs/datasets/schemas/dbSNP.rst new file mode 100644 index 00000000000..9aa89d21ce5 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/dbSNP.rst @@ -0,0 +1,99 @@ +.. _dbSNP: + +dbSNP +===== + +* **Versions:** 154 +* **Reference genome builds:** GRCh37, GRCh38 +* **Type:** :class:`hail.Table` + +Schema (154, GRCh37) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'metadata': struct { + name: str, + version: str, + reference_genome: str, + n_rows: int32, + n_partitions: int32 + } + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'qual': float64 + 'filters': set + 'info': struct { + RS: int32, + GENEINFO: str, + PSEUDOGENEINFO: str, + dbSNPBuildID: int32, + SAO: int32, + SSR: int32, + VC: str, + PM: bool, + NSF: bool, + NSM: bool, + NSN: bool, + SYN: bool, + U3: bool, + U5: bool, + ASS: bool, + DSS: bool, + INT: bool, + R3: bool, + R5: bool, + GNO: bool, + PUB: bool, + FREQ: struct { + _GENOME_DK: float64, + _TWINSUK: float64, + _dbGaP_PopFreq: float64, + _Siberian: float64, + _Chileans: float64, + _FINRISK: float64, + _HapMap: float64, + _Estonian: float64, + _ALSPAC: float64, + _GoESP: float64, + _TOPMED: float64, + _PAGE_STUDY: float64, + _1000Genomes: float64, + _Korea1K: float64, + _ChromosomeY: float64, + _ExAC: float64, + _Qatari: float64, + _GoNL: float64, + _MGP: float64, + _GnomAD: float64, + _Vietnamese: float64, + _GnomAD_exomes: float64, + _PharmGKB: float64, + _KOREAN: float64, + _Daghestan: float64, + _HGDP_Stanford: float64, + _NorthernSweden: float64, + _SGDP_PRJ: float64 + }, + COMMON: bool, + CLNHGVS: array, + CLNVI: array, + CLNORIGIN: array, + CLNSIG: array, + CLNDISDB: array, + CLNDN: array, + CLNREVSTAT: array, + CLNACC: array + } + 'a_index': int32 + 'was_split': bool + 'old_locus': locus + 'old_alleles': array + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst b/hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst similarity index 53% rename from hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst rename to hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst index 7edbd47598b..42556396bec 100644 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_allpairs_Lung.rst +++ b/hail/python/hail/docs/datasets/schemas/dbSNP_rsid.rst @@ -1,14 +1,14 @@ -.. _GTEx_eQTL_allpairs_Lung: +.. _dbSNP_rsid: -GTEx_eQTL_allpairs_Lung -======================= +dbSNP_rsid +========== -* **Versions:** v8 -* **Reference genome builds:** GRCh38 +* **Versions:** 154 +* **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.Table` -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ +Schema (154, GRCh37) +~~~~~~~~~~~~~~~~~~~~ .. code-block:: text @@ -23,17 +23,9 @@ Schema (v8, GRCh38) } ---------------------------------------- Row fields: - 'locus': locus + 'locus': locus 'alleles': array - 'gene_id': str - 'variant_id': str - 'tss_distance': int32 - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 + 'rsid': str ---------------------------------------- Key: ['locus', 'alleles'] ---------------------------------------- diff --git a/hail/python/hail/docs/functions/collections.rst b/hail/python/hail/docs/functions/collections.rst index 978db0183a1..7b2e1f322bb 100644 --- a/hail/python/hail/docs/functions/collections.rst +++ b/hail/python/hail/docs/functions/collections.rst @@ -22,6 +22,7 @@ Collection functions len map flatmap + starmap zip enumerate zip_with_index @@ -39,6 +40,7 @@ Collection functions .. autofunction:: len .. autofunction:: map .. autofunction:: flatmap +.. autofunction:: starmap .. autofunction:: zip .. autofunction:: enumerate .. autofunction:: zip_with_index diff --git a/hail/python/hail/docs/utils/index.rst b/hail/python/hail/docs/utils/index.rst index 373eec9d97d..04f54fad047 100644 --- a/hail/python/hail/docs/utils/index.rst +++ b/hail/python/hail/docs/utils/index.rst @@ -15,10 +15,12 @@ utils hadoop_is_dir hadoop_stat hadoop_ls + hadoop_scheme_supported copy_log range_table range_matrix_table get_1kg + get_hgdp get_movie_lens .. autoclass:: Interval @@ -31,8 +33,10 @@ utils .. autofunction:: hadoop_is_dir .. autofunction:: hadoop_stat .. autofunction:: hadoop_ls +.. autofunction:: hadoop_scheme_supported .. autofunction:: copy_log .. autofunction:: range_table .. autofunction:: range_matrix_table .. autofunction:: get_1kg +.. autofunction:: get_hgdp .. autofunction:: get_movie_lens diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index 61bfad05512..89477f742d1 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -1,113 +1,214 @@ { - "1000_Genomes_autosomes": { - "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", - "url": "https://www.internationalgenome.org/home", + "1000_Genomes_HighCov_autosomes": { + "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", + "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", "versions": [ { - "reference_genome": "GRCh37", + "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh37.mt", - "us": "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh37.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt", + "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_phased.mt" } }, - "version": "phase_3" + "version": "NYGC_30x_phased" }, { "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_autosomes.phase_3.GRCh38.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt", + "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/autosomes_unphased.mt" + } + }, + "version": "NYGC_30x_unphased" + } + ] + }, + "1000_Genomes_HighCov_chrX": { + "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", + "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt", + "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_phased.mt" + } + }, + "version": "NYGC_30x_phased" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt", + "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrX_unphased.mt" + } + }, + "version": "NYGC_30x_unphased" + } + ] + }, + "1000_Genomes_HighCov_chrY": { + "description": "1000 Genomes Project: The New York Genome Center (NYGC), funded by NHGRI, has sequenced 3202 samples from the 1000 Genomes Project sample collection to 30x coverage. Initially, the 2504 unrelated samples from the phase three panel from the 1000 Genomes Project were sequenced. Thereafter, an additional 698 samples, related to samples in the 2504 panel, were also sequenced.", + "url": "https://www.internationalgenome.org/data-portal/data-collection/30x-grch38", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt", + "us": "gs://hail-datasets-us/1000_Genomes/NYGC_30x/GRCh38/chrY_unphased.mt" + } + }, + "version": "NYGC_30x_unphased" + } + ] + }, + "1000_Genomes_Retracted_autosomes": { + "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", + "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/autosomes.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_autosomes.phase_3.GRCh38.mt", - "us": "gs://hail-datasets-us/1000_Genomes_autosomes.phase_3.GRCh38.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/autosomes.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/autosomes.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_chrMT": { - "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", - "url": "https://www.internationalgenome.org/home", + "1000_Genomes_Retracted_chrX": { + "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", + "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", "versions": [ { - "reference_genome": "GRCh37", + "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrMT.phase_3.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrX.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_chrMT.phase_3.GRCh37.mt", - "us": "gs://hail-datasets-us/1000_Genomes_chrMT.phase_3.GRCh37.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrX.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrX.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_chrX": { - "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", + "1000_Genomes_Retracted_chrY": { + "description": "1000 Genomes Project: These datasets have been retracted due to a number of known issues on GRCh38, see link for more details.", + "url": "http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/supporting/GRCh38_positions/README_GRCh38_liftover_20170504.txt", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh38/chrY.mt" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh38/chrY.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh38/chrY.mt" + } + }, + "version": "phase_3" + } + ] + }, + "1000_Genomes_autosomes": { + "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_autosomes dataset). For GRCh38, the NYGC 30x coverage autosomes phased dataset is available as 1000_Genomes_HighCov_autosomes.", "url": "https://www.internationalgenome.org/home", "versions": [ { "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/autosomes.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh37.mt", - "us": "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh37.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/autosomes.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/autosomes.mt" } }, "version": "phase_3" - }, + } + ] + }, + "1000_Genomes_chrMT": { + "description": "1000 Genomes Project: Mitochondrial chromosome variants.", + "url": "https://www.internationalgenome.org/home", + "versions": [ { - "reference_genome": "GRCh38", + "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrX.phase_3.GRCh38.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrMT.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_chrX.phase_3.GRCh38.mt", - "us": "gs://hail-datasets-us/1000_Genomes_chrX.phase_3.GRCh38.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrMT.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrMT.mt" } }, "version": "phase_3" } ] }, - "1000_Genomes_chrY": { - "description": "1000 Genomes Project: the largest public catalogue of human variation and genotype data.", + "1000_Genomes_chrX": { + "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_chrX dataset). For GRCh38, the NYGC 30x coverage chrX phased dataset is available as 1000_Genomes_HighCov_chrX.", "url": "https://www.internationalgenome.org/home", "versions": [ { "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrX.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh37.mt", - "us": "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh37.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrX.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrX.mt" } }, "version": "phase_3" - }, + } + ] + }, + "1000_Genomes_chrY": { + "description": "1000 Genomes Project: The GRCh38 phase_3 version has been retracted, but is still available (see the 1000_Genomes_Retracted_chrY dataset). For GRCh38, the NYGC 30x coverage chrY dataset is available as 1000_Genomes_HighCov_chrY.", + "url": "https://www.internationalgenome.org/home", + "versions": [ { - "reference_genome": "GRCh38", + "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/1000_Genomes_chrY.phase_3.GRCh38.mt" + "us": "s3://hail-datasets-us-east-1/1000_Genomes/phase_3/GRCh37/chrY.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/1000_Genomes_chrY.phase_3.GRCh38.mt", - "us": "gs://hail-datasets-us/1000_Genomes_chrY.phase_3.GRCh38.mt" + "eu": "gs://hail-datasets-eu/1000_Genomes/phase_3/GRCh37/chrY.mt", + "us": "gs://hail-datasets-us/1000_Genomes/phase_3/GRCh37/chrY.mt" } }, "version": "phase_3" @@ -127,11 +228,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh37/table.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh37.ht" + "eu": "gs://hail-datasets-eu/CADD/v1.4/GRCh37/table.ht", + "us": "gs://hail-datasets-us/CADD/v1.4/GRCh37/table.ht" } }, "version": "1.4" @@ -140,14 +241,40 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/CADD.v1.4.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/CADD/v1.4/GRCh38/table.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/CADD.v1.4.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/CADD.v1.4.GRCh38.ht" + "eu": "gs://hail-datasets-eu/CADD/v1.4/GRCh38/table.ht", + "us": "gs://hail-datasets-us/CADD/v1.4/GRCh38/table.ht" } }, "version": "1.4" + }, + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh37/table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/CADD/v1.6/GRCh37/table.ht", + "us": "gs://hail-datasets-us/CADD/v1.6/GRCh37/table.ht" + } + }, + "version": "1.6" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/CADD/v1.6/GRCh38/table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/CADD/v1.6/GRCh38/table.ht", + "us": "gs://hail-datasets-us/CADD/v1.6/GRCh38/table.ht" + } + }, + "version": "1.6" } ] }, @@ -164,11 +291,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/DANN.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/DANN/GRCh37/table.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/DANN.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/DANN.GRCh37.ht" + "eu": "gs://hail-datasets-eu/DANN/GRCh37/table.ht", + "us": "gs://hail-datasets-us/DANN/GRCh37/table.ht" } }, "version": null @@ -177,11 +304,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/DANN.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/DANN/GRCh38/table.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/DANN.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/DANN.GRCh38.ht" + "eu": "gs://hail-datasets-eu/DANN/GRCh38/table.ht", + "us": "gs://hail-datasets-us/DANN/GRCh38/table.ht" } }, "version": null @@ -201,11 +328,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh37.ht" + "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht", + "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_low_complexity_regions.ht" } }, "version": "release_95" @@ -214,11 +341,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_low_complexity_regions.release_95.GRCh38.ht" + "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht", + "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_low_complexity_regions.ht" } }, "version": "release_95" @@ -238,11 +365,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh37.ht" + "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht", + "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh37/homo_sapiens_reference_genome.ht" } }, "version": "release_95" @@ -251,11 +378,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/Ensembl_homo_sapiens_reference_genome.release_95.GRCh38.ht" + "eu": "gs://hail-datasets-eu/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht", + "us": "gs://hail-datasets-us/Ensembl/release_95/GRCh38/homo_sapiens_reference_genome.ht" } }, "version": "release_95" @@ -270,11 +397,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt", - "us": "gs://hail-datasets-us/GTEx_RNA_seq_gene_TPMs.v7.GRCh37.mt" + "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt", + "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_TPMs.mt" } }, "version": "v7" @@ -289,11 +416,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt", - "us": "gs://hail-datasets-us/GTEx_RNA_seq_gene_read_counts.v7.GRCh37.mt" + "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt", + "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_gene_read_counts.mt" } }, "version": "v7" @@ -308,18 +435,18 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt", - "us": "gs://hail-datasets-us/GTEx_RNA_seq_junction_read_counts.v7.GRCh37.mt" + "eu": "gs://hail-datasets-eu/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt", + "us": "gs://hail-datasets-us/GTEx/v7/GRCh37/RNA_seq_junction_read_counts.mt" } }, "version": "v7" } ] }, - "GTEx_eQTL_allpairs_Adipose_Subcutaneous": { + "GTEx_eQTL_Adipose_Subcutaneous_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -330,18 +457,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Adipose_Visceral_Omentum": { + "GTEx_eQTL_Adipose_Visceral_Omentum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -352,18 +479,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Adrenal_Gland": { + "GTEx_eQTL_Adrenal_Gland_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -374,18 +501,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Artery_Aorta": { + "GTEx_eQTL_Artery_Aorta_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -396,18 +523,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Artery_Coronary": { + "GTEx_eQTL_Artery_Coronary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -418,18 +545,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Artery_Tibial": { + "GTEx_eQTL_Artery_Tibial_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -440,18 +567,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Amygdala": { + "GTEx_eQTL_Brain_Amygdala_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -462,18 +589,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24": { + "GTEx_eQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -484,18 +611,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia": { + "GTEx_eQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -506,18 +633,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere": { + "GTEx_eQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -528,18 +655,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Cerebellum": { + "GTEx_eQTL_Brain_Cerebellum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -550,18 +677,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Cortex": { + "GTEx_eQTL_Brain_Cortex_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -572,18 +699,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9": { + "GTEx_eQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -594,18 +721,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Hippocampus": { + "GTEx_eQTL_Brain_Hippocampus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -616,18 +743,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Hypothalamus": { + "GTEx_eQTL_Brain_Hypothalamus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -638,18 +765,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia": { + "GTEx_eQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -660,18 +787,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia": { + "GTEx_eQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -682,18 +809,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1": { + "GTEx_eQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -704,18 +831,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Brain_Substantia_nigra": { + "GTEx_eQTL_Brain_Substantia_nigra_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -726,18 +853,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Breast_Mammary_Tissue": { + "GTEx_eQTL_Breast_Mammary_Tissue_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -748,18 +875,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts": { + "GTEx_eQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -770,18 +897,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes": { + "GTEx_eQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -792,18 +919,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Colon_Sigmoid": { + "GTEx_eQTL_Colon_Sigmoid_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -814,18 +941,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Colon_Transverse": { + "GTEx_eQTL_Colon_Transverse_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -836,18 +963,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction": { + "GTEx_eQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -858,18 +985,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Esophagus_Mucosa": { + "GTEx_eQTL_Esophagus_Mucosa_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -880,18 +1007,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Esophagus_Muscularis": { + "GTEx_eQTL_Esophagus_Muscularis_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -902,18 +1029,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Heart_Atrial_Appendage": { + "GTEx_eQTL_Heart_Atrial_Appendage_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -924,18 +1051,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Heart_Left_Ventricle": { + "GTEx_eQTL_Heart_Left_Ventricle_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -946,18 +1073,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Kidney_Cortex": { + "GTEx_eQTL_Kidney_Cortex_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -968,18 +1095,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Liver": { + "GTEx_eQTL_Liver_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -990,18 +1117,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Liver_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Liver_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Lung": { + "GTEx_eQTL_Lung_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1012,18 +1139,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Lung_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Lung_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Minor_Salivary_Gland": { + "GTEx_eQTL_Minor_Salivary_Gland_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1034,18 +1161,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Muscle_Skeletal": { + "GTEx_eQTL_Muscle_Skeletal_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1056,18 +1183,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Nerve_Tibial": { + "GTEx_eQTL_Nerve_Tibial_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1078,18 +1205,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Ovary": { + "GTEx_eQTL_Ovary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1100,18 +1227,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Ovary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Ovary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Pancreas": { + "GTEx_eQTL_Pancreas_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1122,18 +1249,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pancreas_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Pituitary": { + "GTEx_eQTL_Pituitary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1144,18 +1271,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Pituitary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Prostate": { + "GTEx_eQTL_Prostate_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1166,18 +1293,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Prostate_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Prostate_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic": { + "GTEx_eQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1188,18 +1315,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg": { + "GTEx_eQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1210,18 +1337,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum": { + "GTEx_eQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1232,18 +1359,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Spleen": { + "GTEx_eQTL_Spleen_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1254,18 +1381,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Spleen_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Spleen_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Stomach": { + "GTEx_eQTL_Stomach_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1276,18 +1403,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Stomach_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Stomach_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Testis": { + "GTEx_eQTL_Testis_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1298,18 +1425,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Testis_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Testis_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Thyroid": { + "GTEx_eQTL_Thyroid_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1320,18 +1447,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Thyroid_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Uterus": { + "GTEx_eQTL_Uterus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1342,18 +1469,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Uterus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Uterus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Vagina": { + "GTEx_eQTL_Vagina_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1364,18 +1491,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Vagina_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Vagina_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_eQTL_allpairs_Whole_Blood": { + "GTEx_eQTL_Whole_Blood_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1386,18 +1513,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_eQTL_allpairs_Whole_Blood_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Adipose_Subcutaneous": { + "GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1408,18 +1535,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Subcutaneous_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Subcutaneous_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Adipose_Visceral_Omentum": { + "GTEx_sQTL_Adipose_Visceral_Omentum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1430,18 +1557,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adipose_Visceral_Omentum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adipose_Visceral_Omentum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Adrenal_Gland": { + "GTEx_sQTL_Adrenal_Gland_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1452,18 +1579,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Adrenal_Gland_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Adrenal_Gland_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Artery_Aorta": { + "GTEx_sQTL_Artery_Aorta_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1474,18 +1601,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Aorta_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Aorta_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Artery_Coronary": { + "GTEx_sQTL_Artery_Coronary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1496,18 +1623,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Coronary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Coronary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Artery_Tibial": { + "GTEx_sQTL_Artery_Tibial_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1518,18 +1645,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Artery_Tibial_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Artery_Tibial_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Amygdala": { + "GTEx_sQTL_Brain_Amygdala_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1540,18 +1667,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Amygdala_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Amygdala_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24": { + "GTEx_sQTL_Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1562,18 +1689,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Anterior_cingulate_cortex_BA24_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Anterior_cingulate_cortex_BA24_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia": { + "GTEx_sQTL_Brain_Caudate_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1584,18 +1711,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Caudate_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Caudate_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere": { + "GTEx_sQTL_Brain_Cerebellar_Hemisphere_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1606,18 +1733,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellar_Hemisphere_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellar_Hemisphere_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Cerebellum": { + "GTEx_sQTL_Brain_Cerebellum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1628,18 +1755,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cerebellum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cerebellum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Cortex": { + "GTEx_sQTL_Brain_Cortex_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1650,18 +1777,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Cortex_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Cortex_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9": { + "GTEx_sQTL_Brain_Frontal_Cortex_BA9_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1672,18 +1799,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Frontal_Cortex_BA9_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Frontal_Cortex_BA9_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Hippocampus": { + "GTEx_sQTL_Brain_Hippocampus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1694,18 +1821,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hippocampus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hippocampus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Hypothalamus": { + "GTEx_sQTL_Brain_Hypothalamus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1716,18 +1843,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Hypothalamus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Hypothalamus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia": { + "GTEx_sQTL_Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1738,18 +1865,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Nucleus_accumbens_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Nucleus_accumbens_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia": { + "GTEx_sQTL_Brain_Putamen_basal_ganglia_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1760,18 +1887,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Putamen_basal_ganglia_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Putamen_basal_ganglia_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1": { + "GTEx_sQTL_Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1782,18 +1909,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Spinal_cord_cervical_c-1_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Spinal_cord_cervical_c-1_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Brain_Substantia_nigra": { + "GTEx_sQTL_Brain_Substantia_nigra_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1804,18 +1931,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Brain_Substantia_nigra_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Brain_Substantia_nigra_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Breast_Mammary_Tissue": { + "GTEx_sQTL_Breast_Mammary_Tissue_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1826,18 +1953,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Breast_Mammary_Tissue_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Breast_Mammary_Tissue_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts": { + "GTEx_sQTL_Cells_Cultured_fibroblasts_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1848,18 +1975,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_Cultured_fibroblasts_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_Cultured_fibroblasts_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes": { + "GTEx_sQTL_Cells_EBV-transformed_lymphocytes_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1870,18 +1997,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Cells_EBV-transformed_lymphocytes_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Cells_EBV-transformed_lymphocytes_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Colon_Sigmoid": { + "GTEx_sQTL_Colon_Sigmoid_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1892,18 +2019,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Sigmoid_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Sigmoid_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Colon_Transverse": { + "GTEx_sQTL_Colon_Transverse_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1914,18 +2041,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Colon_Transverse_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Colon_Transverse_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction": { + "GTEx_sQTL_Esophagus_Gastroesophageal_Junction_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1936,18 +2063,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Gastroesophageal_Junction_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Gastroesophageal_Junction_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Esophagus_Mucosa": { + "GTEx_sQTL_Esophagus_Mucosa_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1958,18 +2085,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Mucosa_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Mucosa_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Esophagus_Muscularis": { + "GTEx_sQTL_Esophagus_Muscularis_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -1980,18 +2107,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Esophagus_Muscularis_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Esophagus_Muscularis_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Heart_Atrial_Appendage": { + "GTEx_sQTL_Heart_Atrial_Appendage_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2002,18 +2129,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Atrial_Appendage_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Atrial_Appendage_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Heart_Left_Ventricle": { + "GTEx_sQTL_Heart_Left_Ventricle_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2024,18 +2151,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Heart_Left_Ventricle_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Heart_Left_Ventricle_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Kidney_Cortex": { + "GTEx_sQTL_Kidney_Cortex_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2046,18 +2173,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Kidney_Cortex_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Kidney_Cortex_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Liver": { + "GTEx_sQTL_Liver_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2068,18 +2195,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Liver_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Liver_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Lung": { + "GTEx_sQTL_Lung_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2090,18 +2217,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Lung_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Lung_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Minor_Salivary_Gland": { + "GTEx_sQTL_Minor_Salivary_Gland_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2112,18 +2239,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Minor_Salivary_Gland_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Minor_Salivary_Gland_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Muscle_Skeletal": { + "GTEx_sQTL_Muscle_Skeletal_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2134,18 +2261,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Muscle_Skeletal_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Muscle_Skeletal_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Nerve_Tibial": { + "GTEx_sQTL_Nerve_Tibial_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2156,18 +2283,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Nerve_Tibial_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Nerve_Tibial_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Ovary": { + "GTEx_sQTL_Ovary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2178,18 +2305,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Ovary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Ovary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Pancreas": { + "GTEx_sQTL_Pancreas_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2200,18 +2327,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pancreas_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pancreas_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Pituitary": { + "GTEx_sQTL_Pituitary_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2222,18 +2349,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Pituitary_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Pituitary_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Prostate": { + "GTEx_sQTL_Prostate_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2244,18 +2371,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Prostate_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Prostate_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic": { + "GTEx_sQTL_Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2266,18 +2393,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Not_Sun_Exposed_Suprapubic_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Not_Sun_Exposed_Suprapubic_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg": { + "GTEx_sQTL_Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2288,18 +2415,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Skin_Sun_Exposed_Lower_leg_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Skin_Sun_Exposed_Lower_leg_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum": { + "GTEx_sQTL_Small_Intestine_Terminal_Ileum_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2310,18 +2437,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Small_Intestine_Terminal_Ileum_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Small_Intestine_Terminal_Ileum_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Spleen": { + "GTEx_sQTL_Spleen_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2332,18 +2459,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Spleen_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Spleen_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Stomach": { + "GTEx_sQTL_Stomach_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2354,18 +2481,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Stomach_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Stomach_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Testis": { + "GTEx_sQTL_Testis_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2376,18 +2503,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Testis_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Testis_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Thyroid": { + "GTEx_sQTL_Thyroid_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2398,18 +2525,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Thyroid_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Thyroid_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Uterus": { + "GTEx_sQTL_Uterus_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2420,18 +2547,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Uterus_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Uterus_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Vagina": { + "GTEx_sQTL_Vagina_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2442,18 +2569,18 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Vagina_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Vagina_all_snp_gene_associations.ht" } }, "version": "v8" } ] }, - "GTEx_sQTL_allpairs_Whole_Blood": { + "GTEx_sQTL_Whole_Blood_all_snp_gene_associations": { "annotation_db": { "key_properties": [] }, @@ -2464,11 +2591,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht", - "us": "gs://hail-datasets-us/GTEx_sQTL_allpairs_Whole_Blood_v8_GRCh38.ht" + "eu": "gs://hail-datasets-eu/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht", + "us": "gs://hail-datasets-us/GTEx/v8/sQTL/GRCh38/Whole_Blood_all_snp_gene_associations.ht" } }, "version": "v8" @@ -2483,11 +2610,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt", - "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_both_sexes.v2.GRCh37.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt", + "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/both_sexes.mt" } }, "version": "v2" @@ -2502,11 +2629,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt", - "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_female.v2.GRCh37.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt", + "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/female.mt" } }, "version": "v2" @@ -2521,11 +2648,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt", - "us": "gs://hail-datasets-us/UK_Biobank_Rapid_GWAS_male.v2.GRCh37.mt" + "eu": "gs://hail-datasets-eu/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt", + "us": "gs://hail-datasets-us/UK_Biobank/Rapid_GWAS/v2/GRCh37/male.mt" } }, "version": "v2" @@ -2546,11 +2673,11 @@ "reference_genome": null, "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/gene_specific_summary_2019-07.txt.gz.ht" + "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/gene_specific_summary.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/gene_specific_summary_2019-07.txt.gz.ht", - "us": "gs://hail-datasets-us/annotations/gene_specific_summary_2019-07.txt.gz.ht" + "eu": "gs://hail-datasets-eu/ClinVar/2019-07/gene_specific_summary.ht", + "us": "gs://hail-datasets-us/ClinVar/2019-07/gene_specific_summary.ht" } }, "version": "2019-07" @@ -2568,11 +2695,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht" + "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh37/variant_summary.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht", - "us": "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh37.txt.gz.ht" + "eu": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh37/variant_summary.ht", + "us": "gs://hail-datasets-us/ClinVar/2019-07/GRCh37/variant_summary.ht" } }, "version": "2019-07" @@ -2581,11 +2708,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht" + "us": "s3://hail-datasets-us-east-1/ClinVar/2019-07/GRCh38/variant_summary.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht", - "us": "gs://hail-datasets-us/annotations/variant_summary_2019-07.GRCh38.txt.gz.ht" + "eu": "gs://hail-datasets-eu/ClinVar/2019-07/GRCh38/variant_summary.ht", + "us": "gs://hail-datasets-us/ClinVar/2019-07/GRCh38/variant_summary.ht" } }, "version": "2019-07" @@ -2606,11 +2733,11 @@ "reference_genome": null, "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht" + "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/gene_complete.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht", - "us": "gs://hail-datasets-us/annotations/dbnsfp/dbNSFP4.0_gene.complete.bgz.ht" + "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/gene_complete.ht", + "us": "gs://hail-datasets-us/dbNSFP/v4.0a/gene_complete.ht" } }, "version": "4.0" @@ -2628,11 +2755,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh37/variant.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh37.ht" + "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh37/variant.ht", + "us": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh37/variant.ht" } }, "version": "4.0" @@ -2641,17 +2768,87 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/dbnsfp4.0a.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/dbNSFP/v4.0a/GRCh38/variant.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/dbnsfp4.0a.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/dbnsfp4.0a.GRCh38.ht" + "eu": "gs://hail-datasets-eu/dbNSFP/v4.0a/GRCh38/variant.ht", + "us": "gs://hail-datasets-us/dbNSFP/v4.0a/GRCh38/variant.ht" } }, "version": "4.0" } ] }, + "dbSNP": { + "annotation_db": { + "key_properties": [] + }, + "description": "dbSNP: Reference SNP (rs or RefSNP) Hail Table. The database includes both common and rare single-base nucleotide variation (SNV), short (=< 50bp) deletion/insertion polymorphisms, and other classes of small genetic variations.", + "url": "https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/full_table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/full_table.ht", + "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/full_table.ht" + } + }, + "version": "154" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/full_table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/full_table.ht", + "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/full_table.ht" + } + }, + "version": "154" + } + ] + }, + "dbSNP_rsid": { + "annotation_db": { + "key_properties": [] + }, + "description": "dbSNP: This Hail Table contains a mapping from locus/allele pairs to Reference SNP IDs (rsID). For the full dataset, see dbSNP.", + "url": "https://www.ncbi.nlm.nih.gov/snp/docs/RefSNP_about/", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh37/rsid_only_table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh37/rsid_only_table.ht", + "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh37/rsid_only_table.ht" + } + }, + "version": "154" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://hail-datasets-us-east-1/dbSNP/build_154/GRCh38/rsid_only_table.ht" + }, + "gcp": { + "eu": "gs://hail-datasets-eu/dbSNP/build_154/GRCh38/rsid_only_table.ht", + "us": "gs://hail-datasets-us/dbSNP/build_154/GRCh38/rsid_only_table.ht" + } + }, + "version": "154" + } + ] + }, "gencode": { "annotation_db": { "key_properties": [] @@ -2663,11 +2860,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/gencode.v19.annotation.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GENCODE/v19/GRCh37/annotation.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/gencode.v19.annotation.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/gencode.v19.annotation.GRCh37.ht" + "eu": "gs://hail-datasets-eu/GENCODE/v19/GRCh37/annotation.ht", + "us": "gs://hail-datasets-us/GENCODE/v19/GRCh37/annotation.ht" } }, "version": "v19" @@ -2676,11 +2873,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/gencode.v31.annotation.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GENCODE/v31/GRCh38/annotation.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/gencode.v31.annotation.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/gencode.v31.annotation.GRCh38.ht" + "eu": "gs://hail-datasets-eu/GENCODE/v31/GRCh38/annotation.ht", + "us": "gs://hail-datasets-us/GENCODE/v31/GRCh38/annotation.ht" } }, "version": "v31" @@ -2700,11 +2897,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/elements.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh37.ht" + "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/elements.ht", + "us": "gs://hail-datasets-us/GERP/GERP++/GRCh37/elements.ht" } }, "version": "hg19" @@ -2713,11 +2910,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/GERP_elements.GERP++.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/elements.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/GERP_elements.GERP++.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/GERP_elements.GERP++.GRCh38.ht" + "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/elements.ht", + "us": "gs://hail-datasets-us/GERP/GERP++/GRCh38/elements.ht" } }, "version": "hg19" @@ -2737,11 +2934,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh37/scores.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh37.ht", - "us": "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh37.ht" + "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh37/scores.ht", + "us": "gs://hail-datasets-us/GERP/GERP++/GRCh37/scores.ht" } }, "version": "hg19" @@ -2750,11 +2947,11 @@ "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/GERP_scores.GERP++.GRCh38.ht" + "us": "s3://hail-datasets-us-east-1/GERP/GERP++/GRCh38/scores.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/GERP_scores.GERP++.GRCh38.ht", - "us": "gs://hail-datasets-us/annotations/GERP_scores.GERP++.GRCh38.ht" + "eu": "gs://hail-datasets-eu/GERP/GERP++/GRCh38/scores.ht", + "us": "gs://hail-datasets-us/GERP/GERP++/GRCh38/scores.ht" } }, "version": "hg19" @@ -2774,11 +2971,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_AFR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_AFR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_AFR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AFR.ht" } }, "version": "2018" @@ -2798,11 +2995,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_ALL_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_ALL_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_ALL_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_ALL.ht" } }, "version": "2018" @@ -2822,11 +3019,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_AMR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_AMR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_AMR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_AMR.ht" } }, "version": "2018" @@ -2846,11 +3043,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_EAS_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_EAS_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_EAS_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EAS.ht" } }, "version": "2018" @@ -2870,11 +3067,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_EUR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_EUR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_EUR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_EUR.ht" } }, "version": "2018" @@ -2894,11 +3091,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_bmi_exome_SAS_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_bmi_exome_SAS_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_bmi_exome_SAS_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/bmi_SAS.ht" } }, "version": "2018" @@ -2918,11 +3115,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_AFR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AFR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_AFR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_AFR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AFR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AFR.ht" } }, "version": "2018" @@ -2942,11 +3139,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_ALL_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_ALL.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_ALL_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_ALL_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_ALL.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_ALL.ht" } }, "version": "2018" @@ -2966,11 +3163,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_AMR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_AMR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_AMR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_AMR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_AMR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_AMR.ht" } }, "version": "2018" @@ -2990,11 +3187,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_EAS_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EAS.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_EAS_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_EAS_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EAS.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EAS.ht" } }, "version": "2018" @@ -3014,11 +3211,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_EUR_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_EUR.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_EUR_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_EUR_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_EUR.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_EUR.ht" } }, "version": "2018" @@ -3038,11 +3235,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_height_exome_SAS_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/height_SAS.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_height_exome_SAS_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_height_exome_SAS_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/height_SAS.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/height_SAS.ht" } }, "version": "2018" @@ -3062,11 +3259,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_C_ALL_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Add.ht" } }, "version": "2018" @@ -3086,11 +3283,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_C_ALL_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_ALL_Rec.ht" } }, "version": "2018" @@ -3110,11 +3307,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_C_EUR_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Add.ht" } }, "version": "2018" @@ -3134,11 +3331,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_C_EUR_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_C_EUR_Rec.ht" } }, "version": "2018" @@ -3158,11 +3355,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_M_ALL_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Add.ht" } }, "version": "2018" @@ -3182,11 +3379,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_M_ALL_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_ALL_Rec.ht" } }, "version": "2018" @@ -3206,11 +3403,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_M_EUR_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Add.ht" } }, "version": "2018" @@ -3230,11 +3427,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_M_EUR_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_M_EUR_Rec.ht" } }, "version": "2018" @@ -3254,11 +3451,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_W_ALL_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Add.ht" } }, "version": "2018" @@ -3278,11 +3475,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_W_ALL_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_ALL_Rec.ht" } }, "version": "2018" @@ -3302,11 +3499,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_W_EUR_Add_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Add.ht" } }, "version": "2018" @@ -3326,11 +3523,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht", - "us": "gs://hail-datasets-us/giant_whr_exome_W_EUR_Rec_2018_GRCh37.ht" + "eu": "gs://hail-datasets-eu/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht", + "us": "gs://hail-datasets-us/GIANT/2018_exome_array/GRCh37/whr_W_EUR_Rec.ht" } }, "version": "2018" @@ -4404,11 +4601,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht" + "us": "s3://hail-datasets-us-east-1/gnomAD/v2.1.1/lof_metrics_by_gene.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht", - "us": "gs://hail-datasets-us/annotations/gnomad_v2.1.1_lof_metrics_by_gene.ht" + "eu": "gs://hail-datasets-eu/gnomAD/v2.1.1/lof_metrics_by_gene.ht", + "us": "gs://hail-datasets-us/gnomAD/v2.1.1/lof_metrics_by_gene.ht" } }, "version": "2.1.1" @@ -4451,11 +4648,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht" + "us": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht", - "us": "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.ht" + "eu": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht", + "us": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.ht" } }, "version": "2.2" @@ -4470,11 +4667,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt", - "us": "gs://hail-datasets-us/LDSC_baselineLD_v2.2_ld_scores.GRCh37.mt" + "eu": "gs://hail-datasets-eu/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt", + "us": "gs://hail-datasets-us/LDSC/baseline-LD_v2.2/GRCh37/ld_scores.mt" } }, "version": "2.2" @@ -4483,11 +4680,11 @@ "reference_genome": "GRCh37", "url": { "aws": { - "us": "s3://hail-datasets-us-east-1/LDSC_baseline_v1.1_ld_scores.GRCh37.mt" + "us": "s3://hail-datasets-us-east-1/LDSC/baseline_v1.1/GRCh37/ld_scores.mt" }, "gcp": { - "eu": "gs://hail-datasets-eu/LDSC_baseline_v1.1_ld_scores.GRCh37.mt", - "us": "gs://hail-datasets-us/LDSC_baseline_v1.1_ld_scores.GRCh37.mt" + "eu": "gs://hail-datasets-eu/LDSC/baseline_v1.1/GRCh37/ld_scores.mt", + "us": "gs://hail-datasets-us/LDSC/baseline_v1.1/GRCh37/ld_scores.mt" } }, "version": "1.1" diff --git a/hail/python/hail/experimental/datasets.py b/hail/python/hail/experimental/datasets.py index 5ac483b9791..e7182bef66a 100644 --- a/hail/python/hail/experimental/datasets.py +++ b/hail/python/hail/experimental/datasets.py @@ -6,11 +6,21 @@ import pkg_resources +def _read_dataset(path: str) -> Union[hl.Table, hl.MatrixTable, hl.linalg.BlockMatrix]: + if path.endswith('.ht'): + return hl.read_table(path) + elif path.endswith('.mt'): + return hl.read_matrix_table(path) + elif path.endswith('.bm'): + return hl.linalg.BlockMatrix.read(path) + raise ValueError(f'Invalid path: {path}. Can only load datasets with .ht, .mt, or .bm extensions.') + + def load_dataset(name: str, version: Optional[str], reference_genome: Optional[str], region: str = 'us', - cloud: str = 'gcp') -> Union[hl.Table, hl.MatrixTable]: + cloud: str = 'gcp') -> Union[hl.Table, hl.MatrixTable, hl.linalg.BlockMatrix]: """Load a genetic dataset from Hail's repository. Example @@ -104,12 +114,11 @@ def load_dataset(name: str, dataset['reference_genome'] == reference_genome])] assert len(path) == 1 path = path[0] - - if path.endswith('.ht'): - return hl.read_table(path) - elif path.endswith('.mt'): - return hl.read_matrix_table(path) - elif path.endswith('.bm'): - return hl.linalg.BlockMatrix.read(path) - raise ValueError(f'Invalid path {repr(path)}: can only load' - f' datasets with .ht, .mt, or .bm extensions.') + if path.startswith('s3://'): + try: + dataset = _read_dataset(path) + except hl.utils.java.FatalError: + dataset = _read_dataset(path.replace('s3://', 's3a://')) + else: + dataset = _read_dataset(path) + return dataset diff --git a/hail/python/hail/experimental/db.py b/hail/python/hail/experimental/db.py index 344c11a5248..682d7094deb 100644 --- a/hail/python/hail/experimental/db.py +++ b/hail/python/hail/experimental/db.py @@ -5,8 +5,7 @@ import hail as hl import pkg_resources -from hailtop.utils import (retry_response_returning_functions, - external_requests_client_session) +from hailtop.utils import (external_requests_client_session, retry_response_returning_functions) from .lens import MatrixRows, TableRows from ..expr import StructExpression @@ -270,9 +269,13 @@ def index_compatible_version(self, for version in self.versions) if index is not None] if len(compatible_indexed_values) == 0: - raise ValueError(f'Could not find compatible version of' - f' {self.name} for user dataset with' - f' key {key_expr.dtype}.') + versions = [f'{(v.version, v.reference_genome)}' for v in self.versions] + raise ValueError( + f'Could not find compatible version of {self.name} for user' + f' dataset with key {key_expr.dtype}.\n' + f'This annotation dataset is available for the following' + f' versions and reference genome builds: {", ".join(versions)}.' + ) assert len(compatible_indexed_values) == 1, \ f'{key_expr.dtype}, {self.name}, {compatible_indexed_values}' return compatible_indexed_values[0] diff --git a/hail/python/hail/experimental/haplotype_freq_em.py b/hail/python/hail/experimental/haplotype_freq_em.py index 4dd8bd205a5..363d84811e3 100644 --- a/hail/python/hail/experimental/haplotype_freq_em.py +++ b/hail/python/hail/experimental/haplotype_freq_em.py @@ -14,7 +14,7 @@ def haplotype_freq_em(gt_counts) -> ArrayExpression: [AABB, AABb, AAbb, AaBB, AaBb, Aabb, aaBB, aaBb, aabb] The estimated haplotype counts are returned in an array in the following order: - [AB, Ab, aB, ab] + [AB, aB, Ab, ab] Where _A_ and _a_ are the reference and non-reference alleles for the first variant, resp. And _B_ and _b_ are the reference and non-reference alleles for the second variant, resp. diff --git a/hail/python/hail/experimental/plots.py b/hail/python/hail/experimental/plots.py index b23c8bb0422..8f0a8537d10 100644 --- a/hail/python/hail/experimental/plots.py +++ b/hail/python/hail/experimental/plots.py @@ -63,7 +63,7 @@ def plot_roc_curve(ht, scores, tp_label='tp', fp_label='fp', colors=None, title= tpr=hl.scan.count_where(ordered_ht[tp_label]) / total_tp, fpr=hl.scan.count_where(ordered_ht[fp_label]) / total_fp, ).key_by().drop('_score') - last_row = hl.utils.range_table(1).key_by().select(score_name=score, score=hl.float64(float('-inf')), tpr=hl.float32(1.0), fpr=hl.float32(1.0)) + last_row = hl.utils.range_table(1).key_by().select(score_name=score, score=hl.float64(float('-inf')), tpr=hl.float64(1.0), fpr=hl.float64(1.0)) ordered_ht = ordered_ht.union(last_row) ordered_ht = ordered_ht.annotate( auc_contrib=hl.or_else((ordered_ht.fpr - hl.scan.max(ordered_ht.fpr)) * ordered_ht.tpr, 0.0) diff --git a/hail/python/hail/experimental/vcf_combiner/__main__.py b/hail/python/hail/experimental/vcf_combiner/__main__.py index 7137dd0c197..150ca7fb9dc 100644 --- a/hail/python/hail/experimental/vcf_combiner/__main__.py +++ b/hail/python/hail/experimental/vcf_combiner/__main__.py @@ -30,7 +30,7 @@ def main(): 'GVCFs will be overridden by the names in sample map.', required=False) parser.add_argument('--branch-factor', type=int, default=CombinerConfig.default_branch_factor, help='Branch factor.') - parser.add_argument('--batch-size', type=int, default=CombinerConfig.default_batch_size, help='Batch size.') + parser.add_argument('--batch-size', type=int, default=CombinerConfig.default_phase1_batch_size, help='Batch size.') parser.add_argument('--target-records', type=int, default=CombinerConfig.default_target_records, help='Target records per partition.') parser.add_argument('--overwrite', help='overwrite the output path', action='store_true') parser.add_argument('--key-by-locus-and-alleles', help='Key by both locus and alleles in the final output.', action='store_true') diff --git a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py index c2bc0c9a8fa..5d8310a20cd 100644 --- a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py +++ b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py @@ -1,6 +1,7 @@ """An experimental library for combining (g)VCFS into sparse matrix tables""" # these are necessary for the diver script included at the end of this file import math +import os import uuid from typing import Optional, List, Tuple, Dict @@ -11,7 +12,7 @@ from hail.genetics.reference_genome import reference_genome_type from hail.ir import Apply, TableMapRows, MatrixKeyRowsBy, TopLevelReference from hail.typecheck import oneof, sequenceof, typecheck -from hail.utils.java import info, warning +from hail.utils.java import info, warning, Env _transform_rows_function_map = {} _merge_function_map = {} @@ -291,55 +292,70 @@ def combine_gvcfs(mts): return unlocalize(combined) -@typecheck(ht=hl.Table, n=int, reference_genome=reference_genome_type) -def calculate_new_intervals(ht, n, reference_genome): +@typecheck(mt=hl.MatrixTable, desired_average_partition_size=int, tmp_path=str) +def calculate_new_intervals(mt, desired_average_partition_size: int, tmp_path: str): """takes a table, keyed by ['locus', ...] and produces a list of intervals suitable - for repartitioning a combiner matrix table + for repartitioning a combiner matrix table. Parameters ---------- - ht : :class:`.Table` - Table / Rows Table to compute new intervals for - n : :obj:`int` - Number of rows each partition should have, (last partition may be smaller) - reference_genome: :class:`str` or :class:`.ReferenceGenome`, optional - Reference genome to use. + mt : :class:`.MatrixTable` + Sparse MT intermediate. + desired_average_partition_size : :obj:`int` + Average target number of rows for each partition. + tmp_path : :obj:`str` + Temporary path for scan checkpointing. Returns ------- - :obj:`List[Interval]` + (:obj:`List[Interval]`, :obj:`.Type`) """ - assert list(ht.key) == ['locus'] - assert ht.locus.dtype == hl.tlocus(reference_genome=reference_genome) + assert list(mt.row_key) == ['locus'] + assert isinstance(mt.locus.dtype, hl.tlocus) + reference_genome = mt.locus.dtype.reference_genome end = hl.Locus(reference_genome.contigs[-1], reference_genome.lengths[reference_genome.contigs[-1]], reference_genome=reference_genome) - n_rows = ht.count() + (n_rows, n_cols) = mt.count() if n_rows == 0: raise ValueError('empty table!') - ht = ht.select() - ht = ht.annotate(x=hl.scan.count()) - ht = ht.annotate(y=ht.x + 1) - ht = ht.filter((ht.x // n != ht.y // n) | (ht.x == (n_rows - 1))) - ht = ht.select() + # split by a weight function that takes into account the number of + # dense entries per row. However, give each row some base weight + # to prevent densify computations from becoming unbalanced (these + # scale roughly linearly with N_ROW * N_COL) + ht = mt.select_rows(weight=hl.agg.count() + (n_cols // 25) + 1).rows().checkpoint(tmp_path) + + total_weight = ht.aggregate(hl.agg.sum(ht.weight)) + partition_weight = int(total_weight / (n_rows / desired_average_partition_size)) + + ht = ht.annotate(cumulative_weight=hl.scan.sum(ht.weight), + last_weight=hl.scan._prev_nonnull(ht.weight), + row_idx=hl.scan.count()) + + def partition_bound(x): + return x - (x % hl.int64(partition_weight)) + + at_partition_bound = partition_bound(ht.cumulative_weight) != partition_bound(ht.cumulative_weight - ht.last_weight) + + ht = ht.filter(at_partition_bound | (ht.row_idx == n_rows - 1)) ht = ht.annotate(start=hl.or_else( hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1, reference_genome=reference_genome)), hl.locus_from_global_position(0, reference_genome=reference_genome))) - ht = ht.key_by() - ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True)) + ht = ht.select( + interval=hl.interval(start=hl.struct(locus=ht.start), end=hl.struct(locus=ht.locus), includes_end=True)) + intervals_dtype = hl.tarray(ht.interval.dtype) intervals = ht.aggregate(hl.agg.collect(ht.interval)) - last_st = hl.eval( - hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1, + hl.locus_from_global_position(hl.literal(intervals[-1].end.locus).global_position() + 1, reference_genome=reference_genome)) - interval = hl.Interval(start=last_st, end=end, includes_end=True) + interval = hl.Interval(start=hl.Struct(locus=last_st), end=hl.Struct(locus=end), includes_end=True) intervals.append(interval) - return intervals + return intervals, intervals_dtype @typecheck(reference_genome=reference_genome_type, interval_size=int) @@ -426,8 +442,9 @@ def __init__(self, class CombinerConfig(object): + default_max_partitions_per_job = 75_000 default_branch_factor = 100 - default_batch_size = 100 + default_phase1_batch_size = 100 default_target_records = 30_000 # These are used to calculate intervals for reading GVCFs in the combiner @@ -439,7 +456,7 @@ class CombinerConfig(object): def __init__(self, branch_factor: int = default_branch_factor, - batch_size: int = default_batch_size, + batch_size: int = default_phase1_batch_size, target_records: int = default_target_records): self.branch_factor: int = branch_factor self.batch_size: int = batch_size @@ -461,6 +478,7 @@ def int_ceil(x): file_size.append([1 for _ in range(n_inputs)]) while len(file_size[-1]) > 1: + batch_size_this_phase = self.batch_size if len(file_size) == 1 else 1 last_stage_files = file_size[-1] n = len(last_stage_files) i = 0 @@ -468,7 +486,7 @@ def int_ceil(x): while (i < n): job = [] job_i = 0 - while job_i < self.batch_size and i < n: + while job_i < batch_size_this_phase and i < n: merge = [] merge_i = 0 merge_size = 0 @@ -501,7 +519,7 @@ def int_ceil(x): info(f"GVCF combiner plan:\n" f" Branch factor: {self.branch_factor}\n" - f" Batch size: {self.batch_size}\n" + f" Phase 1 batch size: {self.batch_size}\n" f" Combining {n_inputs} input files in {tree_height} phases with {total_jobs} total jobs.{''.join(phase_strs)}\n") return CombinerPlan(file_size, phases) @@ -517,7 +535,7 @@ def run_combiner(sample_paths: List[str], header: Optional[str] = None, sample_names: Optional[List[str]] = None, branch_factor: int = CombinerConfig.default_branch_factor, - batch_size: int = CombinerConfig.default_batch_size, + batch_size: int = CombinerConfig.default_phase1_batch_size, target_records: int = CombinerConfig.default_target_records, overwrite: bool = False, reference_genome: str = 'default', @@ -644,9 +662,10 @@ def run_combiner(sample_paths: List[str], info(f"Starting phase {phase_i}/{n_phases}, merging {len(files_to_merge)} {merge_str} in {n_jobs} {job_str}.") if phase_i > 1: - intervals = calculate_new_intervals(hl.read_matrix_table(files_to_merge[0]).rows(), - config.target_records, - reference_genome=reference_genome) + intervals, intervals_dtype = calculate_new_intervals(hl.read_matrix_table(files_to_merge[0]), + config.target_records, + os.path.join(tmp_path, + f'phase{phase_i}_interval_checkpoint.ht')) new_files_to_merge = [] @@ -671,7 +690,8 @@ def run_combiner(sample_paths: List[str], reference_genome=reference_genome, contig_recoding=contig_recoding)] else: - mts = [hl.read_matrix_table(path, _intervals=intervals) for path in inputs] + mts = Env.spark_backend("vcf_combiner").read_multiple_matrix_tables(inputs, intervals, + intervals_dtype) merge_mts.append(combine_gvcfs(mts)) diff --git a/hail/python/hail/expr/__init__.py b/hail/python/hail/expr/__init__.py index 5cd92f149f3..9f168591b0b 100644 --- a/hail/python/hail/expr/__init__.py +++ b/hail/python/hail/expr/__init__.py @@ -36,7 +36,7 @@ is_star, is_complex, is_strand_ambiguous, allele_type, hamming, \ mendel_error_code, triangle, downcode, gq_from_pl, parse_call, \ unphased_diploid_gt_index_call, argmax, argmin, zip, _zip_func, enumerate, zip_with_index, map, \ - flatmap, flatten, any, all, filter, sorted, find, group_by, fold, \ + flatmap, starmap, flatten, any, all, filter, sorted, find, group_by, fold, \ array_scan, len, min, nanmin, max, nanmax, mean, median, product, sum, \ cumulative_sum, struct, tuple, set, empty_set, array, empty_array, \ empty_dict, delimit, abs, sign, floor, ceil, float, float32, float64, \ @@ -182,6 +182,7 @@ 'zip_with_index', 'map', 'flatmap', + 'starmap', 'flatten', 'any', 'all', diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index 7c342a3ea9a..7e9f6c13c38 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -4,7 +4,7 @@ import hail import hail as hl from hail.expr import expressions -from hail.expr.types import HailType, is_numeric, is_compound, tint32, \ +from hail.expr.types import HailType, is_numeric, is_compound, is_setlike, tint32, \ tint64, tfloat32, tfloat64, tstr, tbool, tarray, \ tndarray, tset, tdict, tstruct, ttuple, tinterval, \ tlocus, tcall, from_numpy @@ -162,7 +162,7 @@ def impute_type(x): raise ExpressionException("Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) - elif isinstance(x, set): + elif is_setlike(x): if len(x) == 0: raise ExpressionException("Cannot impute type of empty set. Use 'hl.empty_set' to create an empty set.") ts = {impute_type(element) for element in x} @@ -517,7 +517,7 @@ def _promote_numeric(self, typ): def _div_ret_type_f(t): assert is_numeric(t) if t == tint32 or t == tint64: - return tfloat32 + return tfloat64 else: # Float64 or Float32 return t diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 56b5e1537ef..47d8a071920 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -14,7 +14,7 @@ tndarray, tlocus, tinterval, is_numeric import hail.ir as ir from hail.typecheck import typecheck, typecheck_method, func_spec, oneof, \ - identity, nullable, tupleof, sliceof, dictof + identity, nullable, tupleof, sliceof, dictof, anyfunc from hail.utils.java import Env, warning from hail.utils.linkedlist import LinkedList from hail.utils.misc import wrap_to_list, wrap_to_tuple, get_nice_field_error, get_nice_attr_error @@ -345,6 +345,29 @@ def transform_ir(array, name, body): assert isinstance(self._type, tarray) return array_map + @typecheck_method(f=anyfunc) + def starmap(self, f): + r"""Transform each element of a collection of tuples. + + Examples + -------- + + >>> hl.eval(hl.array([(1, 2), (2, 3)]).starmap(lambda x, y: x+y)) + [3, 5] + + Parameters + ---------- + f : function ( (\*args) -> :class:`.Expression`) + Function to transform each element of the collection. + + Returns + ------- + :class:`.CollectionExpression`. + Collection where each element has been transformed according to `f`. + """ + + return self.map(lambda e: f(*e)) + def length(self): """Returns the size of a collection. @@ -2179,27 +2202,10 @@ def __truediv__(self, other): :class:`.NumericExpression` The left number divided by the left. """ - - def ret_type_f(t): - assert is_numeric(t) - if t == tint32 or t == tint64: - return tfloat32 - else: - # Float64 or Float32 - return t - - return self._bin_op_numeric("/", other, ret_type_f) + return self._bin_op_numeric("/", other, self._div_ret_type_f) def __rtruediv__(self, other): - def ret_type_f(t): - assert is_numeric(t) - if t == tint32 or t == tint64: - return tfloat32 - else: - # float64 or float32 - return t - - return self._bin_op_numeric_reverse("/", other, ret_type_f) + return self._bin_op_numeric_reverse("/", other, self._div_ret_type_f) def __floordiv__(self, other): """Divide two numbers with floor division. @@ -3769,20 +3775,41 @@ def shape(self): _opt_long_slice = sliceof(nullable(expr_int64), nullable(expr_int64), nullable(expr_int64)) - @typecheck_method(item=oneof(expr_int64, _opt_long_slice, tupleof(oneof(expr_int64, _opt_long_slice)))) + @typecheck_method(item=nullable(oneof(expr_int64, type(...), _opt_long_slice, tupleof(nullable(oneof(expr_int64, type(...), _opt_long_slice)))))) def __getitem__(self, item): if not isinstance(item, tuple): item = (item,) - if len(item) != self.ndim: - raise ValueError(f'Must specify one index per dimension. ' - f'Expected {self.ndim} dimensions but got {len(item)}') + num_ellipses = len([e for e in item if isinstance(e, type(...))]) + if num_ellipses > 1: + raise IndexError("an index can only have a single ellipsis (\'...\')") + + num_nones = len([x for x in item if x is None]) + list_item = list(item) + + if num_ellipses == 1: + list_types = [type(e) for e in list_item] + ellipsis_location = list_types.index(type(...)) + num_slices_to_add = self.ndim - (len(item) - num_nones) + 1 + no_ellipses = list_item[:ellipsis_location] + [slice(None)] * num_slices_to_add + list_item[ellipsis_location + 1:] + else: + no_ellipses = list_item + + no_nums = [x for x in no_ellipses if ((x is None) or (isinstance(x, slice)))] + indices_nones = [i for i, x in enumerate(no_nums) if x is None] + formatted_item = [x for x in no_ellipses if x is not None] - n_sliced_dims = len([s for s in item if isinstance(s, slice)]) + if len(formatted_item) > self.ndim: + raise IndexError(f'too many indices for array: array is ' + f'{self.ndim}-dimensional, but {len(item)} were indexed') + if len(formatted_item) < self.ndim: + formatted_item += [slice(None, None, None)] * (self.ndim - len(formatted_item)) + + n_sliced_dims = len([s for s in formatted_item if isinstance(s, slice)]) if n_sliced_dims > 0: slices = [] - for i, s in enumerate(item): + for i, s in enumerate(formatted_item): dlen = self.shape[i] if isinstance(s, slice): @@ -3794,6 +3821,7 @@ def __getitem__(self, item): max_bound = hl.if_else(step > 0, dlen, dlen - 1) min_bound = hl.if_else(step > 0, to_expr(0, tint64), to_expr(-1, tint64)) + if s.start is not None: # python treats start < -dlen as None when step < 0: [0,1][-3:0:-1] # and 0 otherwise: [0,1][-3::1] == [0,1][0::1] @@ -3823,15 +3851,35 @@ def __getitem__(self, item): hl.str("Index ") + hl.str(s) + hl.str(f" is out of bounds for axis {i} with size ") + hl.str(dlen) ) slices.append(checked_int) - return construct_expr(ir.NDArraySlice(self._ir, hl.tuple(slices)._ir), - tndarray(self._type.element_type, n_sliced_dims), - self._indices, - self._aggregations) + product = construct_expr(ir.NDArraySlice(self._ir, hl.tuple(slices)._ir), + tndarray(self._type.element_type, n_sliced_dims), + self._indices, + self._aggregations) + + if len(indices_nones) > 0: + reshape_arg = [] + index_non_nones = 0 + for i in range(n_sliced_dims + num_nones): + if i in indices_nones: + reshape_arg.append(1) + else: + reshape_arg.append(product.shape[index_non_nones]) + index_non_nones += 1 + product = product.reshape(tuple(reshape_arg)) - return construct_expr(ir.NDArrayRef(self._ir, [idx._ir for idx in item]), - self._type.element_type, - self._indices, - self._aggregations) + else: + product = construct_expr(ir.NDArrayRef(self._ir, [idx._ir for idx in formatted_item]), + self._type.element_type, + self._indices, + self._aggregations) + + if len(indices_nones) > 0: + reshape_arg = [] + for i in indices_nones: + reshape_arg.append(1) + product = hl.nd.array(product).reshape(tuple(reshape_arg)) + + return product @typecheck_method(shape=oneof(expr_int64, tupleof(expr_int64), expr_tuple())) def reshape(self, *shape): diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index 96048e98ff1..e44db3b3631 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -25,7 +25,7 @@ from hail.genetics.reference_genome import reference_genome_type, ReferenceGenome import hail.ir as ir from hail.typecheck import (typecheck, nullable, anytype, enumeration, tupleof, - func_spec, oneof, arg_check, args_check) + func_spec, oneof, arg_check, args_check, anyfunc) from hail.utils.java import Env, warning from hail.utils.misc import plural @@ -3614,6 +3614,34 @@ def map(f: Callable, collection): return collection.map(f) +@typecheck(f=anyfunc, + collection=expr_oneof(expr_set(), expr_array(), expr_ndarray())) +def starmap(f: Callable, collection): + r"""Transform each element of a collection of tuples. + + Examples + -------- + + >>> a = [(1, 5), (3, 2), (7, 8)] + + >>> hl.eval(hl.starmap(lambda x, y: hl.if_else(x < y, x, y), a)) + [1, 2, 7] + + Parameters + ---------- + f : function ( (\*args) -> :class:`.Expression`) + Function to transform each element of the collection. + collection : :class:`.ArrayExpression` or :class:`.SetExpression` + Collection expression. + + Returns + ------- + :class:`.ArrayExpression` or :class:`.SetExpression`. + Collection where each element has been transformed by `f`. + """ + return collection.starmap(f) + + @typecheck(x=expr_oneof(expr_set(), expr_array(), expr_dict(), expr_str, expr_tuple(), expr_struct())) def len(x) -> Int32Expression: """Returns the size of a collection or string. @@ -4482,7 +4510,7 @@ def _sort_by(collection, less_than): collection._aggregations) -@typecheck(collection=expr_array(), +@typecheck(collection=expr_oneof(expr_array(), expr_dict(), expr_set()), key=nullable(func_spec(1, expr_any)), reverse=expr_bool) def sorted(collection, @@ -4510,8 +4538,8 @@ def sorted(collection, Parameters ---------- - collection : :class:`.ArrayExpression` - Array to sort. + collection : :class:`.ArrayExpression` or :class:`.SetExpression` or :class:`.DictExpression` + Collection to sort. key: function ( (arg) -> :class:`.Expression`), optional Function to evaluate for each element to compute sort key. reverse : :class:`.BooleanExpression` @@ -4523,6 +4551,9 @@ def sorted(collection, Sorted array. """ + if not isinstance(collection, ArrayExpression): + collection = hl.array(collection) + def comp(left, right): return (hl.case() .when(hl.is_missing(left), False) diff --git a/hail/python/hail/fs/fs.py b/hail/python/hail/fs/fs.py index 4c4efd7abc2..63c07c316dc 100644 --- a/hail/python/hail/fs/fs.py +++ b/hail/python/hail/fs/fs.py @@ -57,6 +57,10 @@ def remove(self, path: str): def rmtree(self, path: str): pass + @abc.abstractmethod + def supports_scheme(self, scheme: str) -> bool: + pass + def copy_log(self, path: str) -> None: log = Env.hc()._log try: diff --git a/hail/python/hail/fs/google_fs.py b/hail/python/hail/fs/google_fs.py index 454bad8cf82..0206ec0e065 100644 --- a/hail/python/hail/fs/google_fs.py +++ b/hail/python/hail/fs/google_fs.py @@ -139,3 +139,6 @@ def rmtree(self, path: str): if self._is_local(path): rmtree(path) self.client.rm(path, recursive=True) + + def supports_scheme(self, scheme: str) -> bool: + return scheme in ("gs", "") diff --git a/hail/python/hail/fs/hadoop_fs.py b/hail/python/hail/fs/hadoop_fs.py index 114c0a3fe1d..4a77aec7681 100644 --- a/hail/python/hail/fs/hadoop_fs.py +++ b/hail/python/hail/fs/hadoop_fs.py @@ -50,6 +50,9 @@ def remove(self, path: str): def rmtree(self, path: str): return self._jfs.rmtree(path) + def supports_scheme(self, scheme: str) -> bool: + return self._jfs.supportsScheme(scheme) + class HadoopReader(io.RawIOBase): def __init__(self, hfs, path, buffer_size): diff --git a/hail/python/hail/fs/local_fs.py b/hail/python/hail/fs/local_fs.py index dfa03cb5ee9..ad7c5270be6 100644 --- a/hail/python/hail/fs/local_fs.py +++ b/hail/python/hail/fs/local_fs.py @@ -68,3 +68,6 @@ def remove(self, path: str): def rmtree(self, path: str): rmtree(path) + + def supports_scheme(self, scheme: str) -> bool: + return scheme == "" diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index 00ffe7c7b37..22224c2c2d0 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -416,6 +416,8 @@ def bound_variables(self): return {n for n, _ in self.params} | {self.name} | super().bound_variables def _compute_type(self, env, agg_env): + for _, b in self.params: + b._compute_type(env, agg_env) self.body._compute_type(_env_bind(env, self.bindings(len(self.params))), agg_env) self._type = self.body.typ @@ -475,7 +477,10 @@ def _compute_type(self, env, agg_env): self.left._compute_type(env, agg_env) self.right._compute_type(env, agg_env) if self.op == '/': - if self.left.typ == tfloat64: + int_types = [tint32, tint64] + if self.left.typ in int_types and self.right.typ in int_types: + self._type = tfloat64 + elif self.left.typ == tfloat64: self._type = tfloat64 else: self._type = tfloat32 diff --git a/hail/python/hail/methods/__init__.py b/hail/python/hail/methods/__init__.py index 79557145dfb..efa3b770974 100644 --- a/hail/python/hail/methods/__init__.py +++ b/hail/python/hail/methods/__init__.py @@ -10,7 +10,7 @@ filter_alleles, filter_alleles_hts, split_multi_hts, balding_nichols_model, ld_prune, row_correlation, ld_matrix, linear_mixed_model, linear_regression_rows, _linear_regression_rows_nd, - logistic_regression_rows, poisson_regression_rows, + logistic_regression_rows, _logistic_regression_rows_nd, poisson_regression_rows, linear_mixed_regression_rows, lambda_gc) from .qc import sample_qc, variant_qc, vep, concordance, nirvana, summarize_variants from .misc import rename_duplicates, maximal_independent_set, filter_intervals @@ -24,6 +24,7 @@ 'linear_regression_rows', '_linear_regression_rows_nd', 'logistic_regression_rows', + '_logistic_regression_rows_nd', 'poisson_regression_rows', 'linear_mixed_regression_rows', 'lambda_gc', diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index acc1e89911f..37e26ebd336 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -1,24 +1,23 @@ import json import re +from typing import List -from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ - sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char -from hail.utils.java import Env, FatalError, jindexed_seq_args, warning -from hail.utils import wrap_to_list -from hail.matrixtable import MatrixTable -from hail.table import Table -from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ - tcall, tbool, tint64, tfloat32 +import hail as hl +from hail import ir from hail.expr import StructExpression, LocusExpression, \ expr_array, expr_float64, expr_str, expr_numeric, expr_call, expr_bool, \ expr_any, \ to_expr, analyze -from hail import ir +from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ + tcall, tbool, tint64, tfloat32 from hail.genetics.reference_genome import reference_genome_type -from hail.methods.misc import require_biallelic, require_row_key_variant, require_row_key_variant_w_struct_locus, require_col_key_str -import hail as hl - -from typing import List +from hail.matrixtable import MatrixTable +from hail.methods.misc import require_biallelic, require_row_key_variant, require_col_key_str +from hail.table import Table +from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ + sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char +from hail.utils import wrap_to_list +from hail.utils.java import Env, FatalError, jindexed_seq_args, warning def locus_interval_expr(contig, start, end, includes_start, includes_end, @@ -323,8 +322,7 @@ def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=No The default value is ``0.0``. The missing value is ``0.0``. """ - require_biallelic(dataset, 'export_plink') - require_row_key_variant_w_struct_locus(dataset, 'export_plink') + require_biallelic(dataset, 'export_plink', tolerate_generic_locus=True) if ind_id is None: require_col_key_str(dataset, "export_plink") diff --git a/hail/python/hail/methods/misc.py b/hail/python/hail/methods/misc.py index 31f945dfad1..82c7e938aaa 100644 --- a/hail/python/hail/methods/misc.py +++ b/hail/python/hail/methods/misc.py @@ -230,9 +230,12 @@ def require_key(table, method): raise ValueError("Method '{}' requires a non-empty key".format(method)) -@typecheck(dataset=MatrixTable, method=str) -def require_biallelic(dataset, method) -> MatrixTable: - require_row_key_variant(dataset, method) +@typecheck(dataset=MatrixTable, method=str, tolerate_generic_locus=bool) +def require_biallelic(dataset, method, tolerate_generic_locus: bool = False) -> MatrixTable: + if tolerate_generic_locus: + require_row_key_variant_w_struct_locus(dataset, method) + else: + require_row_key_variant(dataset, method) return dataset._select_rows(method, hl.case() .when(dataset.alleles.length() == 2, dataset._rvrow) diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index 19a56094dc3..1fda56a868a 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -547,7 +547,7 @@ def process_partition(part): @typecheck(test=enumeration('wald', 'lrt', 'score', 'firth'), - y=oneof(expr_float64, sequenceof(expr_float64), sequenceof(sequenceof(expr_float64))), + y=oneof(expr_float64, sequenceof(expr_float64)), x=expr_float64, covariates=sequenceof(expr_float64), pass_through=sequenceof(oneof(str, Expression))) @@ -815,6 +815,417 @@ def logistic_regression_rows(test, y, x, covariates, pass_through=()) -> hail.Ta return result.persist() +# Helpers for logreg: +def mean_impute(hl_array): + non_missing_mean = hl.mean(hl_array, filter_missing=True) + return hl_array.map(lambda entry: hl.if_else(hl.is_defined(entry), entry, non_missing_mean)) + + +def sigmoid(hl_nd): + return hl_nd.map(lambda x: hl.if_else(x > 0, hl.rbind(hl.exp(x), lambda exped: exped / (exped + 1)), 1 / (1 + hl.exp(-x)))) + + +def nd_max(hl_nd): + return hl.max(hl_nd.reshape(-1)._data_array()) + + +def logreg_fit(X, y, null_fit=None, max_iter=25, tol=1E-6): + assert(X.ndim == 2) + assert(y.ndim == 1) + # X is samples by covs. + # y is length num samples, for one cov. + n = X.shape[0] + m = X.shape[1] + + if null_fit is None: + avg = y.sum() / n + logit_avg = hl.log(avg / (1 - avg)) + b = hl.nd.hstack([hl.nd.array([logit_avg]), hl.nd.zeros((hl.int32(m - 1)))]) + mu = sigmoid(X @ b) + score = X.T @ (y - mu) + # Reshape so we do a rowwise multiply + fisher = X.T @ (X * (mu * (1 - mu)).reshape(-1, 1)) + else: + # num covs used to fit null model. + m0 = null_fit.b.shape[0] + m_diff = m - m0 + + X0 = X[:, 0:m0] + X1 = X[:, m0:] + + b = hl.nd.hstack([null_fit.b, hl.nd.zeros((m_diff,))]) + mu = sigmoid(X @ b) + score = hl.nd.hstack([null_fit.score, X1.T @ (y - mu)]) + + fisher00 = null_fit.fisher + fisher01 = X0.T @ (X1 * (mu * (1 - mu)).reshape(-1, 1)) + fisher10 = fisher01.T + fisher11 = X1.T @ (X1 * (mu * (1 - mu)).reshape(-1, 1)) + + fisher = hl.nd.vstack([ + hl.nd.hstack([fisher00, fisher01]), + hl.nd.hstack([fisher10, fisher11]) + ]) + + # Useful type abbreviations + tvector64 = hl.tndarray(hl.tfloat64, 1) + tmatrix64 = hl.tndarray(hl.tfloat64, 2) + search_return_type = hl.tstruct(b=tvector64, score=tvector64, fisher=tmatrix64, num_iter=hl.tint32, log_lkhd=hl.tfloat64, converged=hl.tbool, exploded=hl.tbool) + + def na(field_name): + return hl.missing(search_return_type[field_name]) + + # Need to do looping now. + def search(recur, cur_iter, b, mu, score, fisher): + delta_b_struct = hl.nd.solve(fisher, score, no_crash=True) + + exploded = delta_b_struct.failed + delta_b = delta_b_struct.solution + max_delta_b = nd_max(delta_b.map(lambda e: hl.abs(e))) + log_lkhd = ((y * mu) + (1 - y) * (1 - mu)).map(lambda e: hl.log(e)).sum() + + def compute_next_iter(cur_iter, b, mu, score, fisher): + cur_iter = cur_iter + 1 + b = b + delta_b + mu = sigmoid(X @ b) + score = X.T @ (y - mu) + fisher = X.T @ (X * (mu * (1 - mu)).reshape(-1, 1)) + return recur(cur_iter, b, mu, score, fisher) + + return (hl.case() + .when(exploded | hl.is_nan(delta_b[0]), hl.struct(b=na('b'), score=na('score'), fisher=na('fisher'), num_iter=cur_iter, log_lkhd=log_lkhd, converged=False, exploded=True)) + .when(cur_iter > max_iter, hl.struct(b=na('b'), score=na('score'), fisher=na('fisher'), num_iter=cur_iter, log_lkhd=log_lkhd, converged=False, exploded=False)) + .when(max_delta_b < tol, hl.struct(b=b, score=score, fisher=fisher, num_iter=cur_iter, log_lkhd=log_lkhd, converged=True, exploded=False)) + .default(compute_next_iter(cur_iter, b, mu, score, fisher))) + + res_struct = hl.experimental.loop(search, search_return_type, 1, b, mu, score, fisher) + + return res_struct + + +def wald_test(X, y, null_fit, link): + assert (link == "logistic") + fit = logreg_fit(X, y, null_fit) + + se = hl.nd.diagonal(hl.nd.inv(fit.fisher)).map(lambda e: hl.sqrt(e)) + z = fit.b / se + p = z.map(lambda e: 2 * hl.pnorm(-hl.abs(e))) + return hl.struct( + beta=fit.b[X.shape[1] - 1], + standard_error=se[X.shape[1] - 1], + z_stat=z[X.shape[1] - 1], + p_value=p[X.shape[1] - 1], + fit=hl.struct(n_iterations=fit.num_iter, converged=fit.converged, exploded=fit.exploded)) + + +def lrt_test(X, y, null_fit, link): + assert (link == "logistic") + fit = logreg_fit(X, y, null_fit) + + chi_sq = hl.if_else(~fit.converged, hl.missing(hl.tfloat64), 2 * (fit.log_lkhd - null_fit.log_lkhd)) + p = hl.pchisqtail(chi_sq, X.shape[1] - null_fit.b.shape[0]) + + return hl.struct( + beta=fit.b[X.shape[1] - 1], + chi_sq_stat=chi_sq, + p_value=p, + fit=hl.struct(n_iterations=fit.num_iter, converged=fit.converged, exploded=fit.exploded)) + + +@typecheck(test=enumeration('wald', 'lrt', 'score', 'firth'), + y=oneof(expr_float64, sequenceof(expr_float64)), + x=expr_float64, + covariates=sequenceof(expr_float64), + pass_through=sequenceof(oneof(str, Expression))) +def _logistic_regression_rows_nd(test, y, x, covariates, pass_through=()) -> hail.Table: + r"""For each row, test an input variable for association with a + binary response variable using logistic regression. + + Examples + -------- + Run the logistic regression Wald test per variant using a Boolean + phenotype, intercept and two covariates stored in column-indexed + fields: + + >>> result_ht = hl.logistic_regression_rows( + ... test='wald', + ... y=dataset.pheno.is_case, + ... x=dataset.GT.n_alt_alleles(), + ... covariates=[1, dataset.pheno.age, dataset.pheno.is_female]) + + Run the logistic regression Wald test per variant using a list of binary (0/1) + phenotypes, intercept and two covariates stored in column-indexed + fields: + + >>> result_ht = hl.logistic_regression_rows( + ... test='wald', + ... y=[dataset.pheno.is_case, dataset.pheno.is_case], # where pheno values are 0, 1, or missing + ... x=dataset.GT.n_alt_alleles(), + ... covariates=[1, dataset.pheno.age, dataset.pheno.is_female]) + + Warning + ------- + :func:`.logistic_regression_rows` considers the same set of + columns (i.e., samples, points) for every row, namely those columns for + which **all** response variables and covariates are defined. For each row, missing values of + `x` are mean-imputed over these columns. As in the example, the + intercept covariate ``1`` must be included **explicitly** if desired. + + Notes + ----- + This method performs, for each row, a significance test of the input + variable in predicting a binary (case-control) response variable based + on the logistic regression model. The response variable type must either + be numeric (with all present values 0 or 1) or Boolean, in which case + true and false are coded as 1 and 0, respectively. + + Hail supports the Wald test ('wald'), likelihood ratio test ('lrt'), + Rao score test ('score'), and Firth test ('firth'). Hail only includes + columns for which the response variable and all covariates are defined. + For each row, Hail imputes missing input values as the mean of the + non-missing values. + + The example above considers a model of the form + + .. math:: + + \mathrm{Prob}(\mathrm{is\_case}) = + \mathrm{sigmoid}(\beta_0 + \beta_1 \, \mathrm{gt} + + \beta_2 \, \mathrm{age} + + \beta_3 \, \mathrm{is\_female} + \varepsilon), + \quad + \varepsilon \sim \mathrm{N}(0, \sigma^2) + + where :math:`\mathrm{sigmoid}` is the `sigmoid function`_, the genotype + :math:`\mathrm{gt}` is coded as 0 for HomRef, 1 for Het, and 2 for + HomVar, and the Boolean covariate :math:`\mathrm{is\_female}` is coded as + for ``True`` (female) and 0 for ``False`` (male). The null model sets + :math:`\beta_1 = 0`. + + .. _sigmoid function: https://en.wikipedia.org/wiki/Sigmoid_function + + The structure of the emitted row field depends on the test statistic as + shown in the tables below. + + ========== ================== ======= ============================================ + Test Field Type Value + ========== ================== ======= ============================================ + Wald `beta` float64 fit effect coefficient, + :math:`\hat\beta_1` + Wald `standard_error` float64 estimated standard error, + :math:`\widehat{\mathrm{se}}` + Wald `z_stat` float64 Wald :math:`z`-statistic, equal to + :math:`\hat\beta_1 / \widehat{\mathrm{se}}` + Wald `p_value` float64 Wald p-value testing :math:`\beta_1 = 0` + LRT, Firth `beta` float64 fit effect coefficient, + :math:`\hat\beta_1` + LRT, Firth `chi_sq_stat` float64 deviance statistic + LRT, Firth `p_value` float64 LRT / Firth p-value testing + :math:`\beta_1 = 0` + Score `chi_sq_stat` float64 score statistic + Score `p_value` float64 score p-value testing :math:`\beta_1 = 0` + ========== ================== ======= ============================================ + + For the Wald and likelihood ratio tests, Hail fits the logistic model for + each row using Newton iteration and only emits the above fields + when the maximum likelihood estimate of the coefficients converges. The + Firth test uses a modified form of Newton iteration. To help diagnose + convergence issues, Hail also emits three fields which summarize the + iterative fitting process: + + ================ =================== ======= =============================== + Test Field Type Value + ================ =================== ======= =============================== + Wald, LRT, Firth `fit.n_iterations` int32 number of iterations until + convergence, explosion, or + reaching the max (25 for + Wald, LRT; 100 for Firth) + Wald, LRT, Firth `fit.converged` bool ``True`` if iteration converged + Wald, LRT, Firth `fit.exploded` bool ``True`` if iteration exploded + ================ =================== ======= =============================== + + We consider iteration to have converged when every coordinate of + :math:`\beta` changes by less than :math:`10^{-6}`. For Wald and LRT, + up to 25 iterations are attempted; in testing we find 4 or 5 iterations + nearly always suffice. Convergence may also fail due to explosion, + which refers to low-level numerical linear algebra exceptions caused by + manipulating ill-conditioned matrices. Explosion may result from (nearly) + linearly dependent covariates or complete separation_. + + .. _separation: https://en.wikipedia.org/wiki/Separation_(statistics) + + A more common situation in genetics is quasi-complete seperation, e.g. + variants that are observed only in cases (or controls). Such variants + inevitably arise when testing millions of variants with very low minor + allele count. The maximum likelihood estimate of :math:`\beta` under + logistic regression is then undefined but convergence may still occur + after a large number of iterations due to a very flat likelihood + surface. In testing, we find that such variants produce a secondary bump + from 10 to 15 iterations in the histogram of number of iterations per + variant. We also find that this faux convergence produces large standard + errors and large (insignificant) p-values. To not miss such variants, + consider using Firth logistic regression, linear regression, or + group-based tests. + + Here's a concrete illustration of quasi-complete seperation in R. Suppose + we have 2010 samples distributed as follows for a particular variant: + + ======= ====== === ====== + Status HomRef Het HomVar + ======= ====== === ====== + Case 1000 10 0 + Control 1000 0 0 + ======= ====== === ====== + + The following R code fits the (standard) logistic, Firth logistic, + and linear regression models to this data, where ``x`` is genotype, + ``y`` is phenotype, and ``logistf`` is from the logistf package: + + .. code-block:: R + + x <- c(rep(0,1000), rep(1,1000), rep(1,10) + y <- c(rep(0,1000), rep(0,1000), rep(1,10)) + logfit <- glm(y ~ x, family=binomial()) + firthfit <- logistf(y ~ x) + linfit <- lm(y ~ x) + + The resulting p-values for the genotype coefficient are 0.991, 0.00085, + and 0.0016, respectively. The erroneous value 0.991 is due to + quasi-complete separation. Moving one of the 10 hets from case to control + eliminates this quasi-complete separation; the p-values from R are then + 0.0373, 0.0111, and 0.0116, respectively, as expected for a less + significant association. + + The Firth test reduces bias from small counts and resolves the issue of + separation by penalizing maximum likelihood estimation by the `Jeffrey's + invariant prior `__. This + test is slower, as both the null and full model must be fit per variant, + and convergence of the modified Newton method is linear rather than + quadratic. For Firth, 100 iterations are attempted for the null model + and, if that is successful, for the full model as well. In testing we + find 20 iterations nearly always suffices. If the null model fails to + converge, then the `logreg.fit` fields reflect the null model; + otherwise, they reflect the full model. + + See + `Recommended joint and meta-analysis strategies for case-control association testing of single low-count variants `__ + for an empirical comparison of the logistic Wald, LRT, score, and Firth + tests. The theoretical foundations of the Wald, likelihood ratio, and score + tests may be found in Chapter 3 of Gesine Reinert's notes + `Statistical Theory `__. + Firth introduced his approach in + `Bias reduction of maximum likelihood estimates, 1993 `__. + Heinze and Schemper further analyze Firth's approach in + `A solution to the problem of separation in logistic regression, 2002 `__. + + Hail's logistic regression tests correspond to the ``b.wald``, + ``b.lrt``, and ``b.score`` tests in `EPACTS`_. For each variant, Hail + imputes missing input values as the mean of non-missing input values, + whereas EPACTS subsets to those samples with called genotypes. Hence, + Hail and EPACTS results will currently only agree for variants with no + missing genotypes. + + .. _EPACTS: http://genome.sph.umich.edu/wiki/EPACTS#Single_Variant_Tests + + Note + ---- + Use the `pass_through` parameter to include additional row fields from + matrix table underlying ``x``. For example, to include an "rsid" field, set + ``pass_through=['rsid']`` or ``pass_through=[mt.rsid]``. + + Parameters + ---------- + test : {'wald', 'lrt', 'score', 'firth'} + Statistical test. + y : :class:`.Float64Expression` or :obj:`list` of :class:`.Float64Expression` + One or more column-indexed response expressions. + All non-missing values must evaluate to 0 or 1. + Note that a :class:`.BooleanExpression` will be implicitly converted to + a :class:`.Float64Expression` with this property. + x : :class:`.Float64Expression` + Entry-indexed expression for input variable. + covariates : :obj:`list` of :class:`.Float64Expression` + Non-empty list of column-indexed covariate expressions. + pass_through : :obj:`list` of :class:`str` or :class:`.Expression` + Additional row fields to include in the resulting table. + + Returns + ------- + :class:`.Table` + """ + if len(covariates) == 0: + raise ValueError('logistic regression requires at least one covariate expression') + + mt = matrix_table_source('logistic_regresion_rows/x', x) + check_entry_indexed('logistic_regresion_rows/x', x) + + y_is_list = isinstance(y, list) + if y_is_list and len(y) == 0: + raise ValueError("'logistic_regression_rows': found no values for 'y'") + y = wrap_to_list(y) + + for e in covariates: + analyze('logistic_regression_rows/covariates', e, mt._col_indices) + + # _warn_if_no_intercept('logistic_regression_rows', covariates) + + x_field_name = Env.get_uid() + y_field_names = [f'__y_{i}' for i in range(len(y))] + num_y_fields = len(y_field_names) + + y_dict = dict(zip(y_field_names, y)) + + cov_field_names = [f'__cov{i}' for i in range(len(covariates))] + row_fields = _get_regression_row_fields(mt, pass_through, 'logistic_regression_rows') + + # Handle filtering columns with missing values: + mt = mt.filter_cols(hl.array(y + covariates).all(hl.is_defined)) + + # FIXME: selecting an existing entry field should be emitted as a SelectFields + mt = mt._select_all(col_exprs=dict(**y_dict, + **dict(zip(cov_field_names, covariates))), + row_exprs=row_fields, + col_key=[], + entry_exprs={x_field_name: x}) + + sample_field_name = "samples" + ht = mt._localize_entries("entries", sample_field_name) + + # cov_nd rows are samples, columns are the different covariates + if covariates: + ht = ht.annotate_globals(cov_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: [sample_struct[cov_name] for cov_name in cov_field_names]))) + else: + ht = ht.annotate_globals(cov_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: hl.empty_array(hl.tfloat64)))) + + # y_nd rows are samples, columns are the various dependent variables. + ht = ht.annotate_globals(y_nd=hl.nd.array(ht[sample_field_name].map(lambda sample_struct: [sample_struct[y_name] for y_name in y_field_names]))) + + # Fit null models, which means doing a logreg fit with just the covariates for each phenotype. + null_models = hl.range(num_y_fields).map(lambda idx: logreg_fit(ht.cov_nd, ht.y_nd[:, idx])) + ht = ht.annotate_globals(nulls=null_models) + ht = ht.transmute(x=hl.nd.array(mean_impute(ht.entries[x_field_name]))) + + if test == "wald": + # For each y vector, need to do wald test. + covs_and_x = hl.nd.hstack([ht.cov_nd, ht.x.reshape((-1, 1))]) + wald_structs = hl.range(num_y_fields).map(lambda idx: wald_test(covs_and_x, ht.y_nd[:, idx], ht.nulls[idx], "logistic")) + ht = ht.annotate(logistic_regression=wald_structs) + elif test == "lrt": + covs_and_x = hl.nd.hstack([ht.cov_nd, ht.x.reshape((-1, 1))]) + lrt_structs = hl.range(num_y_fields).map(lambda idx: lrt_test(covs_and_x, ht.y_nd[:, idx], ht.nulls[idx], "logistic")) + ht = ht.annotate(logistic_regression=lrt_structs) + + else: + raise ValueError("Only support wald and lrt so far") + + if not y_is_list: + ht = ht.transmute(**ht.logistic_regression[0]) + + ht = ht.drop("x") + + return ht + + @typecheck(test=enumeration('wald', 'lrt', 'score'), y=expr_float64, x=expr_float64, @@ -1457,9 +1868,9 @@ def lambda_gc(p_value, approximate=True): def _lambda_gc_agg(p_value, approximate=True): chisq = hl.qchisqtail(p_value, 1) if approximate: - med_chisq = hl.agg.approx_quantiles(chisq, 0.5) + med_chisq = hl.agg.filter(~hl.is_nan(p_value), hl.agg.approx_quantiles(chisq, 0.5)) else: - med_chisq = hl.median(hl.agg.collect(chisq)) + med_chisq = hl.agg.filter(~hl.is_nan(p_value), hl.median(hl.agg.collect(chisq))) return med_chisq / hl.qchisqtail(0.5, 1) diff --git a/hail/python/hail/nd/__init__.py b/hail/python/hail/nd/__init__.py index e68467071d8..711ce6da441 100644 --- a/hail/python/hail/nd/__init__.py +++ b/hail/python/hail/nd/__init__.py @@ -1,7 +1,9 @@ from .nd import array, from_column_major, arange, full, zeros, ones, svd, qr, solve, diagonal, inv, concatenate, \ eye, identity, vstack, hstack +newaxis = None + __all__ = [ 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'svd', 'diagonal', 'inv', - 'concatenate', 'eye', 'identity', 'vstack', 'hstack' + 'concatenate', 'eye', 'identity', 'vstack', 'hstack', 'newaxis' ] diff --git a/hail/python/hail/nd/nd.py b/hail/python/hail/nd/nd.py index cca8da0002b..1d20486b505 100644 --- a/hail/python/hail/nd/nd.py +++ b/hail/python/hail/nd/nd.py @@ -221,8 +221,8 @@ def diagonal(nd): return hl.nd.array(hl.range(hl.int32(shape_min)).map(lambda i: nd[i, i])) -@typecheck(a=expr_ndarray(), b=expr_ndarray()) -def solve(a, b): +@typecheck(a=expr_ndarray(), b=expr_ndarray(), no_crash=bool) +def solve(a, b, no_crash=False): """Solve a linear system. Parameters @@ -251,11 +251,21 @@ def solve(a, b): if b.dtype.element_type != hl.tfloat64: b = b.map(lambda e: hl.float64(e)) - ir = Apply("linear_solve", hl.tndarray(hl.tfloat64, 2), a._ir, b._ir) - result = construct_expr(ir, hl.tndarray(hl.tfloat64, 2), a._indices, a._aggregations) + if no_crash: + name = "linear_solve_no_crash" + return_type = hl.tstruct(solution=hl.tndarray(hl.tfloat64, 2), failed=hl.tbool) + else: + name = "linear_solve" + return_type = hl.tndarray(hl.tfloat64, 2) + + ir = Apply(name, return_type, a._ir, b._ir) + result = construct_expr(ir, return_type, a._indices, a._aggregations) if b_ndim_orig == 1: - result = result.reshape((-1)) + if no_crash: + result = hl.struct(solution=result.solution.reshape((-1)), failed=result.failed) + else: + result = result.reshape((-1)) return result diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index 9dfa42acc67..4085ba0939c 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -3281,7 +3281,7 @@ def _same(self, other, tolerance=1e-6, absolute=False): from hail.expr.functions import _values_similar if self._type != other._type: - print(f'Table._same: types differ: {self._type}, {other._type}') + print(f'Table._same: types differ:\n {self._type}\n {other._type}') return False left_global_value = Env.get_uid() @@ -3470,6 +3470,10 @@ def multi_way_zip_join(tables, data_field_name, global_field_name) -> 'Table': is exactly one entry in some `data_field_name` array for every row in the inputs. + The :meth:`multi_way_zip_join` method assumes that inputs have distinct + keys. If any input has duplicate keys, the row value that is included + in the result array for that key is undefined. + Parameters ---------- tables : :class:`list` of :class:`Table` diff --git a/hail/python/hail/utils/__init__.py b/hail/python/hail/utils/__init__.py index 0f6b102dcd9..3156b8c9924 100644 --- a/hail/python/hail/utils/__init__.py +++ b/hail/python/hail/utils/__init__.py @@ -1,11 +1,11 @@ from .misc import wrap_to_list, get_env_or_default, uri_path, local_path_uri, new_temp_file, new_local_temp_dir, new_local_temp_file, with_local_temp_file, storage_level, range_matrix_table, range_table, run_command, HailSeedGenerator, timestamp_path, _dumps_partitions, default_handler -from .hadoop_utils import hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_stat, copy_log +from .hadoop_utils import hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_scheme_supported, hadoop_stat, copy_log from .struct import Struct from .linkedlist import LinkedList from .interval import Interval from .frozendict import frozendict from .java import error, warning, info, FatalError, HailUserError -from .tutorial import get_1kg, get_movie_lens +from .tutorial import get_1kg, get_hgdp, get_movie_lens from .deduplicate import deduplicate __all__ = ['hadoop_open', @@ -15,6 +15,7 @@ 'hadoop_is_file', 'hadoop_stat', 'hadoop_ls', + 'hadoop_scheme_supported', 'copy_log', 'wrap_to_list', 'new_local_temp_dir', @@ -38,6 +39,7 @@ 'HailSeedGenerator', 'LinkedList', 'get_1kg', + 'get_hgdp', 'get_movie_lens', 'timestamp_path', '_dumps_partitions', diff --git a/hail/python/hail/utils/hadoop_utils.py b/hail/python/hail/utils/hadoop_utils.py index 1a7742377da..232b22ccca5 100644 --- a/hail/python/hail/utils/hadoop_utils.py +++ b/hail/python/hail/utils/hadoop_utils.py @@ -214,6 +214,26 @@ def hadoop_ls(path: str) -> List[Dict]: return Env.fs().ls(path) +def hadoop_scheme_supported(scheme: str) -> bool: + """Returns ``True`` if the Hadoop filesystem supports URLs with the given + scheme. + + Examples + -------- + + >>> hadoop_scheme_supported('gs') # doctest: +SKIP + + Parameters + ---------- + scheme : :class:`str` + + Returns + ------- + :obj:`.bool` + """ + return Env.fs().supports_scheme(scheme) + + def copy_log(path: str) -> None: """Attempt to copy the session log to a hadoop-API-compatible location. diff --git a/hail/python/hail/utils/misc.py b/hail/python/hail/utils/misc.py index e22c4b70c3f..21a9088e2e8 100644 --- a/hail/python/hail/utils/misc.py +++ b/hail/python/hail/utils/misc.py @@ -262,9 +262,9 @@ def fmt_field(field): elif isinstance(obj, StructExpression): return 'StructExpression', StructExpression, struct_error(obj), True elif isinstance(obj, ArrayStructExpression): - return 'ArrayStructExpression', StructExpression, struct_error(obj), True + return 'ArrayStructExpression', ArrayStructExpression, struct_error(obj), True elif isinstance(obj, SetStructExpression): - return 'SetStructExpression', StructExpression, struct_error(obj), True + return 'SetStructExpression', SetStructExpression, struct_error(obj), True else: raise NotImplementedError(obj) diff --git a/hail/python/hail/utils/tutorial.py b/hail/python/hail/utils/tutorial.py index 27bb819d523..41afab13e01 100644 --- a/hail/python/hail/utils/tutorial.py +++ b/hail/python/hail/utils/tutorial.py @@ -8,13 +8,17 @@ __all__ = [ 'get_1kg', + 'get_hgdp', 'get_movie_lens' ] resources = { '1kg_annotations': 'https://storage.googleapis.com/hail-tutorial/1kg_annotations.txt', '1kg_matrix_table': 'https://storage.googleapis.com/hail-tutorial/1kg.vcf.bgz', - 'ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/ensembl_gene_annotations.txt', + '1kg_ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/ensembl_gene_annotations.txt', + 'HGDP_annotations': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_pop_and_sex_annotations.tsv', + 'HGDP_matrix_table': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_subset.vcf.bgz', + 'HGDP_ensembl_gene_annotations': 'https://storage.googleapis.com/hail-tutorial/hgdp/hgdp_gene_annotations.tsv', 'movie_lens_100k': 'http://files.grouplens.org/datasets/movielens/ml-100k.zip', } @@ -88,7 +92,7 @@ def get_1kg(output_dir, overwrite: bool = False): sync_retry_transient_errors(urlretrieve, source, tmp_sample_annot) tmp_gene_annot = os.path.join(tmp_dir, 'ensembl_gene_annotations.txt') - source = resources['ensembl_gene_annotations'] + source = resources['1kg_ensembl_gene_annotations'] info(f'downloading Ensembl gene annotations ...\n' f' Source: {source}') sync_retry_transient_errors(urlretrieve, source, tmp_gene_annot) @@ -101,6 +105,67 @@ def get_1kg(output_dir, overwrite: bool = False): info('1KG files found') +def get_hgdp(output_dir, overwrite: bool = False): + """Download subset of the `Human Genome Diversity Panel + `__ + dataset and sample annotations. + + Notes + ----- + The download is about 30MB. + + Parameters + ---------- + output_dir + Directory in which to write data. + overwrite + If ``True``, overwrite any existing files/directories at `output_dir`. + """ + fs = Env.fs() + + if not _dir_exists(fs, output_dir): + fs.mkdir(output_dir) + + matrix_table_path = os.path.join(output_dir, 'HGDP.mt') + vcf_path = os.path.join(output_dir, 'HGDP.vcf.bgz') + sample_annotations_path = os.path.join(output_dir, 'HGDP_annotations.txt') + gene_annotations_path = os.path.join(output_dir, 'ensembl_gene_annotations.txt') + + if (overwrite + or not _dir_exists(fs, matrix_table_path) + or not _file_exists(fs, sample_annotations_path) + or not _file_exists(fs, vcf_path) + or not _file_exists(fs, gene_annotations_path)): + init_temp_dir() + tmp_vcf = os.path.join(tmp_dir, 'HGDP.vcf.bgz') + source = resources['HGDP_matrix_table'] + info(f'downloading HGDP VCF ...\n' + f' Source: {source}') + sync_retry_transient_errors(urlretrieve, resources['HGDP_matrix_table'], tmp_vcf) + cluster_readable_vcf = _copy_to_tmp(fs, local_path_uri(tmp_vcf), extension='vcf.bgz') + info('importing VCF and writing to matrix table...') + hl.import_vcf(cluster_readable_vcf, min_partitions=16, reference_genome='GRCh38').write(matrix_table_path, overwrite=True) + + tmp_sample_annot = os.path.join(tmp_dir, 'HGDP_annotations.txt') + source = resources['HGDP_annotations'] + info(f'downloading HGDP annotations ...\n' + f' Source: {source}') + sync_retry_transient_errors(urlretrieve, source, tmp_sample_annot) + + tmp_gene_annot = os.path.join(tmp_dir, 'ensembl_gene_annotations.txt') + source = resources['HGDP_ensembl_gene_annotations'] + info(f'downloading Ensembl gene annotations ...\n' + f' Source: {source}') + sync_retry_transient_errors(urlretrieve, source, tmp_gene_annot) + + hl.hadoop_copy(local_path_uri(tmp_sample_annot), sample_annotations_path) + hl.hadoop_copy(local_path_uri(tmp_gene_annot), gene_annotations_path) + hl.hadoop_copy(local_path_uri(tmp_vcf), vcf_path) + info('Done!') + else: + info('HGDP files found') + + def get_movie_lens(output_dir, overwrite: bool = False): """Download public Movie Lens dataset. diff --git a/hail/python/hailtop/aiogoogle/auth/session.py b/hail/python/hailtop/aiogoogle/auth/session.py index e94b80fd8b0..8818bb591df 100644 --- a/hail/python/hailtop/aiogoogle/auth/session.py +++ b/hail/python/hailtop/aiogoogle/auth/session.py @@ -2,6 +2,7 @@ from typing import Optional, Type, TypeVar, Mapping import abc import aiohttp +import hailtop.httpx from hailtop.utils import request_retry_transient_errors, RateLimit, RateLimiter from .credentials import Credentials from .access_token import AccessToken @@ -66,11 +67,10 @@ class Session(BaseSession): def __init__(self, *, credentials: Credentials = None, params: Optional[Mapping[str, str]] = None, **kwargs): if credentials is None: credentials = Credentials.default_credentials() - if 'raise_for_status' not in kwargs: kwargs['raise_for_status'] = True self._params = params - self._session = aiohttp.ClientSession(**kwargs) + self._session = hailtop.httpx.ClientSession(**kwargs) self._access_token = AccessToken(credentials) async def request(self, method: str, url: str, **kwargs): @@ -97,7 +97,7 @@ async def request(self, method: str, url: str, **kwargs): return await self._session.request(method, url, **kwargs) async def close(self) -> None: - if hasattr(self._session, '_session'): + if hasattr(self, '_session'): await self._session.close() del self._session del self._access_token diff --git a/hail/python/hailtop/aiogoogle/client/compute_client.py b/hail/python/hailtop/aiogoogle/client/compute_client.py index ffe33daf818..d0b8b25cfa3 100644 --- a/hail/python/hailtop/aiogoogle/client/compute_client.py +++ b/hail/python/hailtop/aiogoogle/client/compute_client.py @@ -1,9 +1,12 @@ import uuid from typing import Mapping, Any, Optional, MutableMapping +import logging from .base_client import BaseClient from hailtop.utils import sleep_and_backoff +log = logging.getLogger('compute_client') + async def request_with_wait_for_done(request_f, path, params: MutableMapping[str, Any] = None, **kwargs): assert 'params' not in kwargs diff --git a/hail/python/hailtop/aiogoogle/client/storage_client.py b/hail/python/hailtop/aiogoogle/client/storage_client.py index de35f3d836a..6d9ad04f871 100644 --- a/hail/python/hailtop/aiogoogle/client/storage_client.py +++ b/hail/python/hailtop/aiogoogle/client/storage_client.py @@ -363,6 +363,7 @@ async def insert_object(self, bucket: str, name: str, **kwargs) -> WritableStrea # https://cloud.google.com/storage/docs/performing-resumable-uploads assert upload_type == 'resumable' chunk_size = kwargs.get('bufsize', 256 * 1024) + resp = await self._session.post( f'https://storage.googleapis.com/upload/storage/v1/b/{bucket}/o', **kwargs) @@ -424,7 +425,6 @@ async def __getitem__(self, key: str) -> str: class GoogleStorageFileListEntry(FileListEntry): def __init__(self, url: str, items: Optional[Dict[str, Any]]): - assert url.endswith('/') == (items is None), f'{url} {items}' self._url = url self._items = items self._status: Optional[GetObjectFileStatus] = None @@ -448,7 +448,7 @@ async def is_dir(self) -> bool: async def status(self) -> FileStatus: if self._status is None: if self._items is None: - raise ValueError("directory has no file status") + raise IsADirectoryError(self._url) self._status = GetObjectFileStatus(self._items) return self._status @@ -478,10 +478,10 @@ def _tmp_name(self, filename: str) -> str: def _part_name(self, number: int) -> str: return self._tmp_name(f'part-{number}') - async def create_part(self, number: int, start: int, *, retry_writes: bool = True) -> WritableStream: + async def create_part(self, number: int, start: int) -> WritableStream: part_name = self._part_name(number) params = { - 'uploadType': 'resumable' if retry_writes else 'media' + 'uploadType': 'media' } return await self._fs._storage_client.insert_object(self._bucket, part_name, params=params) @@ -546,8 +546,13 @@ async def tree_compose(names, dest_name): class GoogleStorageAsyncFS(AsyncFS): def __init__(self, *, storage_client: Optional[StorageClient] = None, + project: Optional[str] = None, **kwargs): if not storage_client: + if project is not None: + if 'params' not in kwargs: + kwargs['params'] = {} + kwargs['params']['userProject'] = project storage_client = StorageClient(**kwargs) self._storage_client = storage_client @@ -576,7 +581,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: return await self._storage_client.get_object( bucket, name, headers={'Range': f'bytes={start}-'}) - async def create(self, url: str, retry_writes: bool = True) -> WritableStream: + async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: bucket, name = self._get_bucket_name(url) params = { 'uploadType': 'resumable' if retry_writes else 'media' @@ -658,8 +663,7 @@ async def _listfiles_flat(self, bucket: str, name: str) -> AsyncIterator[FileLis if prefixes: for prefix in prefixes: assert prefix.endswith('/') - url = f'gs://{bucket}/{prefix}' - yield GoogleStorageFileListEntry(url, None) + yield GoogleStorageFileListEntry(f'gs://{bucket}/{prefix}', None) items = page.get('items') if items: @@ -682,11 +686,23 @@ async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[Fi except StopAsyncIteration: raise FileNotFoundError(url) # pylint: disable=raise-missing-from + async def should_yield(entry): + url = await entry.url() + if url.endswith('/') and await entry.is_file(): + stat = await entry.status() + if await stat.size() != 0: + raise FileAndDirectoryError(url) + return False + return True + async def cons(first_entry, it): - yield first_entry + if await should_yield(first_entry): + yield first_entry try: while True: - yield await it.__anext__() + next_entry = await it.__anext__() + if await should_yield(next_entry): + yield next_entry except StopAsyncIteration: pass @@ -708,7 +724,7 @@ async def isfile(self, url: str) -> bool: async def isdir(self, url: str) -> bool: bucket, name = self._get_bucket_name(url) - assert not name or name.endswith('/') + assert not name or name.endswith('/'), name params = { 'prefix': name, 'delimiter': '/', @@ -723,27 +739,31 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: bucket, name = self._get_bucket_name(url) - await self._storage_client.delete_object(bucket, name) - - async def _remove_doesnt_exist_ok(self, url: str) -> None: try: - bucket, name = self._get_bucket_name(url) await self._storage_client.delete_object(bucket, name) - except FileNotFoundError: - pass except aiohttp.ClientResponseError as e: - if e.status != 404: - raise + if e.status == 404: + raise FileNotFoundError(url) from e + raise - async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + async def _rmtree(self, sema: asyncio.Semaphore, url: str) -> None: async with OnlineBoundedGather2(sema) as pool: - try: - it = await self.listfiles(url, recursive=True) - except FileNotFoundError: - return + bucket, name = self._get_bucket_name(url) + if name and not name.endswith('/'): + name = f'{name}/' + it = self._listfiles_recursive(bucket, name) async for entry in it: await pool.call(self._remove_doesnt_exist_ok, await entry.url()) + async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: + if sema is None: + sema = asyncio.Semaphore(50) + async with sema: + return await self._rmtree(sema, url) + + return await self._rmtree(sema, url) + async def close(self) -> None: - await self._storage_client.close() - del self._storage_client + if hasattr(self, '_storage_client'): + await self._storage_client.close() + del self._storage_client diff --git a/hail/python/hailtop/aiotools/fs.py b/hail/python/hailtop/aiotools/fs.py index eb9f0290218..5c0de3d5033 100644 --- a/hail/python/hailtop/aiotools/fs.py +++ b/hail/python/hailtop/aiotools/fs.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, List, Type, BinaryIO, cast, Set, AsyncIterator, Union, Dict +from typing import Any, AsyncContextManager, Optional, List, Type, BinaryIO, cast, Set, AsyncIterator, Union, Dict from types import TracebackType import abc import os @@ -9,10 +9,11 @@ import asyncio from concurrent.futures import ThreadPoolExecutor import urllib.parse +import functools import humanize from hailtop.utils import ( retry_transient_errors, blocking_to_async, url_basename, url_join, bounded_gather2, - time_msecs, humanize_timedelta_msecs) + time_msecs, humanize_timedelta_msecs, OnlineBoundedGather2) from .stream import ReadableStream, WritableStream, blocking_readable_stream_to_async, blocking_writable_stream_to_async @@ -54,7 +55,7 @@ async def status(self) -> FileStatus: class MultiPartCreate(abc.ABC): @abc.abstractmethod - async def create_part(self, number: int, start: int, *, retry_writes: bool = True): + async def create_part(self, number: int, start: int) -> AsyncContextManager[WritableStream]: pass @abc.abstractmethod @@ -86,7 +87,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: pass @abc.abstractmethod - async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: + async def create(self, url: str, *, retry_writes: bool = True) -> AsyncContextManager[WritableStream]: pass @abc.abstractmethod @@ -117,6 +118,33 @@ async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[Fi async def staturl(self, url: str) -> str: pass + async def _staturl_parallel_isfile_isdir(self, url: str) -> str: + assert not url.endswith('/') + + async def with_exception(f, *args, **kwargs): + try: + return (await f(*args, **kwargs)), None + except Exception as e: + return None, e + + [(is_file, isfile_exc), (is_dir, isdir_exc)] = await asyncio.gather( + with_exception(self.isfile, url), with_exception(self.isdir, url + '/')) + # raise exception deterministically + if isfile_exc: + raise isfile_exc + if isdir_exc: + raise isdir_exc + + if is_file: + if is_dir: + raise FileAndDirectoryError(url) + return AsyncFS.FILE + + if is_dir: + return AsyncFS.DIR + + raise FileNotFoundError(url) + @abc.abstractmethod async def isfile(self, url: str) -> bool: pass @@ -129,14 +157,48 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: pass + async def _remove_doesnt_exist_ok(self, url): + try: + await self.remove(url) + except FileNotFoundError: + pass + @abc.abstractmethod - async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: pass + async def _rmtree_with_recursive_listfiles(self, sema: asyncio.Semaphore, url: str) -> None: + async with OnlineBoundedGather2(sema) as pool: + try: + it = await self.listfiles(url, recursive=True) + except FileNotFoundError: + return + async for entry in it: + await pool.call(self._remove_doesnt_exist_ok, await entry.url()) + async def touch(self, url: str) -> None: async with await self.create(url): pass + async def read(self, url: str) -> bytes: + async with await self.open(url) as f: + return await f.read() + + async def read_from(self, url: str, start: int) -> bytes: + async with await self.open_from(url, start) as f: + return await f.read() + + async def read_range(self, url: str, start: int, end: int) -> bytes: + n = (end - start) + 1 + async with await self.open_from(url, start) as f: + return await f.read(n) + + async def write(self, url: str, data: bytes) -> None: + async def _write() -> None: + async with await self.create(url, retry_writes=False) as f: + await f.write(data) + await retry_transient_errors(_write) + async def close(self) -> None: pass @@ -191,7 +253,7 @@ async def is_dir(self) -> bool: async def status(self) -> LocalStatFileStatus: if self._status is None: if await self.is_dir(): - raise ValueError("directory has no file status") + raise IsADirectoryError() self._status = LocalStatFileStatus(await blocking_to_async(self._thread_pool, self._entry.stat)) return self._status @@ -202,7 +264,7 @@ def __init__(self, fs: 'LocalAsyncFS', path: str, num_parts: int): self._path = path self._num_parts = num_parts - async def create_part(self, number: int, start: int, *, retry_writes: bool = True): # pylint: disable=unused-argument + async def create_part(self, number: int, start: int): # pylint: disable=unused-argument assert 0 <= number < self._num_parts f = await blocking_to_async(self._fs._thread_pool, open, self._path, 'r+b') f.seek(start) @@ -336,9 +398,9 @@ async def isdir(self, url: str) -> bool: async def remove(self, url: str) -> None: path = self._get_path(url) - return os.remove(path) + return await blocking_to_async(self._thread_pool, os.remove, path) - async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: path = self._get_path(url) await blocking_to_async(self._thread_pool, shutil.rmtree, path) @@ -495,12 +557,12 @@ async def _copy_file(self, srcfile: str, destfile: str) -> None: async with await self.router_fs.open(srcfile) as srcf: try: - destf = await self.router_fs.create(destfile, retry_writes=False) + dest_cm = await self.router_fs.create(destfile, retry_writes=False) except FileNotFoundError: await self.router_fs.makedirs(os.path.dirname(destfile), exist_ok=True) - destf = await self.router_fs.create(destfile) + dest_cm = await self.router_fs.create(destfile) - async with destf: + async with dest_cm as destf: while True: b = await srcf.read(Copier.BUFFER_SIZE) if not b: @@ -511,7 +573,7 @@ async def _copy_file(self, srcfile: str, destfile: str) -> None: async def _copy_part(self, source_report, srcfile, part_number, part_creator, return_exceptions): try: async with await self.router_fs.open_from(srcfile, part_number * self.PART_SIZE) as srcf: - async with await part_creator.create_part(part_number, part_number * self.PART_SIZE, retry_writes=False) as destf: + async with await part_creator.create_part(part_number, part_number * self.PART_SIZE) as destf: n = self.PART_SIZE while n > 0: b = await srcf.read(min(Copier.BUFFER_SIZE, n)) @@ -550,7 +612,7 @@ async def _copy_file_multi_part_main( async with part_creator: await bounded_gather2(sema, *[ - retry_transient_errors(self._copy_part, source_report, srcfile, i, part_creator, return_exceptions) + functools.partial(retry_transient_errors, self._copy_part, source_report, srcfile, i, part_creator, return_exceptions) for i in range(n_parts) ], cancel_on_error=True) @@ -632,6 +694,7 @@ async def copy_as_dir(self, sema: asyncio.Semaphore, source_report: SourceReport src = self.src if not src.endswith('/'): src = src + '/' + try: srcentries = await self.router_fs.listfiles(src, recursive=True) except (NotADirectoryError, FileNotFoundError): @@ -666,7 +729,7 @@ async def copy_source(srcentry): await self._copy_file_multi_part(sema, source_report, srcfile, await srcentry.status(), url_join(full_dest, relsrcfile), return_exceptions) await bounded_gather2(sema, *[ - copy_source(srcentry) + functools.partial(copy_source, srcentry) async for srcentry in srcentries], cancel_on_error=True) async def copy(self, sema: asyncio.Semaphore, source_report: SourceReport, return_exceptions: bool): @@ -750,7 +813,7 @@ async def _copy_one_transfer(self, sema: asyncio.Semaphore, transfer_report: Tra raise NotADirectoryError(transfer.dest) await bounded_gather2(sema, *[ - self.copy_source(sema, transfer, r, s, dest_type_task, return_exceptions) + functools.partial(self.copy_source, sema, transfer, r, s, dest_type_task, return_exceptions) for r, s in zip(src_report, src) ], cancel_on_error=True) @@ -776,7 +839,7 @@ async def copy(self, sema: asyncio.Semaphore, copy_report: CopyReport, transfer: assert isinstance(transfer_report, list) await bounded_gather2(sema, *[ - self._copy_one_transfer(sema, r, t, return_exceptions) + functools.partial(self._copy_one_transfer, sema, r, t, return_exceptions) for r, t in zip(transfer_report, transfer) ], return_exceptions=return_exceptions, cancel_on_error=True) except Exception as e: @@ -830,7 +893,7 @@ async def open_from(self, url: str, start: int) -> ReadableStream: fs = self._get_fs(url) return await fs.open_from(url, start) - async def create(self, url: str, *, retry_writes: bool = True) -> WritableStream: + async def create(self, url: str, retry_writes: bool = True) -> WritableStream: fs = self._get_fs(url) return await fs.create(url, retry_writes=retry_writes) @@ -874,7 +937,7 @@ async def remove(self, url: str) -> None: fs = self._get_fs(url) return await fs.remove(url) - async def rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: fs = self._get_fs(url) return await fs.rmtree(sema, url) diff --git a/hail/python/hailtop/aiotools/s3asyncfs.py b/hail/python/hailtop/aiotools/s3asyncfs.py new file mode 100644 index 00000000000..9916afd1926 --- /dev/null +++ b/hail/python/hailtop/aiotools/s3asyncfs.py @@ -0,0 +1,423 @@ +from typing import Any, AsyncIterator, BinaryIO, cast, AsyncContextManager, Dict, List, Optional, Set, Tuple, Type +from types import TracebackType +from concurrent.futures import ThreadPoolExecutor +import os.path +import urllib +import asyncio +import botocore.exceptions +import boto3 +from hailtop.utils import blocking_to_async +from hailtop.aiotools import ( + FileStatus, FileListEntry, ReadableStream, WritableStream, AsyncFS, + MultiPartCreate) +from .stream import ( + AsyncQueueWritableStream, + async_writable_blocking_readable_stream_pair, + blocking_readable_stream_to_async) + + +class PageIterator: + def __init__(self, fs: 'S3AsyncFS', bucket: str, prefix: str, delimiter: Optional[str] = None): + self._fs = fs + self._bucket = bucket + self._prefix = prefix + self._kwargs = {} + if delimiter is not None: + self._kwargs['Delimiter'] = delimiter + self._page = None + + def __aiter__(self) -> 'PageIterator': + return self + + async def __anext__(self): + if self._page is None: + self._page = await blocking_to_async(self._fs._thread_pool, self._fs._s3.list_objects_v2, + Bucket=self._bucket, + Prefix=self._prefix, + **self._kwargs) + return self._page + + next_continuation_token = self._page.get('NextContinuationToken') + if next_continuation_token is not None: + self._page = await blocking_to_async(self._fs._thread_pool, self._fs._s3.list_objects_v2, + Bucket=self._bucket, + Prefix=self._prefix, + ContinuationToken=next_continuation_token, + **self._kwargs) + return self._page + + raise StopAsyncIteration + + +class S3HeadObjectFileStatus(FileStatus): + def __init__(self, head_object_resp): + self.head_object_resp = head_object_resp + + async def size(self) -> int: + return self.head_object_resp['ContentLength'] + + async def __getitem__(self, key: str) -> Any: + return self.head_object_resp[key] + + +class S3ListFilesFileStatus(FileStatus): + def __init__(self, item: Dict[str, Any]): + self._item = item + + async def size(self) -> int: + return self._item['Size'] + + async def __getitem__(self, key: str) -> Any: + return self._item[key] + + +class S3CreateManager(AsyncContextManager[WritableStream]): + def __init__(self, fs: 'S3AsyncFS', bucket: str, name: str): + self.fs: S3AsyncFS = fs + self.bucket: str = bucket + self.name: str = name + self.async_writable: Optional[AsyncQueueWritableStream] = None + self.put_task: Optional[asyncio.Task] = None + self._value: Any = None + + async def __aenter__(self) -> WritableStream: + async_writable, blocking_readable = async_writable_blocking_readable_stream_pair() + self.async_writable = async_writable + self.put_task = asyncio.create_task( + blocking_to_async(self.fs._thread_pool, self.fs._s3.upload_fileobj, + blocking_readable, + Bucket=self.bucket, + Key=self.name)) + return async_writable + + async def __aexit__( + self, exc_type: Optional[Type[BaseException]] = None, + exc_value: Optional[BaseException] = None, + exc_traceback: Optional[TracebackType] = None) -> None: + assert self.async_writable + await self.async_writable.wait_closed() + assert self.put_task + self._value = await self.put_task + + +class S3FileListEntry(FileListEntry): + def __init__(self, bucket: str, key: str, item: Optional[Dict[str, Any]]): + assert key.endswith('/') == (item is None) + self._bucket = bucket + self._key = key + self._item = item + self._status: Optional[S3ListFilesFileStatus] = None + + def name(self) -> str: + return os.path.basename(self._key) + + async def url(self) -> str: + return f's3://{self._bucket}/{self._key}' + + def url_maybe_trailing_slash(self) -> str: + return f's3://{self._bucket}/{self._key}' + + async def is_file(self) -> bool: + return self._item is not None + + async def is_dir(self) -> bool: + return self._item is None + + async def status(self) -> FileStatus: + if self._status is None: + if self._item is None: + raise IsADirectoryError(f's3://{self._bucket}/{self._key}') + self._status = S3ListFilesFileStatus(self._item) + return self._status + + +def _upload_part(s3, bucket, key, number, f, upload_id): + b = f.read() + resp = s3.upload_part( + Bucket=bucket, + Key=key, + PartNumber=number + 1, + UploadId=upload_id, + Body=b) + return resp['ETag'] + + +class S3CreatePartManager(AsyncContextManager[WritableStream]): + def __init__(self, mpc, number: int): + self._mpc = mpc + self._number = number + self._async_writable: Optional[AsyncQueueWritableStream] = None + self._put_task: Optional[asyncio.Task] = None + + async def __aenter__(self) -> WritableStream: + async_writable, blocking_readable = async_writable_blocking_readable_stream_pair() + self._async_writable = async_writable + self._put_task = asyncio.create_task( + blocking_to_async(self._mpc._fs._thread_pool, _upload_part, + self._mpc._fs._s3, + self._mpc._bucket, + self._mpc._name, + self._number, + blocking_readable, + self._mpc._upload_id)) + return async_writable + + async def __aexit__( + self, exc_type: Optional[Type[BaseException]] = None, + exc_value: Optional[BaseException] = None, + exc_traceback: Optional[TracebackType] = None) -> None: + assert self._async_writable is not None + assert self._put_task is not None + try: + await self._async_writable.wait_closed() + finally: + self._mpc._etags[self._number] = await self._put_task + + +class S3MultiPartCreate(MultiPartCreate): + def __init__(self, sema: asyncio.Semaphore, fs: 'S3AsyncFS', bucket: str, name: str, num_parts: int): + self._sema = sema + self._fs = fs + self._bucket = bucket + self._name = name + self._num_parts = num_parts + self._upload_id = None + self._etags: List[Optional[str]] = [None] * num_parts + + async def __aenter__(self) -> 'S3MultiPartCreate': + resp = await blocking_to_async(self._fs._thread_pool, self._fs._s3.create_multipart_upload, + Bucket=self._bucket, + Key=self._name) + self._upload_id = resp['UploadId'] + return self + + async def __aexit__( + self, exc_type: Optional[Type[BaseException]] = None, + exc_value: Optional[BaseException] = None, + exc_traceback: Optional[TracebackType] = None) -> None: + if exc_value is not None: + await blocking_to_async(self._fs._thread_pool, self._fs._s3.abort_multipart_upload, + Bucket=self._bucket, + Key=self._name, + UploadId=self._upload_id) + return + + parts = [] + part_number = 1 + for etag in self._etags: + assert etag is not None + parts.append({ + 'ETag': etag, + 'PartNumber': part_number + }) + part_number += 1 + + await blocking_to_async(self._fs._thread_pool, self._fs._s3.complete_multipart_upload, + Bucket=self._bucket, + Key=self._name, + MultipartUpload={'Parts': parts}, + UploadId=self._upload_id) + + async def create_part(self, number: int, start: int) -> S3CreatePartManager: # pylint: disable=unused-argument + return S3CreatePartManager(self, number) + + +class S3AsyncFS(AsyncFS): + def __init__(self, thread_pool: ThreadPoolExecutor, max_workers=None): + if not thread_pool: + thread_pool = ThreadPoolExecutor(max_workers=max_workers) + self._thread_pool = thread_pool + self._s3 = boto3.client('s3') + + def schemes(self) -> Set[str]: + return {'s3'} + + @staticmethod + def _get_bucket_name(url: str) -> Tuple[str, str]: + parsed = urllib.parse.urlparse(url) + if parsed.scheme != 's3': + raise ValueError(f"invalid scheme, expected s3: {parsed.scheme}") + + name = parsed.path + if name: + assert name[0] == '/' + name = name[1:] + + return (parsed.netloc, name) + + async def open(self, url: str) -> ReadableStream: + bucket, name = self._get_bucket_name(url) + resp = await blocking_to_async(self._thread_pool, self._s3.get_object, + Bucket=bucket, + Key=name) + return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body'])) + + async def open_from(self, url: str, start: int) -> ReadableStream: + bucket, name = self._get_bucket_name(url) + resp = await blocking_to_async(self._thread_pool, self._s3.get_object, + Bucket=bucket, + Key=name, + Range=f'bytes={start}-') + return blocking_readable_stream_to_async(self._thread_pool, cast(BinaryIO, resp['Body'])) + + async def create(self, url: str, *, retry_writes: bool = True) -> S3CreateManager: # pylint: disable=unused-argument + # It may be possible to write a more efficient version of this + # that takes advantage of retry_writes=False. Here's the + # background information: + # + # There are essentially three options for implementing writes. + # The first two handle retries: + # + # 1. Use some form of multipart uploads (which, in the case + # of GCS, we implement by writing temporary objects and + # then calling compose). + # + # 2. Use resumable uploads. This is what the GCS backend + # does, although the performance is must worse than + # non-resumable uploads so in fact it may always be better + # to always use multipart uploads (1). + # + # The third does not handle failures: + # + # 3. Don't be failure/retry safe. Just write the object, and + # if the API call fails, fail. This is useful when you can + # retry at a higher level (this is what the copy code does). + # + # Unfortunately, I don't see how to do (3) with boto3, since + # AWS APIs require a header that includes a hash of the + # request body, and that needs to be computed up front. In + # terms of the boto3 interface, this contraint translates into + # calls like `put_object` require bytes or a seekable stream + # (so it can make two passes over the data, one to compute the + # checksome, and the other to send the data). + # + # Here, we use S3CreateManager, which in turn uses boto3 + # `upload_fileobj` which is implemented in terms of multipart + # uploads. + # + # Another possibility is to make an alternate `create` call + # that takes bytes instead of returning a file-like object, + # and then using `put_object`, and make copy use that + # interface. This has the disadvantage that the read must + # complete before the write can begin (unlike the current + # code, that copies 128MB parts in 256KB chunks). + bucket, name = self._get_bucket_name(url) + return S3CreateManager(self, bucket, name) + + async def multi_part_create( + self, + sema: asyncio.Semaphore, + url: str, + num_parts: int) -> MultiPartCreate: + bucket, name = self._get_bucket_name(url) + return S3MultiPartCreate(sema, self, bucket, name, num_parts) + + async def mkdir(self, url: str) -> None: + pass + + async def makedirs(self, url: str, exist_ok: bool = False) -> None: + pass + + async def statfile(self, url: str) -> FileStatus: + bucket, name = self._get_bucket_name(url) + try: + resp = await blocking_to_async(self._thread_pool, self._s3.head_object, + Bucket=bucket, + Key=name) + return S3HeadObjectFileStatus(resp) + except botocore.exceptions.ClientError as e: + if e.response['ResponseMetadata']['HTTPStatusCode'] == 404: + raise FileNotFoundError(url) from e + raise e + + async def _listfiles_recursive(self, bucket: str, name: str) -> AsyncIterator[FileListEntry]: + assert not name or name.endswith('/') + async for page in PageIterator(self, bucket, name): + assert 'CommonPrefixes' not in page + contents = page.get('Contents') + if contents: + for item in contents: + yield S3FileListEntry(bucket, item['Key'], item) + + async def _listfiles_flat(self, bucket: str, name: str) -> AsyncIterator[FileListEntry]: + assert not name or name.endswith('/') + async for page in PageIterator(self, bucket, name, delimiter='/'): + prefixes = page.get('CommonPrefixes') + if prefixes is not None: + for prefix in prefixes: + yield S3FileListEntry(bucket, prefix['Prefix'], None) + contents = page.get('Contents') + if contents: + for item in contents: + yield S3FileListEntry(bucket, item['Key'], item) + + async def listfiles(self, url: str, recursive: bool = False) -> AsyncIterator[FileListEntry]: + bucket, name = self._get_bucket_name(url) + if name and not name.endswith('/'): + name += '/' + if recursive: + it = self._listfiles_recursive(bucket, name) + else: + it = self._listfiles_flat(bucket, name) + + it = it.__aiter__() + try: + first_entry = await it.__anext__() + except StopAsyncIteration: + raise FileNotFoundError(url) # pylint: disable=raise-missing-from + + async def cons(first_entry, it): + yield first_entry + try: + while True: + yield await it.__anext__() + except StopAsyncIteration: + pass + + return cons(first_entry, it) + + async def staturl(self, url: str) -> str: + return await self._staturl_parallel_isfile_isdir(url) + + async def isfile(self, url: str) -> bool: + try: + bucket, name = self._get_bucket_name(url) + await blocking_to_async(self._thread_pool, self._s3.head_object, + Bucket=bucket, + Key=name) + return True + except botocore.exceptions.ClientError as e: + if e.response['ResponseMetadata']['HTTPStatusCode'] == 404: + return False + raise e + + async def isdir(self, url: str) -> bool: + try: + async for _ in await self.listfiles(url, recursive=True): + return True + assert False # unreachable + except FileNotFoundError: + return False + + async def remove(self, url: str) -> None: + try: + bucket, name = self._get_bucket_name(url) + await blocking_to_async(self._thread_pool, self._s3.delete_object, + Bucket=bucket, + Key=name) + except self._s3.exceptions.NoSuchKey as e: + raise FileNotFoundError(url) from e + + async def _rmtree(self, sema: asyncio.Semaphore, url: str) -> None: + await self._rmtree_with_recursive_listfiles(sema, url) + + async def rmtree(self, sema: Optional[asyncio.Semaphore], url: str) -> None: + if sema is None: + sema = asyncio.Semaphore(50) + async with sema: + return await self._rmtree(sema, url) + + return await self._rmtree(sema, url) + + async def close(self) -> None: + pass diff --git a/hail/python/hailtop/aiotools/stream.py b/hail/python/hailtop/aiotools/stream.py index b1cf2e56f55..7e222444e35 100644 --- a/hail/python/hailtop/aiotools/stream.py +++ b/hail/python/hailtop/aiotools/stream.py @@ -1,7 +1,10 @@ -from typing import Optional, Type, BinaryIO +from typing import BinaryIO, Optional, Tuple, Type from types import TracebackType import abc +import io +import os from concurrent.futures import ThreadPoolExecutor +import janus from hailtop.utils import blocking_to_async @@ -29,7 +32,7 @@ async def wait_closed(self) -> None: self._waited_closed = True @property - def closed(self) -> None: + def closed(self) -> bool: return self._closed async def __aenter__(self) -> 'ReadableStream': @@ -69,7 +72,7 @@ async def wait_closed(self) -> None: self._waited_closed = True @property - def closed(self) -> None: + def closed(self) -> bool: return self._closed async def __aenter__(self) -> 'WritableStream': @@ -92,6 +95,8 @@ def __init__(self, thread_pool: ThreadPoolExecutor, f: BinaryIO): self._f = f async def read(self, n: int = -1) -> bytes: + if n == -1: + return await blocking_to_async(self._thread_pool, self._f.read) return await blocking_to_async(self._thread_pool, self._f.read, n) async def _wait_closed(self) -> None: @@ -115,6 +120,8 @@ async def write(self, b: bytes) -> int: return await blocking_to_async(self._thread_pool, self._f.write, b) async def _wait_closed(self) -> None: + await blocking_to_async(self._thread_pool, self._f.flush) + await blocking_to_async(self._thread_pool, os.fsync, self._f.fileno()) await blocking_to_async(self._thread_pool, self._f.close) del self._f @@ -125,3 +132,74 @@ def blocking_readable_stream_to_async(thread_pool: ThreadPoolExecutor, f: Binary def blocking_writable_stream_to_async(thread_pool: ThreadPoolExecutor, f: BinaryIO) -> _WritableStreamFromBlocking: return _WritableStreamFromBlocking(thread_pool, f) + + +class BlockingQueueReadableStream(io.RawIOBase): + # self.closed and self.close() must be multithread safe, because + # they can be accessed by both the stream reader and writer which + # are in different threads. + def __init__(self, q: janus.Queue): + super().__init__() + self._q = q + self._saw_eos = False + self._closed = False + self._unread = b'' + + def readable(self) -> bool: + return True + + def readinto(self, b: bytearray) -> int: + if self._closed: + raise ValueError('read on closed stream') + if self._saw_eos: + return 0 + + if not self._unread: + self._unread = self._q.sync_q.get() + if self._unread is None: + self._saw_eos = True + return 0 + assert self._unread + + n = min(len(self._unread), len(b)) + b[:n] = self._unread[:n] + self._unread = self._unread[n:] + return n + + def close(self): + self._closed = True + # drain the q so the writer doesn't deadlock + while not self._saw_eos: + c = self._q.sync_q.get() + if c is None: + self._saw_eos = True + + +class AsyncQueueWritableStream(WritableStream): + def __init__(self, q: janus.Queue, blocking_readable: BlockingQueueReadableStream): + super().__init__() + self._sent_eos = False + self._q = q + self._blocking_readable = blocking_readable + + async def write(self, b: bytes) -> int: + if self._blocking_readable._closed: + if not self._sent_eos: + await self._q.async_q.put(None) + self._sent_eos = True + raise ValueError('reader closed') + if b: + await self._q.async_q.put(b) + return len(b) + + async def _wait_closed(self) -> None: + if not self._sent_eos: + await self._q.async_q.put(None) + self._sent_eos = True + + +def async_writable_blocking_readable_stream_pair() -> Tuple[AsyncQueueWritableStream, BlockingQueueReadableStream]: + q: janus.Queue = janus.Queue(maxsize=1) + blocking_readable = BlockingQueueReadableStream(q) + async_writable = AsyncQueueWritableStream(q, blocking_readable) + return async_writable, blocking_readable diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 6155c6f0dbb..5d7bbffd709 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -1,34 +1,47 @@ -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, TypeVar, Generic import sys import abc import os import subprocess as sp import uuid import time +import functools import copy from shlex import quote as shq import webbrowser import warnings +from concurrent.futures import ThreadPoolExecutor from hailtop.config import get_deploy_config, get_user_config -from hailtop.utils import is_google_registry_domain, parse_docker_image_reference +from hailtop.utils import is_google_registry_domain, parse_docker_image_reference, async_to_blocking, bounded_gather, tqdm from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES from hailtop.batch_client.parse import parse_cpu_in_mcpu import hailtop.batch_client.client as bc from hailtop.batch_client.client import BatchClient +from hailtop.aiotools import RouterAsyncFS, LocalAsyncFS, AsyncFS +from hailtop.aiogoogle import GoogleStorageAsyncFS from . import resource, batch, job as _job # pylint: disable=unused-import from .exceptions import BatchException -class Backend(abc.ABC): +RunningBatchType = TypeVar('RunningBatchType') +""" +The type of value returned by :py:meth:`.Backend._run`. The value returned by some backends +enables the user to monitor the asynchronous execution of a Batch. +""" + +SelfType = TypeVar('SelfType') + + +class Backend(abc.ABC, Generic[RunningBatchType]): """ Abstract class for backends. """ _DEFAULT_SHELL = '/bin/bash' @abc.abstractmethod - def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs): + def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) -> RunningBatchType: """ Execute a batch. @@ -36,7 +49,12 @@ def _run(self, batch, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs ------- This method should not be called directly. Instead, use :meth:`.batch.Batch.run`. """ - return + raise NotImplementedError() + + @property + @abc.abstractmethod + def _fs(self) -> AsyncFS: + raise NotImplementedError() # pylint: disable=R0201 def close(self): @@ -45,14 +63,14 @@ def close(self): """ return - def __enter__(self): + def __enter__(self: SelfType) -> SelfType: return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() -class LocalBackend(Backend): +class LocalBackend(Backend[None]): """ Backend that executes batches on a local computer. @@ -95,13 +113,18 @@ def __init__(self, flags += f' -v {gsa_key_file}:/gsa-key/key.json' self._extra_docker_run_flags = flags + self.__fs: AsyncFS = LocalAsyncFS(ThreadPoolExecutor()) + + @property + def _fs(self): + return self.__fs def _run(self, batch: 'batch.Batch', dry_run: bool, verbose: bool, delete_scratch_on_exit: bool, - **backend_kwargs): # pylint: disable=R0915 + **backend_kwargs) -> None: # pylint: disable=R0915 """ Execute a batch. @@ -126,11 +149,24 @@ def _run(self, tmpdir = self._get_scratch_dir() - lines = ['set -e' + ('x' if verbose else ''), - '\n', - '# change cd to tmp directory', - f"cd {tmpdir}", - '\n'] + def new_code_block(): + return ['set -e' + ('x' if verbose else ''), + '\n', + '# change cd to tmp directory', + f"cd {tmpdir}", + '\n'] + + def run_code(code): + code = '\n'.join(code) + if dry_run: + print(code) + else: + try: + sp.check_call(code, shell=True) + except sp.CalledProcessError as e: + print(e) + print(e.output) + raise copied_input_resource_files = set() os.makedirs(tmpdir + '/inputs/', exist_ok=True) @@ -146,7 +182,7 @@ def copy_input(job, r): copied_input_resource_files.add(r) if r._input_path.startswith('gs://'): - return [f'gsutil {requester_pays_project} cp {shq(r._input_path)} {shq(r._get_path(tmpdir))}'] + return [f'gsutil {requester_pays_project} cp -r {shq(r._input_path)} {shq(r._get_path(tmpdir))}'] absolute_input_path = os.path.realpath(r._input_path) @@ -171,7 +207,7 @@ def _cp(dest): directory = os.path.dirname(dest) os.makedirs(directory, exist_ok=True) return 'cp' - return f'gsutil {requester_pays_project} cp' + return f'gsutil {requester_pays_project} cp -r' if isinstance(r, resource.InputResourceFile): return [f'{_cp(dest)} {shq(r._input_path)} {shq(dest)}' @@ -190,81 +226,78 @@ def symlink_input_resource_group(r): symlinks.append(f'ln -sf {shq(src)} {shq(dest)}') return symlinks - write_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] - if write_inputs: - lines += ["# Write input resources to output destinations"] - lines += write_inputs - lines += ['\n'] + try: + write_inputs = [x for r in batch._input_resources for x in copy_external_output(r)] + if write_inputs: + code = new_code_block() + code += ["# Write input resources to output destinations"] + code += write_inputs + code += ['\n'] + run_code(code) - for job in batch._jobs: - if isinstance(job, _job.PythonJob): - job._compile(tmpdir, tmpdir) + for job in batch._jobs: + if isinstance(job, _job.PythonJob): + async_to_blocking(job._compile(tmpdir, tmpdir)) - os.makedirs(f'{tmpdir}/{job._job_id}/', exist_ok=True) + os.makedirs(f'{tmpdir}/{job._job_id}/', exist_ok=True) - lines.append(f"# {job._job_id}: {job.name if job.name else ''}") + code = new_code_block() - lines += [x for r in job._inputs for x in copy_input(job, r)] - lines += [x for r in job._mentioned for x in symlink_input_resource_group(r)] + code.append(f"# {job._job_id}: {job.name if job.name else ''}") - resource_defs = [r._declare(tmpdir) for r in job._mentioned] - env = [f'export {k}={v}' for k, v in job._env.items()] + code += [x for r in job._inputs for x in copy_input(job, r)] + code += [x for r in job._mentioned for x in symlink_input_resource_group(r)] - job_shell = job._shell if job._shell else self._DEFAULT_SHELL + resource_defs = [r._declare(tmpdir) for r in job._mentioned] + env = [f'export {k}={v}' for k, v in job._env.items()] - defs = '; '.join(resource_defs) + '; ' if resource_defs else '' - joined_env = '; '.join(env) + '; ' if env else '' + job_shell = job._shell if job._shell else self._DEFAULT_SHELL - cmd = " && ".join(f'{{\n{x}\n}}' for x in job._command) + defs = '; '.join(resource_defs) + '; ' if resource_defs else '' + joined_env = '; '.join(env) + '; ' if env else '' - quoted_job_script = shq(joined_env + defs + cmd) + cmd = " && ".join(f'{{\n{x}\n}}' for x in job._command) - if job._image: - cpu = f'--cpus={job._cpu}' if job._cpu else '' + quoted_job_script = shq(joined_env + defs + cmd) - memory = job._memory - if memory is not None: - memory_ratios = {'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3} - if memory in memory_ratios: - if job._cpu is not None: - mcpu = parse_cpu_in_mcpu(job._cpu) - if mcpu is not None: - memory = str(int(memory_ratios[memory] * (mcpu / 1000))) - else: - raise BatchException(f'invalid value for cpu: {job._cpu}') - else: - raise BatchException(f'must specify cpu when using {memory} to specify the memory') - memory = f'-m {memory}' if memory else '' - - lines.append(f"docker run " - "--entrypoint=''" - f"{self._extra_docker_run_flags} " - f"-v {tmpdir}:{tmpdir} " - f"-w {tmpdir} " - f"{memory} " - f"{cpu} " - f"{job._image} " - f"{job_shell} -c {quoted_job_script}") - else: - lines.append(f"{job_shell} -c {quoted_job_script}") + if job._image: + cpu = f'--cpus={job._cpu}' if job._cpu else '' - lines += [x for r in job._external_outputs for x in copy_external_output(r)] - lines += ['\n'] - - script = "\n".join(lines) - - if dry_run: - print(lines) - else: - try: - sp.check_call(script, shell=True) - except sp.CalledProcessError as e: - print(e) - print(e.output) - raise - finally: - if delete_scratch_on_exit: - sp.run(f'rm -rf {tmpdir}', shell=True, check=False) + memory = job._memory + if memory is not None: + memory_ratios = {'lowmem': 1024**3, 'standard': 4 * 1024**3, 'highmem': 7 * 1024**3} + if memory in memory_ratios: + if job._cpu is not None: + mcpu = parse_cpu_in_mcpu(job._cpu) + if mcpu is not None: + memory = str(int(memory_ratios[memory] * (mcpu / 1000))) + else: + raise BatchException(f'invalid value for cpu: {job._cpu}') + else: + raise BatchException(f'must specify cpu when using {memory} to specify the memory') + memory = f'-m {memory}' if memory else '' + else: + memory = '' + + code.append(f"docker run " + "--entrypoint=''" + f"{self._extra_docker_run_flags} " + f"-v {tmpdir}:{tmpdir} " + f"-w {tmpdir} " + f"{memory} " + f"{cpu} " + f"{job._image} " + f"{job_shell} -c {quoted_job_script}") + else: + code.append(f"{job_shell} -c {quoted_job_script}") + + code += [x for r in job._external_outputs for x in copy_external_output(r)] + code += ['\n'] + + run_code(code) + finally: + if delete_scratch_on_exit: + sp.run(f'rm -rf {tmpdir}', shell=True, check=False) print('Batch completed successfully!') @@ -278,14 +311,17 @@ def _get_random_name(): return _get_random_name() + def close(self): + async_to_blocking(self._fs.close()) + -class ServiceBackend(Backend): +class ServiceBackend(Backend[bc.Batch]): """Backend that executes batches on Hail's Batch Service on Google Cloud. Examples -------- - >>> service_backend = ServiceBackend('my-billing-account', 'my-bucket') # doctest: +SKIP + >>> service_backend = ServiceBackend('my-billing-account', bucket='my-bucket') # doctest: +SKIP >>> b = Batch(backend=service_backend) # doctest: +SKIP >>> b.run() # doctest: +SKIP >>> service_backend.close() # doctest: +SKIP @@ -299,22 +335,58 @@ class ServiceBackend(Backend): >>> b.run() # doctest: +SKIP >>> service_backend.close() + Instead of a bucket, a full path may be specified for the remote temporary directory: + + >>> service_backend = ServiceBackend('my-billing-account', + ... remote_tmpdir='gs://my-bucket/temporary-files/') + >>> b = Batch(backend=service_backend) + >>> b.run() # doctest: +SKIP + >>> service_backend.close() + Parameters ---------- billing_project: Name of billing project to use. bucket: - Name of bucket to use. Should not include the ``gs://`` - prefix. + Name of bucket to use. Should not include the ``gs://`` prefix. Cannot be used with + remote_tmpdir. Temporary data will be stored in the "/batch" folder of this + bucket. Using this parameter as a positional argument is deprecated. + remote_tmpdir: + Temporary data will be stored in this google cloud storage folder. Cannot be used with + bucket. + google_project: + If specified, the project to use when authenticating with Google + Storage. Google Storage is used to transfer serialized values between + this computer and the cloud machines that execute Python jobs. token: The authorization token to pass to the batch client. Should only be set for user delegation purposes. """ def __init__(self, - billing_project: str = None, - bucket: str = None, - token: str = None): + *args, + billing_project: Optional[str] = None, + bucket: Optional[str] = None, + remote_tmpdir: Optional[str] = None, + google_project: Optional[str] = None, + token: str = None + ): + if len(args) > 2: + raise TypeError(f'ServiceBackend() takes 2 positional arguments but {len(args)} were given') + if len(args) >= 1: + if billing_project is not None: + raise TypeError('ServiceBackend() got multiple values for argument \'billing_project\'') + warnings.warn('Use of deprecated positional argument \'billing_project\' in ServiceBackend(). Specify \'billing_project\' as a keyword argument instead.') + billing_project = args[0] + if len(args) >= 2: + if bucket is not None: + raise TypeError('ServiceBackend() got multiple values for argument \'bucket\'') + warnings.warn('Use of deprecated positional argument \'bucket\' in ServiceBackend(). Specify \'bucket\' as a keyword argument instead.') + bucket = args[1] + + if remote_tmpdir is not None and bucket is not None: + raise ValueError('Cannot specify both remote_tmpdir and bucket in ServiceBackend()') + if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: @@ -323,15 +395,31 @@ def __init__(self, 'or run `hailctl config set batch/billing_project ' 'MY_BILLING_PROJECT`') self._batch_client = BatchClient(billing_project, _token=token) + self.__fs: AsyncFS = RouterAsyncFS('file', [LocalAsyncFS(ThreadPoolExecutor()), + GoogleStorageAsyncFS(project=google_project)]) + if remote_tmpdir is None: + if bucket is None: + bucket = get_user_config().get('batch', 'bucket', fallback=None) + if bucket is None: + raise ValueError( + 'either the bucket or remote_tmpdir parameter of ServiceBackend ' + 'must be set or run `hailctl config set batch/bucket MY_BUCKET`') + if 'gs://' in bucket: + raise ValueError( + 'The bucket parameter to ServiceBackend() should be a bucket name, not a path. ' + 'Use the remote_tmpdir parameter to specify a path.') + remote_tmpdir = f'gs://{bucket}/batch' + else: + if not remote_tmpdir.startswith('gs://'): + raise ValueError( + 'remote_tmpdir must be a google storage path like gs://bucket/folder') + if remote_tmpdir[-1] != '/': + remote_tmpdir += '/' + self.remote_tmpdir = remote_tmpdir - if bucket is None: - bucket = get_user_config().get('batch', 'bucket', fallback=None) - if bucket is None: - raise ValueError( - 'the bucket parameter of ServiceBackend must be set ' - 'or run `hailctl config set batch/bucket ' - 'MY_BUCKET`') - self._bucket_name = bucket + @property + def _fs(self): + return self.__fs def close(self): """ @@ -343,6 +431,7 @@ def close(self): end of your script. """ self._batch_client.close() + async_to_blocking(self._fs.close()) def _run(self, batch: 'batch.Batch', @@ -354,7 +443,7 @@ def _run(self, disable_progress_bar: bool = False, callback: Optional[str] = None, token: Optional[str] = None, - **backend_kwargs): # pylint: disable-msg=too-many-statements + **backend_kwargs) -> bc.Batch: # pylint: disable-msg=too-many-statements """Execute a batch. Warning @@ -384,14 +473,27 @@ def _run(self, token: If not `None`, a string used for idempotency of batch submission. """ - + return async_to_blocking( + self._async_run(batch, dry_run, verbose, delete_scratch_on_exit, wait, open, disable_progress_bar, callback, token, **backend_kwargs)) + + async def _async_run(self, + batch: 'batch.Batch', + dry_run: bool, + verbose: bool, + delete_scratch_on_exit: bool, + wait: bool = True, + open: bool = False, + disable_progress_bar: bool = False, + callback: Optional[str] = None, + token: Optional[str] = None, + **backend_kwargs): # pylint: disable-msg=too-many-statements if backend_kwargs: raise ValueError(f'ServiceBackend does not support any of these keywords: {backend_kwargs}') build_dag_start = time.time() uid = uuid.uuid4().hex[:6] - remote_tmpdir = f'gs://{self._bucket_name}/batch/{uid}' + batch_remote_tmpdir = f'{self.remote_tmpdir}{uid}' local_tmpdir = f'/io/batch/{uid}' default_image = 'ubuntu:18.04' @@ -419,11 +521,11 @@ def copy_input(r): if isinstance(r, resource.InputResourceFile): return [(r._input_path, r._get_path(local_tmpdir))] assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) - return [(r._get_path(remote_tmpdir), r._get_path(local_tmpdir))] + return [(r._get_path(batch_remote_tmpdir), r._get_path(local_tmpdir))] def copy_internal_output(r): assert isinstance(r, (resource.JobResourceFile, resource.PythonResult)) - return [(r._get_path(local_tmpdir), r._get_path(remote_tmpdir))] + return [(r._get_path(local_tmpdir), r._get_path(batch_remote_tmpdir))] def copy_external_output(r): if isinstance(r, resource.InputResourceFile): @@ -460,16 +562,22 @@ def _cp(src, dst): jobs_to_command[j] = write_cmd n_jobs_submitted += 1 - for job in batch._jobs: - if isinstance(job, _job.PythonJob): - if job._image is None: - version = sys.version_info - if version.major != 3 or version.minor not in (6, 7, 8): - raise BatchException( - f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") - job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' - job._compile(local_tmpdir, remote_tmpdir) - + pyjobs = [j for j in batch._jobs if isinstance(j, _job.PythonJob)] + for job in pyjobs: + if job._image is None: + version = sys.version_info + if version.major != 3 or version.minor not in (6, 7, 8): + raise BatchException( + f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") + job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' + + with tqdm(total=len(pyjobs), desc='upload python functions', disable=disable_progress_bar) as pbar: + async def compile_job(job): + await job._compile(local_tmpdir, batch_remote_tmpdir) + pbar.update(1) + await bounded_gather(*[functools.partial(compile_job, j) for j in pyjobs], parallelism=150) + + for job in tqdm(batch._jobs, desc='create job objects', disable=disable_progress_bar): inputs = [x for r in job._inputs for x in copy_input(r)] outputs = [x for r in job._internal_outputs for x in copy_internal_output(r)] @@ -552,7 +660,7 @@ def _cp(src, dst): if delete_scratch_on_exit and used_remote_tmpdir: parents = list(jobs_to_command.keys()) - rm_cmd = f'gsutil -m rm -r {remote_tmpdir}' + rm_cmd = f'gsutil -m rm -r {batch_remote_tmpdir}' cmd = f''' {bash_flags} {activate_service_account} diff --git a/hail/python/hailtop/batch/batch.py b/hail/python/hailtop/batch/batch.py index e6945026e54..d4edf3f7688 100644 --- a/hail/python/hailtop/batch/batch.py +++ b/hail/python/hailtop/batch/batch.py @@ -1,15 +1,16 @@ import os +import warnings import re -import concurrent +from concurrent.futures import ThreadPoolExecutor from typing import Optional, Dict, Union, List, Any, Set from hailtop.utils import secret_alnum_string +from hailtop.aiotools import AsyncFS, RouterAsyncFS, LocalAsyncFS +from hailtop.aiogoogle import GoogleStorageAsyncFS from . import backend as _backend, job, resource as _resource # pylint: disable=cyclic-import from .exceptions import BatchException -from ..google_storage import GCS - class Batch: """ @@ -78,13 +79,15 @@ class Batch: `dill` pre-installed will automatically be used if the current Python version is 3.6, 3.7, or 3.8. project: - If specified, the project to use when authenticating with Google - Storage. Google Storage is used to transfer serialized values between - this computer and the cloud machines that execute Python jobs. + DEPRECATED: please specify `google_project` on the ServiceBackend instead. If specified, + the project to use when authenticating with Google Storage. Google Storage is used to + transfer serialized values between this computer and the cloud machines that execute Python + jobs. cancel_after_n_failures: Automatically cancel the batch after N failures have occurred. The default behavior is there is no limit on the number of failures. Only applicable for the :class:`.ServiceBackend`. Must be greater than 0. + """ _counter = 0 @@ -137,17 +140,23 @@ def __init__(self, self._default_shell = default_shell self._default_python_image = default_python_image - self._project = project - self.__gcs: Optional[GCS] = None + if project is not None: + warnings.warn( + 'The project argument to Batch is deprecated, please instead use the google_project argument to ' + 'ServiceBackend. Use of this argument may trigger warnings from aiohttp about unclosed objects.') + self._DEPRECATED_project = project + self._DEPRECATED_fs: Optional[RouterAsyncFS] = None self._cancel_after_n_failures = cancel_after_n_failures @property - def _gcs(self): - if self.__gcs is None: - self.__gcs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(), - project=self._project) - return self.__gcs + def _fs(self) -> AsyncFS: + if self._DEPRECATED_project is not None: + if self._DEPRECATED_fs is None: + self._DEPRECATED_fs = RouterAsyncFS('file', [LocalAsyncFS(ThreadPoolExecutor()), + GoogleStorageAsyncFS(project=self._DEPRECATED_project)]) + return self._DEPRECATED_fs + return self._backend._fs def new_job(self, name: Optional[str] = None, @@ -553,7 +562,12 @@ def schedule_job(j): raise BatchException("cycle detected in dependency graph") self._jobs = ordered_jobs - return self._backend._run(self, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) + run_result = self._backend._run(self, dry_run, verbose, delete_scratch_on_exit, **backend_kwargs) # pylint: disable=assignment-from-no-return + if self._DEPRECATED_fs is not None: + # best effort only because this is deprecated + self._DEPRECATED_fs.close() + self._DEPRECATED_fs = None + return run_result def __str__(self): return self._uid diff --git a/hail/python/hailtop/batch/batch_pool_executor.py b/hail/python/hailtop/batch/batch_pool_executor.py index 9aa4b5ea5b5..44855035d87 100644 --- a/hail/python/hailtop/batch/batch_pool_executor.py +++ b/hail/python/hailtop/batch/batch_pool_executor.py @@ -128,8 +128,7 @@ def __init__(self, *, if not isinstance(self.backend, ServiceBackend): raise ValueError(f'BatchPoolExecutor is not compatible with {type(backend)}') self.batches: List[Batch] = [] - bucket: str = self.backend._bucket_name - self.directory = f'gs://{bucket}/batch-pool-executor/{self.name}/' + self.directory = self.backend.remote_tmpdir + f'batch-pool-executor/{self.name}/' self.inputs = self.directory + 'inputs/' self.outputs = self.directory + 'outputs/' self.gcs = GCS(blocking_pool=concurrent.futures.ThreadPoolExecutor(), diff --git a/hail/python/hailtop/batch/docs/api.rst b/hail/python/hailtop/batch/docs/api.rst index a6a752c27b6..5a773be52fd 100644 --- a/hail/python/hailtop/batch/docs/api.rst +++ b/hail/python/hailtop/batch/docs/api.rst @@ -96,6 +96,7 @@ at ``__. :nosignatures: :template: class.rst + backend.RunningBatchType backend.Backend backend.LocalBackend backend.ServiceBackend diff --git a/hail/python/hailtop/batch/docs/change_log.rst b/hail/python/hailtop/batch/docs/change_log.rst index 2c5e6222c0b..1b8e258b35e 100644 --- a/hail/python/hailtop/batch/docs/change_log.rst +++ b/hail/python/hailtop/batch/docs/change_log.rst @@ -3,6 +3,21 @@ Change Log ========== +**Version 0.2.70** + +- Made submitting ``PythonJob`` faster when using the ``ServiceBackend`` + +**Version 0.2.69** + +- Added the option to specify either `remote_tmpdir` or `bucket` when using the ``ServiceBackend`` + +**Version 0.2.68** + +- Fixed copying a directory from GCS when using the ``LocalBackend`` +- Fixed writing files to GCS when the bucket name starts with a "g" or an "s" +- Fixed the error "Argument list too long" when using the ``LocalBackend`` +- Fixed an error where memory is set to None when using the ``LocalBackend`` + **Version 0.2.66** - Removed the need for the ``project`` argument in ``Batch()`` unless you are creating a PythonJob diff --git a/hail/python/hailtop/batch/docs/conf.py b/hail/python/hailtop/batch/docs/conf.py index 3e51ed4aaf4..a876394f22a 100644 --- a/hail/python/hailtop/batch/docs/conf.py +++ b/hail/python/hailtop/batch/docs/conf.py @@ -29,6 +29,7 @@ # The full version, including alpha/beta/rc tags release = '' nitpicky = True +nitpick_ignore = [('py:class', 'hailtop.batch_client.client.Batch')] # -- General configuration --------------------------------------------------- diff --git a/hail/python/hailtop/batch/job.py b/hail/python/hailtop/batch/job.py index d574cb82ac8..f2d6ce87767 100644 --- a/hail/python/hailtop/batch/job.py +++ b/hail/python/hailtop/batch/job.py @@ -379,6 +379,11 @@ def gcsfuse(self, bucket, mount_point, read_only=True): if not isinstance(self._batch._backend, backend.ServiceBackend): raise NotImplementedError("A ServiceBackend is required to use the 'gcsfuse' option") + if bucket == '': + raise BatchException('bucket cannot be the empty string') + if mount_point == '': + raise BatchException('mount_point cannot be the empty string') + self._gcsfuse.append((bucket, mount_point, read_only)) return self @@ -871,7 +876,7 @@ def handle_arg(r): return result - def _compile(self, local_tmpdir, remote_tmpdir): + async def _compile(self, local_tmpdir, remote_tmpdir): for i, (result, unapplied, args, kwargs) in enumerate(self._functions): def prepare_argument_for_serialization(arg): if isinstance(arg, _resource.PythonResult): @@ -910,13 +915,8 @@ def wrapped(*args, **kwargs): job_path = os.path.dirname(result._get_path(remote_tmpdir)) code_path = f'{job_path}/code{i}.p' - if isinstance(self._batch._backend, backend.LocalBackend): - os.makedirs(os.path.dirname(code_path), exist_ok=True) - with open(code_path, 'wb') as f: - f.write(pipe.getvalue()) - else: - assert isinstance(self._batch._backend, backend.ServiceBackend) - self._batch._gcs._write_gs_file_from_file_like_object(code_path, pipe) + await self._batch._fs.makedirs(os.path.dirname(code_path), exist_ok=True) + await self._batch._fs.write(code_path, pipe.getvalue()) code = self._batch.read_input(code_path) self._add_inputs(code) diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 72c11bfe6aa..54274fe43e1 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -425,7 +425,8 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, service_account=None, attributes=None, parents=None, input_files=None, output_files=None, always_run=False, timeout=None, gcsfuse=None, requester_pays_project=None, - mount_tokens=False, network: Optional[str] = None): + mount_tokens=False, network: Optional[str] = None, + unconfined: bool = False): if self._submitted: raise ValueError("cannot create a job in an already submitted batch") @@ -499,6 +500,8 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, job_spec['mount_tokens'] = mount_tokens if network: job_spec['network'] = network + if unconfined: + job_spec['unconfined'] = unconfined self._job_specs.append(job_spec) diff --git a/hail/python/hailtop/batch_client/client.py b/hail/python/hailtop/batch_client/client.py index ddeab7438ca..19c6ebd2e2b 100644 --- a/hail/python/hailtop/batch_client/client.py +++ b/hail/python/hailtop/batch_client/client.py @@ -202,7 +202,8 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, service_account=None, attributes=None, parents=None, input_files=None, output_files=None, always_run=False, timeout=None, gcsfuse=None, requester_pays_project=None, - mount_tokens=False, network: Optional[str] = None) -> Job: + mount_tokens=False, network: Optional[str] = None, + unconfined: bool = False) -> Job: if parents: parents = [parent._async_job for parent in parents] @@ -214,7 +215,7 @@ def create_job(self, image, command, env=None, mount_docker_socket=False, input_files=input_files, output_files=output_files, always_run=always_run, timeout=timeout, gcsfuse=gcsfuse, requester_pays_project=requester_pays_project, mount_tokens=mount_tokens, - network=network) + network=network, unconfined=unconfined) return Job.from_async_job(async_job) diff --git a/hail/python/hailtop/google_storage.py b/hail/python/hailtop/google_storage.py index 00d7581efb6..645a21a9f5e 100644 --- a/hail/python/hailtop/google_storage.py +++ b/hail/python/hailtop/google_storage.py @@ -18,7 +18,7 @@ class GCS: @staticmethod def _parse_uri(uri: str): assert uri.startswith('gs://'), uri - uri_parts = uri.lstrip('gs://').split('/') + uri_parts = uri[5:].split('/') bucket = uri_parts[0] path = '/'.join(uri_parts[1:]) return bucket, path diff --git a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py index 072b20c1371..871ce513546 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py +++ b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py @@ -82,6 +82,7 @@ def mkdir_if_not_exists(path): 'SPARK_HOME': '/usr/lib/spark/', 'PYSPARK_PYTHON': '/opt/conda/default/bin/python', 'PYSPARK_DRIVER_PYTHON': '/opt/conda/default/bin/python', + 'HAIL_LOG_DIR': '/home/hail', } # VEP ENV diff --git a/hail/python/hailtop/httpx.py b/hail/python/hailtop/httpx.py index cb9f051a08f..bb28ad0906b 100644 --- a/hail/python/hailtop/httpx.py +++ b/hail/python/hailtop/httpx.py @@ -7,6 +7,62 @@ from .config.deploy_config import get_deploy_config +class ClientResponseError(aiohttp.ClientResponseError): + def __init__(self, + request_info: aiohttp.RequestInfo, + history: Tuple[aiohttp.ClientResponse, ...], + body: str = "", + **kwargs): + super().__init__(request_info, history, **kwargs) + self.body = body + + def __str__(self) -> str: + return "{}, message={!r}, url={!r} body={!r}".format( + self.status, + self.message, + self.request_info.real_url, + self.body + ) + + def __repr__(self) -> str: + args = f"{self.request_info!r}, {self.history!r}" + if self.status != 0: + args += f", status={self.status!r}" + if self.message != "": + args += f", message={self.message!r}" + if self.headers is not None: + args += f", headers={self.headers!r}" + if self.body is not None: + args += f", body={self.body!r}" + return "{}({})".format(type(self).__name__, args) + + +class ClientSession(aiohttp.ClientSession): + async def _request( + self, + method: str, + str_or_url: aiohttp.client.StrOrURL, + **kwargs + ): + raise_for_status = kwargs.pop('raise_for_status', self._raise_for_status) + resp = await super()._request(method, str_or_url, raise_for_status=False, **kwargs) + if raise_for_status: + if resp.status >= 400: + # reason should always be not None for a started response + assert resp.reason is not None + body = (await resp.read()).decode() + resp.release() + raise ClientResponseError( + resp.request_info, + resp.history, + status=resp.status, + message=resp.reason, + headers=resp.headers, + body=body + ) + return resp + + def client_session(*args, raise_for_status: bool = True, timeout: Union[aiohttp.ClientTimeout, float] = None, @@ -30,7 +86,7 @@ def client_session(*args, timeout = aiohttp.ClientTimeout(total=5) kwargs['timeout'] = timeout - return aiohttp.ClientSession(*args, **kwargs) + return ClientSession(*args, **kwargs) def blocking_client_session(*args, **kwargs) -> 'BlockingClientSession': diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py index 11e146fdbe4..dd94f1f81bd 100644 --- a/hail/python/hailtop/utils/__init__.py +++ b/hail/python/hailtop/utils/__init__.py @@ -10,7 +10,7 @@ flatten, partition, cost_str, external_requests_client_session, url_basename, url_join, is_google_registry_domain, parse_docker_image_reference, url_scheme, Notice, periodically_call, dump_all_stacktraces, find_spark_home, TransientError, - bounded_gather2, OnlineBoundedGather2, unpack_comma_delimited_inputs) + bounded_gather2, OnlineBoundedGather2, unpack_comma_delimited_inputs, retry_all_errors_n_times) from .process import ( CalledProcessError, check_shell, check_shell_output, sync_check_shell, sync_check_shell_output) @@ -81,5 +81,6 @@ 'OnlineBoundedGather2', 'unpack_comma_delimited_inputs', 'is_google_registry_domain', - 'parse_docker_image_reference' + 'parse_docker_image_reference', + 'retry_all_errors_n_times' ] diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 0ad2d1f2adf..16ecf752e9f 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -23,8 +23,11 @@ from requests.adapters import HTTPAdapter from urllib3.poolmanager import PoolManager +import hailtop + from .time import time_msecs + log = logging.getLogger('hailtop.utils') @@ -268,62 +271,19 @@ async def __aexit__(self, await self.wait() -class Subsemaphore: - def __init__(self, sema: asyncio.Semaphore): +class WithoutSemaphore: + def __init__(self, sema): self._sema = sema - self._borrowed = 0 - self._lent = False - self._pending: List[Callable[[], None]] = [] - - async def acquire(self): - if not self._lent: - self._lent = True - return self - - acquired = asyncio.Event() - - async def borrow(): - await self._sema.acquire() - if acquired.is_set(): - self._sema.release() - return - self._borrowed += 1 - acquired.set() - - def on_return(): - assert not self._lent - if acquired.is_set(): - return - self._lent = True - acquired.set() - - asyncio.create_task(borrow()) - self._pending.append(on_return) - - await acquired.wait() - - return self - def release(self): - if self._borrowed > 0: - self._sema.release() - self._borrowed -= 1 - else: - assert self._lent - self._lent = False - while self._pending and not self._lent: - f = self._pending.pop() - f() - - async def __aenter__(self) -> 'Subsemaphore': - await self.acquire() + async def __aenter__(self) -> 'WithoutSemaphore': + self._sema.release() return self async def __aexit__(self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: - self.release() + await self._sema.acquire() class PoolShutdownError(Exception): @@ -354,7 +314,7 @@ class OnlineBoundedGather2: def __init__(self, sema: asyncio.Semaphore): self._counter = 0 - self._subsema = Subsemaphore(sema) + self._sema = sema self._pending: Optional[Dict[int, asyncio.Task]] = {} # done if there are no pending tasks (the tasks are all # complete), or if we've shutdown and the cancelled tasks are @@ -404,7 +364,7 @@ async def call(self, f, *args, **kwargs) -> asyncio.Task: async def run_and_cleanup(): try: - async with self._subsema: + async with self._sema: await f(*args, **kwargs) except asyncio.CancelledError: pass @@ -412,7 +372,7 @@ async def run_and_cleanup(): if self._exception is None: _, exc, _ = sys.exc_info() self._exception = exc - await self._shutdown() + await asyncio.shield(self._shutdown()) else: log.info('discarding exception', exc_info=True) @@ -437,22 +397,16 @@ async def wait(self, tasks: List[asyncio.Task]) -> None: pool after waiting. ''' - self._subsema.release() - try: + async with WithoutSemaphore(self._sema): await asyncio.wait(tasks) - finally: - await self._subsema.acquire() async def __aenter__(self) -> 'OnlineBoundedGather2': - await self._subsema.acquire() return self async def __aexit__(self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: - self._subsema.release() - if exc_val: if self._exception is None: self._exception = exc_val @@ -463,64 +417,68 @@ async def __aexit__(self, # wait for done and not pending _done_event.wait can return # when when there are pending jobs if the last job completed # (setting _done_event) and then more tasks were submitted - await self._done_event.wait() + async with WithoutSemaphore(self._sema): + await self._done_event.wait() while self._pending: assert not self._done_event.is_set() - await self._done_event.wait() + async with WithoutSemaphore(self._sema): + await self._done_event.wait() if self._exception: raise self._exception -async def bounded_gather2_return_exceptions(sema: asyncio.Semaphore, *aws): - '''Run the awaitables aws as tasks with parallelism bounded by sema, - which should be asyncio.Semaphore whose initial value is the level - of parallelism. +async def bounded_gather2_return_exceptions(sema: asyncio.Semaphore, *pfs): + '''Run the partial functions `pfs` as tasks with parallelism bounded + by `sema`, which should be `asyncio.Semaphore` whose initial value + is the desired level of parallelism. - The return value is the list of awaitable results as pairs: the - pair (value, None) if the awaitable returned value or (None, exc) - if the awaitable raised the exception exc. - ''' - subsema = Subsemaphore(sema) + The return value is the list of partial function results as pairs: + the pair `(value, None)` if the partial function returned value or + `(None, exc)` if the partial function raised the exception `exc`. - async def run_with_sema_return_exceptions(aw): + ''' + async def run_with_sema_return_exceptions(pf): try: - async with subsema: - return (await aw, None) + async with sema: + return (await pf(), None) except: _, exc, _ = sys.exc_info() return (None, exc) - return await asyncio.gather(*[asyncio.create_task(run_with_sema_return_exceptions(aw)) for aw in aws]) + tasks = [asyncio.create_task(run_with_sema_return_exceptions(pf)) for pf in pfs] + async with WithoutSemaphore(sema): + return await asyncio.gather(*tasks) -async def bounded_gather2_raise_exceptions(sema: asyncio.Semaphore, *aws, cancel_on_error: bool = False): - '''Run the awaitables aws as tasks with parallelism bounded by sema, - which should be asyncio.Semaphore whose initial value is the level - of parallelism. +async def bounded_gather2_raise_exceptions(sema: asyncio.Semaphore, *pfs, cancel_on_error: bool = False): + '''Run the partial functions `pfs` as tasks with parallelism bounded + by `sema`, which should be `asyncio.Semaphore` whose initial value + is the level of parallelism. - The return value is the list of awaitable results. + The return value is the list of partial function results. - The first exception raised by an awaitable is raised by + The first exception raised by a partial function is raised by bounded_gather2_raise_exceptions. - If cancel_on_error is False (the default), the remaining - awaitables continue to run with bounded parallelism. If + If cancel_on_error is False (the default), the remaining partial + functions continue to run with bounded parallelism. If cancel_on_error is True, the unfinished tasks are all cancelled. - ''' - subsema = Subsemaphore(sema) - async def run_with_subsema(aw): - async with subsema: - return await aw + ''' + async def run_with_sema(pf): + async with sema: + return await pf() - tasks = [asyncio.create_task(run_with_subsema(aw)) for aw in aws] + tasks = [asyncio.create_task(run_with_sema(pf)) for pf in pfs] if not cancel_on_error: - return await asyncio.gather(*tasks) + async with WithoutSemaphore(sema): + return await asyncio.gather(*tasks) try: - return await asyncio.gather(*tasks) + async with WithoutSemaphore(sema): + return await asyncio.gather(*tasks) finally: _, exc, _ = sys.exc_info() if exc is not None: @@ -528,13 +486,14 @@ async def run_with_subsema(aw): if not task.done(): task.cancel() if tasks: - await asyncio.wait(tasks) + async with WithoutSemaphore(sema): + await asyncio.wait(tasks) -async def bounded_gather2(sema: asyncio.Semaphore, *aws, return_exceptions: bool = False, cancel_on_error: bool = False): +async def bounded_gather2(sema: asyncio.Semaphore, *pfs, return_exceptions: bool = False, cancel_on_error: bool = False): if return_exceptions: - return await bounded_gather2_return_exceptions(sema, *aws) - return await bounded_gather2_raise_exceptions(sema, *aws, cancel_on_error=cancel_on_error) + return await bounded_gather2_return_exceptions(sema, *pfs) + return await bounded_gather2_raise_exceptions(sema, *pfs, cancel_on_error=cancel_on_error) RETRYABLE_HTTP_STATUS_CODES = {408, 500, 502, 503, 504} @@ -599,6 +558,9 @@ def is_transient_error(e): # 408 request timeout, 500 internal server error, 502 bad gateway # 503 service unavailable, 504 gateway timeout return True + if isinstance(e, hailtop.httpx.ClientResponseError) and ( + e.status == 403 and 'rateLimitExceeded' in e.body): + return True if isinstance(e, aiohttp.ServerTimeoutError): return True if isinstance(e, aiohttp.ServerDisconnectedError): @@ -607,9 +569,6 @@ def is_transient_error(e): return True if isinstance(e, aiohttp.client_exceptions.ClientConnectorError): return hasattr(e, 'os_error') and is_transient_error(e.os_error) - if isinstance(e, aiohttp.ClientOSError): - # aiohttp/client_reqrep.py wraps all OSError instances with a ClientOSError - return is_transient_error(e.__cause__) # appears to happen when the connection is lost prematurely, see: # https://github.com/aio-libs/aiohttp/issues/4581 # https://github.com/aio-libs/aiohttp/blob/v3.7.4/aiohttp/client_proto.py#L85 @@ -625,6 +584,9 @@ def is_transient_error(e): errno.EPIPE )): return True + if isinstance(e, aiohttp.ClientOSError): + # aiohttp/client_reqrep.py wraps all OSError instances with a ClientOSError + return is_transient_error(e.__cause__) if isinstance(e, urllib3.exceptions.ReadTimeoutError): return True if isinstance(e, requests.exceptions.ReadTimeout): @@ -680,6 +642,25 @@ async def _wrapper(f, *args, **kwargs): return _wrapper +def retry_all_errors_n_times(max_errors=10, msg=None, error_logging_interval=10): + async def _wrapper(f, *args, **kwargs): + delay = 0.1 + errors = 0 + while True: + try: + return await f(*args, **kwargs) + except asyncio.CancelledError: # pylint: disable=try-except-raise + raise + except Exception: + errors += 1 + if msg and errors % error_logging_interval == 0: + log.exception(msg, stack_info=True) + if errors >= max_errors: + raise + delay = await sleep_and_backoff(delay) + return _wrapper + + T = TypeVar('T') # pylint: disable=invalid-name diff --git a/hail/python/hailtop/utils/validate/__init__.py b/hail/python/hailtop/utils/validate/__init__.py index f77c23691fd..0af209db9ef 100644 --- a/hail/python/hailtop/utils/validate/__init__.py +++ b/hail/python/hailtop/utils/validate/__init__.py @@ -1,5 +1,5 @@ from .validate import anyof, bool_type, dictof, keyed, listof, int_type, nullable, \ - numeric, oneof, regex, required, str_type, switch, ValidationError + numeric, oneof, regex, required, str_type, non_empty_str_type, switch, ValidationError __all__ = [ 'anyof', @@ -14,6 +14,7 @@ 'regex', 'required', 'str_type', + 'non_empty_str_type', 'switch', 'ValidationError' ] diff --git a/hail/python/hailtop/utils/validate/validate.py b/hail/python/hailtop/utils/validate/validate.py index fcfc2df5b19..acf4f8ad62b 100644 --- a/hail/python/hailtop/utils/validate/validate.py +++ b/hail/python/hailtop/utils/validate/validate.py @@ -140,6 +140,12 @@ def validate(self, name: str, obj): self.checker.validate(name, obj) +class TruthyValidator: + def validate(self, name: str, obj): # pylint: disable=no-self-use + if not obj: + raise ValidationError(f'{name} cannot be {obj}') + + class MultipleValidator: def __init__(self, checkers: List['Validator']): self.checkers = checkers @@ -162,10 +168,11 @@ def required(key: str): str_type = TypedValidator(str) +non_empty_str_type = MultipleValidator([str_type, TruthyValidator()]) bool_type = TypedValidator(bool) int_type = TypedValidator(int) -Validator = Union[TypedValidator, NumericValidator, NullableValidator, SetValidator] +Validator = Union[TypedValidator, NumericValidator, NullableValidator, TruthyValidator, SetValidator] def dictof(vchecker: Validator): diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 34aa6f95fa8..5407945cc8e 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -2,6 +2,8 @@ aiohttp==3.7.4 aiohttp_session>=2.7,<2.8 asyncinit>=0.2.4,<0.3 bokeh>1.3,<2.0 +boto3>=1.17,<2.0 +botocore>=1.20,<2.0 decorator<5 Deprecated>=1.2.10,<1.3 dill>=0.3.1.1,<0.4 @@ -11,6 +13,7 @@ gcsfs==0.8.0 fsspec==0.9.0 humanize==1.0.0 hurry.filesize==0.9 +janus>=0.6,<0.7 nest_asyncio numpy<2 pandas>=1.1.0,<1.1.5 @@ -18,7 +21,7 @@ parsimonious<0.9 PyJWT pyspark>=3.1.1,<3.2.0 python-json-logger==0.1.11 -requests==2.22.0 +requests==2.25.1 scipy>1.2,<1.7 tabulate==0.8.3 tqdm==4.42.1 diff --git a/hail/python/setup.py b/hail/python/setup.py index c7141145641..64bcd1789b4 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -38,6 +38,7 @@ project_urls={ 'Documentation': 'https://hail.is/docs/0.2/', 'Repository': 'https://github.com/hail-is/hail', + 'Change Log': 'https://hail.is/docs/0.2/change_log.html', }, packages=find_packages('.'), package_dir={ diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 660dc2765ed..11b7c364f69 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -1466,40 +1466,35 @@ def test_str_ops(self): self.assertFalse(hl.eval(s_whitespace.endswith('a'))) def test_str_parsing(self): - for x in ('true', 'True', 'TRUE'): - self.assertTrue(hl.eval(hl.bool(x))) - - for x in ('false', 'False', 'FALSE'): - self.assertFalse(hl.eval(hl.bool(x))) + assert_all_eval_to(*[(hl.bool(x), True) for x in ('true', 'True', 'TRUE')]) + assert_all_eval_to(*[(hl.bool(x), False) for x in ('false', 'False', 'FALSE')]) for x in ('nan', 'Nan', 'naN', 'NaN'): for f in (hl.float, hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - self.assertTrue(hl.eval(hl.is_nan(f(x)))) - self.assertTrue(hl.eval(hl.is_nan(f('+' + x)))) - self.assertTrue(hl.eval(hl.is_nan(f('-' + x)))) - + assert_all_eval_to( + (hl.is_nan(f(x)), True), + (hl.is_nan(f('+' + x)), True), + (hl.is_nan(f('-' + x)), True) + ) for x in ('inf', 'Inf', 'iNf', 'InF', 'infinity', 'InfiNitY', 'INFINITY'): for f in (hl.float, hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - self.assertTrue(hl.eval(hl.is_infinite(f(x)))) - self.assertTrue(hl.eval(hl.is_infinite(f('+' + x)))) - self.assertTrue(hl.eval(hl.is_infinite(f('-' + x)))) - self.assertTrue(hl.eval(f('-' + x) < 0.0)) + assert_all_eval_to( + (hl.is_infinite(f(x)), True), + (hl.is_infinite(f('+' + x)), True), + (hl.is_infinite(f('-' + x)), True), + (f('-' + x) < 0.0, True) + ) for x in ('0', '1', '-5', '12382421'): - for f in (hl.int32, hl.int64, hl.parse_int32, hl.parse_int64): - self.assertEqual(hl.eval(f(hl.literal(x))), int(x)) - for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - self.assertEqual(hl.eval(f(hl.literal(x))), float(x)) + assert_all_eval_to(*[(f(hl.literal(x)), int(x)) for f in (hl.int32, hl.int64, hl.parse_int32, hl.parse_int64)]) + assert_all_eval_to(*[(f(hl.literal(x)), float(x)) for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64)]) for x in ('-1.5', '0.0', '2.5'): - for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64): - self.assertEqual(hl.eval(f(hl.literal(x))), float(x)) - for f in (hl.parse_int32, hl.parse_int64): - self.assertEqual(hl.eval(f(hl.literal(x))), None) + assert_all_eval_to(*[(f(hl.literal(x)), float(x)) for f in (hl.float32, hl.float64, hl.parse_float32, hl.parse_float64)]) + assert_all_eval_to(*[(f(hl.literal(x)), None) for f in (hl.parse_int32, hl.parse_int64)]) for x in ('abc', '1abc', ''): - for f in (hl.parse_float32, hl.parse_float64, hl.parse_int32, hl.parse_int64): - self.assertEqual(hl.eval(f(hl.literal(x))), None) + assert_all_eval_to(*[(f(hl.literal(x)), None) for f in (hl.parse_float32, hl.parse_float64, hl.parse_int32, hl.parse_int64)]) def test_str_missingness(self): self.assertEqual(hl.eval(hl.str(1)), '1') @@ -1526,33 +1521,33 @@ def test_division(self): expected = [0.5, 1.0, 2.0, 4.0, None] expected_inv = [2.0, 1.0, 0.5, 0.25, None] - self.check_expr(a_int32 / 4, expected, tarray(tfloat32)) - self.check_expr(a_int64 / 4, expected, tarray(tfloat32)) + self.check_expr(a_int32 / 4, expected, tarray(tfloat64)) + self.check_expr(a_int64 / 4, expected, tarray(tfloat64)) self.check_expr(a_float32 / 4, expected, tarray(tfloat32)) self.check_expr(a_float64 / 4, expected, tarray(tfloat64)) - self.check_expr(int32_4s / a_int32, expected_inv, tarray(tfloat32)) - self.check_expr(int32_4s / a_int64, expected_inv, tarray(tfloat32)) + self.check_expr(int32_4s / a_int32, expected_inv, tarray(tfloat64)) + self.check_expr(int32_4s / a_int64, expected_inv, tarray(tfloat64)) self.check_expr(int32_4s / a_float32, expected_inv, tarray(tfloat32)) self.check_expr(int32_4s / a_float64, expected_inv, tarray(tfloat64)) - self.check_expr(a_int32 / int32_4s, expected, tarray(tfloat32)) - self.check_expr(a_int64 / int32_4s, expected, tarray(tfloat32)) + self.check_expr(a_int32 / int32_4s, expected, tarray(tfloat64)) + self.check_expr(a_int64 / int32_4s, expected, tarray(tfloat64)) self.check_expr(a_float32 / int32_4s, expected, tarray(tfloat32)) self.check_expr(a_float64 / int32_4s, expected, tarray(tfloat64)) - self.check_expr(a_int32 / int64_4, expected, tarray(tfloat32)) - self.check_expr(a_int64 / int64_4, expected, tarray(tfloat32)) + self.check_expr(a_int32 / int64_4, expected, tarray(tfloat64)) + self.check_expr(a_int64 / int64_4, expected, tarray(tfloat64)) self.check_expr(a_float32 / int64_4, expected, tarray(tfloat32)) self.check_expr(a_float64 / int64_4, expected, tarray(tfloat64)) - self.check_expr(int64_4 / a_int32, expected_inv, tarray(tfloat32)) - self.check_expr(int64_4 / a_int64, expected_inv, tarray(tfloat32)) + self.check_expr(int64_4 / a_int32, expected_inv, tarray(tfloat64)) + self.check_expr(int64_4 / a_int64, expected_inv, tarray(tfloat64)) self.check_expr(int64_4 / a_float32, expected_inv, tarray(tfloat32)) self.check_expr(int64_4 / a_float64, expected_inv, tarray(tfloat64)) - self.check_expr(a_int32 / int64_4s, expected, tarray(tfloat32)) - self.check_expr(a_int64 / int64_4s, expected, tarray(tfloat32)) + self.check_expr(a_int32 / int64_4s, expected, tarray(tfloat64)) + self.check_expr(a_int64 / int64_4s, expected, tarray(tfloat64)) self.check_expr(a_float32 / int64_4s, expected, tarray(tfloat32)) self.check_expr(a_float64 / int64_4s, expected, tarray(tfloat64)) @@ -2434,6 +2429,9 @@ def test_array_methods(self): fm = hl.flatmap(lambda x: hl.set(hl.range(0, x.length()).map(lambda i: x[i])), {"ABC", "AAa", "BD"}) self.assertEqual(hl.eval(fm), {'A', 'a', 'B', 'C', 'D'}) + def test_starmap(self): + self.assertEqual(hl.eval(hl.array([(1, 2), (2, 3)]).starmap(lambda x,y: x+y)), [3, 5]) + def test_array_corr(self): x1 = [random.uniform(-10, 10) for x in range(10)] x2 = [random.uniform(-10, 10) for x in range(10)] @@ -2472,6 +2470,10 @@ def test_sorted(self): self.assertEqual(hl.sorted([0, 1, 4, hl.missing(tint), 3, 2], lambda x: x, reverse=True).collect()[0], [4, 3, 2, 1, 0, None]) self.assertEqual(hl.eval(hl.sorted([0, 1, 4, hl.missing(tint), 3, 2], lambda x: x, reverse=True)), [4, 3, 2, 1, 0, None]) + self.assertEqual(hl.eval(hl.sorted({0, 1, 4, 3, 2})), [0, 1, 2, 3, 4]) + + self.assertEqual(hl.eval(hl.sorted({"foo": 1, "bar": 2})), [("bar", 2), ("foo", 1)]) + def test_sort_by(self): self.assertEqual(hl.eval(hl._sort_by(["c", "aaa", "bb", hl.missing(hl.tstr)], lambda l, r: hl.len(l) < hl.len(r))), ["c", "bb", "aaa", None]) self.assertEqual(hl.eval(hl._sort_by([hl.Struct(x=i, y="foo", z=5.5) for i in [5, 3, 8, 2, 5]], lambda l, r: l.x < r.x)), @@ -3058,6 +3060,7 @@ def test_set_functions(self): t = hl.set([3, 8]) self.assert_evals_to(s, set([1, 3, 7])) + self.assert_evals_to(hl.set(frozenset([1, 2, 3])), set([1, 2, 3])) self.assert_evals_to(s.add(3), set([1, 3, 7])) self.assert_evals_to(s.add(4), set([1, 3, 4, 7])) diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index da727c40b50..3192a9f4c0e 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -73,7 +73,8 @@ def test_ndarray_slice(): a = [0, 1] an = np.array(a) ah = hl.nd.array(a) - + ae_np = np.arange(4*4*5*6*5*4).reshape((4, 4, 5, 6, 5, 4)) + ae = hl.nd.array(ae_np) assert_ndarrays_eq( (rect_prism[:, :, :], np_rect_prism[:, :, :]), (rect_prism[:, :, 1], np_rect_prism[:, :, 1]), @@ -87,7 +88,26 @@ def test_ndarray_slice(): np_rect_prism[0:, :, 1:4:2] + np_rect_prism[:, :1, 1:4:2]), (rect_prism[0, 0, -3:-1], np_rect_prism[0, 0, -3:-1]), (rect_prism[-1, 0:1, 3:0:-1], np_rect_prism[-1, 0:1, 3:0:-1]), - + # partial indexing + (rect_prism[1], np_rect_prism[1]), + (rect_prism[1:2], np_rect_prism[1:2]), + (rect_prism[1:2:2], np_rect_prism[1:2:2]), + (rect_prism[1, 2], np_rect_prism[1, 2]), + (rect_prism[-1, 1:2:2], np_rect_prism[-1, 1:2:2]), + # ellipses inclusion + (rect_prism[...], np_rect_prism[...]), + (rect_prism[1, ...], np_rect_prism[1, ...]), + (rect_prism[..., 1], np_rect_prism[..., 1]), + # np.newaxis inclusion + (rect_prism[hl.nd.newaxis, :, :], np_rect_prism[np.newaxis, :, :]), + (rect_prism[hl.nd.newaxis], np_rect_prism[np.newaxis]), + (rect_prism[hl.nd.newaxis, np.newaxis, np.newaxis], np_rect_prism[np.newaxis, np.newaxis, np.newaxis]), + (rect_prism[hl.nd.newaxis, np.newaxis, 1:4:2], np_rect_prism[np.newaxis, np.newaxis, 1:4:2]), + (rect_prism[1, :, hl.nd.newaxis], np_rect_prism[1, :, np.newaxis]), + (rect_prism[1, hl.nd.newaxis, 1], np_rect_prism[1, np.newaxis, 1]), + (rect_prism[..., hl.nd.newaxis, 1], np_rect_prism[..., np.newaxis, 1]), + ) + assert_ndarrays_eq( (flat[15:5:-1], np_flat[15:5:-1]), (flat[::-1], np_flat[::-1]), (flat[::22], np_flat[::22]), @@ -98,6 +118,9 @@ def test_ndarray_slice(): (flat[4:1:-2], np_flat[4:1:-2]), (flat[0:0:1], np_flat[0:0:1]), (flat[-4:-1:2], np_flat[-4:-1:2]), + # ellipses inclusion + (flat[...], np_flat[...]), + (mat[::-1, :], np_mat[::-1, :]), (mat[0, 1:4:2] + mat[:, 1:4:2], np_mat[0, 1:4:2] + np_mat[:, 1:4:2]), @@ -128,11 +151,24 @@ def test_ndarray_slice(): (mat[:-5:-1, 0], np_mat[:-5:-1, 0]), (mat[0:-5, 0], np_mat[0:-5, 0]), (mat[0:-5:-1, 0], np_mat[0:-5:-1, 0]), + # partial indexing + (mat[1], np_mat[1]), + (mat[0:1], np_mat[0:1]), + # ellipses inclusion + (mat[...], np_mat[...]), (ah[:-3:1], an[:-3:1]), (ah[:-3:-1], an[:-3:-1]), (ah[-3::-1], an[-3::-1]), - (ah[-3::1], an[-3::1]) + (ah[-3::1], an[-3::1]), + + # ellipses inclusion + (ae[..., 3], ae_np[..., 3]), + (ae[3, ...], ae_np[3, ...]), + (ae[2, 3, 1:2:2, ...], ae_np[2, 3, 1:2:2, ...]), + (ae[3, 2, 3, ..., 2], ae_np[3, 2, 3, ..., 2]), + (ae[3, 2, 2, ..., 2, 1:2:2], ae_np[3, 2, 2, ..., 2, 1:2:2]), + (ae[3, :, hl.nd.newaxis, ..., :, hl.nd.newaxis, 2], ae_np[3, :, np.newaxis, ..., :, np.newaxis, 2]) ) assert hl.eval(flat[hl.missing(hl.tint32):4:1]) is None @@ -150,6 +186,12 @@ def test_ndarray_slice(): with pytest.raises(HailUserError, match="Index -4 is out of bounds for axis 0 with size 2"): hl.eval(mat[-4, 0:3]) + with pytest.raises(IndexError, match="an index can only have a single ellipsis"): + hl.eval(rect_prism[..., ...]) + + with pytest.raises(IndexError, match="too many indices for array: array is 3-dimensional, but 4 were indexed"): + hl.eval(rect_prism[1, 1, 1, 1]) + def test_ndarray_transposed_slice(): a = hl.nd.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) @@ -325,7 +367,7 @@ def test_ndarray_reshape(): a.reshape(hl.tuple(['4', '5'])) -def test_ndarray_map(): +def test_ndarray_map1(): a = hl.nd.array([[2, 3, 4], [5, 6, 7]]) b = hl.map(lambda x: -x, a) b2 = b.map(lambda x: x * x) @@ -339,9 +381,10 @@ def test_ndarray_map(): assert hl.eval(hl.missing(hl.tndarray(hl.tfloat, 1)).map(lambda x: x * 2)) is None - s = hl.nd.array(["hail", "is", "great"]) - s_lens = s.map(lambda e: hl.len(e)) - assert np.array_equal(hl.eval(s_lens), np.array([4, 2, 5])) + # NDArrays don't correctly support elements that contain pointers at the moment. + # s = hl.nd.array(["hail", "is", "great"]) + # s_lens = s.map(lambda e: hl.len(e)) + # assert np.array_equal(hl.eval(s_lens), np.array([4, 2, 5])) structs = hl.nd.array([hl.struct(x=5, y=True), hl.struct(x=9, y=False)]) assert np.array_equal(hl.eval(structs.map(lambda e: e.y)), np.array([True, False])) @@ -427,7 +470,8 @@ def test_ndarray_map2(): (ncube1 // ny, cube1 // y), (ny // ncube1, y // cube1), (ncube1 // nrow_vec, cube1 // row_vec), - (nrow_vec // ncube1, row_vec // cube1)) + (nrow_vec // ncube1, row_vec // cube1) + ) # Division assert_ndarrays_almost_eq( @@ -1046,13 +1090,11 @@ def assert_table(a, b): def test_eye(): for i in range(13): - for y in range(13): - assert(np.array_equal(hl.eval(hl.nd.eye(i, y)), np.eye(i, y))) + assert_ndarrays_eq(*[(hl.nd.eye(i, y), np.eye(i, y)) for y in range(13)]) def test_identity(): - for i in range(13): - assert(np.array_equal(hl.eval(hl.nd.identity(i)), np.identity(i))) + assert_ndarrays_eq(*[(hl.nd.identity(i), np.identity(i)) for i in range(13)]) def test_agg_ndarray_sum(): diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 62e5b21c9c4..1e43bb020dd 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -130,7 +130,6 @@ def wrapper(func, *args, **kwargs): return wrapper - def skip_when_service_backend(message='does not work on ServiceBackend'): from hail.backend.service_backend import ServiceBackend @decorator @@ -154,6 +153,15 @@ def wrapper(func, *args, **kwargs): reason="doesn't yet work on service backend", strict=True) +def check_spark(): + backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') + return backend_name == 'spark' + +fails_spark_backend = pytest.mark.xfail( + check_spark(), + reason="doesn't yet work on spark backend", + strict=True) + def run_with_cxx_compile(): @decorator diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 4f7dbc12959..16afdff748a 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -71,26 +71,22 @@ def assert_sums_agree(self, bm, nd): self._assert_close(bm.sum(axis=0), np.sum(nd, axis=0, keepdims=True)) self._assert_close(bm.sum(axis=1), np.sum(nd, axis=1, keepdims=True)) - @fails_service_backend() - @fails_local_backend() - def test_from_entry_expr(self): + def test_from_entry_expr_simple(self): mt = get_dataset() mt = mt.annotate_entries(x=hl.or_else(mt.GT.n_alt_alleles(), 0)).cache() - a1 = BlockMatrix.from_entry_expr(hl.or_else(mt.GT.n_alt_alleles(), 0), block_size=32).to_numpy() - a2 = BlockMatrix.from_entry_expr(mt.x, block_size=32).to_numpy() - a3 = BlockMatrix.from_entry_expr(hl.float64(mt.x), block_size=32).to_numpy() + a1 = hl.eval(BlockMatrix.from_entry_expr(hl.or_else(mt.GT.n_alt_alleles(), 0), block_size=32).to_ndarray()) + a2 = hl.eval(BlockMatrix.from_entry_expr(mt.x, block_size=32).to_ndarray()) + a3 = hl.eval(BlockMatrix.from_entry_expr(hl.float64(mt.x), block_size=32).to_ndarray()) self._assert_eq(a1, a2) self._assert_eq(a1, a3) with hl.TemporaryDirectory(ensure_exists=False) as path: BlockMatrix.write_from_entry_expr(mt.x, path, block_size=32) - a4 = BlockMatrix.read(path).to_numpy() + a4 = hl.eval(BlockMatrix.read(path).to_ndarray()) self._assert_eq(a1, a4) - @fails_service_backend() - @fails_local_backend() def test_from_entry_expr_options(self): def build_mt(a): data = [{'v': 0, 's': 0, 'x': a[0]}, @@ -102,10 +98,10 @@ def build_mt(a): return mt.choose_cols([ids.index(0), ids.index(1), ids.index(2)]) def check(expr, mean_impute, center, normalize, expected): - actual = np.squeeze(BlockMatrix.from_entry_expr(expr, + actual = np.squeeze(hl.eval(BlockMatrix.from_entry_expr(expr, mean_impute=mean_impute, center=center, - normalize=normalize).to_numpy()) + normalize=normalize).to_ndarray())) assert np.allclose(actual, expected) a = np.array([0.0, 1.0, 2.0]) @@ -125,8 +121,6 @@ def check(expr, mean_impute, center, normalize, expected): with self.assertRaises(Exception): BlockMatrix.from_entry_expr(mt.x) - @fails_service_backend() - @fails_local_backend() def test_write_from_entry_expr_overwrite(self): mt = hl.balding_nichols_model(1, 1, 1) mt = mt.select_entries(x=mt.GT.n_alt_alleles()) @@ -256,19 +250,20 @@ def test_to_matrix_table(self): mt_round_trip = BlockMatrix.from_entry_expr(mt.element).to_matrix_table_row_major() assert mt._same(mt_round_trip) - @fails_service_backend() - @fails_local_backend() - def test_elementwise_ops(self): + def test_paired_elementwise_ops(self): nx = np.array([[2.0]]) nc = np.array([[1.0], [2.0]]) nr = np.array([[1.0, 2.0, 3.0]]) nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) e = 2.0 - x = BlockMatrix.from_numpy(nx, block_size=8) - c = BlockMatrix.from_numpy(nc, block_size=8) - r = BlockMatrix.from_numpy(nr, block_size=8) - m = BlockMatrix.from_numpy(nm, block_size=8) + # BlockMatrixMap requires very simple IRs on the SparkBackend. If I use + # `from_ndarray` here, it generates an `NDArrayRef` expression that it can't handle. + # Will be fixed by improving FoldConstants handling of ndarrays or fully lowering BlockMatrix. + x = BlockMatrix._create(1, 1, [2.0], block_size=8) + c = BlockMatrix.from_ndarray(hl.literal(nc), block_size=8) + r = BlockMatrix.from_ndarray(hl.literal(nr), block_size=8) + m = BlockMatrix.from_ndarray(hl.literal(nm), block_size=8) self.assertRaises(TypeError, lambda: x + np.array(['one'], dtype=str)) @@ -577,11 +572,9 @@ def test_sum_with_sparsify(self): self.assert_sums_agree(bm3, nd) self.assert_sums_agree(bm4, nd4) - @fails_service_backend() - @fails_local_backend() def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) - bm = BlockMatrix.from_numpy(nd, block_size=3) + bm = BlockMatrix.from_ndarray(hl.literal(nd), block_size=3) for indices in [(0, 0), (5, 7), (-3, 9), (-8, -10)]: self._assert_eq(bm[indices], nd[indices]) @@ -602,14 +595,16 @@ def test_slicing(self): self._assert_eq(bm[indices] - bm, nd[indices] - nd) self._assert_eq(bm - bm[indices], nd - nd[indices]) - for indices in [(slice(0, 8), slice(0, 10)), - (slice(0, 8, 2), slice(0, 10, 2)), - (slice(2, 4), slice(5, 7)), - (slice(-8, -1), slice(-10, -1)), - (slice(-8, -1, 2), slice(-10, -1, 2)), - (slice(None, 4, 1), slice(None, 4, 1)), - (slice(4, None), slice(4, None)), - (slice(None, None), slice(None, None))]: + for indices in [ + (slice(0, 8), slice(0, 10)), + (slice(0, 8, 2), slice(0, 10, 2)), + (slice(2, 4), slice(5, 7)), + (slice(-8, -1), slice(-10, -1)), + (slice(-8, -1, 2), slice(-10, -1, 2)), + (slice(None, 4, 1), slice(None, 4, 1)), + (slice(4, None), slice(4, None)), + (slice(None, None), slice(None, None)) + ]: self._assert_eq(bm[indices], nd[indices]) self._assert_eq(bm[indices][:, :2], nd[indices][:, :2]) self._assert_eq(bm[indices][:2, :], nd[indices][:2, :]) @@ -881,8 +876,6 @@ def test_to_ndarray(self): sparsed = BlockMatrix.from_ndarray(hl.nd.array(sparsed_numpy), block_size=4)._sparsify_blocks(blocks_to_sparsify).to_ndarray() self.assertTrue(np.array_equal(sparsed_numpy, hl.eval(sparsed))) - @fails_service_backend() - @fails_local_backend() def test_block_matrix_entries(self): n_rows, n_cols = 5, 3 rows = [{'i': i, 'j': j, 'entry': float(i + j)} for i in range(n_rows) for j in range(n_cols)] @@ -894,14 +887,12 @@ def test_block_matrix_entries(self): ndarray = np.reshape(list(map(lambda row: row['entry'], rows)), (n_rows, n_cols)) for block_size in [1, 2, 1024]: - block_matrix = BlockMatrix.from_numpy(ndarray, block_size) + block_matrix = BlockMatrix.from_ndarray(hl.literal(ndarray), block_size) entries_table = block_matrix.entries() self.assertEqual(entries_table.count(), n_cols * n_rows) self.assertEqual(len(entries_table.row), 3) self.assertTrue(table._same(entries_table)) - @fails_service_backend() - @fails_local_backend() def test_from_entry_expr_filtered(self): mt = hl.utils.range_matrix_table(1, 1).filter_entries(False) bm = hl.linalg.BlockMatrix.from_entry_expr(mt.row_idx + mt.col_idx, mean_impute=True) # should run without error @@ -1201,3 +1192,17 @@ def test_row_blockmatrix_sum(self): # Summing horizontally along a column vector to make sure nothing changes f = col.sum(axis=1) assert f.to_numpy().shape == (10, 1) + + + @fails_spark_backend() + def test_map(self): + np_mat = np.arange(20, dtype=np.float64).reshape((4, 5)) + bm = BlockMatrix.from_ndarray(hl.nd.array(np_mat)) + bm_mapped_arith = bm._map_dense(lambda x: (x * x) + 5) + self._assert_eq(bm_mapped_arith, np_mat * np_mat + 5) + + bm_mapped_if = bm._map_dense(lambda x: hl.if_else(x >= 1, x, -8.0)) + np_if = np_mat.copy() + np_if[0, 0] = -8.0 + self._assert_eq(bm_mapped_if, np_if) + diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index 208c9476405..01cfa5536f9 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -165,7 +165,6 @@ def test_filter(self): mt.count_rows() @fails_service_backend() - @fails_local_backend() def test_aggregate(self): mt = self.get_mt() @@ -431,7 +430,6 @@ def test_semi_anti_join_cols(self): assert mt.anti_join_cols(ht).count() == (3, 7) @fails_service_backend() - @fails_local_backend() def test_joins(self): mt = self.get_mt().select_rows(x1=1, y1=1) mt2 = mt.select_rows(x2=1, y2=2) @@ -451,7 +449,6 @@ def test_joins(self): self.assertTrue(ct.all(ct.c2 == 2)) @fails_service_backend() - @fails_local_backend() def test_joins_with_key_structs(self): mt = self.get_mt() @@ -563,6 +560,7 @@ def test_union_cols_distinct(self): mt = mt.key_rows_by(x = mt.row_idx // 2) assert mt.union_cols(mt).count_rows() == 5 + @skip_when_service_backend('flaky https://hail.zulipchat.com/#narrow/stream/127527-team/topic/CI.20Deploy.20Failure/near/237593731') def test_union_cols_outer(self): r, c = 10, 10 mt = hl.utils.range_matrix_table(2*r, c) @@ -844,14 +842,13 @@ def test_sample_rows(self): self.assertTrue(ds_small.count_rows() < ds.count_rows()) @fails_service_backend() - @fails_local_backend() def test_read_stored_cols(self): ds = self.get_mt() ds = ds.annotate_globals(x='foo') f = new_temp_file(extension='mt') ds.write(f) t = hl.read_table(f + '/cols') - self.assertTrue(ds.cols()._same(t)) + self.assertTrue(ds.cols().key_by()._same(t)) @skip_when_service_backend('Shuffler encoding/decoding is broken.') def test_read_stored_rows(self): @@ -871,7 +868,6 @@ def test_read_stored_globals(self): self.assertTrue(ds.globals_table()._same(t)) @fails_service_backend() - @fails_local_backend() def test_indexed_read(self): mt = hl.utils.range_matrix_table(2000, 100, 10) f = new_temp_file(extension='mt') @@ -891,7 +887,6 @@ def test_indexed_read(self): self.assertTrue(mt.filter_rows((mt.row_idx >= 150) & (mt.row_idx < 500))._same(mt2)) @fails_service_backend() - @fails_local_backend() def test_indexed_read_vcf(self): vcf = self.get_mt(10) f = new_temp_file(extension='mt') diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index a4fa0a49286..6a4fe7b4425 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -75,6 +75,10 @@ def test_undeclared_info(self): self.assertFalse('undeclared' in info_type) self.assertFalse('undeclaredFlag' in info_type) + @fails_service_backend() + def test_can_import_bad_number_flag(self): + hl.import_vcf(resource('bad_flag_number.vcf')).rows()._force_count() + @fails_service_backend() def test_malformed(self): with self.assertRaisesRegex(FatalError, "invalid character"): @@ -952,6 +956,24 @@ def test_contig_recoding_defaults(self): resource('sex_mt_contigs.fam'), reference_genome='random') + @fails_service_backend() + @fails_local_backend() + def test_export_plink_struct_locus(self): + mt = hl.utils.range_matrix_table(10, 10) + mt = mt.key_rows_by(locus=hl.struct(contig=hl.str(mt.row_idx), position=mt.row_idx), alleles=['A', 'T']).select_rows() + mt = mt.key_cols_by(s=hl.str(mt.col_idx)).select_cols() + mt = mt.annotate_entries(GT=hl.call(0, 0)) + + out = new_temp_file() + + hl.export_plink(mt, out) + mt2 = hl.import_plink( + bed=out + '.bed', + bim=out + '.bim', + fam=out + '.fam', + reference_genome=None).select_rows().select_cols() + assert mt._same(mt2) + # this routine was used to generate resources random.gen, random.sample # random.bgen was generated with qctool v2.0rc9: @@ -1858,7 +1880,7 @@ def test_key_by_after_empty_key_import(self): @fails_service_backend() @fails_local_backend() - def test_devlish_nine_separated_eight_missing_file(self): + def test_devilish_nine_separated_eight_missing_file(self): fields = {'chr': hl.tstr, '': hl.tint32, 'ref': hl.tstr, @@ -2047,7 +2069,6 @@ def test_grep_show_false(self): @fails_service_backend() -@fails_local_backend() def test_matrix_and_table_read_intervals_with_hidden_key(): f1 = new_temp_file() f2 = new_temp_file() diff --git a/hail/python/test/hail/methods/test_misc.py b/hail/python/test/hail/methods/test_misc.py index 54a8102763d..99b2f8577fc 100644 --- a/hail/python/test/hail/methods/test_misc.py +++ b/hail/python/test/hail/methods/test_misc.py @@ -212,3 +212,9 @@ def test_lambda_gc(self): lgc2 = hl.lambda_gc(ht.x2) self.assertAlmostEqual(lgc, 1, places=1) # approximate, 1 place is safe self.assertAlmostEqual(lgc2, 1.89, places=1) # approximate, 1 place is safe + + def test_lambda_gc_nans(self): + N = 5000000 + ht = hl.utils.range_table(N).annotate(x = hl.scan.count() / N, is_even=hl.scan.count() % 2 == 0) + lgc_nan = hl.lambda_gc(hl.case().when(ht.is_even, hl.float('nan')).default(ht.x)) + self.assertAlmostEqual(lgc_nan, 1, places=1) # approximate, 1 place is safe diff --git a/hail/python/test/hail/methods/test_pca.py b/hail/python/test/hail/methods/test_pca.py index 25fb27a6e8c..83216629c55 100644 --- a/hail/python/test/hail/methods/test_pca.py +++ b/hail/python/test/hail/methods/test_pca.py @@ -128,27 +128,52 @@ def bound(vs, us): # equation 12 from https://www.ncbi.nlm.nih.gov/pmc/articles np.testing.assert_allclose(eigens, np_eigenvalues, rtol=0.05) assert bound(np_loadings, loadings) > 0.9 +def make_spectral_matrix(index_func, k, m, n): + sigma_dim = min(m, n) + answer = np.zeros((m, n)) + for j in range(sigma_dim): + answer[j, j] = index_func(j + 1, k) + return answer + +def matrix_table_from_numpy(np_mat): + rows, cols = np_mat.shape + mt = hl.utils.range_matrix_table(rows, cols) + mt = mt.annotate_globals(entries_global = np_mat) + mt = mt.annotate_entries(ent = mt.entries_global[mt.row_idx, mt.col_idx]) + return mt + +# k, m, n +dim_triplets = [(20, 1000, 1000), (10, 100, 200)] + +def spectra_helper(spec_func): + + for triplet in dim_triplets: + k, m, n = triplet + sigma = make_spectral_matrix(spec_func, k, m, n) + seed = 1025 + np.random.seed(seed) + U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] + V = np.linalg.qr(np.random.normal(0, 1, (n, n)))[0] + A = U @ sigma @ V + mt_A = matrix_table_from_numpy(A) + + eigenvalues, scores, loadings = hl._blanczos_pca(mt_A.ent, k=k, oversampling_param=k, compute_loadings=True, q_iterations=4) + singulars = np.sqrt(eigenvalues) + hail_V = (np.array(scores.scores.collect()) / singulars).T + hail_U = np.array(loadings.loadings.collect()) + approx_A = hail_U @ np.diag(singulars) @ hail_V + norm_of_diff = np.linalg.norm(A - approx_A, 2) + np.testing.assert_allclose(norm_of_diff, spec_func(k + 1, k), rtol=1e-02, err_msg=f"Norm test failed on triplet {triplet} ") + np.testing.assert_allclose(singulars, np.diag(sigma)[:k], rtol=1e-01, err_msg=f"Failed on triplet {triplet}") @fails_service_backend(reason='persist_ir') -def test_spectra(): - def make_spectral_matrix(index_func, k, m, n): - sigma_dim = min(m, n) - answer = np.zeros((m, n)) - for j in range(sigma_dim): - answer[j, j] = index_func(j + 1, k) - return answer - - def matrix_table_from_numpy(np_mat): - rows, cols = np_mat.shape - mt = hl.utils.range_matrix_table(rows, cols) - mt = mt.annotate_globals(entries_global = np_mat) - mt = mt.annotate_entries(ent = mt.entries_global[mt.row_idx, mt.col_idx]) - return mt - - # Defined for j >= 1 +def test_spectra_1(): def spec1(j, k): return 1/j + spectra_helper(spec1) +@fails_service_backend(reason='persist_ir') +def test_spectra_2(): def spec2(j, k): if j == 1: return 1 @@ -156,13 +181,19 @@ def spec2(j, k): return 2 * 10**-5 else: return (10**-5) * (k + 1)/j + spectra_helper(spec2) +@fails_service_backend(reason='persist_ir') +def test_spectra_3(): def spec3(j, k): if j <= k: return 10**(-5*(j-1)/(k-1)) else: return (10**-5)*(k+1)/j + spectra_helper(spec3) +@fails_service_backend(reason='persist_ir') +def test_spectra_4(): def spec4(j, k): if j <= k: return 10**(-5*(j-1)/(k-1)) @@ -170,35 +201,13 @@ def spec4(j, k): return 10**-5 else: return 0 + spectra_helper(spec4) +@fails_service_backend(reason='persist_ir') +def test_spectra_5(): def spec5(j, k): if j <= k: return 10**-5 + (1 - 10**-5)*(k - j)/(k - 1) else: return 10**-5 * math.sqrt((k + 1)/j) - - spectral_functions = [spec1, spec2, spec3, spec4, spec5] - - # k, m, n - dim_triplets = [(10, 1000, 1000), (20, 1000, 1000), (10, 100, 200)] - - for triplet in dim_triplets: - k, m, n = triplet - for idx, spec_func in enumerate(spectral_functions): - sigma = make_spectral_matrix(spec_func, k, m, n) - seed = 1025 - np.random.seed(seed) - U = np.linalg.qr(np.random.normal(0, 1, (m, m)))[0] - V = np.linalg.qr(np.random.normal(0, 1, (n, n)))[0] - A = U @ sigma @ V - mt_A = matrix_table_from_numpy(A) - - eigenvalues, scores, loadings = hl._blanczos_pca(mt_A.ent, k=k, oversampling_param=k, compute_loadings=True, q_iterations=4) - singulars = np.sqrt(eigenvalues) - hail_V = (np.array(scores.scores.collect()) / singulars).T - hail_U = np.array(loadings.loadings.collect()) - approx_A = hail_U @ np.diag(singulars) @ hail_V - norm_of_diff = np.linalg.norm(A - approx_A, 2) - np.testing.assert_allclose(norm_of_diff, spec_func(k + 1, k), rtol=1e-02, err_msg=f"Norm test failed on triplet {triplet} on spec{idx + 1}") - np.testing.assert_allclose(singulars, np.diag(sigma)[:k], rtol=1e-01, err_msg=f"Failed on triplet {triplet} on spec{idx + 1}") - + spectra_helper(spec5) diff --git a/hail/python/test/hail/methods/test_statgen.py b/hail/python/test/hail/methods/test_statgen.py index 5c146f1fad2..02525e206c8 100644 --- a/hail/python/test/hail/methods/test_statgen.py +++ b/hail/python/test/hail/methods/test_statgen.py @@ -459,6 +459,8 @@ def eq(x1, x2): eq(combined.p_value, combined.multi.p_value[0]) & eq(combined.multi.p_value[0], combined.multi.p_value[1])))) + logreg_functions = [hl.logistic_regression_rows, hl._logistic_regression_rows_nd] if backend_name == "spark" else [hl._logistic_regression_rows_nd] + # comparing to R: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) # y = c(0, 0, 1, 1, 1, 1, 0, 0, 1, 1) @@ -470,8 +472,6 @@ def eq(x1, x2): # se <- waldtest["x", "Std. Error"] # zstat <- waldtest["x", "z value"] # pval <- waldtest["x", "Pr(>|z|)"] - @fails_service_backend() - @fails_local_backend() def test_logistic_regression_wald_test(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -481,35 +481,35 @@ def test_logistic_regression_wald_test(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) - ht = hl.logistic_regression_rows('wald', - y=pheno[mt.s].isCase, - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + for logistic_regression_function in self.logreg_functions: + ht = logistic_regression_function('wald', + y=pheno[mt.s].isCase, + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].standard_error, 2.1085483421, places=6) - self.assertAlmostEqual(results[1].z_stat, -0.3852261396, places=6) - self.assertAlmostEqual(results[1].p_value, 0.7000698784, places=6) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].standard_error, 1.0296902941, places=6) - self.assertAlmostEqual(results[2].z_stat, -0.4240057531, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6715616176, places=6) + self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].standard_error, 2.1085483421, places=6) + self.assertAlmostEqual(results[1].z_stat, -0.3852261396, places=6) + self.assertAlmostEqual(results[1].p_value, 0.7000698784, places=6) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].standard_error, 1.0296902941, places=6) + self.assertAlmostEqual(results[2].z_stat, -0.4240057531, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6715616176, places=6) - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) - @fails_service_backend() - @fails_local_backend() def test_logistic_regression_wald_test_apply_multi_pheno(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -519,35 +519,38 @@ def test_logistic_regression_wald_test_apply_multi_pheno(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) - ht = hl.logistic_regression_rows('wald', - y=[pheno[mt.s].isCase], - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - - self.assertEqual(len(results[1].logistic_regression),1) - self.assertAlmostEqual(results[1].logistic_regression[0].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].standard_error, 2.1085483421, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].z_stat, -0.3852261396, places=6) - self.assertAlmostEqual(results[1].logistic_regression[0].p_value, 0.7000698784, places=6) + for logistic_regression_function in self.logreg_functions: - self.assertEqual(len(results[2].logistic_regression),1) - self.assertAlmostEqual(results[2].logistic_regression[0].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].standard_error, 1.0296902941, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].z_stat, -0.4240057531, places=6) - self.assertAlmostEqual(results[2].logistic_regression[0].p_value, 0.6715616176, places=6) + ht = logistic_regression_function('wald', + y=[pheno[mt.s].isCase], + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - def is_constant(r): - return (not r.logistic_regression[0].fit.converged) or np.isnan(r.logistic_regression[0].p_value) or abs(r.logistic_regression[0].p_value - 1) < 1e-4 + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertEqual(len(results[3].logistic_regression),1) - self.assertFalse(results[3].logistic_regression[0].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertEqual(len(results[1].logistic_regression),1) + self.assertAlmostEqual(results[1].logistic_regression[0].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].standard_error, 2.1085483421, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].z_stat, -0.3852261396, places=6) + self.assertAlmostEqual(results[1].logistic_regression[0].p_value, 0.7000698784, places=6) + + self.assertEqual(len(results[2].logistic_regression),1) + self.assertAlmostEqual(results[2].logistic_regression[0].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].standard_error, 1.0296902941, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].z_stat, -0.4240057531, places=6) + self.assertAlmostEqual(results[2].logistic_regression[0].p_value, 0.6715616176, places=6) + + def is_constant(r): + return (not r.logistic_regression[0].fit.converged) or np.isnan(r.logistic_regression[0].p_value) or abs(r.logistic_regression[0].p_value - 1) < 1e-4 + + self.assertEqual(len(results[3].logistic_regression),1) + self.assertFalse(results[3].logistic_regression[0].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) @fails_service_backend() @fails_local_backend() @@ -562,28 +565,28 @@ def test_logistic_regression_wald_test_multi_pheno_bgen_dosage(self): mt = hl.import_bgen(resource('example.8bits.bgen'), entry_fields=['dosage']).cache() - ht_single_pheno = hl.logistic_regression_rows('wald', - y=pheno[mt.s].Pheno1, - x=mt.dosage, - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + for logistic_regression_function in self.logreg_functions: - ht_multi_pheno = hl.logistic_regression_rows('wald', - y=[pheno[mt.s].Pheno1, pheno[mt.s].Pheno2], - x=mt.dosage, - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + ht_single_pheno = logistic_regression_function('wald', + y=pheno[mt.s].Pheno1, + x=mt.dosage, + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - single_results = dict(hl.tuple([ht_single_pheno.locus.position, ht_single_pheno.row]).collect()) - multi_results = dict(hl.tuple([ht_multi_pheno.locus.position, ht_multi_pheno.row]).collect()) - self.assertEqual(len(multi_results[1001].logistic_regression),2) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].beta, single_results[1001].beta, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].standard_error,single_results[1001].standard_error, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].z_stat, single_results[1001].z_stat, places=6) - self.assertAlmostEqual(multi_results[1001].logistic_regression[0].p_value,single_results[1001].p_value, places=6) - #TODO test handling of missingness + ht_multi_pheno = logistic_regression_function('wald', + y=[pheno[mt.s].Pheno1, pheno[mt.s].Pheno2], + x=mt.dosage, + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) + + single_results = dict(hl.tuple([ht_single_pheno.locus.position, ht_single_pheno.row]).collect()) + multi_results = dict(hl.tuple([ht_multi_pheno.locus.position, ht_multi_pheno.row]).collect()) + self.assertEqual(len(multi_results[1001].logistic_regression),2) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].beta, single_results[1001].beta, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].standard_error,single_results[1001].standard_error, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].z_stat, single_results[1001].z_stat, places=6) + self.assertAlmostEqual(multi_results[1001].logistic_regression[0].p_value,single_results[1001].p_value, places=6) + #TODO test handling of missingness - @fails_service_backend() - @fails_local_backend() def test_logistic_regression_wald_test_pl(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -593,33 +596,36 @@ def test_logistic_regression_wald_test_pl(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) - ht = hl.logistic_regression_rows( - test='wald', - y=pheno[mt.s].isCase, - x=hl.pl_dosage(mt.PL), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + for logistic_regression_function in self.logreg_functions: - self.assertAlmostEqual(results[1].beta, -0.8286774, places=6) - self.assertAlmostEqual(results[1].standard_error, 2.151145, places=6) - self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=6) - self.assertAlmostEqual(results[1].p_value, 0.7000699, places=6) + ht = logistic_regression_function( + test='wald', + y=pheno[mt.s].isCase, + x=hl.pl_dosage(mt.PL), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - self.assertAlmostEqual(results[2].beta, -0.4431764, places=6) - self.assertAlmostEqual(results[2].standard_error, 1.045213, places=6) - self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6715616, places=6) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + self.assertAlmostEqual(results[1].beta, -0.8286774, places=6) + self.assertAlmostEqual(results[1].standard_error, 2.151145, places=6) + self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=6) + self.assertAlmostEqual(results[1].p_value, 0.7000699, places=6) - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertAlmostEqual(results[2].beta, -0.4431764, places=6) + self.assertAlmostEqual(results[2].standard_error, 1.045213, places=6) + self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6715616, places=6) + + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) @fails_service_backend() @fails_local_backend() @@ -633,33 +639,36 @@ def test_logistic_regression_wald_dosage(self): types={'isCase': hl.tbool}) mt = hl.import_gen(resource('regressionLogistic.gen'), sample_file=resource('regressionLogistic.sample')) - ht = hl.logistic_regression_rows( - test='wald', - y=pheno[mt.s].isCase, - x=hl.gp_dosage(mt.GP), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + for logistic_regression_function in self.logreg_functions: - self.assertAlmostEqual(results[1].beta, -0.8286774, places=4) - self.assertAlmostEqual(results[1].standard_error, 2.151145, places=4) - self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=4) - self.assertAlmostEqual(results[1].p_value, 0.7000699, places=4) + ht = logistic_regression_function( + test='wald', + y=pheno[mt.s].isCase, + x=hl.gp_dosage(mt.GP), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - self.assertAlmostEqual(results[2].beta, -0.4431764, places=4) - self.assertAlmostEqual(results[2].standard_error, 1.045213, places=4) - self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=4) - self.assertAlmostEqual(results[2].p_value, 0.6715616, places=4) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + self.assertAlmostEqual(results[1].beta, -0.8286774, places=4) + self.assertAlmostEqual(results[1].standard_error, 2.151145, places=4) + self.assertAlmostEqual(results[1].z_stat, -0.3852261, places=4) + self.assertAlmostEqual(results[1].p_value, 0.7000699, places=4) - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + self.assertAlmostEqual(results[2].beta, -0.4431764, places=4) + self.assertAlmostEqual(results[2].standard_error, 1.045213, places=4) + self.assertAlmostEqual(results[2].z_stat, -0.4240058, places=4) + self.assertAlmostEqual(results[2].p_value, 0.6715616, places=4) + + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) # comparing to output of R code: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) @@ -672,8 +681,6 @@ def is_constant(r): # lrtest <- anova(logfitnull, logfit, test="LRT") # chi2 <- lrtest[["Deviance"]][2] # pval <- lrtest[["Pr(>Chi)"]][2] - @fails_service_backend() - @fails_local_backend() def test_logistic_regression_lrt(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', @@ -683,31 +690,33 @@ def test_logistic_regression_lrt(self): missing='0', types={'isCase': hl.tbool}) mt = hl.import_vcf(resource('regressionLogistic.vcf')) - ht = hl.logistic_regression_rows( - test='lrt', - y=pheno[mt.s].isCase, - x=mt.GT.n_alt_alleles(), - covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) + for logistic_regression_function in self.logreg_functions: + ht = logistic_regression_function( + test='lrt', + y=pheno[mt.s].isCase, + x=mt.GT.n_alt_alleles(), + covariates=[1.0, covariates[mt.s].Cov1, covariates[mt.s].Cov2]) - self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) - self.assertAlmostEqual(results[1].chi_sq_stat, 0.1503349167, places=6) - self.assertAlmostEqual(results[1].p_value, 0.6982155052, places=6) + results = dict(hl.tuple([ht.locus.position, ht.row]).collect()) - self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) - self.assertAlmostEqual(results[2].chi_sq_stat, 0.1813968574, places=6) - self.assertAlmostEqual(results[2].p_value, 0.6701755415, places=6) + self.assertAlmostEqual(results[1].beta, -0.81226793796, places=6) + self.assertAlmostEqual(results[1].chi_sq_stat, 0.1503349167, places=6) + self.assertAlmostEqual(results[1].p_value, 0.6982155052, places=6) - def is_constant(r): - return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + self.assertAlmostEqual(results[2].beta, -0.43659460858, places=6) + self.assertAlmostEqual(results[2].chi_sq_stat, 0.1813968574, places=6) + self.assertAlmostEqual(results[2].p_value, 0.6701755415, places=6) - self.assertFalse(results[3].fit.converged) # separable - self.assertTrue(is_constant(results[6])) - self.assertTrue(is_constant(results[7])) - self.assertTrue(is_constant(results[8])) - self.assertTrue(is_constant(results[9])) - self.assertTrue(is_constant(results[10])) + def is_constant(r): + return (not r.fit.converged) or np.isnan(r.p_value) or abs(r.p_value - 1) < 1e-4 + + self.assertFalse(results[3].fit.converged) # separable + self.assertTrue(is_constant(results[6])) + self.assertTrue(is_constant(results[7])) + self.assertTrue(is_constant(results[8])) + self.assertTrue(is_constant(results[9])) + self.assertTrue(is_constant(results[10])) # comparing to output of R code: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) diff --git a/hail/python/test/hail/table/test_table.py b/hail/python/test/hail/table/test_table.py index 97be3a580d6..641639c3b3b 100644 --- a/hail/python/test/hail/table/test_table.py +++ b/hail/python/test/hail/table/test_table.py @@ -479,13 +479,11 @@ def test_multiple_entry_joins(self): b=mt2[mt.row_idx, mt.col_idx].x) @fails_service_backend() - @fails_local_backend() def test_multi_way_zip_join(self): d1 = [{"id": 0, "name": "a", "data": 0.0}, {"id": 1, "name": "b", "data": 3.14}, {"id": 2, "name": "c", "data": 2.78}] d2 = [{"id": 0, "name": "d", "data": 1.1}, - {"id": 0, "name": "x", "data": 2.2}, {"id": 2, "name": "v", "data": 7.89}] d3 = [{"id": 1, "name": "f", "data": 9.99}, {"id": 2, "name": "g", "data": -1.0}, @@ -496,9 +494,6 @@ def test_multi_way_zip_join(self): dexpected = [{"id": 0, "__data": [{"name": "a", "data": 0.0}, {"name": "d", "data": 1.1}, None]}, - {"id": 0, "__data": [None, - {"name": "x", "data": 2.2}, - None]}, {"id": 1, "__data": [{"name": "b", "data": 3.14}, None, {"name": "f", "data": 9.99}]}, @@ -519,7 +514,7 @@ def test_multi_way_zip_join(self): self.assertTrue(expected2._same(joined_same_name)) joined_nothing = hl.Table.multi_way_zip_join(ts, 'data', 'globals').drop('data', 'globals') - self.assertEqual(joined_nothing._force_count(), 5) + self.assertEqual(joined_nothing._force_count(), 4) def test_multi_way_zip_join_globals(self): t1 = hl.utils.range_table(1).annotate_globals(x=hl.missing(hl.tint32)) diff --git a/hail/python/test/hailtop/aiotools/test_copy.py b/hail/python/test/hailtop/aiotools/test_copy.py index e7a8bea3646..77585e6837b 100644 --- a/hail/python/test/hailtop/aiotools/test_copy.py +++ b/hail/python/test/hailtop/aiotools/test_copy.py @@ -2,10 +2,13 @@ import secrets from concurrent.futures import ThreadPoolExecutor import asyncio +import functools import pytest from hailtop.utils import url_scheme, bounded_gather2 from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS, Transfer, FileAndDirectoryError -from hailtop.aiogoogle import StorageClient, GoogleStorageAsyncFS +from hailtop.aiogoogle import GoogleStorageAsyncFS +from hailtop.aiotools.s3asyncfs import S3AsyncFS + from .generate_copy_test_specs import ( run_test_spec, create_test_file, create_test_dir) @@ -39,27 +42,36 @@ async def router_filesystem(request): with ThreadPoolExecutor() as thread_pool: async with RouterAsyncFS( - 'file', [LocalAsyncFS(thread_pool), GoogleStorageAsyncFS()]) as fs: + 'file', [LocalAsyncFS(thread_pool), + GoogleStorageAsyncFS(), + S3AsyncFS(thread_pool)]) as fs: file_base = f'/tmp/{token}/' await fs.mkdir(file_base) - bucket = os.environ['HAIL_TEST_BUCKET'] - gs_base = f'gs://{bucket}/tmp/{token}/' + gs_bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + gs_base = f'gs://{gs_bucket}/tmp/{token}/' + + s3_bucket = os.environ['HAIL_TEST_S3_BUCKET'] + s3_base = f's3://{s3_bucket}/tmp/{token}/' bases = { 'file': file_base, - 'gs': gs_base + 'gs': gs_base, + 's3': s3_base } sema = asyncio.Semaphore(50) async with sema: yield (sema, fs, bases) await bounded_gather2(sema, - fs.rmtree(sema, file_base), - fs.rmtree(sema, gs_base)) + functools.partial(fs.rmtree, sema, file_base), + functools.partial(fs.rmtree, sema, gs_base), + functools.partial(fs.rmtree, sema, s3_base)) assert not await fs.isdir(file_base) assert not await fs.isdir(gs_base) + assert not await fs.isdir(s3_base) + async def fresh_dir(fs, bases, scheme): token = secrets.token_hex(16) @@ -68,7 +80,9 @@ async def fresh_dir(fs, bases, scheme): return dir -@pytest.fixture(params=['file/file', 'file/gs', 'gs/file', 'gs/gs']) +@pytest.fixture(params=['file/file', 'file/gs', 'file/s3', + 'gs/file', 'gs/gs', 'gs/s3', + 's3/file', 's3/gs', 's3/s3']) async def copy_test_context(request, router_filesystem): sema, fs, bases = router_filesystem @@ -93,7 +107,7 @@ async def test_copy_behavior(copy_test_context, test_spec): expected = test_spec['result'] dest_scheme = url_scheme(dest_base) - if (dest_scheme == 'gs' + if ((dest_scheme == 'gs' or dest_scheme == 's3') and 'files' in result and expected.get('exception') in ('IsADirectoryError', 'NotADirectoryError')): return @@ -118,18 +132,19 @@ class RaisedWrongExceptionError(Exception): pass -class RaisesOrGS: +class RaisesOrObjectStore: def __init__(self, dest_base, expected_type): - self._gs = url_scheme(dest_base) == 'gs' + scheme = url_scheme(dest_base) + self._object_store = (scheme == 'gs' or scheme == 's3') self._expected_type = expected_type def __enter__(self): return self def __exit__(self, type, value, traceback): - # gs can succeed or throw + # object stores can succeed or throw if type is None: - if not self._gs: + if not self._object_store: raise DidNotRaiseError() elif type != self._expected_type: raise RaisedWrongExceptionError(type) @@ -137,6 +152,7 @@ def __exit__(self, type, value, traceback): # suppress exception return True + @pytest.mark.asyncio async def test_copy_doesnt_exist(copy_test_context): sema, fs, src_base, dest_base = copy_test_context @@ -294,7 +310,7 @@ async def test_copy_dest_target_file_is_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - with RaisesOrGS(dest_base, IsADirectoryError): + with RaisesOrObjectStore(dest_base, IsADirectoryError): await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -366,7 +382,7 @@ async def test_copy_multiple_dest_target_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') await create_test_file(fs, 'src', src_base, 'b') - with RaisesOrGS(dest_base, NotADirectoryError): + with RaisesOrObjectStore(dest_base, NotADirectoryError): await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -378,7 +394,7 @@ async def test_copy_multiple_dest_file(copy_test_context): await create_test_file(fs, 'src', src_base, 'b') await create_test_file(fs, 'dest', dest_base, 'x') - with RaisesOrGS(dest_base, NotADirectoryError): + with RaisesOrObjectStore(dest_base, NotADirectoryError): await fs.copy(sema, Transfer([f'{src_base}a', f'{src_base}b'], f'{dest_base}x')) @@ -388,7 +404,7 @@ async def test_file_overwrite_dir(copy_test_context): await create_test_file(fs, 'src', src_base, 'a') - with RaisesOrGS(dest_base, IsADirectoryError): + with RaisesOrObjectStore(dest_base, IsADirectoryError): await fs.copy(sema, Transfer(f'{src_base}a', dest_base.rstrip('/'), treat_dest_as=Transfer.DEST_IS_TARGET)) @@ -416,3 +432,152 @@ async def test_copy_src_parts(copy_test_context): await expect_file(fs, f'{dest_base}file1', 'src/a/file1') await expect_file(fs, f'{dest_base}subdir/file2', 'src/a/subdir/file2') + + +async def write_file(fs, url, data): + async with await fs.create(url) as f: + await f.write(data) + + +async def collect_files(it): + return [await x.url() async for x in it] + + +@pytest.mark.asyncio +async def test_file_and_directory_error_with_slash_empty_file(router_filesystem): + sema, fs, bases = router_filesystem + + src_base = await fresh_dir(fs, bases, 'gs') + + await write_file(fs, f'{src_base}empty/', '') + await write_file(fs, f'{src_base}empty/foo', b'foo') + + await collect_files(await fs.listfiles(f'{src_base}')) + await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) + await collect_files(await fs.listfiles(f'{src_base}empty/')) + await collect_files(await fs.listfiles(f'{src_base}empty/', recursive=True)) + + for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): + dest_base = await fresh_dir(fs, bases, 'gs') + + await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + dest_base = await fresh_dir(fs, bases, 'gs') + + await fs.copy(sema, Transfer(f'{src_base}empty/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + await collect_files(await fs.listfiles(f'{dest_base}')) + await collect_files(await fs.listfiles(f'{dest_base}', recursive=True)) + + if transfer_type == Transfer.DEST_DIR: + exp_dest = f'{dest_base}empty/foo' + await expect_file(fs, exp_dest, 'foo') + assert not await fs.isfile(f'{dest_base}empty/') + assert await fs.isdir(f'{dest_base}empty/') + await collect_files(await fs.listfiles(f'{dest_base}empty/')) + await collect_files(await fs.listfiles(f'{dest_base}empty/', recursive=True)) + else: + exp_dest = f'{dest_base}foo' + await expect_file(fs, exp_dest, 'foo') + + +@pytest.mark.asyncio +async def test_file_and_directory_error_with_slash_non_empty_file(router_filesystem): + sema, fs, bases = router_filesystem + + src_base = await fresh_dir(fs, bases, 'gs') + + await write_file(fs, f'{src_base}not-empty/', b'not-empty') + await write_file(fs, f'{src_base}not-empty/bar', b'bar') + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}')) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}not-empty/')) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}not-empty/', recursive=True)) + + for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): + dest_base = await fresh_dir(fs, bases, 'gs') + + await fs.copy(sema, Transfer(f'{src_base}not-empty/bar', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + if transfer_type == Transfer.DEST_DIR: + exp_dest = f'{dest_base}bar' + await expect_file(fs, exp_dest, 'bar') + assert not await fs.isfile(f'{dest_base}not-empty/') + assert not await fs.isdir(f'{dest_base}not-empty/') + x = await collect_files(await fs.listfiles(f'{dest_base}')) + assert x == [f'{dest_base}bar'], x + else: + await expect_file(fs, dest_base.rstrip('/'), 'bar') + + with pytest.raises(FileAndDirectoryError): + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}not-empty/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + with pytest.raises(FileAndDirectoryError): + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + +@pytest.mark.asyncio +async def test_file_and_directory_error_with_slash_empty_file_only(router_filesystem): + sema, fs, bases = router_filesystem + + src_base = await fresh_dir(fs, bases, 'gs') + + await write_file(fs, f'{src_base}empty-only/', '') + + await collect_files(await fs.listfiles(f'{src_base}')) + await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) + await collect_files(await fs.listfiles(f'{src_base}empty-only/')) + await collect_files(await fs.listfiles(f'{src_base}empty-only/', recursive=True)) + + for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}empty-only/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + # We ignore empty directories when copying + with pytest.raises(FileNotFoundError): + await collect_files(await fs.listfiles(f'{dest_base}empty-only/')) + + with pytest.raises(FileNotFoundError): + await collect_files(await fs.listfiles(f'{dest_base}empty-only/', recursive=True)) + + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + +@pytest.mark.asyncio +async def test_file_and_directory_error_with_slash_non_empty_file_only(router_filesystem): + sema, fs, bases = router_filesystem + + src_base = await fresh_dir(fs, bases, 'gs') + + await write_file(fs, f'{src_base}not-empty-file-w-slash/', b'not-empty') + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}')) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}', recursive=True)) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}not-empty-file-w-slash/')) + + with pytest.raises(FileAndDirectoryError): + await collect_files(await fs.listfiles(f'{src_base}not-empty-file-w-slash/', recursive=True)) + + for transfer_type in (Transfer.DEST_IS_TARGET, Transfer.DEST_DIR, Transfer.INFER_DEST): + with pytest.raises(FileAndDirectoryError): + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}not-empty-file-w-slash/', dest_base.rstrip('/'), treat_dest_as=transfer_type)) + + with pytest.raises(FileAndDirectoryError): + dest_base = await fresh_dir(fs, bases, 'gs') + await fs.copy(sema, Transfer(f'{src_base}', dest_base.rstrip('/'), treat_dest_as=transfer_type)) diff --git a/hail/python/test/hailtop/batch/test_batch.py b/hail/python/test/hailtop/batch/test_batch.py index 3b3ccac34a6..9f692de8f09 100644 --- a/hail/python/test/hailtop/batch/test_batch.py +++ b/hail/python/test/hailtop/batch/test_batch.py @@ -8,6 +8,7 @@ import google.cloud.storage from hailtop.batch import Batch, ServiceBackend, LocalBackend +from hailtop.batch.exceptions import BatchException from hailtop.batch.globals import arg_max from hailtop.utils import grouped from hailtop.config import get_user_config @@ -609,6 +610,14 @@ def test_gcsfuse_implicit_dirs(self): res = b.run() assert res.status()['state'] == 'success', debug_info(res) + def test_gcsfuse_empty_string_bucket_fails(self): + b = self.batch() + j = b.new_job() + with self.assertRaises(BatchException): + j.gcsfuse('', '/empty_bucket') + with self.assertRaises(BatchException): + j.gcsfuse(self.bucket_name, '') + def test_requester_pays(self): b = self.batch(requester_pays_project='hail-vdc') input = b.read_input('gs://hail-services-requester-pays/hello') @@ -787,7 +796,7 @@ def test_fail_fast(self): b = self.batch(cancel_after_n_failures=1) j1 = b.new_job() - j1.command(f'false') + j1.command('false') j2 = b.new_job() j2.command('sleep 300') @@ -795,3 +804,30 @@ def test_fail_fast(self): res = b.run() job_status = res.get_job(2).status() assert job_status['state'] == 'Cancelled', str(job_status) + + def test_service_backend_bucket_parameter(self): + backend = ServiceBackend(bucket='hail-test-dmk9z') + b = Batch(backend=backend) + j1 = b.new_job() + j1.command(f'echo hello > {j1.ofile}') + j2 = b.new_job() + j2.command(f'cat {j1.ofile}') + b.run() + + def test_service_backend_remote_tempdir_with_trailing_slash(self): + backend = ServiceBackend(remote_tmpdir='gs://hail-test-dmk9z/temporary-files/') + b = Batch(backend=backend) + j1 = b.new_job() + j1.command(f'echo hello > {j1.ofile}') + j2 = b.new_job() + j2.command(f'cat {j1.ofile}') + b.run() + + def test_service_backend_remote_tempdir_with_no_trailing_slash(self): + backend = ServiceBackend(remote_tmpdir='gs://hail-test-dmk9z/temporary-files') + b = Batch(backend=backend) + j1 = b.new_job() + j1.command(f'echo hello > {j1.ofile}') + j2 = b.new_job() + j2.command(f'cat {j1.ofile}') + b.run() diff --git a/hail/python/test/hailtop/test_aiogoogle.py b/hail/python/test/hailtop/test_aiogoogle.py index c10d0307910..c2c83460374 100644 --- a/hail/python/test/hailtop/test_aiogoogle.py +++ b/hail/python/test/hailtop/test_aiogoogle.py @@ -1,3 +1,4 @@ +from typing import Optional import os import secrets import shutil @@ -6,30 +7,28 @@ import asyncio import pytest import concurrent +import urllib.parse +import functools from hailtop.utils import secret_alnum_string, bounded_gather2 from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS from hailtop.aiogoogle import StorageClient, GoogleStorageAsyncFS -@pytest.fixture(params=['file', 'gs', 'router/file', 'router/gs']) -async def filesystem(request): +@pytest.fixture(params=['gs', 'router/gs']) +async def gs_filesystem(request): token = secret_alnum_string() with ThreadPoolExecutor() as thread_pool: if request.param.startswith('router/'): fs = RouterAsyncFS( - 'file', [LocalAsyncFS(thread_pool), GoogleStorageAsyncFS()]) - elif request.param == 'file': - fs = LocalAsyncFS(thread_pool) + 'file', [LocalAsyncFS(thread_pool), + GoogleStorageAsyncFS()]) else: + assert request.param.endswith('gs') fs = GoogleStorageAsyncFS() async with fs: - if request.param.endswith('file'): - base = f'/tmp/{token}/' - else: - assert request.param.endswith('gs') - bucket = os.environ['HAIL_TEST_BUCKET'] - base = f'gs://{bucket}/tmp/{token}/' + bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + base = f'gs://{bucket}/tmp/{token}/' await fs.mkdir(base) sema = asyncio.Semaphore(50) @@ -39,150 +38,9 @@ async def filesystem(request): assert not await fs.isdir(base) -@pytest.fixture -async def local_filesystem(request): - token = secret_alnum_string() - - with ThreadPoolExecutor() as thread_pool: - async with LocalAsyncFS(thread_pool) as fs: - base = f'/tmp/{token}/' - await fs.mkdir(base) - sema = asyncio.Semaphore(50) - async with sema: - yield (sema, fs, base) - await fs.rmtree(sema, base) - assert not await fs.isdir(base) - - -@pytest.fixture(params=['small', 'multipart', 'large']) -async def file_data(request): - if request.param == 'small': - return [b'foo'] - elif request.param == 'multipart': - return [b'foo', b'bar', b'baz'] - else: - assert request.param == 'large' - return [secrets.token_bytes(1_000_000)] - - -@pytest.mark.asyncio -async def test_write_read(filesystem, file_data): - sema, fs, base = filesystem - - file = f'{base}foo' - - async with await fs.create(file) as f: - for b in file_data: - await f.write(b) - - expected = b''.join(file_data) - async with await fs.open(file) as f: - actual = await f.read() - - assert expected == actual - - -@pytest.mark.asyncio -async def test_open_from(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - async with await fs.create(file) as f: - await f.write(b'abcde') - - async with await fs.open_from(file, 2) as f: - r = await f.read() - assert r == b'cde' - - -@pytest.mark.asyncio -async def test_isfile(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - # doesn't exist yet - assert not await fs.isfile(file) - - await fs.touch(file) - - assert await fs.isfile(file) - - -@pytest.mark.asyncio -async def test_isdir(filesystem): - sema, fs, base = filesystem - - # mkdir with trailing slash - dir = f'{base}dir/' - await fs.mkdir(dir) - - await fs.touch(f'{dir}foo') - - # can't test this until after creating foo - assert await fs.isdir(dir) - - # mkdir without trailing slash - dir2 = f'{base}dir2' - await fs.mkdir(dir2) - - await fs.touch(f'{dir2}/foo') - - assert await fs.isdir(dir) - - -@pytest.mark.asyncio -async def test_isdir_subdir_only(filesystem): - sema, fs, base = filesystem - - dir = f'{base}dir/' - await fs.mkdir(dir) - - subdir = f'{dir}subdir/' - await fs.mkdir(subdir) - - await fs.touch(f'{subdir}foo') - - # can't test this until after creating foo - assert await fs.isdir(dir) - assert await fs.isdir(subdir) - - -@pytest.mark.asyncio -async def test_remove(filesystem): - sema, fs, base = filesystem - - file = f'{base}foo' - - await fs.touch(file) - assert await fs.isfile(file) - - await fs.remove(file) - - assert not await fs.isfile(file) - - -@pytest.mark.asyncio -async def test_rmtree(filesystem): - sema, fs, base = filesystem - - dir = f'{base}foo/' - - await fs.mkdir(dir) - await fs.touch(f'{dir}a') - await fs.touch(f'{dir}b') - - assert await fs.isdir(dir) - - await fs.rmtree(sema, dir) - - assert not await fs.isdir(dir) - - @pytest.mark.asyncio async def test_get_object_metadata(): - bucket = os.environ['HAIL_TEST_BUCKET'] + bucket = os.environ['HAIL_TEST_GCS_BUCKET'] file = secrets.token_hex(16) async with StorageClient() as client: @@ -197,7 +55,7 @@ async def test_get_object_metadata(): @pytest.mark.asyncio async def test_get_object_headers(): - bucket = os.environ['HAIL_TEST_BUCKET'] + bucket = os.environ['HAIL_TEST_GCS_BUCKET'] file = secrets.token_hex(16) async with StorageClient() as client: @@ -212,7 +70,7 @@ async def test_get_object_headers(): @pytest.mark.asyncio async def test_compose(): - bucket = os.environ['HAIL_TEST_BUCKET'] + bucket = os.environ['HAIL_TEST_GCS_BUCKET'] token = secret_alnum_string() part_data = [b'a', b'bb', b'ccc'] @@ -230,126 +88,16 @@ async def test_compose(): @pytest.mark.asyncio -async def test_statfile_nonexistent_file(filesystem): - sema, fs, base = filesystem - - with pytest.raises(FileNotFoundError): - await fs.statfile(f'{base}foo') - - -@pytest.mark.asyncio -async def test_statfile_directory(filesystem): - sema, fs, base = filesystem - - await fs.mkdir(f'{base}dir/') - await fs.touch(f'{base}dir/foo') - - with pytest.raises(FileNotFoundError): - # statfile raises FileNotFound on directories - await fs.statfile(f'{base}dir') - - -@pytest.mark.asyncio -async def test_statfile(filesystem): - sema, fs, base = filesystem - - n = 37 - file = f'{base}bar' - async with await fs.create(file) as f: - await f.write(secrets.token_bytes(n)) - - status = await fs.statfile(file) - assert await status.size() == n - -@pytest.mark.asyncio -async def test_listfiles(filesystem): - sema, fs, base = filesystem - - with pytest.raises(FileNotFoundError): - await fs.listfiles(f'{base}does/not/exist') - - with pytest.raises(FileNotFoundError): - await fs.listfiles(f'{base}does/not/exist', recursive=True) - - # create the following directory structure in base: - # foobar - # foo/a - # foo/b/c - a = f'{base}foo/a' - b = f'{base}foo/b/' - c = f'{base}foo/b/c' - await fs.touch(f'{base}foobar') - await fs.mkdir(f'{base}foo/') - await fs.touch(a) - await fs.mkdir(b) - await fs.touch(c) - - async def listfiles(dir, recursive): - return {(await entry.url(), await entry.is_file()) async for entry in await fs.listfiles(dir, recursive)} - - assert await listfiles(f'{base}foo/', recursive=True) == {(a, True), (c, True)} - assert await listfiles(f'{base}foo/', recursive=False) == {(a, True), (b, False)} - - # without trailing slash - assert await listfiles(f'{base}foo', recursive=True) == {(a, True), (c, True)} - assert await listfiles(f'{base}foo', recursive=False) == {(a, True), (b, False)} - - # test FileListEntry.status raises on directory - async for entry in await fs.listfiles(f'{base}foo/', recursive=False): - if await entry.is_dir(): - with pytest.raises(ValueError): - await entry.status() - else: - stat = await entry.status() - assert await stat.size() == 0 - -@pytest.mark.asyncio -@pytest.mark.parametrize("permutation", [ - None, - [0, 1, 2], - [0, 2, 1], - [1, 2, 0], - [2, 1, 0] -]) -async def test_multi_part_create(filesystem, permutation): - sema, fs, base = filesystem - - part_data = [secrets.token_bytes(s) for s in [8192, 600, 20000]] - - s = 0 - part_start = [] - for b in part_data: - part_start.append(s) - s += len(b) - - path = f'{base}a' - async with await fs.multi_part_create(sema, path, len(part_data)) as c: - async def create_part(i): - async with await c.create_part(i, part_start[i]) as f: - await f.write(part_data[i]) - - if permutation: - # do it in a fixed order - for i in permutation: - await create_part(i) - else: - # do in parallel - await asyncio.gather(*[ - create_part(i) for i in range(len(part_data))]) - - expected = b''.join(part_data) - async with await fs.open(path) as f: - actual = await f.read() - assert expected == actual - - -@pytest.mark.asyncio -async def test_multi_part_create_many(filesystem): +async def test_multi_part_create_many_two_level_merge(gs_filesystem): + # This is a white-box test. compose has a maximum of 32 inputs, + # so if we're composing more than 32 parts, the + # GoogleStorageAsyncFS does a multi-level hierarhical merge. try: - sema, fs, base = filesystem + sema, fs, base = gs_filesystem - # > 32 so we perform at least 2 layers of merging - part_data = [secrets.token_bytes(100) for _ in range(80)] + # > 32 so we perform at least 2 levels of merging + part_data_size = [100 for _ in range(40)] + part_data = [secrets.token_bytes(s) for s in part_data_size] s = 0 part_start = [] @@ -365,11 +113,10 @@ async def create_part(i): # do in parallel await bounded_gather2(sema, *[ - create_part(i) for i in range(len(part_data))]) + functools.partial(create_part, i) for i in range(len(part_data))]) expected = b''.join(part_data) - async with await fs.open(path) as f: - actual = await f.read() + actual = await fs.read(path) assert expected == actual except (concurrent.futures._base.CancelledError, asyncio.CancelledError) as err: raise AssertionError('uncaught cancelled error') from err diff --git a/hail/python/test/hailtop/test_fs.py b/hail/python/test/hailtop/test_fs.py new file mode 100644 index 00000000000..c106cfcb929 --- /dev/null +++ b/hail/python/test/hailtop/test_fs.py @@ -0,0 +1,338 @@ +from typing import Optional +import os +import secrets +import shutil +from itertools import accumulate +from concurrent.futures import ThreadPoolExecutor +import asyncio +import pytest +import concurrent +import urllib.parse +from hailtop.utils import secret_alnum_string +from hailtop.aiotools import LocalAsyncFS, RouterAsyncFS +from hailtop.aiotools.s3asyncfs import S3AsyncFS +from hailtop.aiogoogle import GoogleStorageAsyncFS + + +@pytest.fixture(params=['file', 'gs', 's3', 'router/file', 'router/gs', 'router/s3']) +async def filesystem(request): + token = secret_alnum_string() + + with ThreadPoolExecutor() as thread_pool: + if request.param.startswith('router/'): + fs = RouterAsyncFS( + 'file', [LocalAsyncFS(thread_pool), + GoogleStorageAsyncFS(), + S3AsyncFS(thread_pool)]) + elif request.param == 'file': + fs = LocalAsyncFS(thread_pool) + elif request.param.endswith('gs'): + fs = GoogleStorageAsyncFS() + else: + assert request.param.endswith('s3') + fs = S3AsyncFS(thread_pool) + async with fs: + if request.param.endswith('file'): + base = f'/tmp/{token}/' + elif request.param.endswith('gs'): + bucket = os.environ['HAIL_TEST_GCS_BUCKET'] + base = f'gs://{bucket}/tmp/{token}/' + else: + assert request.param.endswith('s3') + bucket = os.environ['HAIL_TEST_S3_BUCKET'] + base = f's3://{bucket}/tmp/{token}/' + + await fs.mkdir(base) + sema = asyncio.Semaphore(50) + async with sema: + yield (sema, fs, base) + await fs.rmtree(sema, base) + assert not await fs.isdir(base) + + +@pytest.fixture +async def local_filesystem(request): + token = secret_alnum_string() + + with ThreadPoolExecutor() as thread_pool: + async with LocalAsyncFS(thread_pool) as fs: + base = f'/tmp/{token}/' + await fs.mkdir(base) + sema = asyncio.Semaphore(50) + async with sema: + yield (sema, fs, base) + await fs.rmtree(sema, base) + assert not await fs.isdir(base) + + +@pytest.fixture(params=['small', 'multipart', 'large']) +async def file_data(request): + if request.param == 'small': + return [b'foo'] + elif request.param == 'multipart': + return [b'foo', b'bar', b'baz'] + else: + assert request.param == 'large' + return [secrets.token_bytes(1_000_000)] + + +@pytest.mark.asyncio +async def test_write_read(filesystem, file_data): + sema, fs, base = filesystem + + file = f'{base}foo' + + async with await fs.create(file) as f: + for b in file_data: + await f.write(b) + + expected = b''.join(file_data) + async with await fs.open(file) as f: + actual = await f.read() + + assert expected == actual + + +@pytest.mark.asyncio +async def test_open_from(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + async with await fs.create(file) as f: + await f.write(b'abcde') + + async with await fs.open_from(file, 2) as f: + r = await f.read() + assert r == b'cde' + + +@pytest.mark.asyncio +async def test_read_from(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + await fs.write(file, b'abcde') + r = await fs.read_from(file, 2) + assert r == b'cde' + + +@pytest.mark.asyncio +async def test_read_range(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + await fs.write(file, b'abcde') + + r = await fs.read_range(file, 2, 2) + assert r == b'c' + + r = await fs.read_range(file, 2, 4) + assert r == b'cde' + + r = await fs.read_range(file, 2, 10) + assert r == b'cde' + + +@pytest.mark.asyncio +async def test_isfile(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + # doesn't exist yet + assert not await fs.isfile(file) + + await fs.touch(file) + + assert await fs.isfile(file) + + +@pytest.mark.asyncio +async def test_isdir(filesystem): + sema, fs, base = filesystem + + # mkdir with trailing slash + dir = f'{base}dir/' + await fs.mkdir(dir) + + await fs.touch(f'{dir}foo') + + # can't test this until after creating foo + assert await fs.isdir(dir) + + # mkdir without trailing slash + dir2 = f'{base}dir2' + await fs.mkdir(dir2) + + await fs.touch(f'{dir2}/foo') + + assert await fs.isdir(dir) + + +@pytest.mark.asyncio +async def test_isdir_subdir_only(filesystem): + sema, fs, base = filesystem + + dir = f'{base}dir/' + await fs.mkdir(dir) + + subdir = f'{dir}subdir/' + await fs.mkdir(subdir) + + await fs.touch(f'{subdir}foo') + + # can't test this until after creating foo + assert await fs.isdir(dir) + assert await fs.isdir(subdir) + + +@pytest.mark.asyncio +async def test_remove(filesystem): + sema, fs, base = filesystem + + file = f'{base}foo' + + await fs.touch(file) + assert await fs.isfile(file) + + await fs.remove(file) + + assert not await fs.isfile(file) + + +@pytest.mark.asyncio +async def test_rmtree(filesystem): + sema, fs, base = filesystem + + dir = f'{base}foo/' + + await fs.mkdir(dir) + await fs.touch(f'{dir}a') + await fs.touch(f'{dir}b') + + assert await fs.isdir(dir) + + await fs.rmtree(sema, dir) + + assert not await fs.isdir(dir) + + +@pytest.mark.asyncio +async def test_statfile_nonexistent_file(filesystem): + sema, fs, base = filesystem + + with pytest.raises(FileNotFoundError): + await fs.statfile(f'{base}foo') + + +@pytest.mark.asyncio +async def test_statfile_directory(filesystem): + sema, fs, base = filesystem + + await fs.mkdir(f'{base}dir/') + await fs.touch(f'{base}dir/foo') + + with pytest.raises(FileNotFoundError): + # statfile raises FileNotFound on directories + await fs.statfile(f'{base}dir') + + +@pytest.mark.asyncio +async def test_statfile(filesystem): + sema, fs, base = filesystem + + n = 37 + file = f'{base}bar' + await fs.write(file, secrets.token_bytes(n)) + status = await fs.statfile(file) + assert await status.size() == n + +@pytest.mark.asyncio +async def test_listfiles(filesystem): + sema, fs, base = filesystem + + with pytest.raises(FileNotFoundError): + await fs.listfiles(f'{base}does/not/exist') + + with pytest.raises(FileNotFoundError): + await fs.listfiles(f'{base}does/not/exist', recursive=True) + + # create the following directory structure in base: + # foobar + # foo/a + # foo/b/c + a = f'{base}foo/a' + b = f'{base}foo/b/' + c = f'{base}foo/b/c' + await fs.touch(f'{base}foobar') + await fs.mkdir(f'{base}foo/') + await fs.touch(a) + await fs.mkdir(b) + await fs.touch(c) + + async def listfiles(dir, recursive): + return {(await entry.url(), await entry.is_file()) async for entry in await fs.listfiles(dir, recursive)} + + assert await listfiles(f'{base}foo/', recursive=True) == {(a, True), (c, True)} + assert await listfiles(f'{base}foo/', recursive=False) == {(a, True), (b, False)} + + # without trailing slash + assert await listfiles(f'{base}foo', recursive=True) == {(a, True), (c, True)} + assert await listfiles(f'{base}foo', recursive=False) == {(a, True), (b, False)} + + # test FileListEntry.status raises on directory + async for entry in await fs.listfiles(f'{base}foo/', recursive=False): + if await entry.is_dir(): + with pytest.raises(IsADirectoryError): + await entry.status() + else: + stat = await entry.status() + assert await stat.size() == 0 + +@pytest.mark.asyncio +@pytest.mark.parametrize("permutation", [ + None, + [0, 1, 2], + [0, 2, 1], + [1, 2, 0], + [2, 1, 0] +]) +async def test_multi_part_create(filesystem, permutation): + sema, fs, base = filesystem + + # S3 has a minimum part size (except for the last part) of 5GiB + if base.startswith('s3'): + min_part_size = 5 * 1024 * 1024 + part_data_size = [min_part_size, min_part_size, min_part_size] + else: + part_data_size = [8192, 600, 20000] + part_data = [secrets.token_bytes(s) for s in part_data_size] + + s = 0 + part_start = [] + for b in part_data: + part_start.append(s) + s += len(b) + + path = f'{base}a' + async with await fs.multi_part_create(sema, path, len(part_data)) as c: + async def create_part(i): + async with await c.create_part(i, part_start[i]) as f: + await f.write(part_data[i]) + + if permutation: + # do it in a fixed order + for i in permutation: + await create_part(i) + else: + # do in parallel + await asyncio.gather(*[ + create_part(i) for i in range(len(part_data))]) + + expected = b''.join(part_data) + async with await fs.open(path) as f: + actual = await f.read() + assert expected == actual diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala index d6f1a6d8773..5846d92c09b 100644 --- a/hail/src/main/scala/is/hail/HailContext.scala +++ b/hail/src/main/scala/is/hail/HailContext.scala @@ -477,7 +477,8 @@ object HailFeatureFlags { ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), - ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)) + ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), + ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")) ) } diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 8bde2e6a2a8..375bf850ca7 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -76,6 +76,8 @@ class ClassesBytes(classesBytes: Array[(String, Array[Byte])]) extends Serializa } class AsmTuple[C](val cb: ClassBuilder[C], val fields: IndexedSeq[Field[_]], val ctor: MethodBuilder[C]) { + val ti: TypeInfo[_] = cb.ti + def newTuple(elems: IndexedSeq[Code[_]]): Code[C] = Code.newInstance(cb, ctor, elems) def loadElementsAny(t: Value[_]): IndexedSeq[Code[_]] = fields.map(_.get(coerce[C](t) )) @@ -108,18 +110,29 @@ class ModuleBuilder() { def tupleClass(fieldTypes: IndexedSeq[TypeInfo[_]]): AsmTuple[_] = { tuples.getOrElseUpdate(fieldTypes, { - val cb = genClass[AnyRef]("Tuple") + val kb = genClass[Unit](s"Tuple${fieldTypes.length}") val fields = fieldTypes.zipWithIndex.map { case (ti, i) => - cb.newField(s"_$i")(ti) + kb.newField(s"_$i")(ti) } - val ctor = cb.newMethod("", fieldTypes, UnitInfo) + val ctor = kb.newMethod("", fieldTypes, UnitInfo) ctor.emitWithBuilder { cb => + // FIXME, maybe a more elegant way to do this? + val L = new lir.Block() + L.append( + lir.methodStmt(INVOKESPECIAL, + "java/lang/Object", + "", + "()V", + false, + UnitInfo, + FastIndexedSeq(lir.load(ctor._this.asInstanceOf[LocalRef[_]].l)))) + cb += new VCode(L, L, null) fields.zipWithIndex.foreach { case (f, i) => cb += f.putAny(ctor._this, ctor.getArg(i + 1)(f.ti).get) } Code._empty } - new AsmTuple(cb, fields, ctor) + new AsmTuple(kb, fields, ctor) }) } @@ -216,7 +229,7 @@ class ClassBuilder[C]( val sourceFile: Option[String] ) extends WrappedModuleBuilder { - val ti: TypeInfo[C] = new ClassInfo[C](className) + val ti: ClassInfo[C] = new ClassInfo[C](className) val lclass = new lir.Classx[C](className, "java/lang/Object", sourceFile) @@ -291,13 +304,6 @@ class ClassBuilder[C]( mb } - def genDependentFunction[A1 : TypeInfo, R : TypeInfo](baseName: String): DependentFunctionBuilder[AsmFunction1[A1, R]] = { - val depCB = modb.genClass[AsmFunction1[A1, R]](baseName) - val apply = depCB.newMethod("apply", Array(GenericTypeInfo[A1]), GenericTypeInfo[R]) - val dep_apply_method = new DependentMethodBuilder(apply) - new DependentFunctionBuilder[AsmFunction1[A1, R]](dep_apply_method) - } - def newField[T: TypeInfo](name: String): Field[T] = new Field[T](this, name) def newStaticField[T: TypeInfo](name: String): StaticField[T] = new StaticField[T](this, name) @@ -494,7 +500,8 @@ class MethodBuilder[C]( assert(ti == cb.ti, s"$ti != ${ cb.ti }") else { val static = (!isStatic).toInt - assert(ti == parameterTypeInfo(i - static), s"$ti != ${ parameterTypeInfo(i - static) }") + assert(ti == parameterTypeInfo(i - static), + s"$ti != ${ parameterTypeInfo(i - static) }\n params: $parameterTypeInfo") } new LocalRef(lmethod.getParam(i)) } @@ -550,58 +557,6 @@ class MethodBuilder[C]( } } -class DependentMethodBuilder[C](val mb: MethodBuilder[C]) extends WrappedMethodBuilder[C] { - var setFields: mutable.ArrayBuffer[(lir.ValueX) => Code[Unit]] = new mutable.ArrayBuffer() - - def newDepField[T : TypeInfo](value: Code[T]): Value[T] = { - val cfr = genFieldThisRef[T]() - setFields += { (obj: lir.ValueX) => - value.end.append(lir.putField(cb.className, cfr.name, typeInfo[T], obj, value.v)) - val newC = new VCode(value.start, value.end, null) - value.clear() - newC - } - cfr - } - - def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = - newDepField(value.asInstanceOf[Code[T]]) - - def newInstance(mb: MethodBuilder[_]): Code[C] = { - val L = new lir.Block() - - val obj = new lir.Local(null, "new_dep_fun", cb.ti) - L.append(lir.store(obj, lir.newInstance(cb.ti, cb.lInit, FastIndexedSeq.empty[lir.ValueX]))) - - var end = L - setFields.foreach { f => - val c = f(lir.load(obj)) - end.append(lir.goto(c.start)) - end = c.end - } - new VCode(L, end, lir.load(obj)) - } - - override def result(pw: Option[PrintWriter]): () => C = - throw new UnsupportedOperationException("cannot call result() on a dependent function") -} - -trait WrappedDependentMethodBuilder[C] extends WrappedMethodBuilder[C] { - def dmb: DependentMethodBuilder[C] - - def mb: MethodBuilder[C] = dmb.mb - - def newDepField[T : TypeInfo](value: Code[T]): Value[T] = dmb.newDepField(value) - - def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = dmb.newDepFieldAny[T](value) - - def newInstance(mb: MethodBuilder[_]): Code[C] = dmb.newInstance(mb) -} - -class DependentFunctionBuilder[F](apply_method: DependentMethodBuilder[F]) extends WrappedDependentMethodBuilder[F] { - def dmb: DependentMethodBuilder[F] = apply_method -} - class FunctionBuilder[F]( val apply_method: MethodBuilder[F] ) extends WrappedMethodBuilder[F] { diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index a166aa42060..e322912a823 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -315,6 +315,7 @@ object Code { invokeStatic[S](tct.runtimeClass, method, Array[Class[_]](a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass), Array[Code[_]](a1, a2, a3, a4, a5))(sct) def _null[T >: Null](implicit tti: TypeInfo[T]): Code[T] = Code(lir.insn0(ACONST_NULL, tti)) + def _uncheckednull(tti: TypeInfo[_]): Code[_] = Code(lir.insn0(ACONST_NULL, tti)) def _empty: Code[Unit] = Code[Unit](null: lir.ValueX) @@ -1051,6 +1052,25 @@ class CodeArray[T](val lhs: Code[Array[T]])(implicit tti: TypeInfo[T]) { Code(lhs, lir.insn1(ARRAYLENGTH)) } +class UntypedCodeArray(val lhs: Code[_], tti: TypeInfo[_]) { + def apply(i: Code[Int]): Code[_] = + Code(lhs, i, lir.insn2(tti.aloadOp)) + + def update(i: Code[Int], x: Code[_]): Code[Unit] = { + lhs.start.append(lir.goto(i.end)) + i.start.append(lir.goto(x.start)) + x.end.append(lir.stmtOp(tti.astoreOp, lhs.v, i.v, x.v)) + val newC = new VCode(lhs.start, x.end, null) + lhs.clear() + i.clear() + x.clear() + newC + } + + def length(): Code[Int] = + Code(lhs, lir.insn1(ARRAYLENGTH)) +} + object CodeLabel { def apply(): CodeLabel = { val L = new lir.Block() diff --git a/hail/src/main/scala/is/hail/asm4s/package.scala b/hail/src/main/scala/is/hail/asm4s/package.scala index a15d094ef77..0268b2b4165 100644 --- a/hail/src/main/scala/is/hail/asm4s/package.scala +++ b/hail/src/main/scala/is/hail/asm4s/package.scala @@ -28,6 +28,8 @@ package asm4s { } override def toString: String = desc + + def uninitializedValue: Code[_] } class ClassInfo[C](className: String) extends TypeInfo[C] { @@ -40,6 +42,8 @@ package asm4s { val returnOp = ARETURN def newArray(): AbstractInsnNode = new TypeInsnNode(ANEWARRAY, iname) + + override def uninitializedValue: Code[_] = Code._uncheckednull(this) } class ArrayInfo[T](implicit val tti: TypeInfo[T]) extends TypeInfo[Array[T]] { @@ -52,6 +56,8 @@ package asm4s { val returnOp = ARETURN def newArray() = new TypeInsnNode(ANEWARRAY, iname) + + override def uninitializedValue: Code[_] = Code._null[Array[T]](this) } } @@ -105,12 +111,14 @@ package object asm4s { val desc = "Z" val loadOp = ILOAD val storeOp = ISTORE - val aloadOp = IALOAD - val astoreOp = IASTORE + val aloadOp = BALOAD + val astoreOp = BASTORE val returnOp = IRETURN val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_BOOLEAN) + + override def uninitializedValue: Code[_] = const(false) } implicit object ByteInfo extends TypeInfo[Byte] { @@ -123,6 +131,8 @@ package object asm4s { val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_BYTE) + + override def uninitializedValue: Code[_] = const(0.toByte) } implicit object ShortInfo extends TypeInfo[Short] { @@ -135,6 +145,8 @@ package object asm4s { val newarrayOp = NEWARRAY def newArray() = new IntInsnNode(NEWARRAY, T_SHORT) + + override def uninitializedValue: Code[_] = const(0.toShort) } implicit object IntInfo extends TypeInfo[Int] { @@ -146,6 +158,8 @@ package object asm4s { val returnOp = IRETURN def newArray() = new IntInsnNode(NEWARRAY, T_INT) + + override def uninitializedValue: Code[_] = const(0) } implicit object LongInfo extends TypeInfo[Long] { @@ -158,6 +172,8 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_LONG) + + override def uninitializedValue: Code[_] = const(0L) } implicit object FloatInfo extends TypeInfo[Float] { @@ -170,6 +186,8 @@ package object asm4s { def newArray() = new IntInsnNode(NEWARRAY, T_FLOAT) + + override def uninitializedValue: Code[_] = const(0f) } implicit object DoubleInfo extends TypeInfo[Double] { @@ -182,6 +200,8 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_DOUBLE) + + override def uninitializedValue: Code[_] = const(0d) } implicit object CharInfo extends TypeInfo[Char] { @@ -194,6 +214,8 @@ package object asm4s { override val slots = 2 def newArray() = new IntInsnNode(NEWARRAY, T_CHAR) + + override def uninitializedValue: Code[_] = const(0.toChar) } implicit object UnitInfo extends TypeInfo[Unit] { @@ -206,6 +228,8 @@ package object asm4s { override def slots = ??? def newArray() = ??? + + override def uninitializedValue: Code[_] = Code._empty } def classInfoFromClass[C](c: Class[C]): ClassInfo[C] = { diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index e872ac6d40c..182cb1b87b8 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -15,7 +15,8 @@ import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.linalg.BlockMatrix import is.hail.types._ import is.hail.types.encoded.EType -import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType, PVoid, SingleCodeType} +import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType} +import is.hail.types.physical.{PTuple, PType, PVoid} import is.hail.types.virtual.TVoid import is.hail.utils._ import is.hail.variant.ReferenceGenome diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index f1820f28aff..57eba44f1c9 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -22,6 +22,7 @@ import is.hail.services.shuffler.ShuffleClient import is.hail.types._ import is.hail.types.encoded._ import is.hail.types.physical._ +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant.ReferenceGenome diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index ed28cb96b92..f8c57cba6cd 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -9,9 +9,10 @@ import is.hail.HailContext import is.hail.expr.{JSONAnnotationImpex, SparkAnnotationImpex, Validate} import is.hail.expr.ir.lowering._ import is.hail.expr.ir._ -import is.hail.types.physical.{PStruct, PTuple, PType, PTypeReferenceSingleCodeType} -import is.hail.types.virtual.{TStruct, TVoid, Type} +import is.hail.types.physical.{PStruct, PTuple, PType} +import is.hail.types.virtual.{TArray, TInterval, TStruct, TVoid, Type} import is.hail.backend.{Backend, BackendContext, BroadcastValue, HailTaskContext} +import is.hail.expr.ir.IRParser.parseType import is.hail.io.fs.{FS, HadoopFS} import is.hail.utils._ import is.hail.io.bgen.IndexBgen @@ -31,10 +32,12 @@ import is.hail.linalg.{BlockMatrix, RowMatrix} import is.hail.rvd.RVD import is.hail.stats.LinearMixedModel import is.hail.types._ +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.variant.ReferenceGenome import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.util.TaskCompletionListener +import org.json4s import org.json4s.JsonAST.{JInt, JObject} @@ -549,6 +552,28 @@ class SparkBackend( } } + def pyReadMultipleMatrixTables(jsonQuery: String): java.util.List[MatrixIR] = { + log.info("pyReadMultipleMatrixTables: got query") + val kvs = JsonMethods.parse(jsonQuery) match { + case json4s.JObject(values) => values.toMap + } + + val paths = kvs("paths").asInstanceOf[json4s.JArray].arr.toArray.map { case json4s.JString(s) => s } + + val intervalPointType = parseType(kvs("intervalPointType").asInstanceOf[json4s.JString].s) + val intervalObjects = JSONAnnotationImpex.importAnnotation(kvs("intervals"), TArray(TInterval(intervalPointType))) + .asInstanceOf[IndexedSeq[Interval]] + + val opts = NativeReaderOptions(intervalObjects, intervalPointType, filterIntervals = false) + val matrixReaders: IndexedSeq[MatrixIR] = paths.map { p => + log.info(s"creating MatrixRead node for $p") + val mnr = MatrixNativeReader(fs, p, Some(opts)) + MatrixRead(mnr.fullMatrixType, false, false, mnr): MatrixIR + } + log.info("pyReadMultipleMatrixTables: returning N matrix tables") + matrixReaders.asJava + } + def pyReferenceAddLiftover(name: String, chainFile: String, destRGName: String): Unit = { ExecutionTimer.logTime("SparkBackend.pyReferenceAddLiftover") { timer => withExecuteContext(timer) { ctx => diff --git a/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala b/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala index 41b4476815f..b93be8b0d41 100644 --- a/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala +++ b/hail/src/main/scala/is/hail/experimental/ExperimentalFunctions.scala @@ -1,6 +1,8 @@ package is.hail.experimental import is.hail.expr.ir.functions._ +import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.concrete.SIndexablePointer import is.hail.types.physical.{PCanonicalArray, PFloat64, PType} import is.hail.types.virtual.{TArray, TFloat64, TInt32, Type} @@ -9,7 +11,7 @@ object ExperimentalFunctions extends RegistryFunctions { def registerAll() { val experimentalPackageClass = Class.forName("is.hail.experimental.package$") - registerScalaFunction("filtering_allele_frequency", Array(TInt32, TInt32, TFloat64), TFloat64, (_: Type, pt: Seq[PType]) => PFloat64())(experimentalPackageClass, "calcFilterAlleleFreq") - registerWrappedScalaFunction1("haplotype_freq_em", TArray(TInt32), TArray(TFloat64), (_: Type, pt: PType) => PCanonicalArray(PFloat64(true)))(experimentalPackageClass, "haplotypeFreqEM") + registerScalaFunction("filtering_allele_frequency", Array(TInt32, TInt32, TFloat64), TFloat64, null)(experimentalPackageClass, "calcFilterAlleleFreq") + registerWrappedScalaFunction1("haplotype_freq_em", TArray(TInt32), TArray(TFloat64), (_: Type, pt: SType) => SIndexablePointer(PCanonicalArray(PFloat64(true))))(experimentalPackageClass, "haplotypeFreqEM") } } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala b/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala index 72be5592af3..3e60eec73eb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ArraySorter.scala @@ -2,75 +2,208 @@ package is.hail.expr.ir import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.types.physical.{PCanonicalArray, PCanonicalDict, PCanonicalSet, PCode, PIndexableCode, PType, typeToTypeInfo} +import is.hail.types.physical.stypes.interfaces.SIndexableCode +import is.hail.types.physical.{PCanonicalArray, PCanonicalDict, PCanonicalSet} +import is.hail.types.virtual.{TArray, TDict, TSet, Type} +import is.hail.utils.FastIndexedSeq + +import scala.language.existentials class ArraySorter(r: EmitRegion, array: StagedArrayBuilder) { - val typ: PType = array.elt - val ti: TypeInfo[_] = typeToTypeInfo(typ) + val ti: TypeInfo[_] = array.elt.ti val mb: EmitMethodBuilder[_] = r.mb - def sort(sorter: DependentEmitFunctionBuilder[_]): Code[Unit] = { - val localF = ti match { - case BooleanInfo => mb.genFieldThisRef[AsmFunction2[Boolean, Boolean, Boolean]]() - case IntInfo => mb.genFieldThisRef[AsmFunction2[Int, Int, Boolean]]() - case LongInfo => mb.genFieldThisRef[AsmFunction2[Int, Int, Boolean]]() - case FloatInfo => mb.genFieldThisRef[AsmFunction2[Long, Long, Boolean]]() - case DoubleInfo => mb.genFieldThisRef[AsmFunction2[Double, Double, Boolean]]() + private[this] var prunedMissing: Boolean = false + + private[this] val workingArrayInfo = arrayInfo(array.ti) + private[this] val workingArray1 = mb.genFieldThisRef("sorter_working_array")(workingArrayInfo) + private[this] val workingArray2 = mb.genFieldThisRef("sorter_working_array")(workingArrayInfo) + + private[this] def arrayRef(workingArray: Code[Array[_]]): UntypedCodeArray = new UntypedCodeArray(workingArray, array.ti) + + def sort(cb: EmitCodeBuilder, region: Value[Region], comparesLessThan: (EmitCodeBuilder, Value[Region], Code[_], Code[_]) => Code[Boolean]): Unit = { + + val sortMB = cb.emb.ecb.genEmitMethod("arraySorter_outer", FastIndexedSeq[ParamType](classInfo[Region]), UnitInfo) + sortMB.voidWithBuilder { cb => + + val newEnd = cb.newLocal[Int]("newEnd", 0) + val i = cb.newLocal[Int]("i", 0) + val size = cb.newLocal[Int]("size", array.size) + + cb.whileLoop(i < size, { + cb.ifx(!array.isMissing(i), { + cb.ifx(newEnd.cne(i), cb += array.update(newEnd, array.apply(i))) + cb.assign(newEnd, newEnd + 1) + }) + cb.assign(i, i + 1) + }) + cb.assign(i, newEnd) + cb.whileLoop(i < size, { + cb += array.setMissing(i, true) + cb.assign(i, i + 1) + }) + + // sort elements in [0, newEnd] + + // merging into B + val mergeMB = cb.emb.ecb.genEmitMethod("arraySorter_merge", FastIndexedSeq[ParamType](classInfo[Region], IntInfo, IntInfo, IntInfo, workingArrayInfo, workingArrayInfo), UnitInfo) + mergeMB.voidWithBuilder { cb => + val r = mergeMB.getCodeParam[Region](1) + val begin = mergeMB.getCodeParam[Int](2) + val mid = mergeMB.getCodeParam[Int](3) + val end = mergeMB.getCodeParam[Int](4) + + def arrayA = new UntypedCodeArray(mergeMB.getCodeParam(5)(workingArrayInfo), array.ti) + + def arrayB = new UntypedCodeArray(mergeMB.getCodeParam(6)(workingArrayInfo), array.ti) + + val i = cb.newLocal[Int]("mergemb_i", begin) + val j = cb.newLocal[Int]("mergemb_j", mid) + + val k = cb.newLocal[Int]("mergemb_k", i) + cb.whileLoop(k < end, { + + val LtakeFromLeft = CodeLabel() + val LtakeFromRight = CodeLabel() + val Ldone = CodeLabel() + + cb.ifx(j < end, { + cb.ifx(i >= mid, cb.goto(LtakeFromRight)) + cb.ifx(comparesLessThan(cb, r, arrayA(j), arrayA(i)), cb.goto(LtakeFromRight), cb.goto(LtakeFromLeft)) + }, cb.goto(LtakeFromLeft)) + + cb.define(LtakeFromLeft) + cb += arrayB.update(k, arrayA(i)) + cb.assign(i, i + 1) + cb.goto(Ldone) + + cb.define(LtakeFromRight) + cb += arrayB.update(k, arrayA(j)) + cb.assign(j, j + 1) + cb.goto(Ldone) + + cb.define(Ldone) + cb.assign(k, k + 1) + }) + } + + val splitMergeMB = cb.emb.ecb.genEmitMethod("arraySorter_splitMerge", FastIndexedSeq[ParamType](classInfo[Region], IntInfo, IntInfo, workingArrayInfo, workingArrayInfo), UnitInfo) + splitMergeMB.voidWithBuilder { cb => + val r = splitMergeMB.getCodeParam[Region](1) + val begin = splitMergeMB.getCodeParam[Int](2) + val end = splitMergeMB.getCodeParam[Int](3) + + val arrayB = splitMergeMB.getCodeParam(4)(workingArrayInfo) + val arrayA = splitMergeMB.getCodeParam(5)(workingArrayInfo) + + cb.ifx(end - begin > 1, { + val mid = cb.newLocal[Int]("splitMerge_mid", (begin + end) / 2) + + cb.invokeVoid(splitMergeMB, r, begin, mid, arrayA, arrayB) + cb.invokeVoid(splitMergeMB, r, mid, end, arrayA, arrayB) + + // result goes in A + cb.invokeVoid(mergeMB, r, begin, mid, end, arrayB, arrayA) + }) + } + + // these arrays should be allocated once and reused + cb.ifx(workingArray1.isNull || arrayRef(workingArray1).length() < newEnd, { + cb.assignAny(workingArray1, Code.newArray(newEnd)(array.ti)) + cb.assignAny(workingArray2, Code.newArray(newEnd)(array.ti)) + }) + + cb.assign(i, 0) + cb.whileLoop(i < newEnd, { + cb += arrayRef(workingArray1).update(i, array(i)) + cb += arrayRef(workingArray2).update(i, array(i)) + cb.assign(i, i + 1) + }) + + // elements are sorted in workingArray2 after calling splitMergeMB + cb.invokeVoid(splitMergeMB, sortMB.getCodeParam[Region](1), const(0), newEnd, workingArray1, workingArray2) + + cb.assign(i, 0) + cb.whileLoop(i < newEnd, { + cb += array.update(i, arrayRef(workingArray2)(i)) + cb.assign(i, i + 1) + }) + } - Code(localF.storeAny(Code.checkcast(sorter.newInstance(mb))(localF.ti)), array.sort(localF)) + cb.invokeVoid(sortMB, region) + + } - def toRegion(cb: EmitCodeBuilder, t: PType): PIndexableCode = { + def toRegion(cb: EmitCodeBuilder, t: Type): SIndexableCode = { t match { - case pca: PCanonicalArray => + case pca: TArray => val len = cb.newLocal[Int]("arraysorter_to_region_len", array.size) - pca.constructFromElements(cb, r.region, len, deepCopy = false) { (cb, idx) => - IEmitCode(cb, array.isMissing(idx), PCode(typ, array(idx))) + // fixme element requiredness should be set here + val arrayType = PCanonicalArray(array.elt.loadedSType.canonicalPType().setRequired(this.prunedMissing || array.eltRequired)) + + arrayType.constructFromElements(cb, r.region, len, deepCopy = false) { (cb, idx) => + array.loadFromIndex(cb, r.region, idx) } - case td: PCanonicalDict => - td.construct(toRegion(cb, td.arrayRep)) - case ts: PCanonicalSet => - ts.construct(toRegion(cb, ts.arrayRep)) + case td: TDict => + PCanonicalDict.coerceArrayCode(toRegion(cb, TArray(td.elementType))) + case ts: TSet => + PCanonicalSet.coerceArrayCode(toRegion(cb, TArray(ts.elementType))) } } - def pruneMissing: Code[Unit] = { - val i = mb.newLocal[Int]() - val n = mb.newLocal[Int]() - - Code( - n := 0, - i := 0, - Code.whileLoop(i < array.size, - Code( - array.isMissing(i).mux( - Code._empty, - i.ceq(n).mux( - n += 1, - Code(array.setMissing(n, false), array.update(n, array(i)), n += 1))), - i += 1)), - array.setSize(n)) + def pruneMissing(cb: EmitCodeBuilder): Unit = { + this.prunedMissing = true + + val i = cb.newLocal[Int]("i", 0) + val n = cb.newLocal[Int]("n", 0) + val size = cb.newLocal[Int]("size", array.size) + cb.whileLoop(i < size, { + cb.ifx(!array.isMissing(i), { + cb.ifx(i.cne(n), + cb += array.update(n, array(i))) + cb.assign(n, n + 1) + }) + cb.assign(i, i + 1) + }) + cb += array.setSize(n) } - def distinctFromSorted(discardNext: (Code[Region], Code[_], Code[Boolean], Code[_], Code[Boolean]) => Code[Boolean]): Code[Unit] = { - val i = mb.newLocal[Int]() - val n = mb.newLocal[Int]() - - Code( - i := 0, - n := 0, - Code.whileLoop(i < array.size, - i += 1, - Code.whileLoop(i < array.size && discardNext(r.region, array(n), array.isMissing(n), array(i), array.isMissing(i)), - i += 1), - n += 1, - (i < array.size && i.cne(n)).mux( - Code( - array.setMissing(n, array.isMissing(i)), - array.isMissing(n).mux( - Code._empty, - array.update(n, array(i)))), - Code._empty)), - array.setSize(n)) + def distinctFromSorted(cb: EmitCodeBuilder, region: Value[Region], discardNext: (EmitCodeBuilder, Value[Region], EmitCode, EmitCode) => Code[Boolean]): Unit = { + + val distinctMB = cb.emb.genEmitMethod("distinctFromSorted", FastIndexedSeq[ParamType](classInfo[Region]), UnitInfo) + distinctMB.voidWithBuilder { cb => + val region = distinctMB.getCodeParam[Region](1) + val i = cb.newLocal[Int]("i", 0) + val n = cb.newLocal[Int]("n", 0) + val size = cb.newLocal[Int]("size", array.size) + cb.whileLoop(i < size, { + cb.assign(i, i + 1) + + val LskipLoopBegin = CodeLabel() + val LskipLoopEnd = CodeLabel() + cb.define(LskipLoopBegin) + cb.ifx(i >= size, cb.goto(LskipLoopEnd)) + cb.ifx(!discardNext(cb, region, + EmitCode.fromI(distinctMB)(cb => array.loadFromIndex(cb, region, n)), + EmitCode.fromI(distinctMB)(cb => array.loadFromIndex(cb, region, i))), + cb.goto(LskipLoopEnd)) + cb.assign(i, i + 1) + cb.goto(LskipLoopBegin) + + cb.define(LskipLoopEnd) + + cb.assign(n, n + 1) + + cb.ifx(i < size && i.cne(n), { + cb += array.setMissing(n, array.isMissing(i)) + cb.ifx(!array.isMissing(n), cb += array.update(n, array(i))) + }) + + }) + cb += array.setSize(n) + } + + cb.invokeVoid(distinctMB, region) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala index 8e19bdc98e9..6282610b13b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BinaryOp.scala @@ -2,13 +2,15 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.types._ +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ object BinaryOp { private val returnType: ((BinaryOp, Type, Type)) => Option[Type] = lift { - case (FloatingPointDivide(), TInt32, TInt32) => TFloat32 - case (FloatingPointDivide(), TInt64, TInt64) => TFloat32 + case (FloatingPointDivide(), TInt32, TInt32) => TFloat64 + case (FloatingPointDivide(), TInt64, TInt64) => TFloat64 case (FloatingPointDivide(), TFloat32, TFloat32) => TFloat32 case (FloatingPointDivide(), TFloat64, TFloat64) => TFloat64 case (Add() | Subtract() | Multiply() | RoundToNegInfDivide() | BitAnd() | BitOr() | BitXOr(), TInt32, TInt32) => TInt32 @@ -32,7 +34,15 @@ object BinaryOp { private def incompatible[T](lt: Type, rt: Type, op: BinaryOp): T = throw new RuntimeException(s"Cannot apply $op to $lt and $rt") - def emit(op: BinaryOp, lt: Type, rt: Type, l: Code[_], r: Code[_]): Code[_] = + def emit(cb: EmitCodeBuilder, op: BinaryOp, l: SCode, r: SCode): SCode = { + val lt = l.st.virtualType + val rt = r.st.virtualType + + val retCode = emit(op, lt, rt, SType.extractPrimCode(cb, l), SType.extractPrimCode(cb, r)) + primitive(getReturnType(op, lt, rt), retCode) + } + + private[this] def emit(op: BinaryOp, lt: Type, rt: Type, l: Code[_], r: Code[_]): Code[_] = (lt, rt) match { case (TInt32, TInt32) => val ll = coerce[Int](l) @@ -41,7 +51,7 @@ object BinaryOp { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toF / rr.toF + case FloatingPointDivide() => ll.toD / rr.toD case RoundToNegInfDivide() => Code.invokeStatic2[Math, Int, Int, Int]("floorDiv", ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -67,7 +77,7 @@ object BinaryOp { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toF / rr.toF + case FloatingPointDivide() => ll.toD / rr.toD case RoundToNegInfDivide() => Code.invokeStatic2[Math, Long, Long, Long]("floorDiv", ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr diff --git a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala index b7934e543d0..34d30e98ad1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BinarySearch.scala @@ -1,91 +1,82 @@ package is.hail.expr.ir -import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SContainer, SInterval, SIntervalCode} import is.hail.utils.FastIndexedSeq import scala.language.existentials -class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, keyOnly: Boolean) { +class BinarySearch[C](mb: EmitMethodBuilder[C], containerType: SContainer, eltType: EmitType, keyOnly: Boolean) { - val elt: PType = typ.elementType - val ti: TypeInfo[_] = typeToTypeInfo(elt) + val containerElementType: EmitType = containerType.elementEmitType val (compare: CodeOrdering.F[Int], equiv: CodeOrdering.F[Boolean], findElt: EmitMethodBuilder[C]) = if (keyOnly) { - val kt = elt match { - case t: PBaseStruct => - require(t.size == 2) - t.types(0) - case t: PCanonicalInterval => - t.pointType + val kt: EmitType = containerElementType.st match { + case s: SBaseStruct => + require(s.size == 2) + s.fieldEmitTypes(0) + case interval: SInterval => + interval.pointEmitType } - val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], typeToTypeInfo(kt)), typeInfo[Int]) + val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](containerType.paramType, eltType.paramType), typeInfo[Int]) val comp: CodeOrdering.F[Int] = { (cb: EmitCodeBuilder, ec1: EmitCode, _ec2: EmitCode) => val ec2 = EmitCode.fromI(cb.emb) { cb => val iec = _ec2.toI(cb) iec.flatMap(cb) { - case v2: PBaseStructCode => + case v2: SBaseStructCode => v2.memoize(cb, "bs_comp_v2").loadField(cb, 0) - case v2: PIntervalCode => + case v2: SIntervalCode => v2.memoize(cb, "bs_comp_v2").loadStart(cb) - }.map(cb)(_.asPCode) + } } - findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Compare())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.st, kt.st, CodeOrdering.Compare())(cb, ec1, ec2) } val ceq: CodeOrdering.F[Boolean] = { (cb: EmitCodeBuilder, ec1: EmitCode, _ec2: EmitCode) => val ec2 = EmitCode.fromI(cb.emb) { cb => val iec = _ec2.toI(cb) iec.flatMap(cb) { - case v2: PBaseStructCode => + case v2: SBaseStructCode => v2.memoize(cb, "bs_eq_v2").loadField(cb, 0) - case v2: PIntervalCode => + case v2: SIntervalCode => v2.memoize(cb, "bs_comp_v2").loadStart(cb) - }.map(cb)(_.asPCode) + } } - findMB.ecb.getOrderingFunction(eltType.sType, kt.sType, CodeOrdering.Equiv())(cb, ec1, ec2) + findMB.ecb.getOrderingFunction(eltType.st, kt.st, CodeOrdering.Equiv())(cb, ec1, ec2) } (comp, ceq, findMB) } else - (mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Compare()), - mb.ecb.getOrderingFunction(eltType.sType, elt.sType, CodeOrdering.Equiv()), - mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], elt.ti), typeInfo[Int])) + (mb.ecb.getOrderingFunction(eltType.st, containerElementType.st, CodeOrdering.Compare()), + mb.ecb.getOrderingFunction(eltType.st, containerElementType.st, CodeOrdering.Equiv()), + mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](containerType.paramType, eltType.paramType), typeInfo[Int])) - private[this] val array = findElt.getCodeParam[Long](1) - private[this] val m = findElt.getCodeParam[Boolean](2) - private[this] val e = findElt.getCodeParam(3)(eltType.ti) - private[this] val len = findElt.newLocal[Int]() - private[this] val i = findElt.newLocal[Int]() - private[this] val low = findElt.newLocal[Int]() - private[this] val high = findElt.newLocal[Int]() + // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i + findElt.emitWithBuilder[Int] { cb => + val indexable = findElt.getSCodeParam(1).asIndexable.memoize(cb, "findElt_indexable") - def cmp(i: Code[Int]): Code[Int] = EmitCodeBuilder.scopedCode(findElt) { cb => - val ec1 = EmitCode(Code._empty, m, PCode(eltType, e)) - val ec2 = EmitCode.fromI(findElt) { cb => - PCode(typ, array).asIndexable.memoize(cb, "binsearch_cmp_i").loadElement(cb, i).map(cb)(_.asPCode) - } - compare(cb, ec1, ec2) - } + val elt = findElt.getEmitParam(2, null) // no streams - // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i - findElt.emit(Code( - len := typ.loadLength(array), - low := 0, - high := len, - Code.whileLoop(low < high, - i := (low + high) / 2, - (cmp(i) <= 0).mux( - high := i, - low := i + 1)), - low)) + val len = cb.newLocal[Int]("findelt_length", indexable.loadLength()) + val low = cb.newLocal("findelt_low", 0) + val high = cb.newLocal("findelt_high", len) + + cb.whileLoop(low < high, { + val i = cb.newLocal("findelt_i", (low + high) / 2) + cb.ifx(compare(cb, elt, EmitCode.fromI(findElt)(cb => indexable.loadElement(cb, i))) <= 0, + cb.assign(high, i), + cb.assign(low, i + 1) + ) + }) + low + } // check missingness of v before calling - def getClosestIndex(array: Code[Long], m: Code[Boolean], v: Code[_]): Code[Int] = { - findElt.invokeCode[Int](array, m, v) + def getClosestIndex(cb: EmitCodeBuilder, array: SCode, v: EmitCode): Code[Int] = { + cb.invokeCode[Int](findElt, array, v) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index 2a0f3ab93af..6afeafdc96b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -243,8 +243,6 @@ class BlockMatrixLiteral(value: BlockMatrix) extends BlockMatrixIR { } case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDense: Boolean) extends BlockMatrixIR { - assert(f.isInstanceOf[ApplyUnaryPrimOp] || f.isInstanceOf[Apply] || f.isInstanceOf[ApplyBinaryPrimOp]) - override lazy val typ: BlockMatrixType = child.typ assert(!needsDense || !typ.isSparse) @@ -268,6 +266,7 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen f(_, scalar) override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = { + assert(f.isInstanceOf[ApplyUnaryPrimOp] || f.isInstanceOf[Apply] || f.isInstanceOf[ApplyBinaryPrimOp]) val prev = child.execute(ctx) val functionArgs = f match { @@ -359,7 +358,6 @@ case object NeedsDense extends SparsityStrategy { } case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: String, rightName: String, f: IR, sparsityStrategy: SparsityStrategy) extends BlockMatrixIR { - assert(f.isInstanceOf[ApplyBinaryPrimOp] || f.isInstanceOf[Apply]) assert( left.typ.nRows == right.typ.nRows && left.typ.nCols == right.typ.nCols && @@ -382,6 +380,8 @@ case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: } override protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = { + assert(f.isInstanceOf[ApplyBinaryPrimOp] || f.isInstanceOf[Apply]) + left match { case BlockMatrixBroadcast(vectorIR: BlockMatrixIR, IndexedSeq(x), _, _) => val vector = coerceToVector(ctx , vectorIR) @@ -421,6 +421,11 @@ case class BlockMatrixMap2(left: BlockMatrixIR, right: BlockMatrixIR, leftName: case ValueToBlockMatrix(child, _, _) => Interpret[Any](ctx, child) match { case vector: IndexedSeq[_] => vector.asInstanceOf[IndexedSeq[Double]].toArray + case vector: NDArray => { + val IndexedSeq(numRows, numCols) = vector.shape + assert(numRows == 1L || numCols == 1L) + vector.getRowMajorElements().asInstanceOf[IndexedSeq[Double]].toArray + } } case _ => ir.execute(ctx).toBreezeMatrix().data } diff --git a/hail/src/main/scala/is/hail/expr/ir/Casts.scala b/hail/src/main/scala/is/hail/expr/ir/Casts.scala index 484e482e018..6eb7bdcbe87 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Casts.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Casts.scala @@ -2,32 +2,33 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.types._ +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import scala.language.existentials object Casts { - private val casts: Map[(Type, Type), (Code[T] => Code[_]) forSome {type T}] = Map( - (TInt32, TInt32) -> ((x: Code[Int]) => x), - (TInt32, TInt64) -> ((x: Code[Int]) => x.toL), - (TInt32, TFloat32) -> ((x: Code[Int]) => x.toF), - (TInt32, TFloat64) -> ((x: Code[Int]) => x.toD), - (TInt64, TInt32) -> ((x: Code[Long]) => x.toI), - (TInt64, TInt64) -> ((x: Code[Long]) => x), - (TInt64, TFloat32) -> ((x: Code[Long]) => x.toF), - (TInt64, TFloat64) -> ((x: Code[Long]) => x.toD), - (TFloat32, TInt32) -> ((x: Code[Float]) => x.toI), - (TFloat32, TInt64) -> ((x: Code[Float]) => x.toL), - (TFloat32, TFloat32) -> ((x: Code[Float]) => x), - (TFloat32, TFloat64) -> ((x: Code[Float]) => x.toD), - (TFloat64, TInt32) -> ((x: Code[Double]) => x.toI), - (TFloat64, TInt64) -> ((x: Code[Double]) => x.toL), - (TFloat64, TFloat32) -> ((x: Code[Double]) => x.toF), - (TFloat64, TFloat64) -> ((x: Code[Double]) => x), - (TInt32, TCall) -> ((x: Code[Int]) => x)) + private val casts: Map[(Type, Type), (EmitCodeBuilder, SCode) => SCode] = Map( + (TInt32, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => x), + (TInt32, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toL)), + (TInt32, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toF)), + (TInt32, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asInt.intCode(cb).toD)), + (TInt64, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toI)), + (TInt64, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => x), + (TInt64, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toF)), + (TInt64, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asLong.longCode(cb).toD)), + (TFloat32, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toI)), + (TFloat32, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toL)), + (TFloat32, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => x), + (TFloat32, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asFloat.floatCode(cb).toD)), + (TFloat64, TInt32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toI)), + (TFloat64, TInt64) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toL)), + (TFloat64, TFloat32) -> ((cb: EmitCodeBuilder, x: SCode) => primitive(x.asDouble.doubleCode(cb).toF)), + (TFloat64, TFloat64) -> ((cb: EmitCodeBuilder, x: SCode) => x)) - def get(from: Type, to: Type): Code[_] => Code[_] = - casts(from -> to).asInstanceOf[Code[_] => Code[_]] + def get(from: Type, to: Type): (EmitCodeBuilder, SCode) => SCode = + casts(from -> to) def valid(from: Type, to: Type): Boolean = casts.contains(from -> to) diff --git a/hail/src/main/scala/is/hail/expr/ir/Children.scala b/hail/src/main/scala/is/hail/expr/ir/Children.scala index 3a677369ac6..a88734dbeb8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Children.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Children.scala @@ -186,6 +186,7 @@ object Children { none case Die(message, typ, errorId) => Array(message) + case Trap(child) => Array(child) case ApplyIR(_, _, args) => args.toFastIndexedSeq case Apply(_, _, args, _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Compile.scala b/hail/src/main/scala/is/hail/expr/ir/Compile.scala index e4760eda286..75c19409e32 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Compile.scala @@ -7,8 +7,9 @@ import is.hail.expr.ir.lowering.LoweringPipeline import is.hail.expr.ir.streams.{EmitStream, StreamArgType} import is.hail.io.fs.FS import is.hail.rvd.RVDContext +import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType, StreamSingleCodeType} import is.hail.types.physical.stypes.interfaces.SStream -import is.hail.types.physical.{PStream, PStruct, PType, PTypeReferenceSingleCodeType, SingleCodeType, StreamSingleCodeType} +import is.hail.types.physical.{PStream, PStruct, PType} import is.hail.types.virtual.Type import is.hail.utils._ @@ -48,10 +49,6 @@ object Compile { TypeCheck(ir, BindingEnv.empty) - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - InferPType(ir, Env.empty, requiredness, usesAndDefs) - val returnParam = CodeParamType(SingleCodeType.typeInfoFromType(ir.typ)) val fb = EmitFunctionBuilder[F](ctx, "Compiled", @@ -75,8 +72,8 @@ object Compile { assert(fb.mb.parameterTypeInfo == expectedCodeParamTypes, s"expected $expectedCodeParamTypes, got ${ fb.mb.parameterTypeInfo }") assert(fb.mb.returnTypeInfo == expectedCodeReturnType, s"expected $expectedCodeReturnType, got ${ fb.mb.returnTypeInfo }") - val emitContext = new EmitContext(ctx, requiredness) - val rt = Emit(emitContext, ir, fb, expectedCodeReturnType) + val emitContext = EmitContext.analyze(ctx, ir) + val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length) val f = fb.resultWithIndex(print) codeCache += k -> CodeCacheValue(rt, f) @@ -114,10 +111,6 @@ object CompileWithAggregators { TypeCheck(ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - InferPType(ir, Env.empty, requiredness, usesAndDefs) - val fb = EmitFunctionBuilder[F](ctx, "CompiledWithAggs", CodeParamType(typeInfo[Region]) +: params.map { case (_, pt) => pt }, SingleCodeType.typeInfoFromType(ir.typ), Some("Emit.scala")) @@ -135,8 +128,8 @@ object CompileWithAggregators { } */ - val emitContext = new EmitContext(ctx, requiredness) - val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, Some(aggSigs)) + val emitContext = EmitContext.analyze(ctx, ir) + val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length, Some(aggSigs)) val f = fb.resultWithIndex() codeCache += k -> CodeCacheValue(rt, f) @@ -206,27 +199,26 @@ object CompileIterator { val ir = LoweringPipeline.compileLowerer(true)(ctx, body).asInstanceOf[IR].noSharing TypeCheck(ir) - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - InferPType(ir, Env.empty, requiredness, usesAndDefs) - - val emitContext = new EmitContext(ctx, requiredness) - val emitter = new Emit(emitContext, stepFECB) + var elementAddress: Settable[Long] = null + var returnType: PType = null - val returnType = ir.pType.asInstanceOf[PStream].elementType.asInstanceOf[PStruct].setRequired(true) + stepF.emitWithBuilder[Boolean] { cb => + val emitContext = EmitContext.analyze(ctx, ir) + val emitter = new Emit(emitContext, stepFECB) - val optStream = EmitCode.fromI(stepF)(cb => EmitStream.produce(emitter, ir, cb, outerRegion, Env.empty, None)) - val returnPType = optStream.st.asInstanceOf[SStream].elementType.canonicalPType() + val env = EmitEnv(Env.empty, argTypeInfo.indices.filter(i => argTypeInfo(i).isInstanceOf[EmitParamType]).map(i => stepF.storeEmitParam(i + 1, cb))) + val optStream = EmitCode.fromI(stepF)(cb => EmitStream.produce(emitter, ir, cb, outerRegion, env, None)) + returnType = optStream.st.asInstanceOf[SStream].elementEmitType.canonicalPType.setRequired(true) + val returnPType = optStream.st.asInstanceOf[SStream].elementType.canonicalPType() - val elementAddress = stepF.genFieldThisRef[Long]("elementAddr") + elementAddress = stepF.genFieldThisRef[Long]("elementAddr") - val didSetup = stepF.genFieldThisRef[Boolean]("didSetup") - stepF.cb.emitInit(didSetup := false) + val didSetup = stepF.genFieldThisRef[Boolean]("didSetup") + stepF.cb.emitInit(didSetup := false) - val eosField = stepF.genFieldThisRef[Boolean]("eos") + val eosField = stepF.genFieldThisRef[Boolean]("eos") - val producer = optStream.pv.asStream.producer - stepF.emitWithBuilder[Boolean] { cb => + val producer = optStream.pv.asStream.producer val ret = cb.newLocal[Boolean]("stepf_ret") val Lreturn = CodeLabel() @@ -234,11 +226,7 @@ object CompileIterator { cb.ifx(!didSetup, { optStream.toI(cb).get(cb) // handle missing, but bound stream producer above - if (producer.requiresMemoryManagementPerElement) - cb.assign(producer.elementRegion, Region.stagedCreate(Region.REGULAR, outerRegion.getPool())) - else - cb.assign(producer.elementRegion, outerRegion) - + cb.assign(producer.elementRegion, eltRegionField) producer.initialize(cb) cb.assign(didSetup, true) cb.assign(eosField, false) @@ -253,8 +241,6 @@ object CompileIterator { stepF.implementLabel(producer.LendOfStream) { cb => producer.close(cb) - if (producer.requiresMemoryManagementPerElement) - cb += producer.elementRegion.invalidate() cb.assign(eosField, true) cb.assign(ret, false) cb.goto(Lreturn) diff --git a/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala b/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala index 7c8b9ac7f20..3c705b655bd 100644 --- a/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala +++ b/hail/src/main/scala/is/hail/expr/ir/CompileAndEvaluate.scala @@ -3,7 +3,8 @@ package is.hail.expr.ir import is.hail.annotations.{Region, SafeRow} import is.hail.asm4s._ import is.hail.expr.ir.lowering.LoweringPipeline -import is.hail.types.physical.{PBaseStruct, PTuple, PTypeReferenceSingleCodeType, SingleCodeType} +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType +import is.hail.types.physical.{PBaseStruct, PTuple} import is.hail.types.virtual.TVoid import is.hail.utils.{FastIndexedSeq, FastSeq} diff --git a/hail/src/main/scala/is/hail/expr/ir/Copy.scala b/hail/src/main/scala/is/hail/expr/ir/Copy.scala index 0264b37dbca..75ce84ea45f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Copy.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Copy.scala @@ -280,6 +280,9 @@ object Copy { case Die(_, typ, errorId) => assert(newChildren.length == 1) Die(newChildren(0).asInstanceOf[IR], typ, errorId) + case Trap(child) => + assert(newChildren.length == 1) + Trap(newChildren(0).asInstanceOf[IR]) case x@ApplyIR(fn, typeArgs, args) => val r = ApplyIR(fn, typeArgs, newChildren.map(_.asInstanceOf[IR])) r.conversion = x.conversion diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 57d49df0981..9b98956f382 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -3,82 +3,57 @@ package is.hail.expr.ir import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.BackendContext -import is.hail.expr.ir.Emit.E import is.hail.expr.ir.agg.{AggStateSig, ArrayAggStateSig, GroupedStateSig} -import is.hail.expr.ir.functions.StringFunctions +import is.hail.expr.ir.analyses.{ComputeMethodSplits, ControlFlowPreventsSplit, ParentPointers} import is.hail.expr.ir.lowering.TableStageDependency +import is.hail.expr.ir.ndarrays.EmitNDArray import is.hail.expr.ir.streams.{EmitStream, StreamProducer, StreamUtils} -import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} +import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} import is.hail.linalg.{BLAS, LAPACK, LinalgCodeUtils} import is.hail.services.shuffler._ -import is.hail.types.TypeWithRequiredness import is.hail.types.physical._ -import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SCanonicalShufflePointer, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable} -import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SStreamCode} -import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat64, SInt32, SInt32Code, SInt64} -import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete._ +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ +import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq} import is.hail.utils._ -import is.hail.utils.richUtils.RichCodeRegion import java.io._ import scala.collection.mutable import scala.language.{existentials, postfixOps} // class for holding all information computed ahead-of-time that we need in the emitter -class EmitContext(val executeContext: ExecuteContext, val req: RequirednessAnalysis) - - -object SetupBuilder { - def apply(mb: EmitMethodBuilder[_]): SetupBuilder = new SetupBuilder(mb, Code._empty) - - def apply(mb: EmitMethodBuilder[_], setup: Code[Unit]): SetupBuilder = new SetupBuilder(mb, setup) - - def map[T, U](mb: EmitMethodBuilder[_])(is: IndexedSeq[T])(f: (SetupBuilder, T) => U): (Code[Unit], IndexedSeq[U]) = { - val sb = SetupBuilder(mb) - val rs = sb.map(is)(f) - (sb.setup, rs) - } - - def map[T, U](mb: EmitMethodBuilder[_], setup: Code[Unit])(is: IndexedSeq[T])(f: (SetupBuilder, T) => U): (Code[Unit], IndexedSeq[U]) = { - val sb = SetupBuilder(mb, setup) - val rs = sb.map(is)(f) - (sb.setup, rs) +object EmitContext { + def analyze(ctx: ExecuteContext, ir: IR): EmitContext = { + ctx.timer.time("EmitContext.analyze") { + val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) + val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context + val inLoopCriticalPath = ControlFlowPreventsSplit(ir, ParentPointers(ir), usesAndDefs) + val methodSplits = ComputeMethodSplits(ir,inLoopCriticalPath) + new EmitContext(ctx, requiredness, usesAndDefs, methodSplits, inLoopCriticalPath, Memo.empty[Unit]) + } } } -class SetupBuilder(mb: EmitMethodBuilder[_], var setup: Code[Unit]) { - def append(c: Code[Unit]): Unit = { - setup = Code(setup, c) - } - - def +=(c: Code[Unit]): Unit = append(c) - - def memoize[T](e: Code[T], name: String)(implicit tti: TypeInfo[T]): Value[T] = { - val l = mb.newLocal[T](name) - append(l := e) - l - } - - def memoizeField[T](e: Code[T], name: String)(implicit tti: TypeInfo[T]): Value[T] = { - val l = mb.genFieldThisRef[T](name) - append(l := e) - l - } +class EmitContext( + val executeContext: ExecuteContext, + val req: RequirednessAnalysis, + val usesAndDefs: UsesAndDefs, + val methodSplits: Memo[Unit], + val inLoopCriticalPath: Memo[Unit], + val tryingToSplit: Memo[Unit] +) - def map[T, U](is: IndexedSeq[T])(f: (SetupBuilder, T) => U): IndexedSeq[U] = is.map(f(this, _)) +case class EmitEnv(bindings: Env[EmitValue], inputValues: IndexedSeq[Value[Region] => EmitValue]) { + def bind(name: String, v: EmitValue): EmitEnv = copy(bindings = bindings.bind(name, v)) - def result(): Code[Unit] = { - val r = setup - setup = null - r - } + def bind(newBindings: (String, EmitValue)*): EmitEnv = copy(bindings = bindings.bindIterable(newBindings)) } object Emit { - type E = Env[EmitValue] - - def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { + def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], nParams: Int, aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { TypeCheck(ir) val mb = fb.apply_method @@ -90,19 +65,21 @@ object Emit { val region = mb.getCodeParam[Region](1) val returnTypeOption: Option[SingleCodeType] = if (ir.typ == TVoid) { fb.apply_method.voidWithBuilder { cb => - emitter.emitVoid(cb, ir, region, Env.empty, container, None) + val env = EmitEnv(Env.empty, (0 until nParams).map(i => mb.storeEmitParam(i + 2, cb))) // this, region, ... + emitter.emitVoid(cb, ir, region, env, container, None) } None } else { var sct: SingleCodeType = null fb.emitWithBuilder { cb => - val pcode = emitter.emitI(ir, cb, region, Env.empty, container, None).handle(cb, { + val env = EmitEnv(Env.empty, (0 until nParams).map(i => mb.storeEmitParam(i + 2, cb))) // this, region, ... + val sc = emitter.emitI(ir, cb, region, env, container, None).handle(cb, { cb._throw[RuntimeException]( Code.newInstance[RuntimeException, String]("cannot return empty")) }) - val scp = SingleCodePCode.fromPCode(cb, pcode, region) + val scp = SingleCodeSCode.fromSCode(cb, sc, region) assert(scp.typ.ti == rti, s"type info mismatch: expect $rti, got ${ scp.typ.ti }") sct = scp.typ scp.code @@ -127,21 +104,21 @@ object AggContainer { aggState } - val cleanup = { cb: EmitCodeBuilder => + val cleanup = { cb: EmitCodeBuilder => aggState.store(cb) cb += region.load().invalidate() - cb.assign(region, Code._null) + cb.assign(region, Code._null[Region]) } (AggContainer(aggs, aggState, () => ()), (cb: EmitCodeBuilder) => cb += setup, cleanup) } def fromMethodBuilder[C](aggs: Array[AggStateSig], mb: EmitMethodBuilder[C], varPrefix: String): (AggContainer, EmitCodeBuilder => Unit, EmitCodeBuilder => Unit) = - fromVars(aggs, mb, mb.genFieldThisRef[Region](s"${varPrefix}_top_region"), mb.genFieldThisRef[Long](s"${varPrefix}_off")) + fromVars(aggs, mb, mb.genFieldThisRef[Region](s"${ varPrefix }_top_region"), mb.genFieldThisRef[Long](s"${ varPrefix }_off")) def fromBuilder[C](cb: EmitCodeBuilder, aggs: Array[AggStateSig], varPrefix: String): AggContainer = { - val off = cb.newField[Long](s"${varPrefix}_off") - val region = cb.newField[Region](s"${varPrefix}_top_region", Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) + val off = cb.newField[Long](s"${ varPrefix }_off") + val region = cb.newField[Region](s"${ varPrefix }_top_region", Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) val states = agg.StateTuple(aggs.map(a => agg.AggStateSig.getState(a, cb.emb.ecb))) val aggState = new agg.TupleAggregatorState(cb.emb.ecb, states, region, off) cb += region.load().setNumParents(aggs.length) @@ -151,7 +128,7 @@ object AggContainer { AggContainer(aggs, aggState, { () => aggState.store(cb) cb += region.load().invalidate() - cb.assign(region, Code._null) + cb.assign(region, Code._null[Region]) }) } } @@ -182,16 +159,19 @@ case class EmitRegion(mb: EmitMethodBuilder[_], region: Value[Region]) { } abstract class EmitValue { - def pt: PType + def emitType: EmitType + + def st: SType = emitType.st def load: EmitCode - def get(cb: EmitCodeBuilder): PCode + def get(cb: EmitCodeBuilder): SCode } class EmitUnrealizableValue(private val ec: EmitCode) extends EmitValue { - val pt: PType = ec.pt - assert(!pt.isRealizable) + val emitType: EmitType = ec.emitType + + assert(st.isInstanceOf[SStream]) private[this] var used: Boolean = false def load: EmitCode = { @@ -200,17 +180,17 @@ class EmitUnrealizableValue(private val ec: EmitCode) extends EmitValue { ec } - override def get(cb: EmitCodeBuilder): PCode = throw new UnsupportedOperationException(s"Can't make PValue for unrealizable type ${pt}") + override def get(cb: EmitCodeBuilder): SCode = throw new UnsupportedOperationException(s"Can't make PValue for unrealizable type ${ ec.st }") } /** - * Notes on IEmitCode; - * 1. It is the responsibility of the producers of IEmitCode to emit the relevant - * jumps for the Lmissing and Lpresent labels (cb.goto or similar) - * 2. It is the responsibility of consumers to define these labels and to - * prevent the pcode from being used on any code path taken as a result of - * jumping to Lmissing. - */ + * Notes on IEmitCode; + * 1. It is the responsibility of the producers of IEmitCode to emit the relevant + * jumps for the Lmissing and Lpresent labels (cb.goto or similar) + * 2. It is the responsibility of consumers to define these labels and to + * prevent the SCode from being used on any code path taken as a result of + * jumping to Lmissing. + */ object IEmitCode { def apply[A](cb: EmitCodeBuilder, m: Code[Boolean], value: => A): IEmitCodeGen[A] = { Code.constBoolValue(m) match { @@ -241,7 +221,7 @@ object IEmitCode { IEmitCodeGen(Lmissing, CodeLabel(), defaultValue, false) } - def multiMapEmitCodes(cb: EmitCodeBuilder, seq: IndexedSeq[EmitCode])(f: IndexedSeq[PCode] => PCode): IEmitCode = { + def multiMapEmitCodes(cb: EmitCodeBuilder, seq: IndexedSeq[EmitCode])(f: IndexedSeq[SCode] => SCode): IEmitCode = { val Lmissing = CodeLabel() val Lpresent = CodeLabel() @@ -261,7 +241,7 @@ object IEmitCode { } def multiFlatMap[A, B, C](seq: IndexedSeq[A], toIec: A => IEmitCodeGen[B], cb: EmitCodeBuilder) - (f: IndexedSeq[B] => IEmitCodeGen[C]): IEmitCodeGen[C] = { + (f: IndexedSeq[B] => IEmitCodeGen[C]): IEmitCodeGen[C] = { val Lmissing = CodeLabel() var required: Boolean = true @@ -286,26 +266,31 @@ object IEmitCode { } object IEmitCodeGen { - implicit class IEmitCode(val iec: IEmitCodeGen[PCode]) extends AnyVal { - def pc: PCode = iec.value - def pt: PType = pc.pt + + implicit class IEmitCode(val iec: IEmitCodeGen[SCode]) extends AnyVal { + def pc: SCode = iec.value + + def st: SType = pc.st def memoize(cb: EmitCodeBuilder, name: String): EmitValue = cb.memoize(iec, name) + + def memoizeField(cb: EmitCodeBuilder, name: String): EmitValue = + cb.memoizeField(iec, name) } + } -case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, val required: Boolean) { +case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, required: Boolean) { lazy val emitType: EmitType = { value match { - case pc: PCode => EmitType(pc.st, required) + case pc: SCode => EmitType(pc.st, required) case _ => throw new UnsupportedOperationException(s"emitType on $value") } } - // This method is a very temporary patch until we can properly separate SCode and PCode - def typecast[T]: IEmitCodeGen[T] = IEmitCodeGen(Lmissing, Lpresent, value.asInstanceOf[T], required) + def setOptional: IEmitCodeGen[A] = copy(required = false) def map[B](cb: EmitCodeBuilder)(f: (A) => B): IEmitCodeGen[B] = { val Lpresent2 = CodeLabel() @@ -338,7 +323,10 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, value } - def get(cb: EmitCodeBuilder, errorMsg: String = "expected non-missing"): A = + def get(cb: EmitCodeBuilder, errorMsg: String = s"expected non-missing"): A = + handle(cb, cb._fatal(errorMsg)) + + def get(cb: EmitCodeBuilder, errorMsg: Code[String]): A = handle(cb, cb._fatal(errorMsg)) def consume(cb: EmitCodeBuilder, ifMissing: => Unit, ifPresent: (A) => Unit): Unit = { @@ -351,17 +339,17 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, cb.define(Lafter) } - def consumePCode(cb: EmitCodeBuilder, ifMissing: => PCode, ifPresent: (A) => PCode): PCode = { + def consumeSCode(cb: EmitCodeBuilder, ifMissing: => SCode, ifPresent: (A) => SCode): SCode = { val Lafter = CodeLabel() cb.define(Lmissing) val missingValue = ifMissing - val pt = missingValue.pt - val ret = cb.emb.newPLocal(pt) + val st = missingValue.st + val ret = cb.emb.newPLocal(st) cb.assign(ret, missingValue) cb.goto(Lafter) cb.define(Lpresent) val presentValue = ifPresent(value) - assert(presentValue.pt == pt) + assert(presentValue.st == st) cb.assign(ret, presentValue) cb.define(Lafter) ret @@ -375,7 +363,7 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, } object EmitCode { - def apply(setup: Code[Unit], m: Code[Boolean], pv: PCode): EmitCode = { + def apply(setup: Code[Unit], m: Code[Boolean], pv: SCode): EmitCode = { Code.constBoolValue(m) match { case Some(false) => val Lpresent = CodeLabel() @@ -388,7 +376,7 @@ object EmitCode { } } - def unapply(ec: EmitCode): Option[(Code[Boolean], PCode)] = + def unapply(ec: EmitCode): Option[(Code[Boolean], SCode)] = Some((ec.m, ec.pv)) def apply(setup: Code[Unit], ec: EmitCode): EmitCode = { @@ -397,9 +385,9 @@ object EmitCode { new EmitCode(Lstart, ec.iec) } - def present(mb: EmitMethodBuilder[_], pc: PCode): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, pc)) + def present(mb: EmitMethodBuilder[_], pc: SCode): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, pc)) - def missing(mb: EmitMethodBuilder[_], pt: PType): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.missing(cb, pt.defaultValue(mb))) + def missing(mb: EmitMethodBuilder[_], pt: SType): EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.missing(cb, pt.defaultValue)) def fromI(mb: EmitMethodBuilder[_])(f: (EmitCodeBuilder) => IEmitCode): EmitCode = { val cb = EmitCodeBuilder(mb) @@ -413,35 +401,39 @@ class EmitCode(private val start: CodeLabel, private val iec: IEmitCode) { def required: Boolean = iec.required + def setOptional: EmitCode = new EmitCode(start, iec.setOptional) + lazy val emitType: EmitType = iec.emitType - def emitParamType: PCodeEmitParamType = PCodeEmitParamType(st.pType) + def emitParamType: SCodeEmitParamType = emitType.paramType def st: SType = iec.value.st - def pv: PCode = iec.value + def pv: SCode = iec.value val m: Code[Boolean] = new CCode(start.L, iec.Lmissing.L, iec.Lpresent.L) - def pt: PType = pv.pt - - def v: Code[_] = pv.code - def toI(cb: EmitCodeBuilder): IEmitCode = { cb.goto(start) iec } - def castTo(mb: EmitMethodBuilder[_], region: Value[Region], destType: PType, deepCopy: Boolean = false): EmitCode = { + def castTo(mb: EmitMethodBuilder[_], region: Value[Region], destType: SType, deepCopy: Boolean = false): EmitCode = { EmitCode.fromI(mb)(cb => toI(cb).map(cb)(_.castTo(cb, region, destType))) } - def codeTuple(): IndexedSeq[Code[_]] = { - val tc = pv.codeTuple() - if (pt.required) - tc - else - tc :+ m + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = { + val ct = if (required) { + toI(cb).get(cb).makeCodeTuple(cb) + } else { + val es = cb.emb.newEmitLocal("ec_makecodetuple", emitType) + cb.assign(es, toI(cb)) + es.pv.makeCodeTuple(cb) :+ es.m + } + + assert(ct.zip(emitParamType.codeTupleTypes).forall { case (p, pt) => p.ti == pt.ti}, + s"ctt mismatch: $emitType\n param: ${ct.map(_.ti)}\n types: ${emitParamType.codeTupleTypes}") + ct } def missingIf(mb: EmitMethodBuilder[_], cond: Code[Boolean]): EmitCode = @@ -457,19 +449,63 @@ class EmitCode(private val start: CodeLabel, private val iec: IEmitCode) { } def asVoid(): Code[Unit] = { - require(pv.pt == PVoid) + require(pv.st == SVoid) Code.toUnit(m) } } -abstract class EmitSettable extends EmitValue { - def store(cb: EmitCodeBuilder, ec: EmitCode): Unit - - def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit +object EmitSettable { + def present(vs: SSettable): EmitSettable = new EmitSettable(None, vs) } -abstract class PresentEmitSettable extends EmitValue { - def store(cb: EmitCodeBuilder, pc: PCode): Unit +class EmitSettable( + missing: Option[Settable[Boolean]], // required if None + vs: SSettable) extends EmitValue { + + lazy val required: Boolean = missing.isEmpty + + lazy val emitType: EmitType = EmitType(vs.st, required) + + def settableTuple(): IndexedSeq[Settable[_]] = { + missing match { + case Some(m) => vs.settableTuple() :+ m + case None => vs.settableTuple() + } + } + + def m: Code[Boolean] = missing.map(_.load()).getOrElse(const(false)) + + def load: EmitCode = { + val ec = EmitCode(Code._empty, + if (required) const(false) else missing.get.load(), + vs.get) + assert(ec.required == required) + ec + } + + def store(cb: EmitCodeBuilder, ec: EmitCode): Unit = { + store(cb, ec.toI(cb)) + } + + def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit = + if (required) + cb.assign(vs, iec.get(cb, s"Required EmitSettable cannot be missing ${ st }")) + else + iec.consume(cb, { + cb.assign(missing.get, true) + }, { value => + cb.assign(missing.get, false) + cb.assign(vs, value) + }) + + override def get(cb: EmitCodeBuilder): SCode = { + if (required) { + vs + } else { + cb.ifx(missing.get, cb._fatal(s"Can't convert missing ${ st } to PValue")) + vs + } + } } class RichIndexedSeqEmitSettable(is: IndexedSeq[EmitSettable]) { @@ -477,9 +513,9 @@ class RichIndexedSeqEmitSettable(is: IndexedSeq[EmitSettable]) { } object LoopRef { - def apply(cb: EmitCodeBuilder, L: CodeLabel, args: IndexedSeq[(String, PType)], pool: Value[RegionPool], resultType: EmitType): LoopRef = { - val (loopArgs, tmpLoopArgs) = args.zipWithIndex.map { case ((name, pt), i) => - (cb.emb.newEmitField(s"$name$i", pt, pt.required), cb.emb.newEmitField(s"tmp$name$i", pt, pt.required)) + def apply(cb: EmitCodeBuilder, L: CodeLabel, args: IndexedSeq[(String, EmitType)], pool: Value[RegionPool], resultType: EmitType): LoopRef = { + val (loopArgs, tmpLoopArgs) = args.zipWithIndex.map { case ((name, et), i) => + (cb.emb.newEmitField(s"$name$i", et), cb.emb.newEmitField(s"tmp$name$i", et)) }.unzip val r1: Settable[Region] = cb.newLocal[Region]("loop_ref_r1") @@ -494,7 +530,7 @@ object LoopRef { class LoopRef( val L: CodeLabel, - val loopTypes: IndexedSeq[PType], + val loopTypes: IndexedSeq[EmitType], val loopArgs: IndexedSeq[EmitSettable], val tmpLoopArgs: IndexedSeq[EmitSettable], val r1: Settable[Region], @@ -509,24 +545,68 @@ abstract class EstimableEmitter[C] { class Emit[C]( val ctx: EmitContext, - val cb: EmitClassBuilder[C]) { emitSelf => + val cb: EmitClassBuilder[C]) { + emitSelf => + + val methods: mutable.Map[(String, Seq[Type], Seq[SType], SType), EmitMethodBuilder[C]] = mutable.Map() + + def emitVoidInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { + assert(!ctx.inLoopCriticalPath.contains(ir)) + val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) + val r = cb.newField[Region]("emitVoidSeparate_region", region) + mb.voidWithBuilder { cb => + ctx.tryingToSplit.bind(ir, ()) + emitVoid(cb, ir, r, env, container, loopEnv) + } + cb.invokeVoid(mb) + } + + def emitSplitMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): (EmitSettable, EmitMethodBuilder[_]) = { + val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) + val r = cb.newField[Region]("emitInSeparate_region", region) + + var ev: EmitSettable = null + mb.voidWithBuilder { cb => + ctx.tryingToSplit.bind(ir, ()) + val result = emitI(ir, cb, r, env, container, loopEnv) + + ev = cb.emb.ecb.newEmitField(s"${context}_result", result.emitType) + cb.assign(ev, result) + } + (ev, mb) + } - val methods: mutable.Map[(String, Seq[Type], Seq[PType], PType), EmitMethodBuilder[C]] = mutable.Map() + def emitInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCode = { + if (ir.typ == TVoid) { + emitVoidInSeparateMethod(context, cb, ir, region, env, container, loopEnv) + return IEmitCode.present(cb, SVoidCode) + } + assert(!ctx.inLoopCriticalPath.contains(ir)) + val (ev, mb) = emitSplitMethod(context, cb, ir, region, env, container, loopEnv) + cb.invokeVoid(mb) + ev.toI(cb) + } + + private[ir] def emitVoid(cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { + if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { + emitVoidInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv) + return + } - private[ir] def emitVoid(cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: E, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): Unit = { val mb: EmitMethodBuilder[C] = cb.emb.asInstanceOf[EmitMethodBuilder[C]] - def emit(ir: IR, mb: EmitMethodBuilder[C] = mb, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = + + def emit(ir: IR, mb: EmitMethodBuilder[C] = mb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = this.emit(ir, mb, region, env, container, loopEnv) def emitStream(ir: IR, outerRegion: Value[Region], mb: EmitMethodBuilder[C] = mb): EmitCode = EmitCode.fromI(mb)(cb => EmitStream.produce(this, ir, cb, outerRegion, env, container)) - def emitVoid(ir: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = + def emitVoid(ir: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = this.emitVoid(cb, ir, region, env, container, loopEnv) - def emitI(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitI(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) (ir: @unchecked) match { @@ -548,8 +628,6 @@ class Emit[C]( } case StreamFor(a, valueName, body) => - val streamType = coerce[PStream](a.pType) - emitStream(a, region).toI(cb).consume(cb, {}, { case stream: SStreamCode => @@ -619,15 +697,15 @@ class Emit[C]( Array.range(start, start + ns).foreach(i => sc.newState(cb, i)) cb.assign(ib, spec.buildCodeInputBuffer( - Code.newInstance[ByteArrayInputStream, Array[Byte]]( - mb.getSerializedAgg(sIdx)))) + Code.newInstance[ByteArrayInputStream, Array[Byte]]( + mb.getSerializedAgg(sIdx)))) cb += mb.freeSerializedAgg(sIdx) (0 until ns).foreach { j => deserializers(j)(cb, ib) } - cb.assign(ib, Code._null) + cb.assign(ib, Code._null[InputBuffer]) case Die(m, typ, errorId) => val cm = emitI(m) @@ -641,12 +719,12 @@ class Emit[C]( val AggContainer(_, sc, _) = container.get val rvAgg = agg.Extract.getAgg(aggSig) val tempState = AggStateSig.getState(aggSig.state, mb.ecb) - val aggStateOffset = mb.genFieldThisRef[Long](s"combOpValue_${i}_state"); + val aggStateOffset = mb.genFieldThisRef[Long](s"combOpValue_${ i }_state"); val v = emitI(value) v.consume(cb, cb._fatal("cannot combOp a missing value"), - { case serializedValue: PBinaryCode => + { case serializedValue: SBinaryCode => cb.assign(aggStateOffset, region.allocate(tempState.storageType.alignment, tempState.storageType.byteSize)) tempState.createState(cb) tempState.newState(cb) @@ -662,7 +740,7 @@ class Emit[C]( val v = emitI(value) v.consume(cb, cb._fatal("cannot initialize aggs from a missing value"), - { case serializedValue: PBinaryCode => + { case serializedValue: SBinaryCode => sc.states(i).createState(cb) sc.newState(cb, i) sc.states(i).deserializeFromBytes(cb, serializedValue) @@ -671,102 +749,113 @@ class Emit[C]( } } - private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, env: E, container: Option[AggContainer]): IEmitCode = { + private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, env: EmitEnv, container: Option[AggContainer]): IEmitCode = { val region = cb.emb.getCodeParam[Region](1) emitI(ir, cb, region, env, container, None) } - private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region], env: E, + private[ir] def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]] ): IEmitCode = { + if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { + return emitInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv) + } + val mb: EmitMethodBuilder[C] = cb.emb.asInstanceOf[EmitMethodBuilder[C]] - def emitI(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitI(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitInNewBuilder(cb: EmitCodeBuilder, ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitInNewBuilder(cb: EmitCodeBuilder, ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitInMethod(cb: EmitCodeBuilder, ir: IR): IEmitCode = - this.emitI(ir, cb, Env.empty, container) - - def emitStream(ir: IR, cb: EmitCodeBuilder, outerRegion: Value[Region] ): IEmitCode = + def emitStream(ir: IR, cb: EmitCodeBuilder, outerRegion: Value[Region]): IEmitCode = EmitStream.produce(this, ir, cb, outerRegion, env, container) - def emitVoid(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = + def emitVoid(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Unit = this.emitVoid(cb, ir: IR, region, env, container, loopEnv) - def emitFallback(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitFallback(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emit(ir, mb, region, env, container, loopEnv, fallingBackFromEmitI = true).toI(cb) - def emitDeforestedNDArrayI(ir: IR): IEmitCode = - deforestNDArrayI(ir, cb, region, env) + def emitDeforestedNDArrayI(ir: IR): IEmitCode = EmitNDArray(this, ir, cb, region, env, container, loopEnv) def emitNDArrayColumnMajorStrides(ir: IR): IEmitCode = { - emitI(ir).map(cb){case pNDCode: PNDArrayCode => + emitI(ir).map(cb) { case pNDCode: SNDArrayCode => val pNDValue = pNDCode.memoize(cb, "ndarray_column_major_check") LinalgCodeUtils.checkColMajorAndCopyIfNeeded(pNDValue, cb, region) } } - val pt = ir.pType + // Returns an IEmitCode along with a Boolean that is true if the returned value is column major. If false it's row + // major instead. + def emitNDArrayStandardStriding(ir: IR): IEmitCodeGen[(SNDArrayValue, Value[Boolean])] = { + emitI(ir).map(cb) { case pNDCode: SNDArrayCode => + val pNDValue = pNDCode.memoize(cb, "ndarray_standard_striding_check") + LinalgCodeUtils.checkStandardStriding(pNDValue, cb, region) + } + } + + def typeWithReqx(node: IR): VirtualTypeWithReq = VirtualTypeWithReq(node.typ, ctx.req.lookup(node).asInstanceOf[TypeWithRequiredness]) + + def typeWithReq: VirtualTypeWithReq = typeWithReqx(ir) - if (pt == PVoid) { + if (ir.typ == TVoid) { emitVoid(ir) - return IEmitCode(CodeLabel(), CodeLabel(), PCode._empty, required = true) + return IEmitCode.present(cb, SVoidCode) } - def presentPC(pc: PCode): IEmitCode = IEmitCode.present(cb, pc) - - def presentC(c: Code[_]): IEmitCode = presentPC(PCode(pt, c)) + def presentPC(pc: SCode): IEmitCode = IEmitCode.present(cb, pc) val result: IEmitCode = (ir: @unchecked) match { case I32(x) => - presentC(const(x)) + presentPC(primitive(const(x))) case I64(x) => - presentC(const(x)) + presentPC(primitive(const(x))) case F32(x) => - presentC(const(x)) + presentPC(primitive(const(x))) case F64(x) => - presentC(const(x)) + presentPC(primitive(const(x))) case s@Str(x) => - presentPC(mb.addLiteral(x, coerce[PString](s.pType))) + presentPC(mb.addLiteral(x, typeWithReq)) case x@UUID4(_) => - presentPC(PCode(x.pType, coerce[PString](x.pType). + val pt = PCanonicalString() + presentPC(pt.loadCheapSCode(cb, pt. allocateAndStoreString(mb, region, Code.invokeScalaObject0[String]( Class.forName("is.hail.expr.ir.package$"), "uuid4")))) case x@Literal(t, v) => - presentPC(mb.addLiteral(v, x.pType)) + presentPC(mb.addLiteral(v, typeWithReq)) case x@EncodedLiteral(codec, value) => - assert(x.pType == codec.decodedPType()) presentPC(mb.addEncodedLiteral(x)) case True() => - presentC(const(true)) + presentPC(primitive(const(true))) case False() => - presentC(const(false)) + presentPC(primitive(const(false))) case Consume(value) => - emitI(value).map(cb){pc => + emitI(value).map(cb) { pc => cb.memoizeField(pc, "consumed_field") // Ignore pc, just return a 1 - PCode(ir.pType, 1L) + primitive(const(1L)) } case Cast(v, typ) => val iec = emitI(v) val cast = Casts.get(v.typ, typ) - iec.map(cb)(pc => PCode(pt, cast(pc.code))) + iec.map(cb)(pc => cast(cb, pc)) case CastRename(v, _typ) => emitI(v) - .map(cb)(pc => PCode(pt, pc.code)) + .map(cb)(pc => pc.st.castRename(_typ).fromCodes(pc.makeCodeTuple(cb))) case NA(typ) => - IEmitCode(cb, const(true), pt.defaultValue(cb.emb)) + IEmitCode(cb, const(true), typeWithReq.canonicalEmitType.st.defaultValue) case IsNA(v) => val m = emitI(v).consumeCode(cb, true, _ => false) - presentC(m) + presentPC(primitive(m)) case Coalesce(values) => - val coalescedValue = mb.newPLocal("coalesce_value", pt) val emittedValues = values.map(v => EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, v))) + val unifiedType = SType.chooseCompatibleType(typeWithReq, emittedValues.map(_.st): _*) + val coalescedValue = mb.newPLocal("coalesce_value", unifiedType) + val Ldefined = CodeLabel() val Lmissing = CodeLabel() @@ -774,7 +863,7 @@ class Emit[C]( value.toI(cb).consume(cb, {}, // fall through to next check { sc => - cb.assign(coalescedValue, sc.castTo(cb, region, pt)) + cb.assign(coalescedValue, sc.castTo(cb, region, unifiedType)) cb.goto(Ldefined) }) } @@ -788,25 +877,27 @@ class Emit[C]( emitI(cond).flatMap(cb) { condValue => + val codeCnsq = EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, cnsq)) val codeAltr = EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, altr)) + val outType = SType.chooseCompatibleType(typeWithReq, codeCnsq.st, codeAltr.st) val Lmissing = CodeLabel() val Ldefined = CodeLabel() - val out = mb.newPLocal(pt) + val out = mb.newPLocal(outType) cb.ifx(condValue.asBoolean.boolCode(cb), { codeCnsq.toI(cb).consume(cb, { cb.goto(Lmissing) - }, {sc => - cb.assign(out, sc.castTo(cb, region, pt)) + }, { sc => + cb.assign(out, sc.castTo(cb, region, outType)) }) }, { codeAltr.toI(cb).consume(cb, { cb.goto(Lmissing) - }, {sc => - cb.assign(out, sc.castTo(cb, region, pt)) + }, { sc => + cb.assign(out, sc.castTo(cb, region, outType)) }) }) cb.goto(Ldefined) @@ -815,100 +906,73 @@ class Emit[C]( } case x@MakeStruct(fields) => - val scode = x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, - region, - fields.map { case (name, x) => + presentPC(SStackStruct.constructFromArgs(cb, region, x.typ.asInstanceOf[TBaseStruct], + fields.map { case (_, x) => EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x)) - }.toFastIndexedSeq, - deepCopy = false) - presentPC(scode) + }: _* + )) case x@MakeTuple(fields) => - val scode = x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, - region, + presentPC(SStackStruct.constructFromArgs(cb, region, x.typ.asInstanceOf[TBaseStruct], fields.map { case (_, x) => - EmitCode.fromI(cb.emb)(emitInNewBuilder(_, x)) - }.toFastIndexedSeq, - deepCopy = false) - presentPC(scode) + EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x)) + }: _* + )) case x@SelectFields(oldStruct, fields) => emitI(oldStruct) - .map(cb) { case sc: SBaseStructCode => - val sv = sc.memoize(cb, "select_fields_scode") - x.pType.asInstanceOf[PCanonicalBaseStruct].constructFromFields(cb, - region, - fields.map { field => - EmitCode.fromI(cb.emb)(cb => sv.loadField(cb, field).typecast[PCode]) - }.toFastIndexedSeq, - deepCopy = false) - } + .map(cb) { case sc: SBaseStructCode => sc.subset(fields: _*) } - case x@InsertFields(old, fields, fieldOrder) => + case x@InsertFields(old, fields, _) => if (fields.isEmpty) emitI(old) else { - val codeOld = emitI(old) - val updateMap = Map(fields: _*) - - codeOld.map(cb) { oldPC => - val oldPV = oldPC.asBaseStruct.memoize(cb, "insert_fields_old") - - val itemsEC = x.pType.fields.map { f => - updateMap.get(f.name) match { - case Some(vir) => - EmitCode.fromI(mb)(emitInNewBuilder(_, vir)) - case None => - EmitCode.fromI(mb)(oldPV.loadField(_, f.name).typecast[PCode]) - } - } - - x.pType.asInstanceOf[PCanonicalBaseStruct] - .constructFromFields(cb, region, itemsEC, deepCopy = false) - .asPCode + emitI(old).map(cb) { old => + old.asBaseStruct.insert(cb, region, x.typ, + fields.map { case (name, x) => (name, EmitCode.fromI(cb.emb)(cb => emitInNewBuilder(cb, x))) }: _*) } } case ApplyBinaryPrimOp(op, l, r) => emitI(l).flatMap(cb) { pcL => - emitI(r).map(cb)(pcR => PCode(pt, BinaryOp.emit(op, l.typ, r.typ, pcL.code, pcR.code))) + emitI(r).map(cb)(pcR => BinaryOp.emit(cb, op, pcL, pcR)) } case ApplyUnaryPrimOp(op, x) => - emitI(x).map(cb)(pc => PCode(pt, UnaryOp.emit(op, x.typ, pc.code))) + emitI(x).map(cb)(pc => UnaryOp.emit(cb, op, pc)) case ApplyComparisonOp(op, l, r) => if (op.strict) { emitI(l).flatMap(cb) { l => emitI(r).map(cb) { r => val f = op.codeOrdering(cb.emb.ecb, l.st, r.st) - PCode(pt, f(cb, EmitCode.present(cb.emb, l), EmitCode.present(cb.emb, r))) + primitive(ir.typ, f(cb, EmitCode.present(cb.emb, l), EmitCode.present(cb.emb, r))) } } } else { val lc = emitI(l).memoize(cb, "l") val rc = emitI(r).memoize(cb, "r") val f = op.codeOrdering(cb.emb.ecb, lc.st, rc.st) - presentC(f(cb, lc, rc)) + presentPC(primitive(ir.typ, f(cb, lc, rc))) } case x@MakeArray(args, _) => - val pType = x.pType.asInstanceOf[PCanonicalArray] + val emittedArgs = args.map(a => EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, a))) + val pType = typeWithReq.canonicalPType.asInstanceOf[PCanonicalArray] val (pushElement, finish) = pType.constructFromFunctions(cb, region, args.size, deepCopy = false) - for (arg <- args) { - val v = emitI(arg) - pushElement(cb, v) + for (arg <- emittedArgs) { + pushElement(cb, arg.toI(cb)) } presentPC(finish(cb)) case ArrayZeros(length) => emitI(length).map(cb) { case n: SInt32Code => - val outputPType = coerce[PArray](ir.pType) + val outputPType = PCanonicalArray(PInt32Required) val elementSize = outputPType.elementByteSize val numElements = cb.newLocal[Int]("n_elements", n.intCode(cb)) val arrayAddress = cb.newLocal[Long]("array_addr", outputPType.allocate(region, numElements)) cb += outputPType.stagedInitialize(arrayAddress, numElements) cb += Region.setMemory(outputPType.firstElementOffset(arrayAddress), numElements.toL * elementSize, 0.toByte) - PCode(pt, arrayAddress) + outputPType.loadCheapSCode(cb, arrayAddress) } case x@ArrayRef(a, i, s) => @@ -952,102 +1016,175 @@ class Emit[C]( val av = ac.asIndexable.memoize(cb, "aref_a") val iv = cb.newLocal("i", ic.asInt.intCode(cb)) boundsCheck(cb, iv, av.loadLength()) - av.loadElement(cb, iv).typecast[PCode] + av.loadElement(cb, iv) } } - case CastToArray(a) => - emitI(a).map(cb)(pc => pt.fromCodeTuple(pc.codeTuple())) - case ArrayLen(a) => emitI(a).map(cb) { (ac) => - PCode(pt, ac.asIndexable.loadLength()) + primitive(ac.asIndexable.loadLength()) } case GetField(o, name) => emitI(o).flatMap(cb) { oc => - val ov = oc.asBaseStruct.memoize(cb, "get_tup_elem_o") - ov.loadField(cb, name).typecast[PCode] + oc.asBaseStruct.loadSingleField(cb, name) } case GetTupleElement(o, i) => emitI(o).flatMap(cb) { oc => - val ov = oc.asBaseStruct.memoize(cb, "get_tup_elem_o") - ov.loadField(cb, oc.pt.asInstanceOf[PTuple].fieldIndex(i)).typecast[PCode] + oc.asBaseStruct.loadSingleField(cb, o.typ.asInstanceOf[TTuple].fieldIndex(i)) } case x@LowerBoundOnOrderedCollection(orderedCollection, elem, onKey) => emitI(orderedCollection).map(cb) { a => - val typ: PContainer = coerce[PIterable](a.pt).asPContainer val e = EmitCode.fromI(cb.emb)(cb => this.emitI(elem, cb, region, env, container, loopEnv)) - val bs = new BinarySearch[C](mb, typ, e.pt, keyOnly = onKey) - val arr = SingleCodePCode.fromPCode(cb, a, region) - PCode(pt, bs.getClosestIndex(arr.code.asInstanceOf[Code[Long]], e.m, e.v)) + val bs = new BinarySearch[C](mb, a.st.asInstanceOf[SContainer], e.emitType, keyOnly = onKey) + primitive(bs.getClosestIndex(cb, a, e)) } - case GroupByKey(collection) => - // sort collection by group - val collectionTyp = coerce[PStream](collection.pType) - val keyValTyp = coerce[PBaseStruct](collectionTyp.elementType) - val keyTyp = keyValTyp.types(0) - val valTyp = keyValTyp.types(1) - val dictTyp = coerce[PCanonicalDict](ir.pType) - val groupTyp = dictTyp.elementType - val arrayTyp = PCanonicalArray(groupTyp, required = true) - - val sortedElts = new StagedArrayBuilder(keyValTyp, mb, 16) - val sorter = new ArraySorter(EmitRegion(mb, region), sortedElts) - - val (k1, k2) = keyValTyp match { - case t: PStruct => GetField(In(0, PCodeEmitParamType(t)), "key") -> GetField(In(1, PCodeEmitParamType(t)), "key") - case t: PTuple => - assert(t.fields(0).index == 0) - GetTupleElement(In(0, PCodeEmitParamType(t)), 0) -> GetTupleElement(In(1, PCodeEmitParamType(t)), 0) - } + case x@ArraySort(a, left, right, lessThan) => + emitStream(a, cb, region).map(cb) { case stream: SStreamCode => + val producer = stream.producer + + val sct = SingleCodeType.fromSType(producer.element.st) - val compare = ApplyComparisonOp(Compare(keyValTyp.types(0).virtualType), k1, k2) < 0 - InferPType(compare) - val leftRightComparatorNames = Array.empty[String] - val sortF = sortedElts.ti match { - case BooleanInfo => makeDependentSortingFunction[Boolean](region, keyValTyp, compare, env, leftRightComparatorNames) - case IntInfo => makeDependentSortingFunction[Int](region, keyValTyp, compare, env, leftRightComparatorNames) - case LongInfo => makeDependentSortingFunction[Long](region, keyValTyp, compare, env, leftRightComparatorNames) - case FloatInfo => makeDependentSortingFunction[Float](region, keyValTyp, compare, env, leftRightComparatorNames) - case DoubleInfo => makeDependentSortingFunction[Double](region, keyValTyp, compare, env, leftRightComparatorNames) + val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) + StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) + val sorter = new ArraySorter(EmitRegion(mb, region), vab) + sorter.sort(cb, region, makeDependentSortingFunction(cb, sct, lessThan, env, emitSelf, Array(left, right))) + sorter.toRegion(cb, x.typ) } - val groupSizes = new StagedArrayBuilder(PInt32(), mb, 0) + case x@ToSet(a) => + emitStream(a, cb, region).map(cb) { case stream: SStreamCode => + val producer = stream.producer + + val sct = SingleCodeType.fromSType(producer.element.st) - val (lastKey, currKey) = (keyValTyp.virtualType: @unchecked) match { - case ts: TStruct => - GetField(In(0, PCodeEmitParamType(keyValTyp)), ts.fieldNames(0)) -> GetField(In(1, PCodeEmitParamType(keyValTyp)), ts.fieldNames(0)) - case tt: TTuple => - GetTupleElement(In(0, PCodeEmitParamType(keyValTyp)), tt.fields(0).index) -> GetTupleElement(In(1, PCodeEmitParamType(keyValTyp)), tt.fields(0).index) - } - val compare2 = ApplyComparisonOp(EQWithNA(keyTyp.virtualType), lastKey, currKey) - InferPType(compare2) - val isSame = mb.genEmitMethod("isSame", - FastIndexedSeq(typeInfo[Region], PCodeEmitParamType(keyValTyp), PCodeEmitParamType(keyValTyp)), - BooleanInfo) - isSame.emitWithBuilder { cb => - emitInMethod(cb, compare2).consumeCode[Boolean](cb, true, _.asBoolean.boolCode(cb)) + val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) + StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) + val sorter = new ArraySorter(EmitRegion(mb, region), vab) + + def lessThan(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { + cb.emb.ecb.getOrdering(sct.loadedSType, sct.loadedSType) + .ltNonnull(cb, sct.loadToSCode(cb, region, l), sct.loadToSCode(cb, region, r)) + } + + sorter.sort(cb, region, lessThan) + + def skipNext(cb: EmitCodeBuilder, region: Value[Region], l: EmitCode, r: EmitCode): Code[Boolean] = { + cb.newLocal[Boolean]("asdb", cb.emb.ecb.getOrdering(l.st, r.st) + .equiv(cb, l, r, missingEqual = true)) + } + + sorter.distinctFromSorted(cb, region, skipNext) + sorter.toRegion(cb, x.typ) } - val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx") - val grpIdx = mb.newLocal[Int]("groupByKey_grpIdx") - val withinGrpIdx = mb.newLocal[Int]("groupByKey_withinGrpIdx") - val outerSize = mb.newLocal[Int]("groupByKey_outerSize") - val groupSize = mb.newLocal[Int]("groupByKey_groupSize") + case x@ToDict(a) => + emitStream(a, cb, region).map(cb) { case stream: SStreamCode => + val producer = stream.producer + + val sct = SingleCodeType.fromSType(producer.element.st) + + val vab = new StagedArrayBuilder(sct, producer.element.required, mb, 0) + StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) + val sorter = new ArraySorter(EmitRegion(mb, region), vab) + + def lessThan(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { + val lk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, l) + .asBaseStruct.memoize(cb, "lt_l") + .loadField(cb, 0)) + + val rk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, r) + .asBaseStruct.memoize(cb, "lt_r") + .loadField(cb, 0)) + + cb.emb.ecb.getOrdering(lk.st, rk.st) + .lt(cb, lk, rk, missingEqual = true) + } + + sorter.sort(cb, region, lessThan) + sorter.pruneMissing(cb) + def skipNext(cb: EmitCodeBuilder, region: Value[Region], l: EmitCode, r: EmitCode): Code[Boolean] = { + + val lk = EmitCode.fromI(cb.emb) { cb => + l.toI(cb).flatMap(cb) { x => + x.asBaseStruct.memoize(cb, "lt_l") + .loadField(cb, 0) + } + } + + val rk = EmitCode.fromI(cb.emb) { cb => + r.toI(cb).flatMap(cb) { x => + x.asBaseStruct.memoize(cb, "lt_r") + .loadField(cb, 0) + } + } + + cb.emb.ecb.getOrdering(lk.st, rk.st) + .equiv(cb, lk, rk, missingEqual = true) + } + + sorter.distinctFromSorted(cb, region, skipNext) + sorter.toRegion(cb, x.typ) + } + + case GroupByKey(collection) => emitStream(collection, cb, region).map(cb) { case stream: SStreamCode => + val sct = SingleCodeType.fromSType(stream.producer.element.st) + val sortedElts = new StagedArrayBuilder(sct, stream.producer.element.required, mb, 16) StreamUtils.writeToArrayBuilder(cb, stream.producer, sortedElts, region) - cb += sorter.sort(sortF) - cb += sorter.pruneMissing + val sorter = new ArraySorter(EmitRegion(mb, region), sortedElts) + + def lt(cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]): Code[Boolean] = { + val lk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, l) + .asBaseStruct.memoize(cb, "lt_l") + .loadField(cb, 0)) + val rk = EmitCode.fromI(cb.emb)(cb => sct.loadToSCode(cb, region, r) + .asBaseStruct.memoize(cb, "lt_r") + .loadField(cb, 0)) + cb.emb.ecb.getOrdering(lk.st, rk.st) + .lt(cb, lk, rk, missingEqual = true) + } + + sorter.sort(cb, region, lt) + sorter.pruneMissing(cb) + + val groupSizes = new StagedArrayBuilder(Int32SingleCodeType, true, mb, 0) + + val eltIdx = mb.newLocal[Int]("groupByKey_eltIdx") + val grpIdx = mb.newLocal[Int]("groupByKey_grpIdx") + val withinGrpIdx = mb.newLocal[Int]("groupByKey_withinGrpIdx") + val outerSize = mb.newLocal[Int]("groupByKey_outerSize") + val groupSize = mb.newLocal[Int]("groupByKey_groupSize") + + cb += groupSizes.clear cb.assign(eltIdx, 0) cb.assign(groupSize, 0) + def sameKeyAtIndices(cb: EmitCodeBuilder, region: Value[Region], idx1: Code[Int], idx2: Code[Int]): Code[Boolean] = { + val lk = EmitCode.fromI(cb.emb) { cb => + sortedElts.loadFromIndex(cb, region, idx1).flatMap(cb) { x => + x.asBaseStruct.memoize(cb, "lt_l") + .loadField(cb, 0) + } + } + + val rk = EmitCode.fromI(cb.emb) { cb => + sortedElts.loadFromIndex(cb, region, idx2).flatMap(cb) { x => + x.asBaseStruct.memoize(cb, "lt_r") + .loadField(cb, 0) + } + } + + cb.emb.ecb.getOrdering(lk.st, rk.st) + .equiv(cb, lk, rk, missingEqual = true) + } + cb.whileLoop(eltIdx < sortedElts.size, { val bottomOfLoop = CodeLabel() val newGroup = CodeLabel() @@ -1056,7 +1193,7 @@ class Emit[C]( cb.ifx(eltIdx.ceq(sortedElts.size - 1), { cb.goto(newGroup) }, { - cb.ifx(cb.invokeCode[Boolean](isSame, region, sortedElts.applyEV(mb, eltIdx), sortedElts.applyEV(mb, eltIdx + 1)), { + cb.ifx(sameKeyAtIndices(cb, region, eltIdx, eltIdx + 1), { cb.goto(bottomOfLoop) }, { cb.goto(newGroup) @@ -1071,7 +1208,12 @@ class Emit[C]( }) cb.assign(outerSize, groupSizes.size) - val (addGroup, finishOuter) = arrayTyp.constructFromFunctions(cb, region, outerSize, deepCopy = false) + val loadedElementType = sct.loadedSType.asInstanceOf[SBaseStruct] + val innerType = PCanonicalArray(loadedElementType.fieldEmitTypes(1).canonicalPType, true) + val kt = loadedElementType.fieldEmitTypes(0).canonicalPType + val groupType = PCanonicalStruct(true, ("key", kt), ("value", innerType)) + val dictType = PCanonicalDict(kt, innerType, false) + val (addGroup, finishOuter) = dictType.arrayRep.constructFromFunctions(cb, region, outerSize, deepCopy = false) cb.assign(eltIdx, 0) cb.assign(grpIdx, 0) @@ -1079,25 +1221,25 @@ class Emit[C]( cb.whileLoop(grpIdx < outerSize, { cb.assign(groupSize, coerce[Int](groupSizes(grpIdx))) cb.assign(withinGrpIdx, 0) - val firstStruct = sortedElts.applyEV(mb, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_firstStruct") - val key = EmitCode.fromI(mb) { cb => firstStruct.loadField(cb, 0).typecast[PCode] } + val firstStruct = sortedElts.loadFromIndex(cb, region, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_firstStruct") + val key = EmitCode.fromI(mb) { cb => firstStruct.loadField(cb, 0) } val group = EmitCode.fromI(mb) { cb => - val (addElt, finishInner) = PCanonicalArray(valTyp, required = true) + val (addElt, finishInner) = innerType .constructFromFunctions(cb, region, groupSize, deepCopy = false) cb.whileLoop(withinGrpIdx < groupSize, { - val struct = sortedElts.applyEV(mb, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_struct") - addElt(cb, struct.loadField(cb, 1).typecast[PCode]) + val struct = sortedElts.loadFromIndex(cb, region, eltIdx).get(cb).asBaseStruct.memoize(cb, "GroupByKey_struct") + addElt(cb, struct.loadField(cb, 1)) cb.assign(eltIdx, eltIdx + 1) cb.assign(withinGrpIdx, withinGrpIdx + 1) }) IEmitCode.present(cb, finishInner(cb)) } - val elt = groupTyp.constructFromFields(cb, region, FastIndexedSeq(key, group), deepCopy = false) + val elt = groupType.constructFromFields(cb, region, FastIndexedSeq(key, group), deepCopy = false) addGroup(cb, IEmitCode.present(cb, elt)) cb.assign(grpIdx, grpIdx + 1) }) - dictTyp.construct(finishOuter(cb)) + dictType.construct(finishOuter(cb)) } case x@StreamLen(a) => @@ -1108,7 +1250,7 @@ class Emit[C]( producer.initialize(cb) val xLen = cb.newLocal[Int]("streamlen_x", compLen(cb)) producer.close(cb) - PCode(x.pType, xLen) + primitive(xLen) case None => val count = cb.newLocal[Int]("stream_length", 0) producer.memoryManagedConsume(region, cb) { cb => @@ -1119,25 +1261,28 @@ class Emit[C]( case SStreamCode(_, nested) => StreamProducer.defineUnusedLabels(nested, mb) case _ => } - PCode(x.pType, count) + primitive(count) } } case x@MakeNDArray(dataIR, shapeIR, rowMajorIR, errorId) => - val xP = coerce[PCanonicalNDArray](x.pType) - val shapePType = coerce[PTuple](shapeIR.pType) - val nDims = shapePType.size emitI(rowMajorIR).flatMap(cb) { isRowMajorCode => - emitI(shapeIR).flatMap(cb) { case shapeTupleCode: PBaseStructCode => - emitI(dataIR).map(cb) { case dataCode: PIndexableCode => + emitI(shapeIR).flatMap(cb) { case shapeTupleCode: SBaseStructCode => + emitI(dataIR).map(cb) { case dataCode: SIndexableCode => + + val shapeSType = shapeTupleCode.st + val nDims = shapeSType.size + val xP = PCanonicalNDArray(dataCode.st.elementType.canonicalPType().setRequired(true), nDims) + val shapeTupleValue = shapeTupleCode.memoize(cb, "make_ndarray_shape") val memoData = dataCode.memoize(cb, "make_nd_array_memoized_data") cb.ifx(memoData.hasMissingValues(cb), { cb._throw(Code.newInstance[HailException, String, Int]( - "Cannot construct an ndarray with missing values.", errorId - ))}) + "Cannot construct an ndarray with missing values.", errorId + )) + }) (0 until nDims).foreach { index => cb.ifx(shapeTupleValue.isFieldMissing(index), @@ -1147,7 +1292,8 @@ class Emit[C]( val stridesSettables = (0 until nDims).map(i => cb.newLocal[Long](s"make_ndarray_stride_$i")) val shapeValues = (0 until nDims).map { i => - shapeTupleValue.loadField(cb, i).get(cb).memoize(cb, s"make_ndarray_shape_${i}").asPValue.value.asInstanceOf[Value[Long]] + val shape = SingleCodeSCode.fromSCode(cb, shapeTupleValue.loadField(cb, i).get(cb), region) + cb.newLocalAny[Long](s"make_ndarray_shape_${ i }", shape.code) } cb.ifx(isRowMajorCode.asBoolean.boolCode(cb), { @@ -1163,22 +1309,22 @@ class Emit[C]( } }) - xP.constructByCopyingArray(shapeValues, stridesSettables, memoData.pc.asIndexable, cb, region) + xP.constructByCopyingArray(shapeValues, stridesSettables, memoData.sc.asIndexable, cb, region) } } } case NDArrayShape(ndIR) => - emitI(ndIR).map(cb){ case pc: PNDArrayCode => pc.shape(cb).asPCode} + emitI(ndIR).map(cb) { case pc: SNDArrayCode => pc.shape(cb) } case x@NDArrayReindex(child, indexMap) => val childEC = emitI(child) - val childPType = coerce[PCanonicalNDArray](child.pType) - childEC.map(cb){ case pndCode: PNDArrayCode => + childEC.map(cb) { case pndCode: SNDArrayPointerCode => + val childPType = pndCode.st.pType val pndVal = pndCode.memoize(cb, "ndarray_reindex_child") val childShape = pndVal.shapes(cb) val childStrides = pndVal.strides(cb) - val pndAddr = SingleCodePCode.fromPCode(cb, pndVal, region) - val dataArray = childPType.dataType.loadCheapPCode(cb, childPType.dataPArrayPointer(pndAddr.code.asInstanceOf[Code[Long]])) + val pndAddr = SingleCodeSCode.fromSCode(cb, pndVal, region) + val dataArray = childPType.dataType.loadCheapSCode(cb, childPType.dataPArrayPointer(pndAddr.code.asInstanceOf[Code[Long]])) val newShape = indexMap.map { childIndex => if (childIndex < childPType.nDims) childShape(childIndex) else const(1L) @@ -1187,7 +1333,8 @@ class Emit[C]( if (childIndex < childPType.nDims) childStrides(childIndex) else const(0L) } - x.pType.constructByCopyingArray( + val newPType = childPType.copy(nDims = indexMap.length) + newPType.constructByCopyingArray( newShape, newStrides, dataArray, @@ -1198,29 +1345,25 @@ class Emit[C]( case NDArrayRef(nd, idxs, errorId) => val ndt = emitI(nd) - ndt.flatMap(cb) { case ndCode: PNDArrayCode => + ndt.flatMap(cb) { case ndCode: SNDArrayCode => val indexEmitCodes = idxs.map(idx => EmitCode.fromI(cb.emb)(emitInNewBuilder(_, idx))) - IEmitCode.multiMapEmitCodes(cb, indexEmitCodes) { idxPCodes: IndexedSeq[PCode] => - val memoizedIndices = idxPCodes.zipWithIndex.map { case (pc, idx) => - pc.memoize(cb,s"ref_idx_$idx") + IEmitCode.multiMapEmitCodes(cb, indexEmitCodes) { idxPCodes: IndexedSeq[SCode] => + val idxValues = idxPCodes.zipWithIndex.map { case (pc, idx) => + cb.newLocal(s"ref_idx_$idx", pc.asInt64.longCode(cb)) } val ndValue = ndCode.memoize(cb, "reffed_ndarray") - val idxValues = memoizedIndices.map(_.value.asInstanceOf[Value[Long]]) cb.append(ndValue.assertInBounds(idxValues, cb, errorId)) - ndValue.loadElement(idxValues, cb).asPCode + ndValue.loadElement(idxValues, cb) } } case NDArrayMatMul(lChild, rChild) => - emitNDArrayColumnMajorStrides(lChild).flatMap(cb) { case leftPCode: PNDArrayCode => - emitNDArrayColumnMajorStrides(rChild).map(cb) { case rightPCode: PNDArrayCode => - val lPType = leftPCode.pt - val rPType = rightPCode.pt - - val leftPVal = leftPCode.memoize(cb, "left_ndarray_matmul") - val rightPVal = rightPCode.memoize(cb, "right_ndarray_matmul") + emitNDArrayStandardStriding(lChild).flatMap(cb) { case (leftPVal: SNDArrayValue, leftIsColumnMajor: Value[Boolean]) => + emitNDArrayStandardStriding(rChild).map(cb) { case (rightPVal: SNDArrayValue, rightIsColumnMajor: Value[Boolean]) => + val lPType = leftPVal.st.asInstanceOf[SNDArrayPointer].pType + val rPType = rightPVal.st.asInstanceOf[SNDArrayPointer].pType val lShape = leftPVal.shapes(cb) val rShape = rightPVal.shapes(cb) @@ -1230,11 +1373,11 @@ class Emit[C]( val leftBroadcastMask = if (lPType.nDims > 2) NDArrayEmitter.broadcastMask(lShape) else IndexedSeq[Value[Long]]() val rightBroadcastMask = if (rPType.nDims > 2) NDArrayEmitter.broadcastMask(rShape) else IndexedSeq[Value[Long]]() - val outputPType = PCanonicalNDArray(lPType.elementType, TNDArray.matMulNDims(lPType.nDims, rPType.nDims), pt.required) + val outputPType = PCanonicalNDArray(lPType.elementType, TNDArray.matMulNDims(lPType.nDims, rPType.nDims)) if ((lPType.elementType.isInstanceOf[PFloat64] || lPType.elementType.isInstanceOf[PFloat32]) && lPType.nDims == 2 && rPType.nDims == 2) { - val leftPValAddr = SingleCodePCode.fromPCode(cb, leftPVal, region) - val rightPValAddr = SingleCodePCode.fromPCode(cb, rightPVal, region) + val leftPValAddr = SingleCodeSCode.fromSCode(cb, leftPVal, region) + val rightPValAddr = SingleCodeSCode.fromSCode(cb, rightPVal, region) val leftDataAddress = lPType.dataFirstElementPointer(leftPValAddr.code.asInstanceOf[Code[Long]]) val rightDataAddress = rPType.dataFirstElementPointer(rightPValAddr.code.asInstanceOf[Code[Long]]) @@ -1242,10 +1385,13 @@ class Emit[C]( val N = rShape(rPType.nDims - 1) val K = lShape(lPType.nDims - 1) - val LDA = M - val LDB = K + val LDA = leftIsColumnMajor.mux(M, K) + val LDB = rightIsColumnMajor.mux(K, N) val LDC = M + val TRANSA: Code[String] = leftIsColumnMajor.mux("N", "T") + val TRANSB: Code[String] = rightIsColumnMajor.mux("N", "T") + val (answerFirstElementAddr, answerFinisher) = outputPType.constructDataFunction( IndexedSeq(M, N), outputPType.makeColumnMajorStrides(IndexedSeq(M, N), region, cb), @@ -1256,8 +1402,8 @@ class Emit[C]( cb.append(lPType.elementType match { case PFloat32(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Float, Long, Int, Long, Int, Float, Long, Int, Unit](BLAS.getClass, method = "sgemm", - "N", - "N", + TRANSA, + TRANSB, M.toI, N.toI, K.toI, @@ -1272,8 +1418,8 @@ class Emit[C]( ) case PFloat64(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Double, Long, Int, Long, Int, Double, Long, Int, Unit](BLAS.getClass, method = "dgemm", - "N", - "N", + TRANSA, + TRANSB, M.toI, N.toI, K.toI, @@ -1298,8 +1444,8 @@ class Emit[C]( val numericElementType = coerce[PNumeric](lPType.elementType) val eVti = typeToTypeInfo(numericElementType) - val emitter = new NDArrayEmitter(unifiedShape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { + val emitter = new NDArrayEmitter(unifiedShape, leftPVal.st.elementType) { + override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): SCode = { val element = coerce[Any](cb.newField("matmul_element")(eVti)) val k = cb.newField[Long]("ndarray_matmul_k") @@ -1319,15 +1465,15 @@ class Emit[C]( val kLen = cb.newField[Long]("ndarray_matmul_kLen") - def multiply(l: PCode, r: PCode): Code[_] = { + def multiply(l: SCode, r: SCode): Code[_] = { (l.st, r.st) match { - case (_: SInt32, _: SInt32) => + case (SInt32, SInt32) => l.asInt.intCode(cb) * r.asInt.intCode(cb) - case (_: SInt64, _: SInt64) => + case (SInt64, SInt64) => l.asLong.longCode(cb) * r.asLong.longCode(cb) - case (_: SFloat32, _: SFloat32) => + case (SFloat32, SFloat32) => l.asFloat.floatCode(cb) * r.asFloat.floatCode(cb) - case (_: SFloat64, _: SFloat64) => + case (SFloat64, SFloat64) => l.asDouble.doubleCode(cb) * r.asDouble.doubleCode(cb) } } @@ -1337,10 +1483,10 @@ class Emit[C]( cb.forLoop(cb.assign(k, 0L), k < kLen, cb.assign(k, k + 1L), { val lElem = leftPVal.loadElement(lIndices, cb) val rElem = rightPVal.loadElement(rIndices, cb) - cb.assign(element, numericElementType.add(multiply(lElem.asPCode, rElem.asPCode), element)) + cb.assign(element, numericElementType.add(multiply(lElem, rElem), element)) }) - PCode(outputPType.elementType, element) + primitive(outputPType.elementType.virtualType, element) } } emitter.emit(cb, outputPType, region) @@ -1349,9 +1495,9 @@ class Emit[C]( } case NDArrayInv(nd) => // Based on https://github.com/numpy/numpy/blob/v1.19.0/numpy/linalg/linalg.py#L477-L547 - emitNDArrayColumnMajorStrides(nd).map(cb) { case pNDCode: PNDArrayCode => + emitNDArrayColumnMajorStrides(nd).map(cb) { case pNDCode: SNDArrayCode => val pndVal = pNDCode.memoize(cb, "ndarray_inverse_nd") - val ndPT = pndVal.pt.asInstanceOf[PCanonicalNDArray] + val ndPT = pndVal.st.asInstanceOf[SNDArrayPointer].pType val shapeArray = pndVal.shapes(cb) val stridesArray = ndPT.makeColumnMajorStrides(shapeArray, region, cb) @@ -1373,7 +1519,7 @@ class Emit[C]( val INFOdgetrf = mb.newLocal[Int]() val INFOdgetri = mb.newLocal[Int]() val INFOerror = (fun: String, info: LocalRef[Int]) => (info cne 0) - .orEmpty(Code._fatal[Unit](const(s"LAPACK error ${fun}. Error code = ").concat(info.toS))) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error ${ fun }. Error code = ").concat(info.toS))) cb.append((N cne M).orEmpty(Code._fatal[Unit](const("Can only invert square matrix")))) @@ -1410,11 +1556,11 @@ class Emit[C]( finish(cb) } case x@NDArraySVD(nd, full_matrices, computeUV) => - emitNDArrayColumnMajorStrides(nd).flatMap(cb){ case ndPCode: PNDArrayCode => + emitNDArrayColumnMajorStrides(nd).flatMap(cb) { case ndPCode: SNDArrayCode => val ndPVal = ndPCode.memoize(cb, "nd_svd_value") val infoDGESDDResult = cb.newLocal[Int]("infoDGESDD") - val infoDGESDDErrorTest = (extraErrorMsg: String) => (infoDGESDDResult cne 0) + val infoDGESDDErrorTest = (extraErrorMsg: String) => (infoDGESDDResult cne 0) .orEmpty(Code._fatal[Unit](const(s"LAPACK error DGESDD. $extraErrorMsg Error code = ").concat(infoDGESDDResult.toS))) val LWORKAddress = mb.newLocal[Long]("svd_lwork_address") @@ -1431,10 +1577,12 @@ class Emit[C]( val A = cb.newLocal[Long]("dgesdd_A_address") val firstElementDataAddress = ndPVal.firstDataAddress(cb) - cb.assign(LWORKAddress, Code.invokeStatic1[Memory, Long, Long]("malloc", 8L)) + cb.assign(LWORKAddress, Code.invokeStatic1[Memory, Long, Long]("malloc", 8L)) + + val retPTypeUncast = NDArraySVD.pTypes(computeUV, false) val (jobz, sPType, uData, uFinisher, vtData, vtFinisher) = if (computeUV) { - val outputPType = x.pType.asInstanceOf[PTuple] + val outputPType = retPTypeUncast.asInstanceOf[PTuple] val uPType = outputPType.fields(0).typ.asInstanceOf[PCanonicalNDArray] val sPType = outputPType.fields(1).typ.asInstanceOf[PCanonicalNDArray] val vtPType = outputPType.fields(2).typ.asInstanceOf[PCanonicalNDArray] @@ -1447,8 +1595,13 @@ class Emit[C]( (if (full_matrices) "A" else "S", sPType, uData, uFinisher, vtData, vtFinisher) } else { - def noOp(cb: EmitCodeBuilder): SNDArrayCode = { throw new IllegalStateException("Can't happen")} - ("N", x.pType.asInstanceOf[PCanonicalNDArray], const(0L), noOp(_), const(0L), noOp(_)) + val outputPType = retPTypeUncast.asInstanceOf[PCanonicalNDArray] + + def noOp(cb: EmitCodeBuilder): SNDArrayCode = { + throw new IllegalStateException("Can't happen") + } + + ("N", outputPType.asInstanceOf[PCanonicalNDArray], const(0L), noOp(_), const(0L), noOp(_)) } val (sDataAddress, sFinisher) = sPType.constructDataFunction(IndexedSeq(K), sPType.makeColumnMajorStrides(IndexedSeq(K), region, cb), cb, region) @@ -1477,6 +1630,7 @@ class Emit[C]( cb.append(Region.copyFrom(firstElementDataAddress, A, (M * N) * 8L)) def LWORK = Region.loadDouble(LWORKAddress).toI + val WORK = cb.newLocal[Long]("dgesdd_work_address") cb.assign(WORK, Code.invokeStatic1[Memory, Long, Long]("malloc", LWORK.toL * 8L)) @@ -1510,8 +1664,8 @@ class Emit[C]( val u = uFinisher(cb) val vt = vtFinisher(cb) - val outputPType = x.pType.asInstanceOf[PCanonicalTuple] - outputPType.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, u.asPCode), EmitCode.present(cb.emb, s), EmitCode.present(cb.emb, vt.asPCode)), deepCopy = false) + val outputPType = NDArraySVD.pTypes(true, false).asInstanceOf[PCanonicalTuple] + outputPType.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, u), EmitCode.present(cb.emb, s), EmitCode.present(cb.emb, vt)), deepCopy = false) } else { s } @@ -1520,11 +1674,14 @@ class Emit[C]( } case x@NDArrayQR(nd, mode) => // See here to understand different modes: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.qr.html - emitNDArrayColumnMajorStrides(nd).map(cb) { case pndCode: PNDArrayCode => + emitNDArrayColumnMajorStrides(nd).map(cb) { case pndCode: SNDArrayCode => + + val resultPType = NDArrayQR.pType(mode, false) + val pndValue = pndCode.memoize(cb, "ndarray_qr_nd") // This does a lot of byte level copying currently, so only trust // the PCanonicalNDArray representation. - assert(pndValue.pt.isInstanceOf[PCanonicalNDArray]) + val pType = pndValue.st.asInstanceOf[SNDArrayPointer].pType val shapeArray = pndValue.shapes(cb) @@ -1541,7 +1698,7 @@ class Emit[C]( def LWORK = (Region.loadDouble(LWORKAddress).toI > 0).mux(Region.loadDouble(LWORKAddress).toI, 1) - val ndPT = pndValue.pt.asInstanceOf[PCanonicalNDArray] + val ndPT = pType val dataFirstElementAddress = pndValue.firstDataAddress(cb) val hPType = ndPT @@ -1592,10 +1749,8 @@ class Emit[C]( val h = hFinisher(cb) val hMemo = h.memoize(cb, "ndarray_qr_h_memo") - val result: PCode = if (mode == "raw") { - val resultType = x.pType.asInstanceOf[PCanonicalBaseStruct] - val rawPType = x.pType.asInstanceOf[PTuple] - assert(hPType equalModuloRequired rawPType.types(0).asInstanceOf[PCanonicalNDArray], s"hPType = ${hPType}, other = ${rawPType.types(0).asInstanceOf[PCanonicalNDArray]}") + val result: SCode = if (mode == "raw") { + val resultType = resultPType.asInstanceOf[PCanonicalBaseStruct] val tau = tauFinisher(cb) resultType.constructFromFields(cb, region, FastIndexedSeq( @@ -1605,11 +1760,11 @@ class Emit[C]( } else { val (rPType, rRows, rCols) = if (mode == "r") { - (x.pType.asInstanceOf[PCanonicalNDArray], K, N) + (resultPType.asInstanceOf[PCanonicalNDArray], K, N) } else if (mode == "complete") { - (x.pType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], M, N) + (resultPType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], M, N) } else if (mode == "reduced") { - (x.pType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], K, N) + (resultPType.asInstanceOf[PTuple].types(1).asInstanceOf[PCanonicalNDArray], K, N) } else { throw new AssertionError(s"Unsupported QR mode $mode") } @@ -1621,15 +1776,23 @@ class Emit[C]( val (rDataAddress, rFinisher) = rPType.constructDataFunction(rShapeArray, rStridesArray, cb, region) // This block assumes that `rDataAddress` and `aAddressDGEQRF` point to column major arrays. - // TODO: Abstract this into ndarray ptype/pcode interface methods. + // TODO: Abstract this into ndarray ptype/SCode interface methods. val currRow = cb.newLocal[Long]("ndarray_qr_currRow") val currCol = cb.newLocal[Long]("ndarray_qr_currCol") val curWriteAddress = cb.newLocal[Long]("ndarray_qr_curr_write_addr", rDataAddress) // I think this just copies out the upper triangle into new ndarray in column major order - cb.forLoop({cb.assign(currCol, 0L)}, currCol < rCols, {cb.assign(currCol, currCol + 1L)}, { - cb.forLoop({cb.assign(currRow, 0L)}, currRow < rRows, {cb.assign(currRow, currRow + 1L)}, { + cb.forLoop({ + cb.assign(currCol, 0L) + }, currCol < rCols, { + cb.assign(currCol, currCol + 1L) + }, { + cb.forLoop({ + cb.assign(currRow, 0L) + }, currRow < rRows, { + cb.assign(currRow, currRow + 1L) + }, { cb.append(Region.storeDouble( curWriteAddress, (currCol >= currRow).mux( @@ -1647,7 +1810,7 @@ class Emit[C]( computeR } else { - val crPType = x.pType.asInstanceOf[PCanonicalTuple] + val crPType = resultPType.asInstanceOf[PCanonicalTuple] val qPType = crPType.types(0).asInstanceOf[PCanonicalNDArray] val qShapeArray = if (mode == "complete") Array(M, M) else Array(M, K) @@ -1714,8 +1877,8 @@ class Emit[C]( } result } - case x: NDArrayMap => emitDeforestedNDArrayI(x) - case x: NDArrayMap2 => emitDeforestedNDArrayI(x) + case x: NDArrayMap => emitDeforestedNDArrayI(x) + case x: NDArrayMap2 => emitDeforestedNDArrayI(x) case x: NDArrayReshape => emitDeforestedNDArrayI(x) case x: NDArrayConcat => emitDeforestedNDArrayI(x) case x: NDArraySlice => emitDeforestedNDArrayI(x) @@ -1735,7 +1898,7 @@ class Emit[C]( case x@ResultOp(start, sig) => val AggContainer(aggs, sc, _) = container.get - val pt = x.pType.asInstanceOf[PCanonicalTuple] + val pt = PCanonicalTuple(false, sig.map(_.pResultType): _*) val addr = cb.newLocal("resultop_tuple_addr", pt.allocate(region)) cb += pt.stagedInitialize(addr, setMissing = false) @@ -1748,17 +1911,17 @@ class Emit[C]( (cb: EmitCodeBuilder) => cb += pt.setFieldMissing(addr, j)) } - presentPC(pt.loadCheapPCode(cb, addr)) + presentPC(pt.loadCheapSCode(cb, addr)) case x@ApplySeeded(fn, args, seed, rt) => - val codeArgs = args.map(a => (a.pType, EmitCode.fromI(cb.emb)(emitInNewBuilder(_, a)))) + val codeArgs = args.map(a => EmitCode.fromI(cb.emb)(emitInNewBuilder(_, a))) val impl = x.implementation val unified = impl.unify(Array.empty[Type], args.map(_.typ), rt) assert(unified) - impl.applySeededI(seed, cb, region, pt, codeArgs: _*) + impl.applySeededI(seed, cb, region, impl.computeReturnEmitType(x.typ, codeArgs.map(_.emitType)).st, codeArgs: _*) case AggStateValue(i, _) => val AggContainer(_, sc, _) = container.get - presentC(sc.states(i).serializeToRegion(cb, coerce[PBinary](pt), region)) + presentPC(sc.states(i).serializeToRegion(cb, PCanonicalBinary(), region)) case ToArray(a) => EmitStream.produce(this, a, cb, region, env, container) @@ -1769,7 +1932,9 @@ class Emit[C]( .flatMap(cb) { case (stream: SStreamCode) => val producer = stream.producer - val xAcc = mb.newEmitField(accumName, x.accPType, x.accPType.required) // in future, will choose compatible type for zero/body with requiredness + val stateEmitType = VirtualTypeWithReq(zero.typ, ctx.req.lookupState(x).head.asInstanceOf[TypeWithRequiredness]).canonicalEmitType + + val xAcc = mb.newEmitField(accumName, stateEmitType) val xElt = mb.newEmitField(valueName, producer.element.emitType) var tmpRegion: Settable[Region] = null @@ -1781,11 +1946,11 @@ class Emit[C]( cb.assign(tmpRegion, Region.stagedCreate(Region.REGULAR, region.getPool())) cb.assign(xAcc, emitI(zero, tmpRegion) - .map(cb)(pc => pc.castTo(cb, tmpRegion, x.accPType))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, stateEmitType.st))) } else { cb.assign(producer.elementRegion, region) cb.assign(xAcc, emitI(zero, producer.elementRegion) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, x.accPType))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, stateEmitType.st))) } producer.unmanagedConsume(cb) { cb => @@ -1793,19 +1958,19 @@ class Emit[C]( if (producer.requiresMemoryManagementPerElement) { cb.assign(xAcc, emitI(body, producer.elementRegion, env.bind(accumName -> xAcc, valueName -> xElt)) - .map(cb)(pc => pc.castTo(cb, tmpRegion, x.accPType, deepCopy = true))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, stateEmitType.st, deepCopy = true))) cb += producer.elementRegion.clearRegion() val swapRegion = cb.newLocal[Region]("streamfold_swap_region", producer.elementRegion) cb.assign(producer.elementRegion, tmpRegion.load()) cb.assign(tmpRegion, swapRegion.load()) } else { cb.assign(xAcc, emitI(body, producer.elementRegion, env.bind(accumName -> xAcc, valueName -> xElt)) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, x.accPType, deepCopy = false))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, stateEmitType.st, deepCopy = false))) } } if (producer.requiresMemoryManagementPerElement) { - cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.pt, deepCopy = true))) + cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.st, deepCopy = true))) cb += producer.elementRegion.invalidate() cb += tmpRegion.invalidate() } @@ -1819,9 +1984,13 @@ class Emit[C]( var tmpRegion: Settable[Region] = null + val accTypes = ctx.req.lookupState(x).zip(acc.map(_._2.typ)) + .map { case (btwr, t) => VirtualTypeWithReq(t, btwr.asInstanceOf[TypeWithRequiredness]) + .canonicalEmitType + } + val xElt = mb.newEmitField(valueName, producer.element.emitType) val names = acc.map(_._1) - val accTypes = x.accPTypes.map(pt => EmitType(pt.sType, pt.required)) val accVars = (names, accTypes).zipped.map(mb.newEmitField) val resEnv = env.bind(names.zip(accVars): _*) @@ -1834,12 +2003,12 @@ class Emit[C]( cb.assign(tmpRegion, Region.stagedCreate(Region.REGULAR, region.getPool())) (accVars, acc).zipped.foreach { case (xAcc, (_, x)) => - cb.assign(xAcc, emitI(x, tmpRegion).map(cb)(_.castTo(cb, tmpRegion, xAcc.pt))) + cb.assign(xAcc, emitI(x, tmpRegion).map(cb)(_.castTo(cb, tmpRegion, xAcc.st))) } } else { cb.assign(producer.elementRegion, region) (accVars, acc).zipped.foreach { case (xAcc, (_, x)) => - cb.assign(xAcc, emitI(x, region).map(cb)(_.castTo(cb, region, xAcc.pt))) + cb.assign(xAcc, emitI(x, region).map(cb)(_.castTo(cb, region, xAcc.st))) } } @@ -1849,7 +2018,7 @@ class Emit[C]( (accVars, seq).zipped.foreach { (accVar, ir) => cb.assign(accVar, emitI(ir, producer.elementRegion, env = seqEnv) - .map(cb)(pc => pc.castTo(cb, tmpRegion, accVar.pt, deepCopy = true))) + .map(cb)(pc => pc.castTo(cb, tmpRegion, accVar.st, deepCopy = true))) } cb += producer.elementRegion.clearRegion() val swapRegion = cb.newLocal[Region]("streamfold2_swap_region", producer.elementRegion) @@ -1859,34 +2028,57 @@ class Emit[C]( (accVars, seq).zipped.foreach { (accVar, ir) => cb.assign(accVar, emitI(ir, producer.elementRegion, env = seqEnv) - .map(cb)(pc => pc.castTo(cb, producer.elementRegion, accVar.pt, deepCopy = false))) + .map(cb)(pc => pc.castTo(cb, producer.elementRegion, accVar.st, deepCopy = false))) } } } if (producer.requiresMemoryManagementPerElement) { accVars.foreach { xAcc => - cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.pt, deepCopy = true))) + cb.assign(xAcc, xAcc.toI(cb).map(cb)(pc => pc.castTo(cb, region, pc.st, deepCopy = true))) } cb += producer.elementRegion.invalidate() cb += tmpRegion.invalidate() } emitI(res, env = resEnv) } + case t@Trap(child) => + val (ev, mb) = emitSplitMethod("trap", cb, child, region, env, container, loopEnv) + val maybeException = cb.newLocal[(String, java.lang.Integer)]("trap_msg", cb.emb.ecb.runMethodWithHailExceptionHandler(mb.mb.methodName)) + val sst = SStringPointer(PCanonicalString(false)) + + val tt = t.typ.asInstanceOf[TTuple] + val errTupleType = tt.types(0).asInstanceOf[TTuple] + val errTuple = SStackStruct(errTupleType, FastIndexedSeq(EmitType(sst, true), EmitType(SInt32, true))) + val tv = cb.emb.newEmitField("trap_errTuple", EmitType(errTuple, false)) + + val maybeMissingEV = cb.emb.newEmitField("trap_value", ev.emitType.copy(required = false)) + cb.ifx(maybeException.isNull, { + cb.assign(tv, EmitCode.missing(cb.emb, errTuple)) + cb.assign(maybeMissingEV, ev) + }, { + val str = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, sst.constructFromString(cb, region, maybeException.invoke[String]("_1")))) + val errorId = EmitCode.fromI(mb)(cb => + IEmitCode.present(cb, primitive(maybeException.invoke[java.lang.Integer]("_2").invoke[Int]("intValue")))) + cb.assign(tv, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, errTupleType, str, errorId))) + cb.assign(maybeMissingEV, EmitCode.missing(cb.emb, ev.st)) + }) + IEmitCode.present(cb, { + SStackStruct.constructFromArgs(cb, region, t.typ.asInstanceOf[TBaseStruct], tv, maybeMissingEV) + }) case Die(m, typ, errorId) => val cm = emitI(m) val msg = cb.newLocal[String]("die_msg") cm.consume(cb, cb.assign(msg, ""), - { sc => cb.assign(msg, sc.asString.loadString())}) + { sc => cb.assign(msg, sc.asString.loadString()) }) cb._throw[HailException](Code.newInstance[HailException, String, Int](msg, errorId)) - val t = PType.canonical(typ, true).deepInnerRequired(true) - IEmitCode.present(cb, t.defaultValue(cb.emb)) + IEmitCode.present(cb, typeWithReq.canonicalEmitType.st.defaultValue) case CastToArray(a) => - emitI(a).map(cb) { ind => ind.asIndexable.castToArray(cb) }.typecast[PCode] + emitI(a).map(cb) { ind => ind.asIndexable.castToArray(cb) } case x@ShuffleWith( keyFields, @@ -1899,15 +2091,15 @@ class Emit[C]( ) => val shuffleType = x.shuffleType - val shuffleST = SCanonicalShufflePointer(PCanonicalShuffle(shuffleType, true)) - val settable = mb.newPField(shuffleST.pType).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleST = SCanonicalShufflePointer(PCanonicalShuffle(shuffleType, false)) + val settable = mb.newPField(shuffleST).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, settable) shuffle.start(cb, region) - val shuffleEnv = env.bind(name -> mb.newPresentEmitSettable(settable)) + val shuffleEnv = env.bind(name -> EmitSettable.present(settable)) - val successfulShuffleIds: PValue = emitI(writerIR, env = shuffleEnv) + val successfulShuffleIds: SValue = emitI(writerIR, env = shuffleEnv) .get(cb, "shuffle ID must be non-missing") // just store it so the writer gets run .memoize(cb, "shuffleSuccessfulShuffleIds") @@ -1942,10 +2134,10 @@ class Emit[C]( shuffle.finishPut(cb) shuffle.close(cb) - val resPType = pt.asInstanceOf[PCanonicalBinary] + val resPType = PCanonicalBinary() // FIXME: server needs to send uuid for the successful partition - val boff = cb.memoize(resPType.loadCheapPCode(cb, resPType.allocate(region, 0)), "shuffleWriteBOff") - val baddr = SingleCodePCode.fromPCode(cb, boff, region) + val boff = cb.memoize(resPType.loadCheapSCode(cb, resPType.allocate(region, 0)), "shuffleWriteBOff") + val baddr = SingleCodeSCode.fromSCode(cb, boff, region) cb += resPType.storeLength(baddr.code.asInstanceOf[Code[Long]], 0) presentPC(boff) @@ -1957,7 +2149,7 @@ class Emit[C]( } case WriteValue(value, path, spec) => - emitI(path).flatMap(cb) { case p: PStringCode => + emitI(path).flatMap(cb) { case p: SStringCode => val pv = p.memoize(cb, "write_path") emitI(value).map(cb) { v => val ob = cb.newLocal[OutputBuffer]("write_ob") @@ -1971,7 +2163,13 @@ class Emit[C]( case x@TailLoop(name, args, body) => val loopStartLabel = CodeLabel() - val inits = args.zip(x.accPTypes) + + val accTypes = ctx.req.lookupState(x).zip(args.map(_._2.typ)) + .map { case (btwr, t) => VirtualTypeWithReq(t, btwr.asInstanceOf[TypeWithRequiredness]) + .canonicalEmitType + } + + val inits = args.zip(accTypes) val stagedPool = cb.newLocal[RegionPool]("tail_loop_pool_ref") cb.assign(stagedPool, region.getPool()) @@ -1985,14 +2183,14 @@ class Emit[C]( val newLoopEnv = loopEnv.getOrElse(Env.empty) // Emit into LoopRef's current region. (region 1) - loopRef.loopArgs.zip(inits).foreach { case (settable, ((_, x), pt)) => - settable.store(cb, emitI(x, loopRef.r1).map(cb)(_.castTo(cb, loopRef.r1, pt))) + loopRef.loopArgs.zip(inits).foreach { case (settable, ((_, x), et)) => + settable.store(cb, emitI(x, loopRef.r1).map(cb)(_.castTo(cb, loopRef.r1, et.st))) } cb.define(loopStartLabel) val result = emitI(body, env = argEnv, loopEnv = Some(newLoopEnv.bind(name, loopRef))).map(cb) { pc => - val answerInRightRegion = pc.copyToRegion(cb, region) + val answerInRightRegion = pc.copyToRegion(cb, region, pc.st) cb.append(loopRef.r1.clearRegion()) cb.append(loopRef.r2.clearRegion()) answerInRightRegion @@ -2004,8 +2202,8 @@ class Emit[C]( val loopRef = loopEnv.get.lookup(name) // Need to emit into region 2, clear region 1, then swap them. - (loopRef.tmpLoopArgs, loopRef.loopTypes, args).zipped.map { case (tmpLoopArg, pt, arg) => - tmpLoopArg.store(cb, emitI(arg, loopEnv = None, region = loopRef.r2).map(cb)(_.castTo(cb, loopRef.r2, pt))) + (loopRef.tmpLoopArgs, loopRef.loopTypes, args).zipped.map { case (tmpLoopArg, et, arg) => + tmpLoopArg.store(cb, emitI(arg, loopEnv = None, region = loopRef.r2).map(cb)(_.castTo(cb, loopRef.r2, et.st))) } cb.append(loopRef.r1.clearRegion()) @@ -2025,55 +2223,65 @@ class Emit[C]( cb.define(deadLabel) val rt = loopRef.resultType - IEmitCode(CodeLabel(), CodeLabel(), rt.st.pType.defaultValue(mb), rt.required) + IEmitCode(CodeLabel(), CodeLabel(), rt.st.defaultValue, rt.required) case x@CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => - val ctxsType = coerce[PStream](contexts.pType) val parentCB = mb.ecb + emitStream(contexts, cb, region).map(cb) { case ctxStream: SStreamCode => - val functionID: String = { - val bodyFB = EmitFunctionBuilder[Region, Array[Byte], Array[Byte], Array[Byte]](ctx.executeContext, "collect_distributed_array") + def wrapInTuple(cb: EmitCodeBuilder, region: Value[Region], et: EmitCode): SBaseStructPointerCode = { + PCanonicalTuple(true, et.emitType.canonicalPType).constructFromFields(cb, region, FastIndexedSeq(et), deepCopy = false) + } + + val bufferSpec: BufferSpec = BufferSpec.defaultUncompressed + + val emitGlobals = EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, globals)) - // FIXME this is terrible - val m = MakeTuple.ordered(FastSeq(body)) - val bodyReturnPType = PCanonicalTuple(true, body.pType) - m._pType = bodyReturnPType + val ctxType = ctxStream.st.elementEmitType + val contextPTuple: PTuple = PCanonicalTuple(required = true, ctxType.canonicalPType) + val globalPTuple: PTuple = PCanonicalTuple(required = true, emitGlobals.emitType.canonicalPType) + val contextSpec: TypedCodecSpec = TypedCodecSpec(contextPTuple, bufferSpec) + val globalSpec: TypedCodecSpec = TypedCodecSpec(globalPTuple, bufferSpec) + // emit body in new FB + val bodyFB = EmitFunctionBuilder[Region, Array[Byte], Array[Byte], Array[Byte]](ctx.executeContext, "collect_distributed_array") + + var bodySpec: TypedCodecSpec = null bodyFB.emitWithBuilder { cb => - val ctxIB = cb.newLocal[InputBuffer]("cda_ctx_ib", x.contextSpec.buildCodeInputBuffer( + val region = bodyFB.getCodeParam[Region](1) + val ctxIB = cb.newLocal[InputBuffer]("cda_ctx_ib", contextSpec.buildCodeInputBuffer( Code.newInstance[ByteArrayInputStream, Array[Byte]](bodyFB.getCodeParam[Array[Byte]](2)))) - val gIB = cb.newLocal[InputBuffer]("cda_g_ib", x.globalSpec.buildCodeInputBuffer( + val gIB = cb.newLocal[InputBuffer]("cda_g_ib", globalSpec.buildCodeInputBuffer( Code.newInstance[ByteArrayInputStream, Array[Byte]](bodyFB.getCodeParam[Array[Byte]](3)))) - val decodedContext = x.contextSpec.encodedType.buildDecoder(x.contextSpec.encodedVirtualType, bodyFB.ecb) - .apply(cb, bodyFB.getCodeParam[Region](1), ctxIB) + val decodedContext = contextSpec.encodedType.buildDecoder(contextSpec.encodedVirtualType, bodyFB.ecb) + .apply(cb, region, ctxIB) .asBaseStruct .memoize(cb, "decoded_context_tuple") .loadField(cb, 0) - .typecast[PCode] - .memoize(cb, "decoded_context") + .memoizeField(cb, "decoded_context") - val decodedGlobal = x.globalSpec.encodedType.buildDecoder(x.globalSpec.encodedVirtualType, bodyFB.ecb) - .apply(cb, bodyFB.getCodeParam[Region](1), gIB) + val decodedGlobal = globalSpec.encodedType.buildDecoder(globalSpec.encodedVirtualType, bodyFB.ecb) + .apply(cb, region, gIB) .asBaseStruct .memoize(cb, "decoded_global_tuple") .loadField(cb, 0) - .typecast[PCode] - .memoize(cb, "decoded_global") + .memoizeField(cb, "decoded_global") - val env = Env[EmitValue]( + val env = EmitEnv(Env[EmitValue]( (cname, decodedContext), - (gname, decodedGlobal)) + (gname, decodedGlobal)), FastIndexedSeq()) - val bodyResult = new Emit(ctx, bodyFB.ecb) - .emitI(m, cb, env, None) - .get(cb, "cda return cannot be missing!") - .memoize(cb, "cda_body_result") + val bodyResult = wrapInTuple(cb, + region, + EmitCode.fromI(cb.emb)(cb => new Emit(ctx, bodyFB.ecb).emitI(body, cb, env, None))) + + bodySpec = TypedCodecSpec(bodyResult.st.canonicalPType().setRequired(true), bufferSpec) val bOS = cb.newLocal[ByteArrayOutputStream]("cda_baos", Code.newInstance[ByteArrayOutputStream]()) - val bOB = cb.newLocal[OutputBuffer]("cda_ob", x.bodySpec.buildCodeOutputBuffer(bOS)) - x.bodySpec.encodedType.buildEncoder(bodyResult.st, cb.emb.ecb) - .apply(cb, bodyResult, bOB) + val bOB = cb.newLocal[OutputBuffer]("cda_ob", bodySpec.buildCodeOutputBuffer(bOS)) + bodySpec.encodedType.buildEncoder(bodyResult.st, cb.emb.ecb) + .apply(cb, bodyResult, bOB) cb += bOB.invoke[Unit]("flush") cb += bOB.invoke[Unit]("close") bOS.invoke[Array[Byte]]("toByteArray") @@ -2081,59 +2289,55 @@ class Emit[C]( val fID = genUID() parentCB.addModule(fID, bodyFB.resultWithIndex()) - fID - } - - val spark = parentCB.backend() - - val baos = mb.genFieldThisRef[ByteArrayOutputStream]() - val buf = mb.genFieldThisRef[OutputBuffer]() - val ctxab = mb.genFieldThisRef[ByteArrayArrayBuilder]() - val encRes = mb.genFieldThisRef[Array[Array[Byte]]]() - - def wrapInTuple(cb: EmitCodeBuilder, et: EmitCode): SBaseStructPointerCode = { - PCanonicalTuple(true, et.pt).constructFromFields(cb, region, FastIndexedSeq(et), deepCopy = false) - } + val functionID = fID + + val spark = parentCB.backend() + + val baos = mb.genFieldThisRef[ByteArrayOutputStream]() + val buf = mb.genFieldThisRef[OutputBuffer]() + val ctxab = mb.genFieldThisRef[ByteArrayArrayBuilder]() + val encRes = mb.genFieldThisRef[Array[Array[Byte]]]() + + + def addContexts(cb: EmitCodeBuilder, ctxStream: StreamProducer): Unit = { + ctxStream.memoryManagedConsume(region, cb, setup = { cb => + cb += ctxab.invoke[Int, Unit]("ensureCapacity", ctxStream.length.map(_.apply(cb)).getOrElse(16)) + }) { cb => + cb += baos.invoke[Unit]("reset") + val ctxTuple = wrapInTuple(cb, region, ctxStream.element) + .memoize(cb, "cda_add_contexts_addr") + contextSpec.encodedType.buildEncoder(ctxTuple.st, parentCB) + .apply(cb, ctxTuple, buf) + cb += buf.invoke[Unit]("flush") + cb += ctxab.invoke[Array[Byte], Unit]("add", baos.invoke[Array[Byte]]("toByteArray")) + } + } - def addContexts(cb: EmitCodeBuilder, ctxStream: StreamProducer): Unit = { - ctxStream.memoryManagedConsume(region, cb, setup = { cb => - cb += ctxab.invoke[Int, Unit]("ensureCapacity", ctxStream.length.map(_.apply(cb)).getOrElse(16)) - }) { cb => - cb += baos.invoke[Unit]("reset") - val ctxTuple = wrapInTuple(cb, ctxStream.element) - .memoize(cb, "cda_add_contexts_addr") - x.contextSpec.encodedType.buildEncoder(ctxTuple.st, parentCB) - .apply(cb, ctxTuple, buf) + def addGlobals(cb: EmitCodeBuilder): Unit = { + val wrapped = wrapInTuple(cb, region, emitGlobals) + globalSpec.encodedType.buildEncoder(wrapped.st, parentCB) + .apply(cb, wrapped, buf) cb += buf.invoke[Unit]("flush") - cb += ctxab.invoke[Array[Byte], Unit]("add", baos.invoke[Array[Byte]]("toByteArray")) } - } - - def addGlobals(cb: EmitCodeBuilder): Unit = { - val g = wrapInTuple(cb, EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, globals))).memoize(cb, "cda_g") - x.globalSpec.encodedType.buildEncoder(g.st, parentCB) - .apply(cb, g, buf) - cb += buf.invoke[Unit]("flush") - } - def decodeResult(cb: EmitCodeBuilder): PCode = { - val len = mb.newLocal[Int]("cda_result_length") - val ib = mb.newLocal[InputBuffer]("decode_ib") - - cb.assign(len, encRes.length()) - x.pType.asInstanceOf[PCanonicalArray].constructFromElements(cb, region, len, deepCopy = false) { (cb, i) => - cb.assign(ib, x.bodySpec.buildCodeInputBuffer(Code.newInstance[ByteArrayInputStream, Array[Byte]](encRes(i)))) - val eltTupled = x.bodySpec.encodedType.buildDecoder(x.bodySpec.encodedVirtualType, parentCB) - .apply(cb, region, ib) - .asBaseStruct - .memoize(cb, "cda_eltTupled") - eltTupled.loadField(cb, 0) + def decodeResult(cb: EmitCodeBuilder): SCode = { + val len = mb.newLocal[Int]("cda_result_length") + val ib = mb.newLocal[InputBuffer]("decode_ib") + + cb.assign(len, encRes.length()) + val pt = PCanonicalArray(bodySpec.encodedType.decodedSType(bodySpec.encodedVirtualType).asInstanceOf[SBaseStruct].fieldEmitTypes(0).canonicalPType) + pt.asInstanceOf[PCanonicalArray].constructFromElements(cb, region, len, deepCopy = false) { (cb, i) => + cb.assign(ib, bodySpec.buildCodeInputBuffer(Code.newInstance[ByteArrayInputStream, Array[Byte]](encRes(i)))) + val eltTupled = bodySpec.encodedType.buildDecoder(bodySpec.encodedVirtualType, parentCB) + .apply(cb, region, ib) + .asBaseStruct + .memoize(cb, "cda_eltTupled") + eltTupled.loadField(cb, 0) + } } - } - emitStream(contexts, cb, region).map(cb) { case ctxStream: SStreamCode => cb.assign(baos, Code.newInstance[ByteArrayOutputStream]()) - cb.assign(buf, x.contextSpec.buildCodeOutputBuffer(baos)) // TODO: take a closer look at whether we need two codec buffers? + cb.assign(buf, contextSpec.buildCodeOutputBuffer(baos)) // TODO: take a closer look at whether we need two codec buffers? cb.assign(ctxab, Code.newInstance[ByteArrayArrayBuilder, Int](16)) addContexts(cb, ctxStream.producer) cb += baos.invoke[Unit]("reset") @@ -2155,21 +2359,17 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.pt }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ir) }") } case _ => // we dynamically generate some IRs in emission. Ignore these... } - if (result.pt != pt) { - if (!result.pt.equalModuloRequired(pt)) - throw new RuntimeException(s"ptype mismatch:\n emitted: ${ result.pt }\n inferred: ${ ir.pType }\n ir: $ir") - (result.pt.required, pt.required) match { - case (true, false) => result.map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code)) - case (false, true) => IEmitCode.present(cb, result.get(cb)) - } - } else result + if (result.st.virtualType != ir.typ) + throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ir).take(50) }") + + result } /** @@ -2205,50 +2405,50 @@ class Emit[C]( * {@code tAggIn.elementType}. {@code tAggIn.symTab} is not used by Emit. * **/ - private[ir] def emit(ir: IR, mb: EmitMethodBuilder[C], env: E, container: Option[AggContainer]): EmitCode = { + private[ir] def emit(ir: IR, mb: EmitMethodBuilder[C], env: EmitEnv, container: Option[AggContainer]): EmitCode = { val region = mb.getCodeParam[Region](1) emit(ir, mb, region, env, container, None) } - private[ir] def emitWithRegion(ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], env: E, container: Option[AggContainer]): EmitCode = + private[ir] def emitWithRegion(ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], env: EmitEnv, container: Option[AggContainer]): EmitCode = emit(ir, mb, region, env, container, None) private def emit( ir: IR, mb: EmitMethodBuilder[C], region: Value[Region], - env: E, + env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]], fallingBackFromEmitI: Boolean = false ): EmitCode = { - def emit(ir: IR, region: Value[Region] = region, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = - this.emit(ir, mb, region, env, container, loopEnv) + if (ctx.methodSplits.contains(ir) && !ctx.tryingToSplit.contains(ir)) { + return EmitCode.fromI(mb)(cb => emitInSeparateMethod(s"split_${ir.getClass.getSimpleName}", cb, ir, region, env, container, loopEnv)) + } + - def emitInMethod(ir: IR, mb: EmitMethodBuilder[C]): EmitCode = - this.emit(ir, mb, Env.empty, container) + def emit(ir: IR, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): EmitCode = + this.emit(ir, mb, region, env, container, loopEnv) - def emitI(ir: IR, cb: EmitCodeBuilder, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = + def emitI(ir: IR, cb: EmitCodeBuilder, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = this.emitI(ir, cb, region, env, container, loopEnv) - def emitVoid(ir: IR, env: E = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Code[Unit] = { + def emitVoid(ir: IR, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): Code[Unit] = { EmitCodeBuilder.scopedVoid(mb) { cb => this.emitVoid(cb, ir, region, env, container, loopEnv) } } - def emitStream(ir: IR, outerRegion: Value[Region] ): EmitCode = + def emitStream(ir: IR, outerRegion: Value[Region]): EmitCode = EmitCode.fromI(mb)(cb => EmitStream.produce(this, ir, cb, outerRegion, env, container)) - val pt = ir.pType - // ideally, emit would not be called with void values, but initOp args can be void // working towards removing this - if (pt == PVoid) + if (ir.typ == TVoid) return EmitCode.fromI(mb) { cb => this.emitVoid(cb, ir, region, env, container, loopEnv) - IEmitCode.present(cb, PCode._empty) + IEmitCode.present(cb, SCode._empty) } val result: EmitCode = (ir: @unchecked) match { @@ -2261,90 +2461,15 @@ class Emit[C]( } } - case Ref(name, _) => - val ev = env.lookup(name) - if (!ev.pt.equalModuloRequired(pt)) - throw new RuntimeException(s"PValue type did not match inferred ptype:\n name: $name\n pv: ${ ev.pt }\n ir: $pt") + case Ref(name, t) => + val ev = env.bindings.lookup(name) + if (ev.st.virtualType != t) + throw new RuntimeException(s"emit value type did not match specified type:\n name: $name\n ev: ${ ev.st.virtualType }\n ir: ${ ir.typ }") ev.load - case x@(_: ArraySort | _: ToSet | _: ToDict) => - val resultTypeAsIterable = coerce[PIterable](x.pType) - val eltType = x.children(0).asInstanceOf[IR].pType.asInstanceOf[PIterable].elementType - val eltVType = eltType.virtualType - - val vab = new StagedArrayBuilder(resultTypeAsIterable.elementType, mb, 0) - val sorter = new ArraySorter(EmitRegion(mb, region), vab) - - val (array, lessThan, distinct, leftRightComparatorNames: Array[String]) = (x: @unchecked) match { - case ArraySort(a, l, r, lessThan) => (a, lessThan, Code._empty, Array(l, r)) - case ToSet(a) => - val discardNext = mb.genEmitMethod("discardNext", - FastIndexedSeq[ParamType](typeInfo[Region], PCodeEmitParamType(eltType), PCodeEmitParamType(eltType)), - typeInfo[Boolean]) - val cmp2 = ApplyComparisonOp(EQWithNA(eltVType), In(0, PCodeEmitParamType(eltType)), In(1, PCodeEmitParamType(eltType))) - InferPType(cmp2) - val EmitCode(m, pv) = emitInMethod(cmp2, discardNext) - discardNext.emitWithBuilder { cb => - m || pv.asBoolean.boolCode(cb) - } - val lessThan = ApplyComparisonOp(Compare(eltVType), In(0, PCodeEmitParamType(eltType)), In(1, PCodeEmitParamType(eltType))) < 0 - InferPType(lessThan) - (a, lessThan, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => - EmitCodeBuilder.scopedCode[Boolean](mb) { cb => - cb.invokeCode[Boolean](discardNext, r, - EmitCode(Code._empty, m1, PCode(eltType, v1)), - EmitCode(Code._empty, m2, PCode(eltType, v2))) - } - }, Array.empty[String]) - case ToDict(a) => - val (k0, k1, keyType) = eltType match { - case t: PStruct => (GetField(In(0, PCodeEmitParamType(eltType)), "key"), GetField(In(1, PCodeEmitParamType(eltType)), "key"), t.fieldType("key")) - case t: PTuple => (GetTupleElement(In(0, PCodeEmitParamType(eltType)), 0), GetTupleElement(In(1, PCodeEmitParamType(eltType)), 0), t.types(0)) - } - val discardNext = mb.genEmitMethod("discardNext", - FastIndexedSeq[ParamType](typeInfo[Region], PCodeEmitParamType(eltType), PCodeEmitParamType(eltType)), - typeInfo[Boolean]) - - val cmp2 = ApplyComparisonOp(EQWithNA(keyType.virtualType), k0, k1).deepCopy() - InferPType(cmp2) - val EmitCode(m, pv) = emitInMethod(cmp2, discardNext) - discardNext.emitWithBuilder { cb => - m || pv.asBoolean.boolCode(cb) - } - val lessThan = (ApplyComparisonOp(Compare(keyType.virtualType), k0, k1) < 0).deepCopy() - InferPType(lessThan) - (a, lessThan, Code(sorter.pruneMissing, sorter.distinctFromSorted { (r, v1, m1, v2, m2) => - EmitCodeBuilder.scopedCode[Boolean](mb) { cb => - cb.invokeCode[Boolean](discardNext, r, - EmitCode(Code._empty, m1, PCode(eltType, v1)), - EmitCode(Code._empty, m2, PCode(eltType, v2))) - } - }), Array.empty[String]) - } - - val sort = vab.ti match { - case BooleanInfo => sorter.sort(makeDependentSortingFunction[Boolean]( - region, eltType, lessThan, env, leftRightComparatorNames)) - case IntInfo => sorter.sort(makeDependentSortingFunction[Int](region, eltType, lessThan, env, leftRightComparatorNames)) - case LongInfo => sorter.sort(makeDependentSortingFunction[Long]( - region, eltType, lessThan, env, leftRightComparatorNames)) - case FloatInfo => sorter.sort(makeDependentSortingFunction[Float]( - region, eltType, lessThan, env, leftRightComparatorNames)) - case DoubleInfo => sorter.sort(makeDependentSortingFunction[Double]( - region, eltType, lessThan, env, leftRightComparatorNames)) - } - - val optStream = emitStream(array, region) - EmitCode.fromI(mb)(cb => optStream.toI(cb).map(cb) { case stream: SStreamCode => - StreamUtils.writeToArrayBuilder(cb, stream.producer, vab, region) - cb += sort - cb += distinct - sorter.toRegion(cb, x.pType) - }) - case In(i, expectedPType) => // this, Code[Region], ... - val ev = mb.getEmitParam(2 + i, region) + val ev = env.inputValues(i).apply(region) ev case ir@Apply(fn, typeArgs, args, rt) => @@ -2352,25 +2477,24 @@ class Emit[C]( val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) - val argPTypes = args.map(_.pType) - val k = (fn, typeArgs, argPTypes, pt) + val emitArgs = args.map(a => EmitCode.fromI(mb)(emitI(a, _))).toFastIndexedSeq + + val argSTypes = emitArgs.map(_.st) + val retType = impl.computeStrictReturnEmitType(ir.typ, argSTypes) + val k = (fn, typeArgs, argSTypes, retType) val meth = methods.get(k) match { case Some(funcMB) => funcMB case None => - val funcMB = impl.getAsMethod(mb.ecb, pt, typeArgs, argPTypes: _*) + val funcMB = impl.getAsMethod(mb.ecb, retType, typeArgs, argSTypes: _*) methods.update(k, funcMB) funcMB } - val vars = args.map { a => coerce[Any](mb.newLocal()(typeToTypeInfo(a.pType))) } EmitCode.fromI(mb) { cb => val emitArgs = args.map(a => EmitCode.fromI(cb.emb)(emitI(a, _))).toFastIndexedSeq IEmitCode.multiMapEmitCodes(cb, emitArgs) { codeArgs => - for ((l, i) <- vars.zip(codeArgs)) { - cb.assign(l, i.code) - } - PCode(pt, meth.invokeCode[Any](CodeParam(region) +: vars.map(_.get: Param): _*)) + cb.invokeSCode(meth, FastIndexedSeq[Param](CodeParam(region)) ++ codeArgs.map(pc => pc: Param): _*) } } case x@ApplySpecial(_, typeArgs, args, rt) => @@ -2378,7 +2502,8 @@ class Emit[C]( val impl = x.implementation val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) - impl.apply(EmitRegion(mb, region), pt, typeArgs, codeArgs: _*) + val retType = impl.computeReturnEmitType(x.typ, codeArgs.map(_.emitType)) + impl.apply(EmitRegion(mb, region), retType.st, typeArgs, codeArgs: _*) case x@WritePartition(stream, pctx, writer) => val ctxCode = emit(pctx) @@ -2401,463 +2526,44 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.pt }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ir) }") } case _ => // we dynamically generate some IRs in emission. Ignore these... } - if (result.pt != pt) { - if (!result.pt.equalModuloRequired(pt)) - throw new RuntimeException(s"ptype mismatch:\n emitted: ${ result.pt }\n inferred: ${ ir.pType }\n ir: $ir") - (result.pt.required, pt.required) match { - case (true, false) => EmitCode.fromI(mb)(cb => result.toI(cb).map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code))) - case (false, true) => EmitCode.fromI(mb) { cb => IEmitCode.present(cb, result.toI(cb).get(cb)) } - } - } else result + if (result.st.virtualType != ir.typ) + throw new RuntimeException(s"type mismatch: EC=${ result.st.virtualType } / IR=${ ir.typ }\n") + result } - private def capturedReferences(ir: IR): (IR, (Emit.E, DependentEmitFunctionBuilder[_]) => Emit.E) = { - var ids = Set[String]() - - VisitIR(ir) { - case Ref(id, _) => - ids += id - case _ => - } - - (ir, { (env: Emit.E, f: DependentEmitFunctionBuilder[_]) => - Env[EmitValue](ids.toFastSeq.flatMap { id => - env.lookupOption(id).map { e => - (id, f.newDepEmitField(e.load)) - } - }: _*) - }) - } + private def makeDependentSortingFunction( + cb: EmitCodeBuilder, + elemSCT: SingleCodeType, ir: IR, env: EmitEnv, emitter: Emit[_], leftRightComparatorNames: Array[String]): (EmitCodeBuilder, Value[Region], Code[_], Code[_]) => Code[Boolean] = { + val fb = cb.emb.ecb - private def makeDependentSortingFunction[T: TypeInfo]( - region: Code[Region], - elemPType: PType, ir: IR, env: Emit.E, leftRightComparatorNames: Array[String]): DependentEmitFunctionBuilder[AsmFunction2[T, T, Boolean]] = { - val (newIR, getEnv) = capturedReferences(ir) - val f = cb.genDependentFunction[T, T, Boolean](baseName = "sort_compare") - val fregion = f.newDepField[Region](region) - var newEnv = getEnv(env, f) - - val leftEC = EmitCode(Code._empty, false, PCode(elemPType, f.getCodeParam[T](1))) - val rightEC = EmitCode(Code._empty, false, PCode(elemPType, f.getCodeParam[T](2))) - val sort = f.genEmitMethod("sort", - FastIndexedSeq(typeInfo[Region], leftEC.emitParamType, rightEC.emitParamType), + var newEnv = env + val sort = fb.genEmitMethod("dependent_sorting_func", + FastIndexedSeq(typeInfo[Region], CodeParamType(elemSCT.ti), CodeParamType(elemSCT.ti)), BooleanInfo) - if (leftRightComparatorNames.nonEmpty) { - assert(leftRightComparatorNames.length == 2) - newEnv = newEnv.bindIterable( - IndexedSeq( - (leftRightComparatorNames(0), sort.getEmitParam(2, fregion)), - (leftRightComparatorNames(1), sort.getEmitParam(3, fregion)))) - } - - val EmitCode(m, v) = new Emit(ctx, f.ecb).emit(newIR, sort, newEnv, None) - - sort.emit(m.mux(Code._fatal[Boolean]("Result of sorting function cannot be missing."), v.code)) - f.apply_method.emitWithBuilder(cb => cb.invokeCode[Boolean](sort, fregion, leftEC, rightEC)) - f - } - - private def present(pv: PCode): EmitCode = EmitCode(Code._empty, false, pv) - - private def present(pt: PType, c: Code[_]): EmitCode = - EmitCode(Code._empty, false, PCode(pt, c)) - - def deforestNDArrayI(x0: IR, cb: EmitCodeBuilder, region: Value[Region], env: E): IEmitCode = { - - def emit(ir: IR, env: E = env): IEmitCode = - this.emitI(ir, cb, region, env, None, None) - - def dEmit(ir: IR, env: E = env): IEmitCode = emit(ir, env) - - def deforest(x: IR): IEmitCodeGen[NDArrayEmitter] = { - val xType = coerce[PNDArray](x.pType) - val outputNDims = xType.nDims - - x match { - case NDArrayMap(child, elemName, body) => - deforest(child).map(cb) { childEmitter => - val childP = child.pType.asInstanceOf[PNDArray] - val elemPType = childP.elementType - - new NDArrayEmitter(childEmitter.outputShape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val elemRef = cb.emb.newPresentEmitField("ndarray_map_element_name", elemPType) - - cb.assign(elemRef, childEmitter.outputElement(cb, idxVars)) - val bodyEnv = env.bind(elemName, elemRef) - val bodyI = dEmit(body, bodyEnv) - - bodyI.get(cb, "NDArray map body cannot be missing") - } - } - } - case NDArrayMap2(lChild, rChild, lName, rName, body) => - deforest(lChild).flatMap(cb) { leftChildEmitter => - deforest(rChild).map(cb) { rightChildEmitter => - val lP = coerce[PNDArray](lChild.pType) - val rP = coerce[PNDArray](rChild.pType) - - val leftShapeValues = leftChildEmitter.outputShape - val rightShapeValues = rightChildEmitter.outputShape - - val (newSetupShape, shapeArray) = NDArrayEmitter.unifyShapes2(cb.emb, leftShapeValues, rightShapeValues) - - cb.append(newSetupShape) - - new NDArrayEmitter(shapeArray) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val lElemRef = cb.emb.newPresentEmitField(lName, lP.elementType) - val rElemRef = cb.emb.newPresentEmitField(rName, rP.elementType) - - val bodyEnv = env.bind(lName, lElemRef) - .bind(rName, rElemRef) - - val lIdxVars2 = NDArrayEmitter.zeroBroadcastedDims2(cb.emb, idxVars, nDims, leftShapeValues) - val rIdxVars2 = NDArrayEmitter.zeroBroadcastedDims2(cb.emb, idxVars, nDims, rightShapeValues) - - cb.assign(lElemRef, leftChildEmitter.outputElement(cb, lIdxVars2)) - cb.assign(rElemRef, rightChildEmitter.outputElement(cb, rIdxVars2)) - - dEmit(body, bodyEnv).get(cb, "NDArrayMap2 body cannot be missing") - } - } - } - } - case NDArrayReindex(child, indexExpr) => - deforest(child).map(cb) { childEmitter => - val childPType = child.pType.asInstanceOf[PNDArray] - - val shapeSeq = indexExpr.map { childIndex => - if (childIndex < childPType.nDims) - childEmitter.outputShape(childIndex) - else - const(1L) - } - - new NDArrayEmitter(shapeSeq) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val concreteIdxsForChild = Array.tabulate(childEmitter.nDims) { childDim => - val parentDim = indexExpr.indexOf(childDim) - idxVars(parentDim) - } - childEmitter.outputElement(cb, concreteIdxsForChild) - } - } - } - case x@NDArrayReshape(childND, shape) => - deforest(childND).flatMap(cb) { childEmitter => - val outputNDims = x.pType.nDims - - val childShapeValues = childEmitter.outputShape - - val requestedShapeValues = Array.tabulate(x.pType.nDims)(i => cb.newLocal[Long](s"ndarray_reindex_request_shape_$i")).toIndexedSeq - - dEmit(shape, env).map(cb) { pc => - val tupleCode = pc.asBaseStruct - val tupleValue = tupleCode.memoize(cb, "ndarray_reshape_requested") - - val hasNegativeOne = cb.newLocal[Boolean]("ndarray_reshape_has_neg_one") - val runningProduct = cb.newLocal[Long]("ndarray_reshape_running_product") - val replacesNegativeOne = cb.newLocal[Long]("ndarray_reshape_replaces_neg_one") - val tempShapeElement = cb.newLocal[Long]("ndarray_reshape_temp_shape_element") - - cb.assign(hasNegativeOne, false) - cb.assign(runningProduct, 1L) - - (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) - cb.ifx(tempShapeElement < 0L, - { - cb.ifx(tempShapeElement ceq -1L, - { - cb.ifx(hasNegativeOne, { - cb._fatal("Can't infer shape, more than one -1") - }, { - cb.assign(hasNegativeOne, true) - }) - }, - { - cb._fatal("Can't reshape, new shape must contain only nonnegative numbers or -1") - } - ) - }, - { - cb.assign(runningProduct, runningProduct * tempShapeElement) - } - ) - } - - val numElements = cb.newLocal[Long]("ndarray_reshape_child_num_elements") - cb.assign(numElements, childND.pType.asInstanceOf[PNDArray].numElements(childShapeValues)) - - cb.ifx(hasNegativeOne.mux( - (runningProduct ceq 0L) || (numElements % runningProduct) > 0L, - numElements cne runningProduct - ), { - cb._fatal("Can't reshape since requested shape is incompatible with number of elements") - }) - cb.assign(replacesNegativeOne, (runningProduct ceq 0L).mux(0L, numElements / runningProduct)) - - (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) - cb.assign(requestedShapeValues(i), (tempShapeElement ceq -1L).mux(replacesNegativeOne, tempShapeElement)) - } - - new NDArrayEmitter(requestedShapeValues) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val storeElementIndex = cb.newLocal[Long]("ndarray_reshape_index_store") - cb.assign(storeElementIndex, LinalgCodeUtils.linearizeIndicesRowMajor(idxVars, requestedShapeValues, cb.emb)) - - val (newIdxVarsSetup, newIdxVars) = LinalgCodeUtils.unlinearizeIndexRowMajor(storeElementIndex, childShapeValues, cb.emb) - cb.append(newIdxVarsSetup) - assert(newIdxVars.length == childEmitter.nDims) + sort.emitWithBuilder[Boolean] { cb => + val region = sort.getCodeParam[Region](1) + val leftEC = cb.memoize(EmitCode.present(sort, elemSCT.loadToSCode(cb, region, sort.getCodeParam(2)(elemSCT.ti))), "sort_leftEC") + val rightEC = cb.memoize(EmitCode.present(sort, elemSCT.loadToSCode(cb, region, sort.getCodeParam(3)(elemSCT.ti))), "sort_rightEC") - childEmitter.outputElement(cb, newIdxVars) - } - } - } - } - case x@NDArrayFilter(child, filters) => - deforest(child).map(cb) { childEmitter => - - val filterWasMissing = (0 until filters.size).map(i => cb.newField[Boolean](s"ndarray_filter_${i}_was_missing")) - val filtPValues = new Array[PIndexableValue](filters.size) - val outputShape = childEmitter.outputShape.map(_ => cb.newField[Long]("ndarray_filter_output_shapes")) - - filters.zipWithIndex.foreach { case (filt, i) => - // Each filt is a sequence that may be missing with elements that may not be missing. - emit(filt).consume(cb, - { - cb.assign(outputShape(i), childEmitter.outputShape(i)) - cb.assign(filterWasMissing(i), true) - }, - { - filtArrayPC => { - val filtArrayPValue = filtArrayPC.asIndexable.memoize(cb, s"ndarray_filt_array_${i}") - filtPValues(i) = filtArrayPValue - cb.assign(outputShape(i), filtArrayPValue.loadLength().toL) - cb.assign(filterWasMissing(i), false) - } - } - ) - } - - new NDArrayEmitter(outputShape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val newIdxVars: IndexedSeq[Settable[Long]] = Array.tabulate(x.pType.nDims) { _ => cb.newField[Long]("ndarray_filter_new_idx_val") } - newIdxVars.zipWithIndex.foreach { case (newIdxVar, i) => - cb.ifx(filterWasMissing(i), { - cb.assign(newIdxVar, idxVars(i)) - }, - { - cb.assign(newIdxVar, filtPValues(i).loadElement(cb, idxVars(i).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$i)").asLong.longCode(cb)) - }) - } - - childEmitter.outputElement(cb, newIdxVars) - } - } - } - case NDArraySlice(child, slicesIR) => - deforest(child).flatMap(cb) { childEmitter => - emit(slicesIR).flatMap(cb) { slicesPC => - val slicesValue = slicesPC.asBaseStruct.memoize(cb, "ndarray_slice_tuple_pv") - - val (indexingIndices, slicingIndices) = slicesValue.pt.types.zipWithIndex.partition { case (pFieldType, idx) => - pFieldType.isPrimitive - } match { - case (a, b) => (a.map(_._2), b.map(_._2)) - } - - IEmitCode.multiFlatMap[Int, SCode, NDArrayEmitter](indexingIndices, indexingIndex => slicesValue.loadField(cb, indexingIndex), cb) { indexingSCodes => - val indexingValues = indexingSCodes.map(sCode => sCode.memoize(cb, "ndarray_slice_indexer")) - val slicingValueTriples = new BoxedArrayBuilder[(Value[Long], Value[Long], Value[Long])]() - val outputShape = { - IEmitCode.multiFlatMap[Int, SCode, IndexedSeq[Value[Long]]](slicingIndices, - valueIdx => slicesValue.loadField(cb, valueIdx), cb) { sCodeSlices: IndexedSeq[SCode] => - IEmitCode.multiFlatMap(sCodeSlices, { sCodeSlice: SCode => - val sValueSlice = sCodeSlice.asBaseStruct.memoize(cb, "ndarray_slice_sCodeSlice") - // I know I have a tuple of three elements here, start, stop, step - - val newDimSizeI = sValueSlice.loadField(cb, 0).flatMap(cb) { startC => - sValueSlice.loadField(cb, 1).flatMap(cb) { stopC => - sValueSlice.loadField(cb, 2).map(cb) { stepC => - val start = cb.newLocal[Long]("ndarray_slice_start", startC.asLong.longCode(cb)) - val stop = cb.newLocal[Long]("ndarray_slice_stop", stopC.asLong.longCode(cb)) - val step = cb.newLocal[Long]("ndarray_slice_step", stepC.asLong.longCode(cb)) - - slicingValueTriples.push((start, stop, step)) - - val newDimSize = cb.newLocal[Long]("new_dim_size") - cb.ifx(step >= 0L && start <= stop, { - cb.assign(newDimSize, const(1L) + ((stop - start) - 1L) / step) - }, { - cb.ifx(step < 0L && start >= stop, { - cb.assign(newDimSize, (((stop - start) + 1L) / step) + 1L) - }, { - cb.assign(newDimSize, 0L) - }) - }) - - newDimSize - - } - } - } - newDimSizeI - }, cb)(x => IEmitCode(cb, false, x)) - } - } - - outputShape.map(cb) { outputShapeSeq => - new NDArrayEmitter(outputShapeSeq) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - // Iterate through the slices tuple given in. For each single integer, should just copy that integer into - // an indexed seq. For each range, should use start and step to modify. - val oldIdxVarsIterator = idxVars.toIterator - val indexingIterator = indexingValues.toIterator - val slicingIterator = slicingValueTriples.result().toIterator - - val newIdxVars = slicesValue.pt.types.map { fieldType => fieldType match { - case indexer: PInt64 => { - indexingIterator.next().asPValue.value.asInstanceOf[Value[Long]] - } - case slicer: PBaseStruct => { - val (start, stop, step) = slicingIterator.next() - - cb.memoize(PCode.apply(PInt64Required, start + oldIdxVarsIterator.next() * step), "ndarray_slice_adjusted_lookup").value.asInstanceOf[Value[Long]] - } - }} - - childEmitter.outputElement(cb, newIdxVars) - } - } - } - } - } - } - case NDArrayConcat(nds, axis) => - emit(nds).flatMap(cb) { ndsPCode => - val ndsArrayPValue = ndsPCode.asIndexable.memoize(cb, "ndarray_concat_array_of_nds") - val arrLength = ndsArrayPValue.loadLength() - cb.ifx(arrLength ceq 0, { - cb._fatal("need at least one ndarray to concatenate") - }) - - val missing: Code[Boolean] = { - if (ndsArrayPValue.st.elementEmitType.required) - const(false) - else { - val missing = cb.newLocal[Boolean]("ndarray_concat_result_missing") - cb.assign(missing, false) - // Need to check if the any of the ndarrays are missing. - val missingCheckLoopIdx = cb.newLocal[Int]("ndarray_concat_missing_check_idx") - cb.forLoop(cb.assign(missingCheckLoopIdx, 0), missingCheckLoopIdx < arrLength, cb.assign(missingCheckLoopIdx, missingCheckLoopIdx + 1), - cb.assign(missing, missing | ndsArrayPValue.isElementMissing(missingCheckLoopIdx)) - ) - missing - } - } - - IEmitCode(cb, missing, { - val loopIdx = cb.newLocal[Int]("ndarray_concat_shape_check_idx") - val firstND = ndsArrayPValue.loadElement(cb, 0).map(cb) { sCode => sCode.asNDArray }.get(cb).memoize(cb, "ndarray_concat_input_0") - val newShape = (0 until outputNDims).map { dimIdx => - val localDim = cb.newLocal[Long](s"ndarray_concat_output_shape_element_${dimIdx}") - val ndShape = firstND.shapes(cb) - cb.assign(localDim, ndShape(dimIdx)) - cb.forLoop(cb.assign(loopIdx, 1), loopIdx < arrLength, cb.assign(loopIdx, loopIdx + 1), { - val shapeOfNDAtIdx = ndsArrayPValue.loadElement(cb, loopIdx).map(cb) { sCode => sCode.asNDArray }.get(cb).shape(cb).memoize(cb, "ndarray_concat_input_shape") - val dimLength = shapeOfNDAtIdx.loadField(cb, dimIdx).get(cb).toPCode(cb, region).memoize(cb, "dimLength").value.asInstanceOf[Value[Long]] - - if (dimIdx == axis) { - cb.assign(localDim, localDim + dimLength) - } - else { - cb.ifx(dimLength.cne(localDim), - cb._fatal(const(s"NDArrayConcat: mismatched dimensions of input NDArrays along axis ").concat(loopIdx.toS).concat(": expected ") - .concat(localDim.toS).concat(", got ") - .concat(dimLength.toS)) - ) - } - }) - localDim - } - - new NDArrayEmitter(newShape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val concatAxisIdx = cb.newLocal[Long]("ndarray_concat_axis_id") - val whichNDArrayToRead = cb.newLocal[Int]("ndarray_concat_outputElement_i") - - cb.assign(concatAxisIdx, idxVars(axis)) - cb.assign(whichNDArrayToRead, 0) - val condition = EmitCodeBuilder.scopedCode[Boolean](cb.emb) { cb => - (concatAxisIdx >= ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.shape(cb).memoize(cb, "ndarray_concat_condition").loadField(cb, axis).get(cb).asLong.longCode(cb)) - } - cb.whileLoop(condition, { - cb.assign(concatAxisIdx, concatAxisIdx - ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.shape(cb).memoize(cb, "ndarray_concat_output_subtract").loadField(cb, axis).get(cb).asLong.longCode(cb)) - cb.assign(whichNDArrayToRead, whichNDArrayToRead + 1) - }) - cb.ifx(whichNDArrayToRead >= arrLength, cb._fatal(const("NDArrayConcat: trying to access element greater than length of concatenation axis: ").concat(whichNDArrayToRead.toS).concat(" > ").concat((arrLength - 1).toS))) - val transformedIdxs = Array.tabulate(nDims) { idx => - if (idx == axis) concatAxisIdx else idxVars(idx) - }.toFastIndexedSeq - ndsArrayPValue.loadElement(cb, whichNDArrayToRead).get(cb).asNDArray.memoize(cb, "ndarray_to_load_element_from").loadElement(transformedIdxs, cb).toPCode(cb, region) - } - } - }) - } - case NDArrayAgg(child, axesToSumOut) => - deforest(child).map(cb) { childEmitter => - val childDims = child.typ.asInstanceOf[TNDArray].nDims - val axesToKeep = (0 until childDims).filter(axis => !axesToSumOut.contains(axis)) - val newOutputShape = axesToKeep.map(idx => childEmitter.outputShape(idx)) - val newOutputShapeComplement = axesToSumOut.map(idx => childEmitter.outputShape(idx)) - - new NDArrayEmitter(newOutputShape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - val numericElementType = coerce[PNumeric](child.pType.asInstanceOf[PNDArray].elementType) - val runningSum = NumericPrimitives.newLocal(cb, "ndarray_agg_running_sum", numericElementType.virtualType) - cb.assign(runningSum, numericElementType.zero) - - SNDArray.forEachIndex(cb, newOutputShapeComplement, "NDArrayAgg_Sum_loop"){ case (cb, coordsBeingSummedOut) => - // Build the new list we need to pass down into child - val idxVarsIt = idxVars.toIterator - val summedOutIt = coordsBeingSummedOut.toIterator - val fullIndicesForChild = (0 until childDims).map(idx => - if (axesToSumOut.contains(idx)) summedOutIt.next() else idxVarsIt.next() - ) - cb.assign(runningSum, numericElementType.add(runningSum, childEmitter.outputElement(cb, fullIndicesForChild).code)) - } - - PCode.apply(numericElementType, runningSum) - } - } - } - case _ => - val ndI = emit(x) - ndI.map(cb){ ndPCode => - val ndPv = ndPCode.asNDArray.memoize(cb, "deforestNDArray_fall_through_ndarray") - val shape = ndPv.shapes(cb) - - new NDArrayEmitter(shape) { - override def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode = { - ndPv.asInstanceOf[PNDArrayValue].loadElement(idxVars, cb).toPCode(cb, region) - } - } - } + if (leftRightComparatorNames.nonEmpty) { + assert(leftRightComparatorNames.length == 2) + newEnv = newEnv.bind( + (leftRightComparatorNames(0), leftEC), + (leftRightComparatorNames(1), rightEC)) } - } - deforest(x0).map(cb)(emitter => emitter.emit(cb, coerce[PCanonicalNDArray](x0.pType), region)) + val iec = emitter.emitI(ir, cb, newEnv, None) + iec.get(cb, "Result of sorting function cannot be missing").asBoolean.boolCode(cb) + } + (cb: EmitCodeBuilder, region: Value[Region], l: Code[_], r: Code[_]) => cb.invokeCode[Boolean](sort, region, l, r) } } @@ -2882,15 +2588,15 @@ object NDArrayEmitter { def zeroBroadcastedDims(indices: IndexedSeq[Code[Long]], broadcastMask: IndexedSeq[Code[Long]]): IndexedSeq[Value[Long]] = { indices.zip(broadcastMask).map { case (index, flag) => new Value[Long] { def get: Code[Long] = index * flag - }} + } + } } - def unifyShapes2(mb: EmitMethodBuilder[_], leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): (Code[Unit], IndexedSeq[Value[Long]]) = { - val sb = SetupBuilder(mb) - + def unifyShapes2(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { val shape = leftShape.zip(rightShape).zipWithIndex.map { case ((left, right), i) => val notSameAndNotBroadcastable = !((left ceq right) || (left ceq 1L) || (right ceq 1L)) - sb.memoizeField( + cb.newField[Long]( + s"unify_shapes2_shape$i", notSameAndNotBroadcastable.mux( Code._fatal[Long](rightShape.foldLeft[Code[String]]( leftShape.foldLeft[Code[String]]( @@ -2899,16 +2605,14 @@ object NDArrayEmitter { .concat("] vs [ ") )((accum, v) => accum.concat(v.toS).concat(" ")) .concat("]")), - (left > right).mux(left, right)), - s"unify_shapes2_shape$i") + (left > right).mux(left, right))) } - (sb.result(), shape) + shape } def matmulShape(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { val mb = cb.emb - val sb = SetupBuilder(mb) assert(leftShape.nonEmpty) assert(rightShape.nonEmpty) @@ -2916,7 +2620,6 @@ object NDArrayEmitter { var lK: Value[Long] = null var rK: Value[Long] = null var shape: IndexedSeq[Value[Long]] = null - var setup: Code[Unit] = Code._empty if (leftShape.length == 1) { lK = leftShape.head @@ -2934,10 +2637,9 @@ object NDArrayEmitter { shape = leftShape.slice(0, leftShape.length - 1) } else { rK = rightShape(rightShape.length - 2) - val (unifiedSetup, unifiedShape) = unifyShapes2(mb, + val unifiedShape = unifyShapes2(cb, leftShape.slice(0, leftShape.length - 2), rightShape.slice(0, rightShape.length - 2)) - setup = Code(setup, unifiedSetup) shape = unifiedShape :+ leftShape(leftShape.length - 2) :+ rightShape.last } } @@ -2946,25 +2648,23 @@ object NDArrayEmitter { val rightShapeString = const("(").concat(rightShape.map(_.toS).reduce((a, b) => a.concat(", ").concat(b))).concat(")") - setup = Code(setup, - (lK cne rK).orEmpty( - Code._fatal[Unit](const("Matrix dimensions incompatible: ") - .concat(leftShapeString) - .concat(" can't be multiplied by matrix with dimensions ") - .concat(rightShapeString)))) + cb.ifx(lK.cne(rK), { + cb._fatal("Matrix dimensions incompatible: ", + leftShapeString, + " can't be multiplied by matrix with dimensions ", + rightShapeString) + }) - cb.append(setup) shape } } -abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]]) -{ +abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]], val elementType: SType) { val nDims = outputShape.length - def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): PCode + def outputElement(cb: EmitCodeBuilder, idxVars: IndexedSeq[Value[Long]]): SCode - def emit(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region]): PCode = { + def emit(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region]): SCode = { val shapeArray = outputShape val idx = cb.newLocal[Int]("ndarrayemitter_emitloops_idx", 0) @@ -2975,9 +2675,11 @@ abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]]) cb, region) - SNDArray.forEachIndex(cb, shapeArray, "ndarrayemitter_emitloops") { case (cb, idxVars) => - val element = IEmitCode.present(cb, outputElement(cb, idxVars)).consume(cb, {cb._fatal("NDArray elements cannot be missing")}, { elementPc => - targetType.elementType.storeAtAddress(cb, firstElementAddress + (idx.toL * targetType.elementType.byteSize), region, elementPc, true) + SNDArray.forEachIndexColMajor(cb, shapeArray, "ndarrayemitter_emitloops") { case (cb, idxVars) => + val element = IEmitCode.present(cb, outputElement(cb, idxVars)).consume(cb, { + cb._fatal("NDArray elements cannot be missing") + }, { elementPc => + targetType.elementType.storeAtAddress(cb, firstElementAddress + (idx.toL * targetType.elementType.byteSize), region, elementPc, true) }) cb.assign(idx, idx + 1) } diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index f85479051d1..c008e2505cb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -2,22 +2,21 @@ package is.hail.expr.ir import is.hail.annotations.{Region, RegionPool, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.backend.{BackendUtils, HailTaskContext} +import is.hail.backend.BackendUtils import is.hail.expr.ir.functions.IRRandomness import is.hail.expr.ir.orderings.CodeOrdering import is.hail.io.fs.FS import is.hail.io.{BufferSpec, InputBuffer, TypedCodecSpec} -import is.hail.lir -import is.hail.types.physical.stypes.{EmitType, SType} -import is.hail.types.physical.stypes.interfaces.PVoidCode.pt -import is.hail.types.physical.{PCanonicalTuple, PCode, PSettable, PStream, PType, PValue, typeToTypeInfo} +import is.hail.types.VirtualTypeWithReq +import is.hail.types.physical.stypes._ +import is.hail.types.physical.{PCanonicalTuple, PType} import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.variant.ReferenceGenome import org.apache.spark.TaskContext import java.io._ -import java.util.Base64 +import java.lang.reflect.InvocationTargetException import scala.collection.mutable import scala.language.existentials @@ -84,27 +83,19 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def getOrDefineLazyField[T: TypeInfo](setup: Code[T], id: Any): Value[T] = ecb.getOrDefineLazyField(setup, id) - def newPSettable(sb: SettableBuilder, pt: PType, name: String = null): PSettable = ecb.newPSettable(sb, pt, name) + def newPSettable(sb: SettableBuilder, pt: SType, name: String = null): SSettable = ecb.newPSettable(sb, pt, name) - def newPField(pt: PType): PSettable = ecb.newPField(pt) + def newPField(pt: SType): SSettable = ecb.newPField(pt) - def newPField(name: String, pt: PType): PSettable = ecb.newPField(name, pt) + def newPField(name: String, pt: SType): SSettable = ecb.newPField(name, pt) - def newEmitField(et: EmitType): EmitSettable = ecb.newEmitField(et.st.pType, et.required) + def newEmitField(et: EmitType): EmitSettable = ecb.newEmitField(et.st, et.required) - def newEmitField(pt: PType, required: Boolean): EmitSettable = ecb.newEmitField(pt, required) + def newEmitField(pt: SType, required: Boolean): EmitSettable = ecb.newEmitField(pt, required) - def newEmitField(name: String, et: EmitType): EmitSettable = ecb.newEmitField(name, et.st.pType, et.required) + def newEmitField(name: String, et: EmitType): EmitSettable = ecb.newEmitField(name, et.st, et.required) - def newEmitField(name: String, pt: PType, required: Boolean): EmitSettable = ecb.newEmitField(name, pt, required) - - def newEmitSettable(pt: PType, ms: Settable[Boolean], vs: PSettable, required: Boolean): EmitSettable = ecb.newEmitSettable(pt, ms, vs, required) - - def newPresentEmitField(pt: PType): PresentEmitSettable = ecb.newPresentEmitField(pt) - - def newPresentEmitField(name: String, pt: PType): PresentEmitSettable = ecb.newPresentEmitField(name, pt) - - def newPresentEmitSettable(ps: PSettable): PresentEmitSettable = ecb.newPresentEmitSettable(ps) + def newEmitField(name: String, pt: SType, required: Boolean): EmitSettable = ecb.newEmitField(name, pt, required) def fieldBuilder: SettableBuilder = cb.fieldBuilder @@ -127,7 +118,7 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def partitionRegion: Settable[Region] = ecb.partitionRegion - def addLiteral(v: Any, t: PType): PValue = ecb.addLiteral(v, t) + def addLiteral(v: Any, t: VirtualTypeWithReq): SValue = ecb.addLiteral(v, t) def addEncodedLiteral(encodedLiteral: EncodedLiteral) = ecb.addEncodedLiteral(encodedLiteral) @@ -152,11 +143,6 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def addAggStates(aggSigs: Array[agg.AggStateSig]): agg.TupleAggregatorState = ecb.addAggStates(aggSigs) - def genDependentFunction[F](baseName: String, - maybeGenericParameterTypeInfo: IndexedSeq[MaybeGenericTypeInfo[_]], - maybeGenericReturnTypeInfo: MaybeGenericTypeInfo[_])(implicit fti: TypeInfo[F]): DependentEmitFunctionBuilder[F] = - ecb.genDependentFunction(baseName, maybeGenericParameterTypeInfo, maybeGenericReturnTypeInfo) - def newRNG(seed: Long): Value[IRRandomness] = ecb.newRNG(seed) def resultWithIndex(print: Option[PrintWriter] = None): (FS, Int, Region) => C = ecb.resultWithIndex(print) @@ -188,14 +174,6 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def create(path: Code[String]): Code[OutputStream] = getFS.invoke[String, OutputStream]("create", path) - - def genDependentFunction[A1: TypeInfo, A2: TypeInfo, R: TypeInfo]( - baseName: String = null - ): DependentEmitFunctionBuilder[AsmFunction2[A1, A2, R]] = - genDependentFunction[AsmFunction2[A1, A2, R]](baseName, Array(GenericTypeInfo[A1], GenericTypeInfo[A2]), GenericTypeInfo[R]) - - def genDependentFunction[A1: TypeInfo, A2: TypeInfo, A3: TypeInfo, R: TypeInfo]: DependentEmitFunctionBuilder[AsmFunction3[A1, A2, A3, R]] = - genDependentFunction[AsmFunction3[A1, A2, A3, R]](null, Array(GenericTypeInfo[A1], GenericTypeInfo[A2], GenericTypeInfo[A3]), GenericTypeInfo[R]) } class EmitClassBuilder[C]( @@ -225,75 +203,19 @@ class EmitClassBuilder[C]( // EmitClassBuilder methods - def newPSettable(sb: SettableBuilder, pt: PType, name: String = null): PSettable = PSettable(sb, pt, name) - - def newPField(pt: PType): PSettable = newPSettable(fieldBuilder, pt) - - def newPField(name: String, pt: PType): PSettable = newPSettable(fieldBuilder, pt, name) - - def newEmitField(pt: PType, required: Boolean): EmitSettable = - newEmitSettable(pt, genFieldThisRef[Boolean](), newPField(pt), required) - - def newEmitField(name: String, emitType: EmitType): EmitSettable = newEmitField(name, emitType.st.pType, emitType.required) - - def newEmitField(name: String, pt: PType, required: Boolean): EmitSettable = - newEmitSettable(pt, genFieldThisRef[Boolean](name + "_missing"), newPField(name, pt), required) - - def newEmitSettable(_pt: PType, ms: Settable[Boolean], vs: PSettable, required: Boolean): EmitSettable = new EmitSettable { - if (!_pt.isRealizable) { - throw new UnsupportedOperationException(s"newEmitSettable can only be called on realizable PTypes. Called on ${_pt}") - } - - def pt: PType = _pt - - def load: EmitCode = { - val ec = EmitCode(Code._empty, - if (required) const(false) else ms.get, - vs.get) - assert(ec.required == required) - ec - } - - def store(cb: EmitCodeBuilder, ec: EmitCode): Unit = { - store(cb, ec.toI(cb)) - } - - def store(cb: EmitCodeBuilder, iec: IEmitCode): Unit = - if (required) - cb.assign(vs, iec.get(cb, s"Required EmitSettable cannot be missing ${ _pt }")) - else - iec.consume(cb, { - cb.assign(ms, true) - }, { value => - cb.assign(ms, false) - cb.assign(vs, value) - }) - - override def get(cb: EmitCodeBuilder): PCode = { - if (required) { - vs - } else { - cb.ifx(ms, cb._fatal(s"Can't convert missing ${_pt} to PValue")) - vs - } - } - } + def newPSettable(sb: SettableBuilder, st: SType, name: String = null): SSettable = SSettable(sb, st, name) - def newPresentEmitField(pt: PType): PresentEmitSettable = - newPresentEmitSettable(newPField(pt)) + def newPField(st: SType): SSettable = newPSettable(fieldBuilder, st) - def newPresentEmitField(name: String, pt: PType): PresentEmitSettable = - newPresentEmitSettable(newPField(name, pt)) + def newPField(name: String, st: SType): SSettable = newPSettable(fieldBuilder, st, name) - def newPresentEmitSettable(ps: PSettable): PresentEmitSettable = new PresentEmitSettable { - def pt: PType = ps.pt + def newEmitField(st: SType, required: Boolean): EmitSettable = + new EmitSettable(if (required) None else Some(genFieldThisRef[Boolean]("emitfield_missing")), newPField(st)) - def load: EmitCode = EmitCode(Code._empty, const(false), ps.load()) + def newEmitField(name: String, emitType: EmitType): EmitSettable = newEmitField(name, emitType.st, emitType.required) - def store(cb: EmitCodeBuilder, pv: PCode): Unit = ps.store(cb, pv) - - override def get(cb: EmitCodeBuilder): PCode = ps - } + def newEmitField(name: String, st: SType, required: Boolean): EmitSettable = + new EmitSettable(if (required) None else Some(genFieldThisRef[Boolean](name + "_missing")), newPField(name, st)) private[this] val typMap: mutable.Map[Type, Value[_ <: Type]] = mutable.Map() @@ -313,29 +235,28 @@ class EmitClassBuilder[C]( rgExists.mux(Code._empty, addRG) } - private[this] val literalsMap: mutable.Map[(PType, Any), PSettable] = - mutable.Map[(PType, Any), PSettable]() - private[this] val encodedLiteralsMap: mutable.Map[EncodedLiteral, PSettable] = - mutable.Map[EncodedLiteral, PSettable]() + private[this] val literalsMap: mutable.Map[(VirtualTypeWithReq, Any), SSettable] = + mutable.Map[(VirtualTypeWithReq, Any), SSettable]() + private[this] val encodedLiteralsMap: mutable.Map[EncodedLiteral, SSettable] = + mutable.Map[EncodedLiteral, SSettable]() private[this] lazy val encLitField: Settable[Array[Byte]] = genFieldThisRef[Array[Byte]]("encodedLiterals") lazy val partitionRegion: Settable[Region] = genFieldThisRef[Region]("partitionRegion") private[this] lazy val poolField: Settable[RegionPool] = genFieldThisRef[RegionPool]() - def addLiteral(v: Any, t: PType): PValue = { + def addLiteral(v: Any, t: VirtualTypeWithReq): SValue = { assert(v != null) - assert(t.isCanonical) - literalsMap.getOrElseUpdate(t -> v, PSettable(fieldBuilder, t, "literal")) + + literalsMap.getOrElseUpdate(t -> v, SSettable(fieldBuilder, t.canonicalEmitType.st, "literal")) } - def addEncodedLiteral(encodedLiteral: EncodedLiteral): PValue = { - assert(encodedLiteral._pType.isCanonical) - encodedLiteralsMap.getOrElseUpdate(encodedLiteral, PSettable(fieldBuilder, encodedLiteral._pType, "encodedLiteral")) + def addEncodedLiteral(encodedLiteral: EncodedLiteral): SValue = { + encodedLiteralsMap.getOrElseUpdate(encodedLiteral, SSettable(fieldBuilder, encodedLiteral.codec.encodedType.decodedSType(encodedLiteral.typ), "encodedLiteral")) } private[this] def encodeLiterals(): Array[Array[Byte]] = { val literals = literalsMap.toArray - val litType = PCanonicalTuple(true, literals.map(_._1._1): _*) + val litType = PCanonicalTuple(true, literals.map(_._1._1.canonicalPType.setRequired(true)): _*) val spec = TypedCodecSpec(litType, BufferSpec.defaultUncompressed) cb.addInterface(typeInfo[FunctionWithLiterals].iname) @@ -357,7 +278,7 @@ class EmitClassBuilder[C]( lits.loadField(cb, i) .consume(cb, cb._fatal("expect non-missing literals!"), - { pc => f.store(cb, pc.asPCode) }) + { pc => f.store(cb, pc) }) } // Handle the pre-encoded literals, which only need to be decoded. preEncodedLiterals.zipWithIndex.foreach { case ((encLit, f), index) => @@ -378,7 +299,7 @@ class EmitClassBuilder[C]( val rvb = new RegionValueBuilder(region) rvb.start(litType) rvb.startTuple() - literals.foreach { case ((typ, a), _) => rvb.addAnnotation(typ.virtualType, a) } + literals.foreach { case ((typ, a), _) => rvb.addAnnotation(typ.t, a) } rvb.endTuple() enc.writeRegionValue(rvb.end()) } @@ -477,6 +398,12 @@ class EmitClassBuilder[C]( _aggSerialized.load().update(i, Code._null) } + def runMethodWithHailExceptionHandler(mname: String): Code[(String, java.lang.Integer)] = { + Code.invokeScalaObject2[AnyRef, String, (String, java.lang.Integer)](CodeExceptionHandler.getClass, + "handleUserException", + cb._this.get.asInstanceOf[Code[AnyRef]], mname) + } + def backend(): Code[BackendUtils] = { if (_backendField == null) { cb.addInterface(typeInfo[FunctionWithBackend].iname) @@ -582,64 +509,49 @@ class EmitClassBuilder[C]( ): CodeOrdering.F[op.ReturnType] = getOrderingFunction(t, t, sortOrder, op) - private def getCodeArgsInfo(argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): (IndexedSeq[TypeInfo[_]], TypeInfo[_]) = { + private def getCodeArgsInfo(argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): (IndexedSeq[TypeInfo[_]], TypeInfo[_], AsmTuple[_]) = { val codeArgsInfo = argsInfo.flatMap { case CodeParamType(ti) => FastIndexedSeq(ti) case t: EmitParamType => t.codeTupleTypes - case PCodeParamType(pt) => pt.codeTupleTypes() + case SCodeParamType(pt) => pt.codeTupleTypes() } - val codeReturnInfo = returnInfo match { - case CodeParamType(ti) => ti - case PCodeParamType(pt) => pt.ti + val (codeReturnInfo, asmTuple) = returnInfo match { + case CodeParamType(ti) => ti -> null + case SCodeParamType(pt) if pt.nCodes == 1 => pt.codeTupleTypes().head -> null + case SCodeParamType(pt) => + val asmTuple = modb.tupleClass(pt.codeTupleTypes()) + asmTuple.ti -> asmTuple case t: EmitParamType => val ts = t.codeTupleTypes if (ts.length == 1) - ts.head + ts.head -> null else { - throw new UnsupportedOperationException + val asmTuple = modb.tupleClass(ts) + asmTuple.ti -> asmTuple } } - (codeArgsInfo, codeReturnInfo) + (codeArgsInfo, codeReturnInfo, asmTuple) } def newEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = { - val (codeArgsInfo, codeReturnInfo) = getCodeArgsInfo(argsInfo, returnInfo) + val (codeArgsInfo, codeReturnInfo, asmTuple) = getCodeArgsInfo(argsInfo, returnInfo) - new EmitMethodBuilder[C]( - argsInfo, returnInfo, - this, - cb.newMethod(name, codeArgsInfo, codeReturnInfo)) + new EmitMethodBuilder[C](argsInfo, returnInfo, this, cb.newMethod(name, codeArgsInfo, codeReturnInfo), asmTuple) } def newEmitMethod(name: String, argsInfo: IndexedSeq[MaybeGenericTypeInfo[_]], returnInfo: MaybeGenericTypeInfo[_]): EmitMethodBuilder[C] = { new EmitMethodBuilder[C]( argsInfo.map(ai => CodeParamType(ai.base)), CodeParamType(returnInfo.base), - this, - cb.newMethod(name, argsInfo, returnInfo)) + this, cb.newMethod(name, argsInfo, returnInfo), asmTuple = null) } def newStaticEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = { - val (codeArgsInfo, codeReturnInfo) = getCodeArgsInfo(argsInfo, returnInfo) + val (codeArgsInfo, codeReturnInfo, asmTuple) = getCodeArgsInfo(argsInfo, returnInfo) - new EmitMethodBuilder[C]( - argsInfo, returnInfo, - this, - cb.newStaticMethod(name, codeArgsInfo, codeReturnInfo)) - } - - def genDependentFunction[F](baseName: String, - maybeGenericParameterTypeInfo: IndexedSeq[MaybeGenericTypeInfo[_]], - maybeGenericReturnTypeInfo: MaybeGenericTypeInfo[_])(implicit fti: TypeInfo[F]): DependentEmitFunctionBuilder[F] = { - val depCB = emodb.genEmitClass[F](baseName) - val apply_method = depCB.cb.newMethod("apply", maybeGenericParameterTypeInfo, maybeGenericReturnTypeInfo) - val dep_apply_method = new DependentMethodBuilder(apply_method) - val emit_apply_method = new EmitMethodBuilder[F]( - maybeGenericParameterTypeInfo.map(pi => CodeParamType(pi.base)), - CodeParamType(maybeGenericReturnTypeInfo.base), - depCB, - apply_method) - new DependentEmitFunctionBuilder[F](this, dep_apply_method, emit_apply_method) + new EmitMethodBuilder[C](argsInfo, returnInfo, this, + cb.newStaticMethod(name, codeArgsInfo, codeReturnInfo), + asmTuple) } val rngs: BoxedArrayBuilder[(Settable[IRRandomness], Code[IRRandomness])] = new BoxedArrayBuilder() @@ -789,14 +701,6 @@ class EmitClassBuilder[C]( def getUnsafeWriter(path: Code[String]): Code[OutputStream] = getFS.invoke[String, OutputStream]("unsafeWriter", path) - - def genDependentFunction[A1: TypeInfo, A2: TypeInfo, R: TypeInfo]( - baseName: String = null - ): DependentEmitFunctionBuilder[AsmFunction2[A1, A2, R]] = - genDependentFunction[AsmFunction2[A1, A2, R]](baseName, Array(GenericTypeInfo[A1], GenericTypeInfo[A2]), GenericTypeInfo[R]) - - def genDependentFunction[A1: TypeInfo, A2: TypeInfo, A3: TypeInfo, R: TypeInfo]: DependentEmitFunctionBuilder[AsmFunction3[A1, A2, A3, R]] = - genDependentFunction[AsmFunction3[A1, A2, A3, R]](null, Array(GenericTypeInfo[A1], GenericTypeInfo[A2], GenericTypeInfo[A3]), GenericTypeInfo[R]) } object EmitFunctionBuilder { @@ -887,11 +791,32 @@ trait FunctionWithBackend { def setBackend(spark: BackendUtils): Unit } +object CodeExceptionHandler { + /** + * This method assumes that the method referred to by `methodName` + * is a 0-argument class method (only takes the class itself as an arg) + * which returns void. + */ + def handleUserException(obj: AnyRef, methodName: String): (String, java.lang.Integer) = { + try { + obj.getClass.getMethod(methodName).invoke(obj) + null + } catch { + case e: InvocationTargetException => + e.getTargetException match { + case ue: HailException => (ue.msg, ue.errorId) + case e => throw e + } + } + } +} + class EmitMethodBuilder[C]( val emitParamTypes: IndexedSeq[ParamType], val emitReturnType: ParamType, val ecb: EmitClassBuilder[C], - val mb: MethodBuilder[C] + val mb: MethodBuilder[C], + private[ir] val asmTuple: AsmTuple[_] ) extends WrappedEmitClassBuilder[C] { // wrapped MethodBuilder methods def newLocal[T: TypeInfo](name: String = null): LocalRef[T] = mb.newLocal[T](name) @@ -919,18 +844,57 @@ class EmitMethodBuilder[C]( } } - def getPCodeParam(emitIndex: Int): PCode = { + def getSCodeParam(emitIndex: Int): SCode = { assert(mb.isStatic || emitIndex != 0) val static = (!mb.isStatic).toInt - val _pt = emitParamTypes(emitIndex - static).asInstanceOf[PCodeParamType].pt - assert(!_pt.isInstanceOf[PStream]) + val _st = emitParamTypes(emitIndex - static).asInstanceOf[SCodeParamType].st + assert(_st.isRealizable) - val ts = _pt.codeTupleTypes() + val ts = _st.codeTupleTypes() val codeIndex = emitParamCodeIndex(emitIndex - static) - _pt.sType.fromCodes(ts.zipWithIndex.map { case (t, i) => + _st.fromCodes(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).load() - }).asPCode + }) + } + + def storeEmitParam(emitIndex: Int, cb: EmitCodeBuilder): Value[Region] => EmitValue = { + assert(mb.isStatic || emitIndex != 0) + val static = (!mb.isStatic).toInt + val et = emitParamTypes(emitIndex - static) match { + case t: EmitParamType => t + case _ => throw new RuntimeException(s"isStatic=${ mb.isStatic }, emitIndex=$emitIndex, params=$emitParamTypes") + } + val codeIndex = emitParamCodeIndex(emitIndex - static) + + et match { + case SingleCodeEmitParamType(required, sct) => + val field = cb.newFieldAny(s"storeEmitParam_sct_$emitIndex", mb.getArg(codeIndex)(sct.ti).get)(sct.ti); + { region: Value[Region] => + val emitCode = EmitCode.fromI(this) { cb => + if (required) { + IEmitCode.present(cb, sct.loadToSCode(cb, region, field.load())) + } else { + IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToSCode(cb, null, field.load())) + } + } + + new EmitValue { + evSelf => + + override def emitType: EmitType = emitCode.emitType + + override def load: EmitCode = emitCode + + override def get(cb: EmitCodeBuilder): SCode = emitCode.toI(cb).get(cb) + } + } + + case SCodeEmitParamType(et) => + val fd = cb.memoizeField(getEmitParam(emitIndex, null), s"storeEmitParam_$emitIndex") + _ => fd + } + } // needs region to support stream arguments @@ -948,48 +912,47 @@ class EmitMethodBuilder[C]( val emitCode = EmitCode.fromI(this) { cb => if (required) { - IEmitCode.present(cb, sct.loadToPCode(cb, r, mb.getArg(codeIndex)(sct.ti).get)) + IEmitCode.present(cb, sct.loadToSCode(cb, r, mb.getArg(codeIndex)(sct.ti).get)) } else { - IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToPCode(cb, null, mb.getArg(codeIndex)(sct.ti).get)) + IEmitCode(cb, mb.getArg[Boolean](codeIndex + 1).get, sct.loadToSCode(cb, null, mb.getArg(codeIndex)(sct.ti).get)) } } new EmitValue { evSelf => - val pt: PType = emitCode.pt + + override def emitType: EmitType = emitCode.emitType override def load: EmitCode = emitCode - override def get(cb: EmitCodeBuilder): PCode = emitCode.toI(cb).get(cb) + override def get(cb: EmitCodeBuilder): SCode = emitCode.toI(cb).get(cb) } - case PCodeEmitParamType(_pt) => - val ts = _pt.codeTupleTypes() + case SCodeEmitParamType(et) => + val ts = et.st.codeTupleTypes() new EmitValue { evSelf => - val pt: PType = _pt + val emitType: EmitType = et def load: EmitCode = { EmitCode(Code._empty, - if (pt.required) + if (et.required) const(false) else mb.getArg[Boolean](codeIndex + ts.length), - pt.fromCodeTuple(ts.zipWithIndex.map { case (t, i) => + st.fromCodes(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).get })) } - override def get(cb: EmitCodeBuilder): PCode = { - new PValue { - override def pt: PType = evSelf.pt - - override def get: PCode = pt.fromCodeTuple(ts.zipWithIndex.map { case (t, i) => + override def get(cb: EmitCodeBuilder): SCode = { + new SValue { + override def get: SCode = st.fromCodes(ts.zipWithIndex.map { case (t, i) => mb.getArg(codeIndex + i)(t).get }) - override def st: SType = evSelf.pt.sType + override def st: SType = evSelf.st } } } @@ -1009,33 +972,29 @@ class EmitMethodBuilder[C]( case EmitParam(ec) => fatal("EmitParam passed to invokeCode") }: _*) } - def newPLocal(pt: PType): PSettable = newPSettable(localBuilder, pt) - - def newPLocal(name: String, pt: PType): PSettable = newPSettable(localBuilder, pt, name) + def newPLocal(st: SType): SSettable = newPSettable(localBuilder, st) - def newEmitLocal(emitType: EmitType): EmitSettable = newEmitLocal(emitType.st.pType, emitType.required) - def newEmitLocal(pt: PType, required: Boolean): EmitSettable = - newEmitSettable(pt, if (required) null else newLocal[Boolean](), newPLocal(pt), required) + def newPLocal(name: String, st: SType): SSettable = newPSettable(localBuilder, st, name) - def newEmitLocal(name: String, emitType: EmitType): EmitSettable = newEmitLocal(name, emitType.st.pType, emitType.required) - def newEmitLocal(name: String, pt: PType, required: Boolean): EmitSettable = - newEmitSettable(pt, if (required) null else newLocal[Boolean](name + "_missing"), newPLocal(name, pt), required) + def newEmitLocal(emitType: EmitType): EmitSettable = newEmitLocal(emitType.st, emitType.required) + def newEmitLocal(st: SType, required: Boolean): EmitSettable = + new EmitSettable(if (required) None else Some(newLocal[Boolean]("anon_emitlocal_m")), newPLocal("anon_emitlocal_v", st)) - def newPresentEmitLocal(pt: PType): PresentEmitSettable = - newPresentEmitSettable(newPLocal(pt)) - - def newPresentEmitLocal(name: String, pt: PType): PresentEmitSettable = - newPresentEmitSettable(newPLocal(name, pt)) + def newEmitLocal(name: String, emitType: EmitType): EmitSettable = newEmitLocal(name, emitType.st, emitType.required) + def newEmitLocal(name: String, st: SType, required: Boolean): EmitSettable = + new EmitSettable(if (required) None else Some(newLocal[Boolean](name + "_missing")), newPLocal(name, st)) def emitWithBuilder[T](f: (EmitCodeBuilder) => Code[T]): Unit = emit(EmitCodeBuilder.scopedCode[T](this)(f)) def voidWithBuilder(f: (EmitCodeBuilder) => Unit): Unit = emit(EmitCodeBuilder.scopedVoid(this)(f)) - def emitPCode(f: (EmitCodeBuilder) => PCode): Unit = { - // FIXME: this should optionally construct a tuple to support multiple-code SCodes + def emitSCode(f: (EmitCodeBuilder) => SCode): Unit = { emit(EmitCodeBuilder.scopedCode(this) { cb => val res = f(cb) - res.code + if (res.st.nCodes == 1) + res.makeCodeTuple(cb).head + else + asmTuple.newTuple(res.makeCodeTuple(cb)) }) } @@ -1085,91 +1044,13 @@ trait WrappedEmitMethodBuilder[C] extends WrappedEmitClassBuilder[C] { def getEmitParam(emitIndex: Int, r: Value[Region]): EmitValue = emb.getEmitParam(emitIndex, r) - def newPLocal(pt: PType): PSettable = emb.newPLocal(pt) - - def newPLocal(name: String, pt: PType): PSettable = emb.newPLocal(name, pt) + def newPLocal(st: SType): SSettable = emb.newPLocal(st) - def newEmitLocal(pt: PType, required: Boolean): EmitSettable = emb.newEmitLocal(pt, required) + def newPLocal(name: String, st: SType): SSettable = emb.newPLocal(name, st) - def newEmitLocal(name: String, pt: PType, required: Boolean): EmitSettable = emb.newEmitLocal(name, pt, required) + def newEmitLocal(st: SType, required: Boolean): EmitSettable = emb.newEmitLocal(st, required) - def newPresentEmitLocal(pt: PType): PresentEmitSettable = emb.newPresentEmitLocal(pt) -} - -class DependentEmitFunctionBuilder[F]( - parentcb: EmitClassBuilder[_], - val dep_apply_method: DependentMethodBuilder[F], - val apply_method: EmitMethodBuilder[F] -) extends WrappedEmitMethodBuilder[F] { - def emb: EmitMethodBuilder[F] = apply_method - - // wrapped DependentMethodBuilder - def newDepField[T : TypeInfo](value: Code[T]): Value[T] = dep_apply_method.newDepField[T](value) - - def newDepFieldAny[T: TypeInfo](value: Code[_]): Value[T] = dep_apply_method.newDepFieldAny[T](value) - - def newInstance(mb: EmitMethodBuilder[_]): Code[F] = dep_apply_method.newInstance(mb.mb) - - private[this] val typMap: mutable.Map[Type, Value[Type]] = - mutable.Map[Type, Value[Type]]() - - private[this] val literalsMap: mutable.Map[(PType, Any), PValue] = - mutable.Map[(PType, Any), PValue]() - - override def getType(t: Type): Code[Type] = - typMap.getOrElseUpdate(t, { - val fromParent = parentcb.getType(t) - val field = newDepField[Type](fromParent) - field - }) - - override def addLiteral(v: Any, t: PType): PValue = { - assert(v != null) - literalsMap.getOrElseUpdate(t -> v, { - val fromParent = parentcb.addLiteral(v, t) - newDepPField(fromParent.get) - }) - } - - def newDepPField(pc: PCode): PValue = { - val ti = typeToTypeInfo(pc.pt) - val field = newPField(pc.pt) - dep_apply_method.setFields += { (obj: lir.ValueX) => - val code = pc.code - // XXX below assumes that the first settable is the 'base' of the PSettable - val baseField = field.settableTuple()(0).asInstanceOf[ThisFieldRef[_]] - code.end.append(lir.putField(className, baseField.name, ti, obj, code.v)) - // FIXME need to initialize other potential settables in the PSettable here - val newC = new VCode(code.start, code.end, null) - code.clear() - newC - } - field - } - - def newDepEmitField(ec: EmitCode): EmitValue = { - val _pt = ec.pt - val ti = typeToTypeInfo(_pt) - val m = genFieldThisRef[Boolean]() - val v = genFieldThisRef()(ti) - dep_apply_method.setFields += { (obj: lir.ValueX) => - ec.m.end.append(lir.putField(className, m.name, typeInfo[Boolean], obj, ec.m.v)) - ec.m.end.append(lir.putField(className, v.name, ti, obj, ec.v.v)) - val newC = new VCode(ec.m.start, ec.m.end, null) - ec.m.clear() - ec.v.clear() - newC - } - new EmitValue { - def pt: PType = _pt - - def get(cb: EmitCodeBuilder): PCode = load.toI(cb).get( - cb, - "Can't convert missing value to PValue.").memoize(cb, "newDepEmitField_memo") - - def load: EmitCode = EmitCode(Code._empty, m.load(), PCode(_pt, v.load())) - } - } + def newEmitLocal(name: String, pt: SType, required: Boolean): EmitSettable = emb.newEmitLocal(name, pt, required) } class EmitFunctionBuilder[F](val apply_method: EmitMethodBuilder[F]) extends WrappedEmitMethodBuilder[F] { diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala index 77055a3b868..257f511216d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitCodeBuilder.scala @@ -5,9 +5,8 @@ import is.hail.asm4s.{coerce => _, _} import is.hail.expr.ir.functions.StringFunctions import is.hail.expr.ir.streams.StreamProducer import is.hail.lir -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.{SCode, SSettable, SValue} import is.hail.types.physical.stypes.interfaces.SStreamCode -import is.hail.types.physical.{PCode, PSettable, PType, PValue} import is.hail.utils._ object EmitCodeBuilder { @@ -55,8 +54,8 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten tmp } - def assign(s: PSettable, v: PCode): Unit = { - assert(s.pt.equalModuloRequired(v.pt), s"type mismatch!\n settable=${s.pt}\n passed=${v.pt}") + def assign(s: SSettable, v: SCode): Unit = { + assert(s.st == v.st, s"type mismatch!\n settable=${s.st}\n passed=${v.st}") s.store(this, v) } @@ -72,41 +71,37 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten (is, ix).zipped.foreach { case (s, c) => s.store(this, c) } } - def assign(s: PresentEmitSettable, v: PCode): Unit = { - s.store(this, v) - } - - def memoize(pc: PCode, name: String): PValue = pc.memoize(this, name) + def memoize(pc: SCode, name: String): SValue = pc.memoize(this, name) - def memoizeField(pc: PCode, name: String): PValue = { - val f = emb.newPField(name, pc.pt) + def memoizeField(pc: SCode, name: String): SValue = { + val f = emb.newPField(name, pc.st) assign(f, pc) f } def memoize(v: EmitCode, name: String): EmitValue = { - require(v.pt.isRealizable) + require(v.st.isRealizable) val l = emb.newEmitLocal(name, v.emitType) assign(l, v) l } def memoize(v: IEmitCode, name: String): EmitValue = { - require(v.pt.isRealizable) + require(v.st.isRealizable) val l = emb.newEmitLocal(name, v.emitType) assign(l, v) l } def memoizeField[T](ec: EmitCode, name: String): EmitValue = { - require(ec.pt.isRealizable) + require(ec.st.isRealizable) val l = emb.newEmitField(name, ec.emitType) l.store(this, ec) l } def withScopedMaybeStreamValue[T](ec: EmitCode, name: String)(f: EmitValue => T): T = { - if (ec.pt.isRealizable) { + if (ec.st.isRealizable) { f(memoizeField(ec, name)) } else { val ev = new EmitUnrealizableValue(ec) @@ -119,7 +114,7 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten } def memoizeField(v: IEmitCode, name: String): EmitValue = { - require(v.pt.isRealizable) + require(v.st.isRealizable) val l = emb.newEmitField(name, v.emitType) assign(l, v) l @@ -139,40 +134,29 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten if (c.ti != cpt.ti) throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + s"\n got ${ c.ti }" + - s"\n expected ${ cpt.ti }") + s"\n expected ${ cpt.ti }" + + s"\n all param types: ${expectedArgs}-") FastIndexedSeq(c) - case (PCodeParam(pc), pcpt: PCodeParamType) => - if (pc.pt != pcpt.pt) + case (SCodeParam(pc), pcpt: SCodeParamType) => + if (pc.st != pcpt.st) throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + - s"\n got ${ pc.pt }" + - s"\n expected ${ pcpt.pt }") - pc.codeTuple() - case (EmitParam(ec), PCodeEmitParamType(pt)) => - if (!ec.pt.equalModuloRequired(pt)) { + s"\n got ${ pc.st }" + + s"\n expected ${ pcpt.st }") + pc.makeCodeTuple(this) + case (EmitParam(ec), SCodeEmitParamType(et)) => + if (!ec.emitType.equalModuloRequired(et)) { throw new RuntimeException(s"invoke ${callee.mb.methodName}: arg $i: type mismatch:" + - s"\n got ${ec.pt}" + - s"\n expected ${pt}") + s"\n got ${ec.st}" + + s"\n expected ${et.st}") } - val castEc = (ec.pt.required, pt.required) match { - case (true, false) => - EmitCode.fromI(emb)(cb => ec.toI(cb).map(cb)(pc => PCode(pc.pt.setRequired(pt.required), pc.code))) + val castEc = (ec.required, et.required) match { + case (true, false) => ec.setOptional case (false, true) => - EmitCode.fromI(emb) { cb => - val presentPC = ec.toI(cb).get(cb) - IEmitCode.present(cb, PCode(presentPC.pt.setRequired(pt.required), presentPC.code)) - } + EmitCode.fromI(emb) { cb => IEmitCode.present(cb, ec.toI(cb).get(cb)) } case _ => ec } - - if (castEc.pt.required) { - append(Code.toUnit(castEc.m)) - castEc.codeTuple() - } else { - val ev = memoize(castEc, "cb_invoke_setup_params") - ev.codeTuple() - } - + castEc.makeCodeTuple(this) case (arg, expected) => throw new RuntimeException(s"invoke ${ callee.mb.methodName }: arg $i: type mismatch:" + s"\n got ${ arg }" + @@ -197,25 +181,25 @@ class EmitCodeBuilder(val emb: EmitMethodBuilder[_], var code: Code[Unit]) exten _invoke[T](callee, args: _*) } - // FIXME: this should be invokeSCode and should allocate/destructure a tuple when more than one code is present - def invokePCode(callee: EmitMethodBuilder[_], args: Param*): PCode = { - val pt = callee.emitReturnType.asInstanceOf[PCodeParamType].pt - PCode(pt, _invoke(callee, args: _*)) - } - - // for debugging - def printRegionValue(value: Code[_], typ: PType, region: Value[Region]): Unit = { - append(Code._println(StringFunctions.boxArg(EmitRegion(emb, region), typ)(value))) + def invokeSCode(callee: EmitMethodBuilder[_], args: Param*): SCode = { + val st = callee.emitReturnType.asInstanceOf[SCodeParamType].st + if (st.nCodes == 1) + st.fromCodes(FastIndexedSeq(_invoke(callee, args: _*))) + else { + val tup = newLocal("invokepcode_tuple", _invoke(callee, args: _*))(callee.asmTuple.ti) + st.fromCodes(callee.asmTuple.loadElementsAny(tup)) + } } // for debugging - def strValue(t: PType, code: Code[_]): Code[String] = { - StringFunctions.boxArg(EmitRegion(emb, emb.partitionRegion), t)(code).invoke[String]("toString") + def strValue(sc: SCode): Code[String] = { + StringFunctions.scodeToJavaValue(this, emb.partitionRegion, sc).invoke[String]("toString") } - def strValue(sc: SCode): Code[String] = { - val x = sc.asPCode - strValue(x.pt, x.code) + def strValue(ec: EmitCode): Code[String] = { + val s = newLocal[String]("s") + ec.toI(this).consume(this, assign(s, "NA"), sc => assign(s, strValue(sc))) + s } // for debugging diff --git a/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala b/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala index 4895a200a40..9729ed35281 100644 --- a/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala +++ b/hail/src/main/scala/is/hail/expr/ir/FoldConstants.scala @@ -33,6 +33,7 @@ object FoldConstants { _: NDArrayAgg | _: NDArrayWrite | _: NDArrayMatMul | + _: Trap | _: Die => None case ir: IR if ir.typ.isInstanceOf[TStream] => None case ir: IR if !IsConstant(ir) && diff --git a/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala b/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala index 52caa14d528..edd541cf14b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala +++ b/hail/src/main/scala/is/hail/expr/ir/GenericTableValue.scala @@ -16,7 +16,7 @@ import is.hail.utils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.Row import org.apache.spark.{Partition, TaskContext} -import org.json4s.JsonAST.JObject +import org.json4s.JsonAST.{JObject, JString} import org.json4s.{Extraction, JValue} class PartitionIteratorLongReader( @@ -60,17 +60,18 @@ class PartitionIteratorLongReader( cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltPType.loadCheapPCode(cb, rv))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltPType.loadCheapSCode(cb, rv))) override def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode(SStream(producer.element.st, true), producer) + SStreamCode(SStream(producer.element.emitType), producer) } } def toJValue: JValue = { JObject( + "category" -> JString("PartitionIteratorLongReader"), "fullRowType" -> Extraction.decompose(fullRowType)(PartitionReader.formats), "contextType" -> Extraction.decompose(contextType)(PartitionReader.formats)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index e5456c03d00..96c2c31a09a 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -11,6 +11,7 @@ import is.hail.io.{AbstractTypedCodecSpec, BufferSpec, TypedCodecSpec} import is.hail.rvd.RVDSpecMaker import is.hail.types.encoded._ import is.hail.types.physical._ +import is.hail.types.physical.stypes.{BooleanSingleCodeType, Float32SingleCodeType, Float64SingleCodeType, Int32SingleCodeType, Int64SingleCodeType, PTypeReferenceSingleCodeType, SType} import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, _} import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} @@ -18,15 +19,8 @@ import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} import scala.language.existentials sealed trait IR extends BaseIR { - protected[ir] var _pType: PType = null private var _typ: Type = null - def pType = { - assert(_pType != null) - - _pType - } - def typ: Type = { if (_typ == null) try { @@ -64,9 +58,8 @@ sealed trait IR extends BaseIR { def unwrap: IR = _unwrap(this) } -sealed trait TypedIR[T <: Type, P <: PType] extends IR { +sealed trait TypedIR[T <: Type] extends IR { override def typ: T = coerce[T](super.typ) - override def pType: P = coerce[P](super.pType) } object Literal { @@ -174,7 +167,7 @@ final case class Ref(name: String, var _typ: Type) extends BaseRef // Recur can't exist outside of loop // Loops can be nested, but we can't call outer loops in terms of inner loops so there can only be one loop "active" in a given context -final case class TailLoop(name: String, params: IndexedSeq[(String, IR)], body: IR) extends IR with InferredState { +final case class TailLoop(name: String, params: IndexedSeq[(String, IR)], body: IR) extends IR { lazy val paramIdx: Map[String, Int] = params.map(_._1).zipWithIndex.toMap } final case class Recur(name: String, args: IndexedSeq[IR], _typ: Type) extends BaseRef @@ -297,20 +290,15 @@ final case class StreamZip(as: IndexedSeq[IR], names: IndexedSeq[String], body: } final case class StreamMultiMerge(as: IndexedSeq[IR], key: IndexedSeq[String]) extends IR { override def typ: TStream = coerce[TStream](super.typ) - override def pType: PStream = coerce[PStream](super.pType) } + +/** + * The StreamZipJoin node assumes that input streams have distinct keys. If input streams + * do not have distinct keys, the key that is included in the result is undefined, but + * is likely the last. + */ final case class StreamZipJoin(as: IndexedSeq[IR], key: IndexedSeq[String], curKey: String, curVals: String, joinF: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) - override def pType: PStream = coerce[PStream](super.pType) - private var _curValsType: PCanonicalArray = null - def getOrComputeCurValsType(valsType: => PType): PCanonicalArray = { - if (_curValsType == null) _curValsType = valsType.asInstanceOf[PCanonicalArray] - _curValsType - } - def curValsType: PCanonicalArray = { - assert(_curValsType != null) - _curValsType - } } final case class StreamFilter(a: IR, name: String, cond: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) @@ -319,11 +307,7 @@ final case class StreamFlatMap(a: IR, name: String, body: IR) extends IR { override def typ: TStream = coerce[TStream](super.typ) } -trait InferredState extends IR { var accPTypes: Array[PType] = null } - -final case class StreamFold(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR with InferredState { - def accPType: PType = accPTypes.head -} +final case class StreamFold(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR object StreamFold2 { def apply(a: StreamFold): StreamFold2 = { @@ -331,14 +315,12 @@ object StreamFold2 { } } -final case class StreamFold2(a: IR, accum: IndexedSeq[(String, IR)], valueName: String, seq: IndexedSeq[IR], result: IR) extends IR with InferredState { +final case class StreamFold2(a: IR, accum: IndexedSeq[(String, IR)], valueName: String, seq: IndexedSeq[IR], result: IR) extends IR { assert(accum.length == seq.length) val nameIdx: Map[String, Int] = accum.map(_._1).zipWithIndex.toMap } -final case class StreamScan(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR with InferredState { - def accPType: PType = accPTypes.head -} +final case class StreamScan(a: IR, zero: IR, accumName: String, valueName: String, body: IR) extends IR final case class StreamFor(a: IR, valueName: String, body: IR) extends IR @@ -395,7 +377,7 @@ object StreamJoin { final case class StreamJoinRightDistinct(left: IR, right: IR, lKey: IndexedSeq[String], rKey: IndexedSeq[String], l: String, r: String, joinF: IR, joinType: String) extends IR -sealed trait NDArrayIR extends TypedIR[TNDArray, PNDArray] { +sealed trait NDArrayIR extends TypedIR[TNDArray] { def elementTyp: Type = typ.elementType } @@ -519,8 +501,6 @@ object InsertFields { final case class InsertFields(old: IR, fields: Seq[(String, IR)], fieldOrder: Option[IndexedSeq[String]]) extends IR { override def typ: TStruct = coerce[TStruct](super.typ) - - override def pType: PStruct = coerce[PStruct](super.pType) } object GetFieldByIdx { @@ -562,6 +542,13 @@ object Die { def apply(message: String, typ: Type, errorId: Int): Die = Die(Str(message), typ, errorId) } +/** + * the Trap node runs the `child` node with an exception handler. If the child + * throws a HailException (user exception), then we return the tuple ((msg, errorId), NA). + * If the child throws any other exception, we raise that exception. If the + * child does not throw, then we return the tuple (NA, child value). + */ +final case class Trap(child: IR) extends IR final case class Die(message: IR, _typ: Type, errorId: Int) extends IR final case class ApplyIR(function: String, typeArgs: Seq[Type], args: Seq[IR]) extends IR { @@ -629,25 +616,7 @@ final case class BlockMatrixWrite(child: BlockMatrixIR, writer: BlockMatrixWrite final case class BlockMatrixMultiWrite(blockMatrices: IndexedSeq[BlockMatrixIR], writer: BlockMatrixMultiWriter) extends IR -final case class CollectDistributedArray(contexts: IR, globals: IR, cname: String, gname: String, body: IR, tsd: Option[TableStageDependency] = None) extends IR { - val bufferSpec: BufferSpec = BufferSpec.defaultUncompressed - - lazy val contextPTuple: PTuple = PCanonicalTuple(required = true, coerce[PStream](contexts.pType).elementType) - lazy val globalPTuple: PTuple = PCanonicalTuple(required = true, globals.pType) - lazy val bodyPTuple: PTuple = PCanonicalTuple(required = true, body.pType) - - lazy val contextSpec: TypedCodecSpec = TypedCodecSpec(contextPTuple, bufferSpec) - lazy val globalSpec: TypedCodecSpec = TypedCodecSpec(globalPTuple, bufferSpec) - lazy val bodySpec: TypedCodecSpec = TypedCodecSpec(bodyPTuple, bufferSpec) - - lazy val decodedContextPTuple: PTuple = contextSpec.encodedType.decodedPType(contextPTuple.virtualType).asInstanceOf[PTuple] - lazy val decodedGlobalPTuple: PTuple = globalSpec.encodedType.decodedPType(globalPTuple.virtualType).asInstanceOf[PTuple] - lazy val decodedBodyPTuple: PTuple = bodySpec.encodedType.decodedPType(bodyPTuple.virtualType).asInstanceOf[PTuple] - - def decodedContextPType: PType = decodedContextPTuple.types(0) - def decodedGlobalPType: PType = decodedGlobalPTuple.types(0) - def decodedBodyPType: PType = decodedBodyPTuple.types(0) -} +final case class CollectDistributedArray(contexts: IR, globals: IR, cname: String, gname: String, body: IR, tsd: Option[TableStageDependency] = None) extends IR object PartitionReader { implicit val formats: Formats = new DefaultFormats() { diff --git a/hail/src/main/scala/is/hail/expr/ir/InferPType.scala b/hail/src/main/scala/is/hail/expr/ir/InferPType.scala index 9b8740d4a80..13d6210085c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferPType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferPType.scala @@ -1,22 +1,9 @@ package is.hail.expr.ir -import is.hail.types.physical.{Int32SingleCodeType, _} -import is.hail.types.virtual._ -import is.hail.utils._ -import is.hail.HailContext -import is.hail.types.{RDict, RIterable, TypeWithRequiredness} +import is.hail.types.TypeWithRequiredness +import is.hail.types.physical._ object InferPType { - - def clearPTypes(x: BaseIR): Unit = { - x match { - case x: IR => - x._pType = null - case _ => - } - x.children.foreach(clearPTypes) - } - def getCompatiblePType(pTypes: Seq[PType]): PType = { val r = TypeWithRequiredness.apply(pTypes.head.virtualType) pTypes.foreach(r.fromPType) @@ -29,323 +16,4 @@ object InferPType { pTypes.head else result.canonicalPType(pTypes.head.virtualType) } - - def apply(ir: IR): Unit = apply(ir, Env.empty) - - private type AAB[T] = Array[BoxedArrayBuilder[RecursiveArrayBuilderElement[T]]] - - case class RecursiveArrayBuilderElement[T](value: T, nested: Option[AAB[T]]) - - def newBuilder[T](n: Int): AAB[T] = Array.fill(n)(new BoxedArrayBuilder[RecursiveArrayBuilderElement[T]]) - - def apply(ir: IR, env: Env[PType], requiredness: RequirednessAnalysis, usesAndDefs: UsesAndDefs): Unit = { - try { - requiredness.states.m.foreach { case (ir, types) => - ir.t match { - case x: StreamFold => x.accPTypes = types.map(r => r.canonicalPType(x.zero.typ)).toArray - case x: StreamScan => x.accPTypes = types.map(r => r.canonicalPType(x.zero.typ)).toArray - case x: StreamFold2 => - x.accPTypes = x.accum.zip(types).map { case ((_, arg), r) => r.canonicalPType(arg.typ) }.toArray - case x: TailLoop => - x.accPTypes = x.params.zip(types).map { case ((_, arg), r) => r.canonicalPType(arg.typ) }.toArray - } - } - _inferWithRequiredness(ir, env, requiredness, usesAndDefs) - } catch { - case e: Exception => - throw new RuntimeException(s"error while inferring IR:\n${Pretty(ir)}", e) - } - VisitIR(ir) { case (node: IR) => - if (node._pType == null) - throw new RuntimeException(s"ptype inference failure: node not inferred:\n${Pretty(node)}\n ** Full IR: **\n${Pretty(ir)}") - } - } - - def apply(ir: IR, env: Env[PType]): Unit = { - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, env) // Value IR inference doesn't need context - apply(ir, env, requiredness, usesAndDefs) - } - - private def lookup(name: String, r: TypeWithRequiredness, defNode: IR): PType = defNode match { - case Let(`name`, value, _) => value.pType - case TailLoop(`name`, _, body) => r.canonicalPType(body.typ) - case x: TailLoop => x.accPTypes(x.paramIdx(name)) - case ArraySort(a, l, r, c) => coerce[PStream](a.pType).elementType - case StreamMap(a, `name`, _) => coerce[PStream](a.pType).elementType - case x@StreamZip(as, _, _, _) => - coerce[PStream](as(x.nameIdx(name)).pType).elementType.setRequired(r.required) - case StreamZipJoin(as, key, `name`, _, joinF) => - assert(r.required) - getCompatiblePType(as.map { a => - PCanonicalStruct(true, key.map { k => - k -> coerce[PStruct](coerce[PStream](a.pType).elementType).fieldType(k) - }: _*) - }, r).setRequired(true) - case x@StreamZipJoin(as, key, _, `name`, joinF) => - assert(r.required) - assert(!r.asInstanceOf[RIterable].elementType.required) - x.getOrComputeCurValsType { - PCanonicalArray( - getCompatiblePType( - as.map(a => coerce[PStruct](coerce[PStream](a.pType).elementType)), - r.asInstanceOf[RIterable].elementType).setRequired(false), - required = true) - } - case StreamFilter(a, `name`, _) => coerce[PStream](a.pType).elementType - case StreamFlatMap(a, `name`, _) => coerce[PStream](a.pType).elementType - case StreamFor(a, `name`, _) => coerce[PStream](a.pType).elementType - case StreamFold(a, _, _, `name`, _) => coerce[PStream](a.pType).elementType - case x: StreamFold => x.accPType - case StreamScan(a, _, _, `name`, _) => coerce[PStream](a.pType).elementType - case x: StreamScan => x.accPType - case StreamFold2(a, _, `name`, _, _) => coerce[PStream](a.pType).elementType - case x: StreamFold2 => x.accPTypes(x.nameIdx(name)) - case StreamJoinRightDistinct(left, _, _, _, `name`, _, _, joinType) => - coerce[PStream](left.pType).elementType.orMissing(joinType == "left") - case StreamJoinRightDistinct(_, right, _, _, _, `name`, _, _) => - coerce[PStream](right.pType).elementType.setRequired(false) - case RunAggScan(a, `name`, _, _, _, _) => coerce[PStream](a.pType).elementType - case NDArrayMap(nd, `name`, _) => coerce[PNDArray](nd.pType).elementType - case NDArrayMap2(left, _, `name`, _, _) => coerce[PNDArray](left.pType ).elementType - case NDArrayMap2(_, right, _, `name`, _) => coerce[PNDArray](right.pType).elementType - case x@CollectDistributedArray(_, _, `name`, _, _, _) => x.decodedContextPType - case x@CollectDistributedArray(_, _, _, `name`, _, _) => x.decodedGlobalPType - case x@ShuffleWith(_, _, _, _, `name`, _, _) => x.shufflePType - case _ => throw new RuntimeException(s"$name not found in definition \n${ Pretty(defNode) }") - } - - private def _inferWithRequiredness(node: IR, env: Env[PType], requiredness: RequirednessAnalysis, usesAndDefs: UsesAndDefs): Unit = { - if (node._pType != null) - throw new RuntimeException(node.toString) - node.children.foreach { - case x: IR => _inferWithRequiredness(x, env, requiredness, usesAndDefs) - case c => throw new RuntimeException(s"unsupported node:\n${Pretty(c)}") - } - node._pType = node match { - case x if x.typ == TVoid => PVoid - case _: I32 | _: I64 | _: F32 | _: F64 | _: Str | _: UUID4 | _: Literal | _: True | _: False - | _: Cast | _: NA | _: Die | _: IsNA | _: ArrayZeros | _: ArrayLen | _: StreamLen - | _: LowerBoundOnOrderedCollection | _: ApplyBinaryPrimOp - | _: ApplyUnaryPrimOp | _: ApplyComparisonOp | _: WriteValue | _: Consume - | _: NDArrayAgg | _: ShuffleWrite | _: AggStateValue | _: CombOpValue | _: InitFromSerializedValue => - requiredness(node).canonicalPType(node.typ) - case EncodedLiteral(codec, _) => - codec.decodedPType() - case CastRename(v, typ) => v.pType.deepRename(typ) - case x: BaseRef if usesAndDefs.free.contains(RefEquality(x)) => - env.lookup(x.name) - case x: BaseRef => - lookup(x.name, requiredness(node), usesAndDefs.defs.lookup(node).asInstanceOf[IR]) - case MakeNDArray(data, shape, rowMajor, _) => - val nElem = shape.pType.asInstanceOf[PTuple].size - PCanonicalNDArray(coerce[PArray](data.pType).elementType.setRequired(true), nElem, requiredness(node).required) - case StreamRange(start: IR, stop: IR, step: IR, requiresMemoryManagementPerElement) => - assert(start.pType isOfType stop.pType) - assert(start.pType isOfType step.pType) - PCanonicalStream(start.pType.setRequired(true), required = requiredness(node).required) - case Let(_, _, body) => body.pType - case TailLoop(_, _, body) => body.pType - case a: AbstractApplyNode[_] => a.implementation.returnPType(a.returnType, a.args.map(_.pType)) - case ArrayRef(a, i, s) => - assert(i.pType isOfType PInt32()) - coerce[PArray](a.pType).elementType.setRequired(requiredness(node).required) - case ArraySort(a, leftName, rightName, lessThan) => - assert(lessThan.pType.isOfType(PBoolean())) - PCanonicalArray(coerce[PIterable](a.pType).elementType, requiredness(node).required) - case ToSet(a) => - PCanonicalSet(coerce[PIterable](a.pType).elementType, requiredness(node).required) - case ToDict(a) => - val elt = coerce[PBaseStruct](coerce[PIterable](a.pType).elementType) - PCanonicalDict(elt.types(0), elt.types(1), requiredness(node).required) - case ToArray(a) => - val elt = coerce[PIterable](a.pType).elementType - PCanonicalArray(elt, requiredness(node).required) - case CastToArray(a) => - val elt = coerce[PIterable](a.pType).elementType - PCanonicalArray(elt, requiredness(node).required) - case ToStream(a, requiresMemoryManagementPerElement) => - val elt = coerce[PIterable](a.pType).elementType - PCanonicalStream(elt, required = requiredness(node).required) - case GroupByKey(collection) => - val r = coerce[RDict](requiredness(node)) - val elt = coerce[PBaseStruct](coerce[PStream](collection.pType).elementType) - PCanonicalDict(elt.types(0), PCanonicalArray(elt.types(1), r.valueType.required), r.required) - case StreamTake(a, len) => - a.pType.setRequired(requiredness(node).required) - case StreamDrop(a, len) => - a.pType.setRequired(requiredness(node).required) - case StreamGrouped(a, size) => - val r = coerce[RIterable](requiredness(node)) - assert(size.pType isOfType PInt32()) - val innerPType = coerce[PStream](a.pType) - PCanonicalStream(innerPType.setRequired(r.elementType.required), r.required) - case StreamGroupByKey(a, key) => - val r = coerce[RIterable](requiredness(node)) - val innerPType = coerce[PStream](a.pType) - PCanonicalStream(innerPType.setRequired(r.elementType.required), r.required) - case StreamMap(a, name, body) => - PCanonicalStream(body.pType, requiredness(node).required) - case StreamZip(as, names, body, behavior) => - PCanonicalStream( - body.pType, - requiredness(node).required) - case StreamZipJoin(as, _, curKey, curVals, joinF) => - val r = requiredness(node).asInstanceOf[RIterable] - val rEltType = joinF.pType - PCanonicalStream( - rEltType, - r.required) - case StreamMultiMerge(as, _) => - val r = coerce[RIterable](requiredness(node)) - assert(r.elementType.required) - PCanonicalStream( - getCompatiblePType(as.map(_.pType.asInstanceOf[PStream].elementType), r.elementType), - r.required) - case StreamFilter(a, name, cond) => a.pType - case StreamFlatMap(a, name, body) => - val innerStreamType = coerce[PStream](body.pType) - PCanonicalStream( - innerStreamType.elementType, - requiredness(node).required) - case x: StreamFold => - x.accPType.setRequired(requiredness(node).required) - case x: StreamFold2 => - x.result.pType.setRequired(requiredness(node).required) - case x@StreamScan(a, _, _, _, body) => - val r = coerce[RIterable](requiredness(node)) - PCanonicalStream( - x.accPType.setRequired(r.elementType.required), - r.required) - case StreamJoinRightDistinct(left, right, _, _, _, _, join, _) => - PCanonicalStream( - join.pType, - requiredness(node).required) - case NDArrayShape(nd) => - val r = nd.pType.asInstanceOf[PCanonicalNDArray].shapeType - r.setRequired(requiredness(node).required) - case NDArrayReshape(nd, shape) => - val shapeT = shape.pType.asInstanceOf[PTuple] - PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, shapeT.size, - requiredness(node).required) - case NDArrayConcat(nds, _) => - val ndtyp = coerce[PNDArray](coerce[PArray](nds.pType).elementType) - ndtyp.setRequired(requiredness(node).required) - case NDArrayMap(nd, name, body) => - val ndPType = nd.pType.asInstanceOf[PNDArray] - PCanonicalNDArray(body.pType.setRequired(true), ndPType.nDims, requiredness(node).required) - case NDArrayMap2(l, r, lName, rName, body) => - val lPType = l.pType.asInstanceOf[PNDArray] - PCanonicalNDArray(body.pType.setRequired(true), lPType.nDims, requiredness(node).required) - case NDArrayReindex(nd, indexExpr) => - PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, indexExpr.length, requiredness(node).required) - case NDArrayRef(nd, idxs, _) => - coerce[PNDArray](nd.pType).elementType.setRequired(requiredness(node).required) - case NDArraySlice(nd, slices) => - val remainingDims = coerce[PTuple](slices.pType).types.filter(_.isInstanceOf[PTuple]) - PCanonicalNDArray(coerce[PNDArray](nd.pType).elementType, remainingDims.length, requiredness(node).required) - case NDArrayFilter(nd, filters) => coerce[PNDArray](nd.pType) - case NDArrayMatMul(l, r) => - val lTyp = coerce[PNDArray](l.pType) - val rTyp = coerce[PNDArray](r.pType) - PCanonicalNDArray(lTyp.elementType, TNDArray.matMulNDims(lTyp.nDims, rTyp.nDims), requiredness(node).required) - case NDArrayQR(child, mode) => NDArrayQR.pType(mode, child.pType.required) - case NDArraySVD(child, _, computeUV) => NDArraySVD.pTypes(computeUV, child.pType.required) - case NDArrayInv(_) => NDArrayInv.pType - case MakeStruct(fields) => - PCanonicalStruct(requiredness(node).required, - fields.map { case (name, a) => (name, a.pType) }: _ *) - case SelectFields(old, fields) => - if(HailContext.getFlag("use_spicy_ptypes") != null) { - PSubsetStruct(coerce[PStruct](old.pType), fields:_*) - } else { - val tbs = coerce[PStruct](old.pType) - tbs.selectFields(fields.toFastIndexedSeq) - } - case InsertFields(old, fields, fieldOrder) => - val tbs = coerce[PStruct](old.pType) - val s = tbs.insertFields(fields.map(f => { (f._1, f._2.pType) })) - fieldOrder.map { fds => - assert(fds.length == s.size) - PCanonicalStruct(tbs.required, fds.map(f => f -> s.fieldType(f)): _*) - }.getOrElse(s) - case GetField(o, name) => - val t = coerce[PStruct](o.pType) - if (t.index(name).isEmpty) - throw new RuntimeException(s"$name not in $t") - t.field(name).typ.setRequired(requiredness(node).required) - case MakeTuple(values) => - PCanonicalTuple(values.map { case (idx, v) => - PTupleField(idx, v.pType) - }.toFastIndexedSeq, requiredness(node).required) - case MakeArray(irs, t) => - val r = coerce[RIterable](requiredness(node)) - if (irs.isEmpty) r.canonicalPType(t) else - PCanonicalArray(getCompatiblePType(irs.map(_.pType), r.elementType), r.required) - case GetTupleElement(o, idx) => - val t = coerce[PTuple](o.pType) - t.fields(t.fieldIndex(idx)).typ.setRequired(requiredness(node).required) - case If(cond, cnsq, altr) => - assert(cond.pType isOfType PBoolean()) - val r = requiredness(node) - getCompatiblePType(FastIndexedSeq(cnsq.pType, altr.pType), r).setRequired(r.required) - case Coalesce(values) => - val r = requiredness(node) - getCompatiblePType(values.map(_.pType), r).setRequired(r.required) - case In(_, ept) => ept match { - case SingleCodeEmitParamType(required, sct) => sct match { - case StreamSingleCodeType(sr, eltType) => PCanonicalStream(eltType, required) - case Int32SingleCodeType => PInt32(required) - case Int64SingleCodeType => PInt64(required) - case Float32SingleCodeType => PFloat32(required) - case Float64SingleCodeType => PFloat64(required) - case BooleanSingleCodeType => PBoolean(required) - case PTypeReferenceSingleCodeType(pt) => pt.setRequired(required) - } - case PCodeEmitParamType(pt) => pt - } - case x: CollectDistributedArray => - PCanonicalArray(x.decodedBodyPType, requiredness(node).required) - case ReadPartition(context, rowType, reader) => - val child = reader.rowPType(rowType) - PCanonicalStream(child, required = requiredness(node).required) - case WritePartition(value, writeCtx, writer) => - writer.returnPType(writeCtx.pType, coerce[PStream](value.pType)) - case ReadValue(path, spec, requestedType) => - spec.decodedPType(requestedType).setRequired(requiredness(node).required) - case MakeStream(irs, t, requiresMemoryManagementPerElement) => - val r = coerce[RIterable](requiredness(node)) - if (irs.isEmpty) r.canonicalPType(t) else - PCanonicalStream(getCompatiblePType(irs.map(_.pType), r.elementType), r.required) - case x@ResultOp(resultIdx, sigs) => - PCanonicalTuple(true, sigs.map(_.pResultType): _*) - case x@RunAgg(body, result, signature) => result.pType - case x@RunAggScan(array, name, init, seq, result, signature) => - PCanonicalStream( - result.pType, - array.pType.required) - case ShuffleWith(keyFields, rowType, rowEType, keyEType, name, writer, readers) => - val r = requiredness(node) - assert(r.required == readers.pType.required) - readers.pType - case ShuffleWrite(id, rows) => - val r = requiredness(node) - assert(r.required) - PCanonicalBinary(true) - case ShufflePartitionBounds(id, nPartitions) => - val r = requiredness(node) - assert(r.required) - PCanonicalStream( - coerce[TShuffle](id.typ).keyDecodedPType, - required = true) - case ShuffleRead(id, keyRange) => - val r = requiredness(node) - assert(r.required) - PCanonicalStream( - coerce[TShuffle](id.typ).rowDecodedPType, - required = true) - } - if (node.pType.virtualType != node.typ) - throw new RuntimeException(s"pType.virtualType: ${node.pType.virtualType}, vType = ${node.typ}\n ir=$node") - } } diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index 254f8546852..53ddc49ac04 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -49,6 +49,7 @@ object InferType { case _: DeserializeAggs => TVoid case _: Begin => TVoid case Die(_, t, _) => t + case Trap(child) => TTuple(TTuple(TString, TInt32), child.typ) case If(cond, cnsq, altr) => assert(cond.typ == TBoolean) assert(cnsq.typ == altr.typ) @@ -90,7 +91,7 @@ object InferType { case ToDict(a) => val elt = coerce[TBaseStruct](coerce[TStream](a.typ).elementType) TDict(elt.types(0), elt.types(1)) - case ToArray(a) => + case ta@ToArray(a) => val elt = coerce[TStream](a.typ).elementType TArray(elt) case CastToArray(a) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala index f80dc15f952..33e765ea877 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala @@ -3,15 +3,18 @@ package is.hail.expr.ir import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.lowering.LoweringPipeline -import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType, SingleCodeType} +import is.hail.types.physical.{PTuple, PType, stypes} import is.hail.types.virtual._ import is.hail.io.BufferSpec import is.hail.linalg.BlockMatrix import is.hail.rvd.RVDContext import is.hail.utils._ import is.hail.HailContext +import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeType} import org.apache.spark.sql.Row +import scala.collection.mutable + object Interpret { type Agg = (IndexedSeq[Row], TStruct) @@ -130,7 +133,7 @@ object Interpret { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toFloat / rr.toFloat + case FloatingPointDivide() => ll.toDouble / rr.toDouble case RoundToNegInfDivide() => java.lang.Math.floorDiv(ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -154,7 +157,7 @@ object Interpret { case Add() => ll + rr case Subtract() => ll - rr case Multiply() => ll * rr - case FloatingPointDivide() => ll.toFloat / rr.toFloat + case FloatingPointDivide() => ll.toDouble / rr.toDouble case RoundToNegInfDivide() => java.lang.Math.floorDiv(ll, rr) case BitAnd() => ll & rr case BitOr() => ll | rr @@ -514,7 +517,7 @@ object Interpret { for (i <- 0 until k) { advance(i) } - val builder = new BoxedArrayBuilder[Any]() + val builder = new mutable.ArrayBuffer[Any]() while (tournament(0) != k) { val i = tournament(0) val elt = Array.fill[Row](k)(null) @@ -529,7 +532,7 @@ object Interpret { } builder += interpret(joinF, env.bind(curKeyName -> curKey, curValsName -> elt.toFastIndexedSeq), args) } - builder.result().toFastIndexedSeq + builder.toFastIndexedSeq } case StreamFilter(a, name, cond) => val aValue = interpret(a, env, args) @@ -704,6 +707,12 @@ object Interpret { case Die(message, typ, errorId) => val message_ = interpret(message).asInstanceOf[String] fatal(if (message_ != null) message_ else "", errorId) + case Trap(child) => + try { + Row(null, interpret(child)) + } catch { + case e: HailException => Row(Row(e.msg, e.errorId), null) + } case ir@ApplyIR(function, _, functionArgs) => interpret(ir.explicitNode, env, args) case ApplySpecial("lor", _, Seq(left_, right_), _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala index 7a9aa9bf159..53f92888c70 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala @@ -20,6 +20,7 @@ import is.hail.variant._ import org.apache.spark.sql.Row import org.apache.spark.storage.StorageLevel import org.json4s._ +import org.json4s.jackson.JsonMethods object MatrixIR { def read(fs: FS, path: String, dropCols: Boolean = false, dropRows: Boolean = false, requestedType: Option[MatrixType] = None): MatrixIR = { @@ -105,11 +106,19 @@ trait MatrixReader { def lower(mr: MatrixRead): TableIR def toJValue: JValue + + def renderShort(): String + + def defaultRender(): String = { + StringEscapeUtils.escapeString(JsonMethods.compact(toJValue)) + } } abstract class MatrixHybridReader extends TableReader with MatrixReader { lazy val fullType: TableType = fullMatrixType.canonicalTableType + override def defaultRender(): String = super.defaultRender() + override def lower(mr: MatrixRead): TableIR = { var tr: TableIR = TableRead(mr.typ.canonicalTableType, mr.dropRows, this) if (mr.dropCols) { @@ -195,6 +204,8 @@ class MatrixNativeReader( ) extends MatrixReader { def pathsUsed: Seq[String] = FastSeq(params.path) + override def renderShort(): String = s"(MatrixNativeReader ${ params.path } ${ params.options.map(_.renderShort()).getOrElse("") })" + lazy val columnCount: Option[Int] = Some(spec.colsSpec .partitionCounts .sum @@ -301,6 +312,8 @@ class MatrixRangeReader( rowType = TStruct("row_idx" -> TInt32), entryType = TStruct.empty) + override def renderShort(): String = s"(MatrixRangeReader $params $nPartitionsAdj)" + val columnCount: Option[Int] = Some(params.nCols) lazy val partitionCounts: Option[IndexedSeq[Long]] = Some(partition(params.nRows, nPartitionsAdj).map(_.toLong)) diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala index 4628869d701..490f9533b61 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala @@ -96,7 +96,7 @@ case class MatrixValue( FileFormat.version.rep, is.hail.HAIL_PRETTY_VERSION, "../references", - typ.colsTableType, + typ.colsTableType.copy(key = FastIndexedSeq[String]()), Map("globals" -> RVDComponentSpec("../globals/rows"), "rows" -> RVDComponentSpec("rows"), "partition_counts" -> PartitionCountsComponentSpec(partitionCounts))) diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala index 1f1a3232ba5..5b38d8b0f1e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala @@ -1,29 +1,33 @@ package is.hail.expr.ir -import java.io.OutputStream import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.functions.MatrixWriteBlockMatrix import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, TableStage} import is.hail.expr.ir.streams.StreamProducer +import is.hail.expr.{JSONAnnotationImpex, Nat} import is.hail.io._ import is.hail.io.fs.FS import is.hail.io.gen.{ExportBGEN, ExportGen} import is.hail.io.index.StagedIndexWriter import is.hail.io.plink.ExportPlink import is.hail.io.vcf.ExportVCF -import is.hail.rvd.{RVDPartitioner, RVDSpecMaker} -import is.hail.types.encoded.{EBaseStruct, EType} +import is.hail.linalg.BlockMatrix +import is.hail.rvd.{IndexSpec, RVDPartitioner, RVDSpecMaker} +import is.hail.types.encoded.{EBaseStruct, EBlockMatrixNDArray, EType} +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.{PBaseStructCode, PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PCode, PIndexableValue, PInt64, PInt64Required, PStream, PStruct, PType} +import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStream, PStruct, PType} import is.hail.types.virtual._ -import is.hail.types.{MatrixType, RTable, TableType} +import is.hail.types._ import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream +import org.apache.spark.sql.Row import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats, ShortTypeHints} +import java.io.OutputStream + object MatrixWriter { implicit val formats: Formats = new DefaultFormats() { override val typeHints = ShortTypeHints( @@ -114,12 +118,15 @@ case class MatrixNativeWriter( Str(partFile(1, 0)), globalWriter) val globalTableWriter = TableSpecWriter(s"$path/globals", TableType(tm.globalType, FastIndexedSeq(), TStruct.empty), "rows", "globals", "../references", log = false) - val colTableWriter = TableSpecWriter(s"$path/cols", tm.colsTableType, "rows", "../globals/rows", "../references", log = false) + val colTableWriter = TableSpecWriter(s"$path/cols", tm.colsTableType.copy(key = FastIndexedSeq[String]()), "rows", "../globals/rows", "../references", log = false) val rowTableWriter = TableSpecWriter(s"$path/rows", tm.rowsTableType, "rows", "../globals/rows", "../references", log = false) val entriesTableWriter = TableSpecWriter(s"$path/entries", TableType(tm.entriesRVType, FastIndexedSeq(), tm.globalType), "rows", "../globals/rows", "../references", log = false) val matrixWriter = MatrixSpecWriter(path, tm, "rows/rows", "globals/rows", "cols/rows", "entries/rows", "references", log = true) + val rowsIndexSpec = IndexSpec.defaultAnnotation("../../index", coerce[PStruct](pKey)) + val entriesIndexSpec = IndexSpec.defaultAnnotation("../../index", coerce[PStruct](pKey), withOffsetField = true) + RelationalWriter.scoped(path, overwrite = overwrite, Some(t.typ))( RelationalWriter.scoped(s"$path/globals", overwrite = false, None)( RelationalWriter.scoped(s"$path/cols", overwrite = false, None)( @@ -138,8 +145,8 @@ case class MatrixNativeWriter( WriteMetadata(MakeArray(GetField(colInfo, "partitionCounts")), colTableWriter), bindIR(ToArray(mapIR(ToStream(partInfo)) { fc => GetField(fc, "filePath") })) { files => Begin(FastIndexedSeq( - WriteMetadata(files, RVDSpecWriter(s"$path/rows/rows", RVDSpecMaker(rowSpec, lowered.partitioner))), - WriteMetadata(files, RVDSpecWriter(s"$path/entries/rows", RVDSpecMaker(entrySpec, RVDPartitioner.unkeyed(lowered.numPartitions)))))) + WriteMetadata(files, RVDSpecWriter(s"$path/rows/rows", RVDSpecMaker(rowSpec, lowered.partitioner, rowsIndexSpec))), + WriteMetadata(files, RVDSpecWriter(s"$path/entries/rows", RVDSpecMaker(entrySpec, RVDPartitioner.unkeyed(lowered.numPartitions), entriesIndexSpec))))) }, bindIR(ToArray(mapIR(ToStream(partInfo)) { fc => GetField(fc, "partitionCounts") })) { counts => Begin(FastIndexedSeq( @@ -179,12 +186,13 @@ case class SplitPartitionNativeWriter( context: EmitCode, region: Value[Region]): IEmitCode = { val keyType = ifIndexed { index.get._2 } - val iAnnotationType = PCanonicalStruct(required = true, "entries_offset" -> PInt64Required) + val iAnnotationType = PCanonicalStruct(required = true, "entries_offset" -> PInt64()) val mb = cb.emb + val indexWriter = ifIndexed { StagedIndexWriter.withDefaults(keyType, mb.ecb, annotationType = iAnnotationType) } - context.toI(cb).map(cb) { ctxCode: PCode => + context.toI(cb).map(cb) { ctxCode: SCode => val result = mb.newLocal[Long]("write_result") val filename1 = mb.newLocal[String]("filename1") val os1 = mb.newLocal[ByteTrackingOutputStream]("write_os1") @@ -202,7 +210,7 @@ case class SplitPartitionNativeWriter( indexWriter.add(cb, { IEmitCode.present(cb, keyType.asInstanceOf[PCanonicalBaseStruct] .constructFromFields(cb, stream.elementRegion, - keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name).typecast[PCode])), + keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name))), deepCopy = false)) }, ob1.invoke[Long]("indexOffset"), { IEmitCode.present(cb, @@ -223,39 +231,37 @@ case class SplitPartitionNativeWriter( cb.assign(n, n + 1L) } - PCode(pResultType, EmitCodeBuilder.scopedCode(mb) { cb: EmitCodeBuilder => - val pctx = ctxCode.memoize(cb, "context") - cb.assign(filename1, pctx.asString.loadString()) - if (hasIndex) { - val indexFile = cb.newLocal[String]("indexFile") - cb.assign(indexFile, const(index.get._1).concat(filename1)) - indexWriter.init(cb, indexFile) - } - cb.assign(filename2, const(partPrefix2).concat(filename1)) - cb.assign(filename1, const(partPrefix1).concat(filename1)) - cb.assign(os1, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename1))) - cb.assign(os2, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename2))) - cb.assign(ob1, spec1.buildCodeOutputBuffer(Code.checkcast[OutputStream](os1))) - cb.assign(ob2, spec2.buildCodeOutputBuffer(Code.checkcast[OutputStream](os2))) - cb.assign(n, 0L) - - stream.memoryManagedConsume(region, cb) { cb => - writeFile(cb, stream.element) - } + val pctx = ctxCode.memoize(cb, "context") + cb.assign(filename1, pctx.asString.loadString()) + if (hasIndex) { + val indexFile = cb.newLocal[String]("indexFile") + cb.assign(indexFile, const(index.get._1).concat(filename1).concat(".idx")) + indexWriter.init(cb, indexFile) + } + cb.assign(filename2, const(partPrefix2).concat(filename1)) + cb.assign(filename1, const(partPrefix1).concat(filename1)) + cb.assign(os1, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename1))) + cb.assign(os2, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename2))) + cb.assign(ob1, spec1.buildCodeOutputBuffer(Code.checkcast[OutputStream](os1))) + cb.assign(ob2, spec2.buildCodeOutputBuffer(Code.checkcast[OutputStream](os2))) + cb.assign(n, 0L) + + stream.memoryManagedConsume(region, cb) { cb => + writeFile(cb, stream.element) + } - cb += ob1.writeByte(0.asInstanceOf[Byte]) - cb += ob2.writeByte(0.asInstanceOf[Byte]) - cb.assign(result, pResultType.allocate(region)) - if (hasIndex) - indexWriter.close(cb) - cb += ob1.flush() - cb += ob2.flush() - cb += os1.invoke[Unit]("close") - cb += os2.invoke[Unit]("close") - filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) - cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) - result.get - }) + cb += ob1.writeByte(0.asInstanceOf[Byte]) + cb += ob2.writeByte(0.asInstanceOf[Byte]) + cb.assign(result, pResultType.allocate(region)) + if (hasIndex) + indexWriter.close(cb) + cb += ob1.flush() + cb += ob2.flush() + cb += os1.invoke[Unit]("close") + cb += os2.invoke[Unit]("close") + filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) + cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) + pResultType.loadCheapSCode(cb, result.get) } } } @@ -291,16 +297,16 @@ case class MatrixSpecWriter(path: String, typ: MatrixType, rowRelPath: String, g cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PBaseStructCode] + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asBaseStruct val partCounts = cb.newLocal[Array[Long]]("partCounts") val c = pc.memoize(cb, "matrixPartCounts") - val a = c.loadField(cb, "rows").get(cb).memoize(cb, "rowCounts").asInstanceOf[PIndexableValue] + val a = c.loadField(cb, "rows").get(cb).asIndexable.memoize(cb, "rowCounts") val n = cb.newLocal[Int]("n", a.loadLength()) val i = cb.newLocal[Int]("i", 0) cb.assign(partCounts, Code.newArray[Long](n)) cb.whileLoop(i < n, { - val count = a.loadElement(cb, i).get(cb, "part count can't be missing!").asPCode + val count = a.loadElement(cb, i).get(cb, "part count can't be missing!") cb += partCounts.update(i, count.asInt64.longCode(cb)) cb.assign(i, i + 1) }) @@ -346,6 +352,119 @@ case class MatrixBlockMatrixWriter( blockSize: Int ) extends MatrixWriter { def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = MatrixWriteBlockMatrix(ctx, mv, entryField, path, overwrite, blockSize) + + override def lower(colsFieldName: String, entriesFieldName: String, colKey: IndexedSeq[String], + ctx: ExecuteContext, ts: TableStage, t: TableIR, r: RTable, relationalLetsAbove: Map[String, IR]): IR = { + + val tm = MatrixType.fromTableType(t.typ, colsFieldName, entriesFieldName, colKey) + val rm = r.asMatrixType(colsFieldName, entriesFieldName) + + val countColumnsIR = ArrayLen(GetField(ts.getGlobals(), colsFieldName)) + val numCols: Int = CompileAndEvaluate(ctx, countColumnsIR, true).asInstanceOf[Int] + val numBlockCols: Int = (numCols - 1) / blockSize + 1 + val lastBlockNumCols = numCols % blockSize + + val rowCountIR = ts.mapCollect(relationalLetsAbove)(paritionIR => StreamLen(paritionIR)) + val inputRowCountPerPartition: IndexedSeq[Int] = CompileAndEvaluate(ctx, rowCountIR).asInstanceOf[IndexedSeq[Int]] + val inputPartStartsPlusLast = inputRowCountPerPartition.scanLeft(0L)(_ + _) + val inputPartStarts = inputPartStartsPlusLast.dropRight(1) + val inputPartStops = inputPartStartsPlusLast.tail + + val numRows = inputPartStartsPlusLast.last + val numBlockRows: Int = (numRows.toInt - 1) / blockSize + 1 + + // Zip contexts with partition starts and ends + val zippedWithStarts = ts.mapContexts{oldContextsStream => zipIR(IndexedSeq(oldContextsStream, ToStream(Literal(TArray(TInt64), inputPartStarts)), ToStream(Literal(TArray(TInt64), inputPartStops))), ArrayZipBehavior.AssertSameLength){ case IndexedSeq(oldCtx, partStart, partStop) => + MakeStruct(Seq[(String, IR)]("mwOld" -> oldCtx, "mwStartIdx" -> Cast(partStart, TInt32), "mwStopIdx" -> Cast(partStop, TInt32))) + }}(newCtx => GetField(newCtx, "mwOld")) + + // Now label each row with its idx. + val perRowIdxId = genUID() + val partsZippedWithIdx = zippedWithStarts.mapPartitionWithContext { (part, ctx) => + zip2(part, rangeIR(GetField(ctx, "mwStartIdx"), GetField(ctx, "mwStopIdx")), ArrayZipBehavior.AssertSameLength) { (partRow, idx) => + insertIR(partRow, (perRowIdxId, idx)) + } + } + + // Two steps, make a partitioner that works currently based on row_idx splits, then resplit accordingly. + val inputRowIntervals = inputPartStarts.zip(inputPartStops).map{ case (intervalStart, intervalEnd) => + Interval(Row(intervalStart.toInt), Row(intervalEnd.toInt), true, false) + } + val rowIdxPartitioner = RVDPartitioner.generate(TStruct((perRowIdxId, TInt32)), inputRowIntervals) + + val keyedByRowIdx = partsZippedWithIdx.changePartitionerNoRepartition(rowIdxPartitioner) + + // Now create a partitioner that makes appropriately sized blocks + val desiredRowStarts = (0 until numBlockRows).map(_ * blockSize) + val desiredRowStops = desiredRowStarts.drop(1) :+ numRows.toInt + val desiredRowIntervals = desiredRowStarts.zip(desiredRowStops).map{ + case (intervalStart, intervalEnd) => Interval(Row(intervalStart), Row(intervalEnd), true, false) + } + + val blockSizeGroupsPartitioner = RVDPartitioner.generate(TStruct((perRowIdxId, TInt32)), desiredRowIntervals) + val rowsInBlockSizeGroups: TableStage = keyedByRowIdx.repartitionNoShuffle(blockSizeGroupsPartitioner) + + def createBlockMakingContexts(tablePartsStreamIR: IR): IR = { + flatten(zip2(tablePartsStreamIR, rangeIR(numBlockRows), ArrayZipBehavior.AssertSameLength) { case (tableSinglePartCtx, blockColIdx) => + mapIR(rangeIR(I32(numBlockCols))){ blockColIdx => + MakeStruct(Seq("oldTableCtx" -> tableSinglePartCtx, "blockStart" -> (blockColIdx * I32(blockSize)), + "blockSize" -> If(blockColIdx ceq I32(numBlockCols - 1), I32(lastBlockNumCols), I32(blockSize)), + "blockColIdx" -> blockColIdx, + "blockRowIdx" -> blockColIdx)) + } + }) + } + + val tableOfNDArrays = rowsInBlockSizeGroups.mapContexts(createBlockMakingContexts)(ir => GetField(ir, "oldTableCtx")).mapPartitionWithContext{ (partIr, ctxRef) => + bindIR(GetField(ctxRef, "blockStart")){ blockStartRef => + val numColsOfBlock = GetField(ctxRef, "blockSize") + val arrayOfSlicesAndIndices = ToArray(mapIR(partIr) { singleRow => + val mappedSlice = ToArray(mapIR(ToStream(sliceArrayIR(GetField(singleRow, entriesFieldName), blockStartRef, blockStartRef + numColsOfBlock)))(entriesStructRef => + GetField(entriesStructRef, entryField) + )) + MakeStruct(Seq( + perRowIdxId -> GetField(singleRow, perRowIdxId), + "rowOfData" -> mappedSlice + )) + }) + bindIR(arrayOfSlicesAndIndices){ arrayOfSlicesAndIndicesRef => + val idxOfResult = GetField(ArrayRef(arrayOfSlicesAndIndicesRef, I32(0)), perRowIdxId) + val ndarrayData = ToArray(flatMapIR(ToStream(arrayOfSlicesAndIndicesRef)){idxAndSlice => + ToStream(GetField(idxAndSlice, "rowOfData")) + }) + val numRowsOfBlock = ArrayLen(arrayOfSlicesAndIndicesRef) + val shape = maketuple(Cast(numRowsOfBlock, TInt64), Cast(numColsOfBlock, TInt64)) + val ndarray = MakeNDArray(ndarrayData, shape, True(), ErrorIDs.NO_ERROR) + MakeStream(Seq(MakeStruct(Seq( + perRowIdxId -> idxOfResult, + "blockRowIdx" -> GetField(ctxRef, "blockRowIdx"), + "blockColIdx" -> GetField(ctxRef, "blockColIdx"), + "ndBlock" -> ndarray))), + TStream(TStruct(perRowIdxId -> TInt32, "blockRowIdx" -> TInt32, "blockColIdx" -> TInt32, "ndBlock" -> ndarray.typ))) + } + } + } + + val elementType = tm.entryType.fieldType(entryField) + val etype = EBlockMatrixNDArray(EType.fromTypeAndAnalysis(elementType, rm.entryType.field(entryField)), encodeRowMajor = true, required = true) + val spec = TypedCodecSpec(etype, TNDArray(tm.entryType.fieldType(entryField), Nat(2)), BlockMatrix.bufferSpec) + + val pathsWithColMajorIndices = tableOfNDArrays.mapCollect(relationalLetsAbove) { partition => + ToArray(mapIR(partition) { singleNDArrayTuple => + bindIR(GetField(singleNDArrayTuple, "blockRowIdx") + (GetField(singleNDArrayTuple, "blockColIdx") * numBlockRows)) { colMajorIndex => + val blockPath = + Str(s"$path/parts/part-") + + invoke("str", TString, colMajorIndex) + Str("-") + UUID4() + maketuple(colMajorIndex, WriteValue(GetField(singleNDArrayTuple, "ndBlock"), blockPath, spec)) + } + }) + } + val flatPathsAndIndices = flatMapIR(ToStream(pathsWithColMajorIndices))(ToStream(_)) + val sortedColMajorPairs = sortIR(flatPathsAndIndices){case (l, r) => ApplyComparisonOp(LT(TInt32), GetTupleElement(l, 0), GetTupleElement(r, 0))} + val flatPaths = ToArray(mapIR(ToStream(sortedColMajorPairs))(GetTupleElement(_, 1))) + val bmt = BlockMatrixType(elementType, IndexedSeq(numRows, numCols), numRows==1, blockSize, BlockMatrixSparsity.dense) + RelationalWriter.scoped(path, overwrite, None)(WriteMetadata(flatPaths, BlockMatrixNativeMetadataWriter(path, false, bmt))) + } } object MatrixNativeMultiWriter { diff --git a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala index ea1e7451f97..61d359a7b14 100644 --- a/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/NativeReaderOptions.scala @@ -55,4 +55,6 @@ case class NativeReaderOptions( "intervalPointType" -> intervalPointType.parsableString(), "filterIntervals" -> filterIntervals) } + + def renderShort(): String = s"(IntervalRead: ${intervals.length} intervals, filter=${filterIntervals})" } diff --git a/hail/src/main/scala/is/hail/expr/ir/Param.scala b/hail/src/main/scala/is/hail/expr/ir/Param.scala index f415646a66b..0c7a266e386 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Param.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Param.scala @@ -1,7 +1,7 @@ package is.hail.expr.ir import is.hail.asm4s.{BooleanInfo, Code, TypeInfo, classInfo} -import is.hail.types.physical.{PCode, PType, SingleCodePCode, SingleCodeType} +import is.hail.types.physical.stypes.{EmitType, SCode, SType, SingleCodeType} import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq @@ -17,10 +17,10 @@ case class CodeParamType(ti: TypeInfo[_]) extends ParamType { override def toString: String = s"CodeParam($ti)" } -case class PCodeParamType(pt: PType) extends ParamType { - def nCodes: Int = pt.nCodes +case class SCodeParamType(st: SType) extends ParamType { + def nCodes: Int = st.nCodes - override def toString: String = s"PCodeParam($pt, $nCodes)" + override def toString: String = s"SCodeParam($st, $nCodes)" } trait EmitParamType extends ParamType { @@ -49,18 +49,16 @@ case class SingleCodeEmitParamType(required: Boolean, sct: SingleCodeType) exten override def toString: String = s"SingleCodeEmitParamType($required, $sct)" } -case class PCodeEmitParamType(pt: PType) extends EmitParamType { - def required: Boolean = pt.required +case class SCodeEmitParamType(et: EmitType) extends EmitParamType { + def required: Boolean = et.required - def virtualType: Type = pt.virtualType + def virtualType: Type = et.st.virtualType - def definedTupleTypes(): IndexedSeq[TypeInfo[_]] = pt.codeTupleTypes() - - override def toString: String = s"PTypeEmitParamType($pt, $nCodes)" + def definedTupleTypes(): IndexedSeq[TypeInfo[_]] = et.st.codeTupleTypes() } sealed trait Param case class CodeParam(c: Code[_]) extends Param case class EmitParam(ec: EmitCode) extends Param -case class PCodeParam(pc: PCode) extends Param +case class SCodeParam(sc: SCode) extends Param diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 2e69b5787c0..9f4d510402f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -19,6 +19,7 @@ import org.json4s.{Formats, JObject} import org.json4s.jackson.{JsonMethods, Serialization} import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.reflect.ClassTag import scala.util.parsing.combinator.JavaTokenParsers import scala.util.parsing.input.Positional @@ -286,25 +287,25 @@ object IRParser { f: (TokenIterator) => T, sep: Token, end: Token)(implicit tct: ClassTag[T]): Array[T] = { - val xs = new BoxedArrayBuilder[T]() + val xs = new mutable.ArrayBuffer[T]() while (it.hasNext && it.head != end) { xs += f(it) if (it.head == sep) consumeToken(it) } - xs.result() + xs.toArray } def repUntil[T](it: TokenIterator, f: (TokenIterator) => StackFrame[T], end: Token)(implicit tct: ClassTag[T]): StackFrame[Array[T]] = { - val xs = new BoxedArrayBuilder[T]() + val xs = new mutable.ArrayBuffer[T]() var cont: T => StackFrame[Array[T]] = null def loop(): StackFrame[Array[T]] = { if (it.hasNext && it.head != end) { f(it).flatMap(cont) } else { - done(xs.result()) + done(xs.toArray) } } cont = { t => @@ -317,11 +318,11 @@ object IRParser { def repUntilNonStackSafe[T](it: TokenIterator, f: (TokenIterator) => T, end: Token)(implicit tct: ClassTag[T]): Array[T] = { - val xs = new BoxedArrayBuilder[T]() + val xs = new mutable.ArrayBuffer[T]() while (it.hasNext && it.head != end) { xs += f(it) } - xs.result() + xs.toArray } def base_seq_parser[T : ClassTag](f: TokenIterator => T)(it: TokenIterator): Array[T] = { @@ -1258,6 +1259,11 @@ object IRParser { ir_value_expr(env)(it).map { msg => Die(msg, typ, errorId) } + case "Trap" => + ir_value_expr(env)(it).map { child => + Trap(child) + } + case "ApplySeeded" => val function = identifier(it) val seed = int64_literal(it) diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index 3f787559efa..bb8a79050b2 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -205,7 +205,7 @@ object Pretty { FastSeq(if (typ == reader.fullMatrixType) "None" else typ.parsableString(), prettyBooleanLiteral(dropCols), prettyBooleanLiteral(dropRows), - '"' + StringEscapeUtils.escapeString(JsonMethods.compact(reader.toJValue)) + '"') + if (elideLiterals) reader.renderShort() else '"' + StringEscapeUtils.escapeString(JsonMethods.compact(reader.toJValue)) + '"') case MatrixWrite(_, writer) => single('"' + StringEscapeUtils.escapeString(Serialization.write(writer)(MatrixWriter.formats)) + '"') case MatrixMultiWrite(_, writer) => @@ -256,7 +256,7 @@ object Pretty { case TableRead(typ, dropRows, tr) => FastSeq(if (typ == tr.fullType) "None" else typ.parsableString(), prettyBooleanLiteral(dropRows), - '"' + StringEscapeUtils.escapeString(JsonMethods.compact(tr.toJValue)) + '"') + if (elideLiterals) tr.renderShort() else '"' + StringEscapeUtils.escapeString(JsonMethods.compact(tr.toJValue)) + '"') case TableWrite(_, writer) => single('"' + StringEscapeUtils.escapeString(Serialization.write(writer)(TableWriter.formats)) + '"') case TableMultiWrite(_, writer) => @@ -293,6 +293,8 @@ object Pretty { single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case MatrixToValueApply(_, function) => single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) + case BlockMatrixToValueApply(_, function) => + single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case BlockMatrixToTableApply(_, _, function) => single(prettyStringLiteral(Serialization.write(function)(RelationalFunctions.formats))) case TableRename(_, rowMap, globalMap) => diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala b/hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala similarity index 73% rename from hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala rename to hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala index 5dbadb80899..c72c79dd6ba 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeToIRIntermediateClassTag.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PrimitiveTypeToIRIntermediateClassTag.scala @@ -5,14 +5,12 @@ import is.hail.types.virtual._ import scala.reflect.{ClassTag, classTag} -object TypeToIRIntermediateClassTag { +object PrimitiveTypeToIRIntermediateClassTag { def apply(t: Type): ClassTag[_] = t.fundamentalType match { - case TVoid => classTag[Unit] case TBoolean => classTag[Boolean] case TInt32 => classTag[Int] case TInt64 => classTag[Long] case TFloat32 => classTag[Float] case TFloat64 => classTag[Double] - case _: TBaseStruct | _: TArray | TBinary => classTag[Long] } } diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala index 2c772477db6..2b035a5aaa6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala @@ -1373,6 +1373,20 @@ object PruneDeadFields { ) memoizeMatrixIR(child, dep, memo) BindingEnv.empty + case TailLoop(name, params, body) => + val bodyEnv = memoizeValueIR(body, body.typ, memo) + val paramTypes = params.map{ case (paramName, paramIR) => + bodyEnv.eval.lookupOption(paramName) match { + case Some(ab) => unifySeq(paramIR.typ, ab.result()) + case None => minimal(paramIR.typ) + } + } + unifyEnvsSeq( + IndexedSeq(bodyEnv.deleteEval(params.map(_._1))) ++ + params.zip(paramTypes).map{ case ((paramName, paramIR), paramType) => + memoizeValueIR(paramIR, paramType, memo) + } + ) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => val rArray = requestedType.asInstanceOf[TArray] val bodyEnv = memoizeValueIR(body, rArray.elementType, memo) diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index 36aeb81be98..40907aa4b2d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -3,7 +3,8 @@ package is.hail.expr.ir import is.hail.expr.ir.functions.GetElement import is.hail.methods.ForceCountTable import is.hail.types._ -import is.hail.types.physical.{PCanonicalStream, PStream, PType, PTypeReferenceSingleCodeType, StreamSingleCodeType} +import is.hail.types.physical.stypes.{EmitType, PTypeReferenceSingleCodeType, StreamSingleCodeType} +import is.hail.types.physical.{PCanonicalStream, PStream, PType} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.catalyst.expressions.GenericRow @@ -24,6 +25,7 @@ object Requiredness { case class RequirednessAnalysis(r: Memo[BaseTypeWithRequiredness], states: Memo[IndexedSeq[TypeWithRequiredness]]) { def lookup(node: BaseIR): BaseTypeWithRequiredness = r.lookup(node) + def lookupState(node: BaseIR): IndexedSeq[BaseTypeWithRequiredness] = states.lookup(node) def lookupOpt(node: BaseIR): Option[BaseTypeWithRequiredness] = r.get(node) def apply(node: IR): TypeWithRequiredness = coerce[TypeWithRequiredness](lookup(node)) def getState(node: IR): IndexedSeq[TypeWithRequiredness] = states(node) @@ -99,7 +101,8 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { def addBindingRelations(node: BaseIR): Unit = { val refMap: Map[String, IndexedSeq[RefEquality[BaseRef]]] = usesAndDefs.uses(node).toFastIndexedSeq.groupBy(_.t.name) - def addElementBinding(name: String, d: IR, makeOptional: Boolean = false): Unit = { + def addElementBinding(name: String, d: IR, makeOptional: Boolean = false, makeRequired: Boolean = false): Unit = { + assert(!(makeOptional && makeRequired)) if (refMap.contains(name)) { val uses = refMap(name) val eltReq = coerce[RIterable](lookup(d)).elementType @@ -107,6 +110,10 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { val optional = eltReq.copy(eltReq.children) optional.union(false) optional + } else if (makeRequired) { + val req = eltReq.copy(eltReq.children) + req.union(true) + req } else eltReq uses.foreach { u => defs.bind(u, Array(req)) } dependents.getOrElseUpdate(d, mutable.Set[RefEquality[BaseIR]]()) ++= uses @@ -169,8 +176,8 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { case x@ApplyIR(_, _, args) => x.refIdx.foreach { case (n, i) => addBinding(n, args(i)) } case ArraySort(a, l, r, c) => - addElementBinding(l, a) - addElementBinding(r, a) + addElementBinding(l, a, makeRequired = true) + addElementBinding(r, a, makeRequired = true) case StreamMap(a, name, body) => addElementBinding(name, a) case x@StreamZip(as, names, body, behavior) => @@ -452,6 +459,15 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { // always required case _: I32 | _: I64 | _: F32 | _: F64 | _: Str | True() | False() | _: IsNA | _: Die | _: UUID4 | _: Consume => case _: CombOpValue | _: AggStateValue => + case Trap(child) => + // error message field is missing if the child runs without error + requiredness.asInstanceOf[RTuple].field(0).union(false) + + val childField = requiredness.asInstanceOf[RTuple].field(1) + // trap can return optional if child throws exception + childField.union(false) + + childField.unionFrom(lookup(child)) case x if x.typ == TVoid => case ApplyComparisonOp(EQWithNA(_, _), _, _) | ApplyComparisonOp(NEQWithNA(_, _), _, _) | ApplyComparisonOp(Compare(_, _), _, _) => case ApplyComparisonOp(op, l, r) => @@ -658,8 +674,11 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.unionFrom(oldReq.field(idx)) case x: ApplyIR => requiredness.unionFrom(lookup(x.body)) case x: AbstractApplyNode[_] => //FIXME: round-tripping via PTypes. - val argP = x.args.map(a => lookup(a).canonicalPType(a.typ)) - requiredness.fromPType(x.implementation.returnPType(x.returnType, argP)) + val argP = x.args.map { a => + val pt = lookup(a).canonicalPType(a.typ) + EmitType(pt.sType, pt.required) + } + requiredness.fromPType(x.implementation.computeReturnEmitType(x.returnType, argP).canonicalPType) case CollectDistributedArray(ctxs, globs, _, _, body, _) => requiredness.union(lookup(ctxs).required) coerce[RIterable](requiredness).elementType.unionFrom(lookup(body)) @@ -674,7 +693,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.union(lookup(path).required) requiredness.fromPType(spec.encodedType.decodedPType(rt)) case In(_, t) => t match { - case PCodeEmitParamType(pt) => requiredness.fromPType(pt) + case SCodeEmitParamType(et) => requiredness.fromPType(et.canonicalPType) case SingleCodeEmitParamType(required, StreamSingleCodeType(_, eltType)) => requiredness.fromPType(PCanonicalStream(eltType, required)) // fixme hacky case SingleCodeEmitParamType(required, PTypeReferenceSingleCodeType(pt)) => requiredness.fromPType(pt.setRequired(required)) case SingleCodeEmitParamType(required, _) => requiredness.union(required) @@ -709,9 +728,9 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.unionFrom(lookup(readers)) case ShuffleWrite(id, rows) => // required case ShufflePartitionBounds(id, nPartitions) => - coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).keyDecodedPType) + coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).keyDecodedPType.setRequired(true)) case ShuffleRead(id, keyRange) => - coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).rowDecodedPType) + coerce[RIterable](requiredness).elementType.fromPType(coerce[TShuffle](id.typ).rowDecodedPType.setRequired(true)) } requiredness.probeChangedAndReset() } diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index e9daf779107..1557ab833fc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -143,6 +143,7 @@ object Simplify { case StreamZip(as, names, body, _) if as.length == 1 => StreamMap(as.head, names.head, body) case StreamMap(StreamZip(as, names, zipBody, b), name, mapBody) => StreamZip(as, names, Let(name, zipBody, mapBody), b) + case StreamMap(StreamFlatMap(child, flatMapName, flatMapBody), mapName, mapBody) => StreamFlatMap(child, flatMapName, StreamMap(flatMapBody, mapName, mapBody)) case x@StreamFlatMap(NA(_), _, _) => NA(x.typ) @@ -174,6 +175,7 @@ object Simplify { case Cast(Cast(x, _), t) if x.typ == t =>x case CastRename(x, t) if x.typ == t => x + case CastRename(CastRename(x, _), t) => CastRename(x, t) case ApplyBinaryPrimOp(Add(), I32(0), x) => x case ApplyBinaryPrimOp(Add(), x, I32(0)) => x @@ -565,9 +567,11 @@ object Simplify { canBeLifted(query) } => query - case BlockMatrixToValueApply(ValueToBlockMatrix(child, IndexedSeq(nrows, ncols), _), functions.GetElement(Seq(i, j))) => - if (child.typ.isInstanceOf[TArray]) ArrayRef(child, I32((i * ncols + j).toInt)) else child - + case BlockMatrixToValueApply(ValueToBlockMatrix(child, IndexedSeq(nrows, ncols), _), functions.GetElement(Seq(i, j))) => child.typ match { + case TArray(_) => ArrayRef(child, I32((i * ncols + j).toInt)) + case TNDArray(_, _) => NDArrayRef(child, IndexedSeq(i, j), ErrorIDs.NO_ERROR) + case TFloat64 => child + } case LiftMeOut(child) if IsConstant(child) => child } @@ -941,12 +945,12 @@ object Simplify { case BlockMatrixSlice(BlockMatrixMap2(l, r, ln, rn, f, sparsityStrategy), slices) => BlockMatrixMap2(BlockMatrixSlice(l, slices), BlockMatrixSlice(r, slices), ln, rn, f, sparsityStrategy) case BlockMatrixMap2(BlockMatrixBroadcast(scalarBM, IndexedSeq(), _, _), right, leftName, rightName, f, sparsityStrategy) => - val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(Seq(0, 0))) + val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(IndexedSeq(0, 0))) val needsDense = sparsityStrategy == NeedsDense || sparsityStrategy.exists(leftBlock = true, rightBlock = false) val maybeDense = if (needsDense) BlockMatrixDensify(right) else right BlockMatrixMap(maybeDense, rightName, Subst(f, BindingEnv.eval(leftName -> getElement)), needsDense) case BlockMatrixMap2(left, BlockMatrixBroadcast(scalarBM, IndexedSeq(), _, _), leftName, rightName, f, sparsityStrategy) => - val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(Seq(0, 0))) + val getElement = BlockMatrixToValueApply(scalarBM, functions.GetElement(IndexedSeq(0, 0))) val needsDense = sparsityStrategy == NeedsDense || sparsityStrategy.exists(leftBlock = false, rightBlock = true) val maybeDense = if (needsDense) BlockMatrixDensify(left) else left BlockMatrixMap(maybeDense, leftName, Subst(f, BindingEnv.eval(rightName -> getElement)), needsDense) diff --git a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala index a5b20e009e3..f369f9d8d48 100644 --- a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala +++ b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala @@ -1,15 +1,17 @@ package is.hail.expr.ir +import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.types.physical.{PCode, PType, PValue, typeToTypeInfo} +import is.hail.types.physical.stypes.SingleCodeType +import is.hail.types.physical.{PType, typeToTypeInfo} import is.hail.types.virtual.Type import is.hail.utils.BoxedArrayBuilder import scala.reflect.ClassTag -class StagedArrayBuilder(val elt: PType, mb: EmitMethodBuilder[_], len: Code[Int]) { +class StagedArrayBuilder(val elt: SingleCodeType, val eltRequired: Boolean, mb: EmitMethodBuilder[_], len: Code[Int]) { - val ti: TypeInfo[_] = typeToTypeInfo(elt) + val ti: TypeInfo[_] = elt.ti val ref: Value[Any] = coerce[Any](ti match { case BooleanInfo => mb.genLazyFieldThisRef[BooleanMissingArrayBuilder](Code.newInstance[BooleanMissingArrayBuilder, Int](len), "zab") @@ -44,26 +46,6 @@ class StagedArrayBuilder(val elt: PType, mb: EmitMethodBuilder[_], len: Code[Int case DoubleInfo => coerce[DoubleMissingArrayBuilder](ref).invoke[Int, Double, Unit]("update", i, coerce[Double](x)) } - def sort(compare: Code[AsmFunction2[_, _, _]]): Code[Unit] = { - ti match { - case BooleanInfo => - type F = AsmFunction2[Boolean, Boolean, Boolean] - coerce[BooleanMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) - case IntInfo => - type F = AsmFunction2[Int, Int, Boolean] - coerce[IntMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) - case LongInfo => - type F = AsmFunction2[Long, Long, Boolean] - coerce[LongMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) - case FloatInfo => - type F = AsmFunction2[Float, Float, Boolean] - coerce[FloatMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) - case DoubleInfo => - type F = AsmFunction2[Double, Double, Boolean] - coerce[DoubleMissingArrayBuilder](ref).invoke[F, Unit]("sort", coerce[F](compare)) - } - } - def addMissing(): Code[Unit] = coerce[MissingArrayBuilder](ref).invoke[Unit]("addMissing") @@ -81,19 +63,10 @@ class StagedArrayBuilder(val elt: PType, mb: EmitMethodBuilder[_], len: Code[Int def clear: Code[Unit] = coerce[MissingArrayBuilder](ref).invoke[Unit]("clear") - def applyEV(mb: EmitMethodBuilder[_], i: Code[Int]): EmitValue = - new EmitValue { - def pt: PType = elt - - def get(cb: EmitCodeBuilder): PCode = load.toI(cb).get( - cb, - s"Can't convert missing EmitValue of type ${pt} to PValue.") - - def load: EmitCode = { - val t = mb.newLocal[Int]("sab_applyEV_load_i") - EmitCode(t := i, isMissing(t), PCode(elt, apply(t))) - } - } + def loadFromIndex(cb: EmitCodeBuilder, r: Value[Region], i: Code[Int]): IEmitCode = { + val idx = cb.newLocal[Int]("loadFromIndex_idx", i) + IEmitCode(cb, isMissing(idx), elt.loadToSCode(cb, r, apply(idx))) + } } sealed abstract class MissingArrayBuilder(initialCapacity: Int) { @@ -493,6 +466,22 @@ final class LongArrayBuilder(initialCapacity: Int= 16) { } } + def +=(x: Long): Unit = add(x) + + def ++=(xs: Array[Long]) = { + val newLen = size + xs.length + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, xs.length) + size_ = newLen + } + + def ++=(xs: Array[Long], n: Int) = { + val newLen = size + n + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, n) + size_ = newLen + } + def add(x: Long): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -551,6 +540,27 @@ final class IntArrayBuilder(initialCapacity: Int = 16) { } } + def setSizeUninitialized(n: Int) = { + ensureCapacity(n) + size_ = n + } + + def +=(x: Int): Unit = add(x) + + def ++=(xs: Array[Int]) = { + val newLen = size + xs.length + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, xs.length) + size_ = newLen + } + + def ++=(xs: Array[Int], n: Int) = { + val newLen = size + n + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, n) + size_ = newLen + } + def add(x: Int): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -609,6 +619,27 @@ final class DoubleArrayBuilder(initialCapacity: Int = 16) { } } + def setSizeUninitialized(n: Int) = { + ensureCapacity(n) + size_ = n + } + + def +=(x: Double): Unit = add(x) + + def ++=(xs: Array[Double]) = { + val newLen = size + xs.length + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, xs.length) + size_ = newLen + } + + def ++=(xs: Array[Double], n: Int) = { + val newLen = size + n + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, n) + size_ = newLen + } + def add(x: Double): Unit = { ensureCapacity(size_ + 1) b(size_) = x @@ -641,6 +672,140 @@ final class DoubleArrayBuilder(initialCapacity: Int = 16) { } } +final class ByteArrayBuilder(initialCapacity: Int = 16) { + + var size_ : Int = 0 + var b: Array[Byte] = new Array[Byte](initialCapacity) + + def size: Int = size_ + + def setSize(n: Int) { + require(n >= 0 && n <= size) + size_ = n + } + + def apply(i: Int): Byte = { + require(i >= 0 && i < size) + b(i) + } + + def ensureCapacity(n: Int): Unit = { + if (b.length < n) { + val newCapacity = math.max(n, b.length * 2) + val newb = new Array[Byte](newCapacity) + Array.copy(b, 0, newb, 0, size_) + b = newb + } + } + + def +=(x: Byte) = add(x) + + def ++=(xs: Array[Byte]) = { + val newLen = size + xs.length + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, xs.length) + size_ = newLen + } + + def ++=(xs: Array[Byte], n: Int) = { + val newLen = size + n + ensureCapacity(newLen) + System.arraycopy(xs, 0, b, size_, n) + size_ = newLen + } + + def add(x: Byte): Unit = { + ensureCapacity(size_ + 1) + b(size_) = x + size_ += 1 + } + + def update(i: Int, x: Byte): Unit = { + require(i >= 0 && i < size) + b(i) = x + } + + def clear() { size_ = 0 } + + def result(): Array[Byte] = b.slice(0, size_) + + def clearAndResize(): Unit = { + size_ = 0 + if (b.length > initialCapacity) + b = new Array[Byte](initialCapacity) + } + def appendFrom(ab2: ByteArrayBuilder): Unit = { + ensureCapacity(size_ + ab2.size_) + System.arraycopy(ab2.b, 0, b, size_, ab2.size_) + size_ = size_ + ab2.size_ + } + + def pop(): Byte = { + size_ -= 1 + b(size) + } +} + +final class BooleanArrayBuilder(initialCapacity: Int = 16) { + + var size_ : Int = 0 + var b: Array[Boolean] = new Array[Boolean](initialCapacity) + + def size: Int = size_ + + def setSize(n: Int) { + require(n >= 0 && n <= size) + size_ = n + } + + def apply(i: Int): Boolean = { + require(i >= 0 && i < size) + b(i) + } + + def ensureCapacity(n: Int): Unit = { + if (b.length < n) { + val newCapacity = math.max(n, b.length * 2) + val newb = new Array[Boolean](newCapacity) + Array.copy(b, 0, newb, 0, size_) + b = newb + } + } + + def +=(x: Boolean) = add(x) + + def add(x: Boolean): Unit = { + ensureCapacity(size_ + 1) + b(size_) = x + size_ += 1 + } + + def update(i: Int, x: Boolean): Unit = { + require(i >= 0 && i < size) + b(i) = x + } + + def clear() { size_ = 0 } + + def result(): Array[Boolean] = b.slice(0, size_) + + def clearAndResize(): Unit = { + size_ = 0 + if (b.length > initialCapacity) + b = new Array[Boolean](initialCapacity) + } + def appendFrom(ab2: BooleanArrayBuilder): Unit = { + ensureCapacity(size_ + ab2.size_) + System.arraycopy(ab2.b, 0, b, size_, ab2.size_) + size_ = size_ + ab2.size_ + } + + def pop(): Boolean = { + size_ -= 1 + b(size) + } +} + final class AnyRefArrayBuilder[T <: AnyRef](initialCapacity: Int = 16)(implicit ct: ClassTag[T]) { var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index bb001efac33..390464e1d0d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -16,14 +16,16 @@ import is.hail.linalg.{BlockMatrix, BlockMatrixMetadata, BlockMatrixReadRowBlock import is.hail.rvd._ import is.hail.sparkextras.ContextRDD import is.hail.types._ -import is.hail.types.physical._ -import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} +import is.hail.types.physical.{stypes, _} +import is.hail.types.physical.stypes.{BooleanSingleCodeType, Int32SingleCodeType, PTypeReferenceSingleCodeType, StreamSingleCodeType} +import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, SStream, SStreamCode} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.TaskContext import org.apache.spark.executor.InputMetrics import org.apache.spark.sql.Row import org.json4s.JsonAST.JString +import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} import java.io.{ByteArrayInputStream, DataInputStream, DataOutputStream, InputStream} @@ -416,6 +418,12 @@ abstract class TableReader { Extraction.decompose(this)(TableReader.formats) } + def renderShort(): String + + def defaultRender(): String = { + StringEscapeUtils.escapeString(JsonMethods.compact(toJValue)) + } + def lowerGlobals(ctx: ExecuteContext, requestedGlobalsType: TStruct): IR = throw new LowererUnsupportedOperation(s"${ getClass.getSimpleName }.lowerGlobals not implemented") @@ -483,7 +491,6 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { val iterator = mb.genFieldThisRef[Iterator[Long]]("rvdreader_iterator") val next = mb.genFieldThisRef[Long]("rvdreader_next") - val first = mb.genFieldThisRef[Boolean]("rvdreader_first") val region = mb.genFieldThisRef[Region]("rvdreader_region") val upcastF = mb.genFieldThisRef[AsmFunction2RegionLongLong]("rvdreader_upcast") @@ -504,12 +511,12 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { cb.assign(next, upcastF.invoke[Region, Long, Long]("apply", region, Code.longValue(iterator.invoke[java.lang.Long]("next")))) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, upcastPType.loadCheapPCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, upcastPType.loadCheapSCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode(SStream(producer.element.st, true), producer) + SStreamCode(producer) } } @@ -539,7 +546,7 @@ case class PartitionNativeReader(spec: AbstractTypedCodecSpec) extends AbstractN context.toI(cb).map(cb) { path => val pathString = path.asString.loadString() val xRowBuf = mb.genFieldThisRef[InputBuffer]("pnr_xrowbuf") - val next = mb.newPSettable(mb.fieldBuilder, spec.decodedPType(requestedType), "pnr_next") + val next = mb.newPSettable(mb.fieldBuilder, spec.encodedType.decodedSType(requestedType), "pnr_next") val region = mb.genFieldThisRef[Region]("pnr_region") val producer = new StreamProducer { @@ -560,7 +567,7 @@ case class PartitionNativeReader(spec: AbstractTypedCodecSpec) extends AbstractN override def close(cb: EmitCodeBuilder): Unit = cb += xRowBuf.close() } - SStreamCode(SStream(producer.element.st, true), producer) + SStreamCode(producer) } } @@ -635,12 +642,13 @@ case class PartitionNativeReaderIndexed(spec: AbstractTypedCodecSpec, indexSpec: .consumeCode[Interval](cb, Code._fatal[Interval](""), { pc => - val pcm = pc.memoize(cb, "pnri_interval").asPValue + val pcm = pc.memoize(cb, "pnri_interval") + val pt = pcm.st.canonicalPType() Code.invokeScalaObject2[PType, Long, Interval]( PartitionBoundOrdering.getClass, "regionValueToJavaObject", - mb.getPType(pcm.pt), - coerce[Long](pcm.code)) + mb.getPType(pt), + pt.store(cb, region, pcm, false)) } ), Code._null[InputMetrics] @@ -654,11 +662,11 @@ case class PartitionNativeReaderIndexed(spec: AbstractTypedCodecSpec, indexSpec: cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = cb += it.invoke[Unit]("close") } - SStreamCode(SStream(producer.element.st, true), producer) + SStreamCode(producer) } } @@ -747,7 +755,7 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig context.toI(cb).map(cb) { ctxStruct => - def getIndexReader(cb: EmitCodeBuilder, ctxMemo: PBaseStructValue): Code[IndexReader] = { + def getIndexReader(cb: EmitCodeBuilder, ctxMemo: SBaseStructValue): Code[IndexReader] = { makeIndexCode match { case Some(makeIndex) => val indexPath = ctxMemo @@ -762,19 +770,20 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig } } - def getInterval(cb: EmitCodeBuilder, ctxMemo: PBaseStructValue): Code[Interval] = { + def getInterval(cb: EmitCodeBuilder, region: Value[Region], ctxMemo: SBaseStructValue): Code[Interval] = { makeIndexCode match { case Some(_) => ctxMemo.loadField(cb, "interval") .consumeCode[Interval](cb, Code._fatal[Interval](""), { pc => - val pcm = pc.memoize(cb, "pnri_interval").asPValue + val pcm = pc.memoize(cb, "pnri_interval") + val pt = pcm.st.canonicalPType() Code.invokeScalaObject2[PType, Long, Interval]( PartitionBoundOrdering.getClass, "regionValueToJavaObject", - mb.getPType(pcm.pt), - coerce[Long](pcm.code)) + mb.getPType(pt), + pt.store(cb, region, pcm, false)) } ) case None => Code._null[Interval] @@ -818,7 +827,7 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig getIndexReader(cb, ctxMemo), leftOffsetField.map[Code[String]](const(_)).getOrElse(Code._null[String]), rightOffsetField.map[Code[String]](const(_)).getOrElse(Code._null[String]), - getInterval(cb, ctxMemo), + getInterval(cb, region, ctxMemo), Code._null[InputMetrics] )) } @@ -830,11 +839,11 @@ case class PartitionZippedNativeReader(specLeft: AbstractTypedCodecSpec, specRig cb.assign(next, it.invoke[Long]("_next")) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, next))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, next))) override def close(cb: EmitCodeBuilder): Unit = cb += it.invoke[Unit]("close") } - SStreamCode(SStream(producer.element.st, true), producer) + SStreamCode(producer) } } @@ -889,6 +898,8 @@ class TableNativeReader( decomposeWithName(params, "TableNativeReader") } + override def renderShort(): String = s"(TableNativeReader ${ params.path } ${ params.options.map(_.renderShort()).getOrElse("") })" + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { @@ -913,7 +924,7 @@ class TableNativeReader( else params.options.map(opts => new RVDPartitioner(specPart.kType, opts.intervals)) - spec.rowsSpec.readTableStage(ctx, spec.rowsComponent.absolutePath(params.path), requestedType.rowType, partitioner, filterIntervals).apply(globals) + spec.rowsSpec.readTableStage(ctx, spec.rowsComponent.absolutePath(params.path), requestedType, partitioner, filterIntervals).apply(globals) } } @@ -926,7 +937,9 @@ case class TableNativeZippedReader( ) extends TableReader { def pathsUsed: Seq[String] = FastSeq(pathLeft, pathRight) - private lazy val filterIntervals = options.map(_.filterIntervals).getOrElse(false) + override def renderShort(): String = s"(TableNativeZippedReader $pathLeft $pathRight ${ options.map(_.renderShort()).getOrElse("") })" + + private lazy val filterIntervals = options.exists(_.filterIntervals) private def intervals = options.map(_.intervals) @@ -1044,7 +1057,7 @@ case class TableNativeZippedReader( AbstractRVDSpec.readZippedLowered(ctx, specLeft.rowsSpec, specRight.rowsSpec, pathLeft + "/rows", pathRight + "/rows", - partitioner, options.exists(_.filterIntervals), + partitioner, filterIntervals, requestedType.rowType, reqLeft, reqRight, requestedType.key).apply(globals) } @@ -1110,6 +1123,8 @@ case class TableFromBlockMatrixNativeReader(params: TableFromBlockMatrixNativeRe override def toJValue: JValue = { decomposeWithName(params, "TableFromBlockMatrixNativeReader")(TableReader.formats) } + + def renderShort(): String = defaultRender() } object TableRead { @@ -1684,6 +1699,11 @@ case class TableIntervalJoin( } } +/** + * The TableMultiWayZipJoin node assumes that input tables have distinct keys. If inputs + * do not have distinct keys, the key that is included in the result is undefined, but + * is likely the last. + */ case class TableMultiWayZipJoin(children: IndexedSeq[TableIR], fieldName: String, globalName: String) extends TableIR { require(children.length > 0, "there must be at least one table as an argument") diff --git a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala index dbca1b280c4..445e2200ae9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableWriter.scala @@ -11,8 +11,9 @@ import is.hail.io.index.StagedIndexWriter import is.hail.io.{AbstractTypedCodecSpec, BufferSpec, OutputBuffer, TypedCodecSpec} import is.hail.rvd.{AbstractRVDSpec, IndexSpec, RVDPartitioner, RVDSpecMaker} import is.hail.types.encoded.EType -import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PCode, PIndexableCode, PInt64, PStream, PStringCode, PStruct, PType} -import is.hail.types.physical.stypes.interfaces.PVoidCode +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces.SVoidCode +import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStream, PStruct, PType} import is.hail.types.virtual._ import is.hail.types.{RTable, TableType} import is.hail.utils._ @@ -174,7 +175,7 @@ case class PartitionNativeWriter(spec: AbstractTypedCodecSpec, partPrefix: Strin val keyType = ifIndexed { index.get._2 } val indexWriter = ifIndexed { StagedIndexWriter.withDefaults(keyType, mb.ecb) } - context.toI(cb).map(cb) { ctxCode: PCode => + context.toI(cb).map(cb) { ctxCode: SCode => val result = mb.newLocal[Long]("write_result") val filename = mb.newLocal[String]("filename") @@ -189,66 +190,65 @@ case class PartitionNativeWriter(spec: AbstractTypedCodecSpec, partPrefix: Strin indexWriter.add(cb, { IEmitCode.present(cb, keyType.asInstanceOf[PCanonicalBaseStruct] .constructFromFields(cb, stream.elementRegion, - keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name).typecast[PCode])), + keyType.fields.map(f => EmitCode.fromI(cb.emb)(cb => row.loadField(cb, f.name))), deepCopy = false)) }, ob.invoke[Long]("indexOffset"), - IEmitCode.present(cb, PCode(+PCanonicalStruct(), 0L))) + IEmitCode.present(cb, PCanonicalStruct().loadCheapSCode(cb, 0L))) } - cb += ob.writeByte(1.asInstanceOf[Byte]) + cb += ob.writeByte(1.asInstanceOf[Byte]) - spec.encodedType.buildEncoder(row.st, cb.emb.ecb) - .apply(cb, row, ob) + spec.encodedType.buildEncoder(row.st, cb.emb.ecb) + .apply(cb, row, ob) - cb.assign(n, n + 1L) + cb.assign(n, n + 1L) } - PCode(pResultType, EmitCodeBuilder.scopedCode(mb) { cb: EmitCodeBuilder => - val pctx = ctxCode.memoize(cb, "context") - cb.assign(filename, pctx.asString.loadString()) - if (hasIndex) { - val indexFile = cb.newLocal[String]("indexFile") - cb.assign(indexFile, const(index.get._1).concat(filename).concat(".idx")) - indexWriter.init(cb, indexFile) - } - cb.assign(filename, const(partPrefix).concat(filename)) - cb.assign(os, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename))) - cb.assign(ob, spec.buildCodeOutputBuffer(Code.checkcast[OutputStream](os))) - cb.assign(n, 0L) - - stream.memoryManagedConsume(region, cb) { cb => - writeFile(cb, stream.element) - } - - cb += ob.writeByte(0.asInstanceOf[Byte]) - cb.assign(result, pResultType.allocate(region)) - if (hasIndex) - indexWriter.close(cb) - cb += ob.flush() - cb += os.invoke[Unit]("close") - filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) - cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) - result.get - }) + val pctx = ctxCode.memoize(cb, "context") + cb.assign(filename, pctx.asString.loadString()) + if (hasIndex) { + val indexFile = cb.newLocal[String]("indexFile") + cb.assign(indexFile, const(index.get._1).concat(filename).concat(".idx")) + indexWriter.init(cb, indexFile) + } + cb.assign(filename, const(partPrefix).concat(filename)) + cb.assign(os, Code.newInstance[ByteTrackingOutputStream, OutputStream](mb.create(filename))) + cb.assign(ob, spec.buildCodeOutputBuffer(Code.checkcast[OutputStream](os))) + cb.assign(n, 0L) + + stream.memoryManagedConsume(region, cb) { cb => + writeFile(cb, stream.element) + } + + cb += ob.writeByte(0.asInstanceOf[Byte]) + cb.assign(result, pResultType.allocate(region)) + if (hasIndex) + indexWriter.close(cb) + cb += ob.flush() + cb += os.invoke[Unit]("close") + filenameType.storeAtAddress(cb, pResultType.fieldOffset(result, "filePath"), region, pctx, false) + cb += Region.storeLong(pResultType.fieldOffset(result, "partitionCounts"), n) + pResultType.loadCheapSCode(cb, result.get) } } } case class RVDSpecWriter(path: String, spec: RVDSpecMaker) extends MetadataWriter { def annotationType: Type = TArray(TString) + def writeMetadata( writeAnnotations: => IEmitCode, cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PIndexableCode] + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asIndexable val a = pc.memoize(cb, "filePaths") val partFiles = cb.newLocal[Array[String]]("partFiles") val n = cb.newLocal[Int]("n", a.loadLength()) val i = cb.newLocal[Int]("i", 0) cb.assign(partFiles, Code.newArray[String](n)) cb.whileLoop(i < n, { - val s = a.loadElement(cb, i).get(cb, "file name can't be missing!").asInstanceOf[PStringCode] + val s = a.loadElement(cb, i).get(cb, "file name can't be missing!").asString cb += partFiles.update(i, s.loadString()) cb.assign(i, i + 1) }) @@ -286,7 +286,7 @@ case class TableSpecWriter(path: String, typ: TableType, rowRelPath: String, glo cb: EmitCodeBuilder, region: Value[Region]): Unit = { cb += cb.emb.getFS.invoke[String, Unit]("mkDir", path) - val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asInstanceOf[PIndexableCode] + val pc = writeAnnotations.get(cb, "write annotations can't be missing!").asIndexable val partCounts = cb.newLocal[Array[Long]]("partCounts") val a = pc.memoize(cb, "writePartCounts") @@ -328,7 +328,7 @@ case class RelationalWriter(path: String, overwrite: Boolean, maybeRefs: Option[ } } - writeAnnotations.consume(cb, {}, { pc => assert(pc == PVoidCode) }) // PVoidCode.code is Code._empty + writeAnnotations.consume(cb, {}, { pc => assert(pc == SVoidCode) }) // PVoidCode.code is Code._empty cb += Code.invokeScalaObject2[FS, String, Unit](Class.forName("is.hail.utils.package$"), "writeNativeFileReadMe", cb.emb.getFS, path) cb += cb.emb.create(s"$path/_SUCCESS").invoke[Unit]("close") diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index 5636772d362..2d57dc4233e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -404,6 +404,7 @@ object TypeCheck { } case Die(msg, typ, _) => assert(msg.typ == TString) + case Trap(child) => case x@ApplyIR(fn, typeArgs, args) => case x: AbstractApplyNode[_] => assert(x.implementation.unify(x.typeArgs, x.args.map(_.typ), x.returnType)) @@ -427,6 +428,8 @@ object TypeCheck { case BlockMatrixCollect(_) => case BlockMatrixWrite(_, _) => case BlockMatrixMultiWrite(_, _) => + case ValueToBlockMatrix(child, _, _) => + assert(child.typ.isInstanceOf[TArray] || child.typ.isInstanceOf[TNDArray] || child.typ == TFloat64) case CollectDistributedArray(ctxs, globals, cname, gname, body, _) => assert(ctxs.typ.isInstanceOf[TStream]) case x@ReadPartition(context, rowType, reader) => diff --git a/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala b/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala index 48bdceb92fa..eae7967b3bf 100644 --- a/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala +++ b/hail/src/main/scala/is/hail/expr/ir/UnaryOp.scala @@ -3,6 +3,8 @@ package is.hail.expr.ir import is.hail.asm4s._ import is.hail.expr._ import is.hail.types._ +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ @@ -23,7 +25,12 @@ object UnaryOp { private def incompatible[T](t: Type, op: UnaryOp): T = throw new RuntimeException(s"Cannot apply $op to values of type $t") - def emit(op: UnaryOp, t: Type, x: Code[_]): Code[_] = t match { + def emit(cb: EmitCodeBuilder, op: UnaryOp, x: SCode): SCode = { + + primitive(getReturnType(op, x.st.virtualType), emit(op, x.st.virtualType, SType.extractPrimCode(cb, x))) + } + + private def emit(op: UnaryOp, t: Type, x: Code[_]): Code[_] = t match { case TBoolean => val xx = coerce[Boolean](x) op match { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala index b225d6f7a36..363164da111 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala @@ -7,7 +7,8 @@ import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical._ import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode} +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SStackStruct} +import is.hail.types.physical.stypes.interfaces.SBinaryCode import is.hail.utils._ trait AggregatorState { @@ -34,14 +35,14 @@ trait AggregatorState { def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit - def deserializeFromBytes(cb: EmitCodeBuilder, bytes: PBinaryCode): Unit = { + def deserializeFromBytes(cb: EmitCodeBuilder, bytes: SBinaryCode): Unit = { val lazyBuffer = kb.getOrDefineLazyField[MemoryBufferWrapper](Code.newInstance[MemoryBufferWrapper](), (this, "bufferWrapper")) cb += lazyBuffer.invoke[Array[Byte], Unit]("set", bytes.loadBytes()) val ib = cb.newLocal("aggstate_deser_from_bytes_ib", lazyBuffer.invoke[InputBuffer]("buffer")) deserialize(BufferSpec.defaultUncompressed)(cb, ib) } - def serializeToRegion(cb: EmitCodeBuilder, t: PBinary, r: Code[Region]): Code[Long] = { + def serializeToRegion(cb: EmitCodeBuilder, t: PBinary, r: Code[Region]): SCode = { val lazyBuffer = kb.getOrDefineLazyField[MemoryWriterWrapper](Code.newInstance[MemoryWriterWrapper](), (this, "writerWrapper")) val addr = kb.genFieldThisRef[Long]("addr") cb += lazyBuffer.invoke[Unit]("clear") @@ -51,7 +52,7 @@ trait AggregatorState { cb += t.storeLength(addr, lazyBuffer.invoke[Int]("length")) cb += lazyBuffer.invoke[Long, Unit]("copyToAddress", t.bytesAddress(addr)) - addr + t.loadCheapSCode(cb, addr) } } @@ -141,18 +142,18 @@ abstract class AbstractTypedRegionBackedAggState(val ptype: PType) extends Regio } def get(cb: EmitCodeBuilder): IEmitCode = { - IEmitCode(cb, storageType.isFieldMissing(off, 0), ptype.loadCheapPCode(cb, storageType.loadField(off, 0))) + IEmitCode(cb, storageType.isFieldMissing(off, 0), ptype.loadCheapSCode(cb, storageType.loadField(off, 0))) } def copyFrom(cb: EmitCodeBuilder, src: Code[Long]): Unit = { newState(cb, off) - storageType.storeAtAddress(cb, off, region, storageType.loadCheapPCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, off, region, storageType.loadCheapSCode(cb, src), deepCopy = true) } def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { val codecSpec = TypedCodecSpec(storageType, codec) val enc = codecSpec.encodedType.buildEncoder(storageType.sType, kb) - (cb, ob: Value[OutputBuffer]) => enc(cb, storageType.loadCheapPCode(cb, off), ob) + (cb, ob: Value[OutputBuffer]) => enc(cb, storageType.loadCheapSCode(cb, off), ob) } def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { @@ -166,12 +167,12 @@ abstract class AbstractTypedRegionBackedAggState(val ptype: PType) extends Regio class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClassBuilder[_]) extends AggregatorState { private[this] val emitTypes = vtypes.map(_.canonicalEmitType) - assert(emitTypes.forall(_.st.pType.isPrimitive)) + assert(emitTypes.forall(_.st.isPrimitive)) val nFields: Int = emitTypes.length val fields: Array[EmitSettable] = Array.tabulate(nFields) { i => kb.newEmitField(s"primitiveRVA_${ i }_v", emitTypes(i)) } val storageType = PCanonicalTuple(true, emitTypes.map(_.canonicalPType): _*) - val sStorageType = SBaseStructPointer(storageType) + val sStorageType = storageType.sType def foreachField(f: (Int, EmitSettable) => Unit): Unit = { (0 until nFields).foreach { i => @@ -186,7 +187,7 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass private[this] def loadVarsFromRegion(cb: EmitCodeBuilder, srcc: Code[Long]): Unit = { val pv = new SBaseStructPointerCode(sStorageType, srcc).memoize(cb, "prim_rvastate_load_vars") foreachField { (i, es) => - cb.assign(es, pv.loadField(cb, i).map(cb)(_.asPCode)) + cb.assign(es, pv.loadField(cb, i)) } } @@ -196,7 +197,11 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def store(cb: EmitCodeBuilder, regionStorer: (EmitCodeBuilder, Value[Region]) => Unit, destc: Code[Long]): Unit = { val dest = cb.newLocal("prim_rvastate_store_dest", destc) - storageType.storeAtAddressFromFields(cb, dest, null, fields.map(_.load), false) + storageType.storeAtAddress(cb, + dest, + null, + SStackStruct.constructFromArgs(cb, null, storageType.virtualType, fields.map(_.load): _*), + false) } def copyFrom(cb: EmitCodeBuilder, src: Code[Long]): Unit = loadVarsFromRegion(cb, src) @@ -204,11 +209,15 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { (cb, ob: Value[OutputBuffer]) => foreachField { case (_, es) => - if (es.pt.required) { - cb += ob.writePrimitive(es.pt)(es.v) + if (es.emitType.required) { + ob.writePrimitive(cb, es.get(cb)) } else { - cb += ob.writeBoolean(es.m) - cb.ifx(!es.m, cb += ob.writePrimitive(es.pt)(es.v)) + es.toI(cb).consume(cb, + cb += ob.writeBoolean(true), + { sc => + cb += ob.writeBoolean(false) + ob.writePrimitive(cb, sc) + }) } } } @@ -216,12 +225,12 @@ class PrimitiveRVAState(val vtypes: Array[VirtualTypeWithReq], val kb: EmitClass def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { (cb, ib: Value[InputBuffer]) => foreachField { case (_, es) => - if (es.pt.required) { - cb.assign(es, EmitCode.present(cb.emb, PCode(es.pt, ib.readPrimitive(es.pt)))) + if (es.emitType.required) { + cb.assign(es, EmitCode.present(cb.emb, ib.readPrimitive(es.st.virtualType))) } else { cb.ifx(ib.readBoolean(), - cb.assign(es, EmitCode.missing(cb.emb, es.pt)), - cb.assign(es, EmitCode.present(cb.emb, PCode(es.pt, ib.readPrimitive(es.pt))))) + cb.assign(es, EmitCode.missing(cb.emb, es.st)), + cb.assign(es, EmitCode.present(cb.emb, ib.readPrimitive(es.st.virtualType)))) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala b/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala index 0255286df36..6f682c3a043 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/AppendOnlyBTree.scala @@ -97,7 +97,7 @@ class AppendOnlyBTree(kb: EmitClassBuilder[_], val key: BTreeKey, region: Value[ private def insert(cb: EmitCodeBuilder, nodec: Code[Long], insertIdxc: Code[Int], kc: EmitCode, childC: Code[Long]): Code[Long] = { val kt = key.compType.sType - val castKCode = EmitCode.fromI(cb.emb)(cb => kc.toI(cb).map(cb)(k => kt.coerceOrCopy(cb, region, k, false).asPCode)) + val castKCode = EmitCode.fromI(cb.emb)(cb => kc.toI(cb).map(cb)(k => kt.coerceOrCopy(cb, region, k, false))) val insertAt = kb.getOrGenEmitMethod("btree_insert", (this, "insert", kt), FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Int], castKCode.emitParamType, typeInfo[Long]), typeInfo[Long]) { insertAt => val node: Value[Long] = insertAt.getCodeParam[Long](1) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala index 95b22966383..d8300b75006 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ApproxCDFAggregator.scala @@ -43,7 +43,7 @@ class ApproxCDFState(val kb: EmitClassBuilder[_]) extends AggregatorState { } def result(cb: EmitCodeBuilder, region: Value[Region]): SBaseStructPointerCode = { - QuantilesAggregator.resultType.loadCheapPCode(cb, aggr.invoke[Region, Long]("rvResult", region)) + QuantilesAggregator.resultType.loadCheapSCode(cb, aggr.invoke[Region, Long]("rvResult", region)) } def newState(cb: EmitCodeBuilder, off: Code[Long]): Unit = cb += region.getNewRegion(regionSize) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala index 6913b9f9a10..d63fb61b9ba 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ArrayElementLengthCheckAggregator.scala @@ -257,7 +257,7 @@ class ArrayElementLengthCheckAggregator(nestedAggs: Array[StagedAggregator], kno cb.assign(i, i + 1) }) // don't need to deep copy because that's done in nested aggregators - pt.storeAtAddress(cb, addr, region, resultType.loadCheapPCode(cb, resultAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, resultType.loadCheapSCode(cb, resultAddr), deepCopy = false) } ) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala index fa2b3678127..37987eec69d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CallStatsAggregator.scala @@ -6,6 +6,7 @@ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, IEmitCode} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer, TypedCodecSpec} import is.hail.types.physical._ import is.hail.types.virtual.{TCall, TInt32, Type} +import is.hail.types.physical.stypes.interfaces._ import is.hail.utils._ import scala.language.existentials @@ -76,7 +77,7 @@ class CallStatsState(val kb: EmitClassBuilder[_]) extends PointerBasedRVAState { def serialize(codec: BufferSpec): (EmitCodeBuilder, Value[OutputBuffer]) => Unit = { (cb, ob) => val codecSpec = TypedCodecSpec(CallStatsState.stateType, codec) codecSpec.encodedType.buildEncoder(CallStatsState.stateType.sType, kb) - .apply(cb, CallStatsState.stateType.loadCheapPCode(cb, off), ob) + .apply(cb, CallStatsState.stateType.loadCheapSCode(cb, off), ob) } def deserialize(codec: BufferSpec): (EmitCodeBuilder, Value[InputBuffer]) => Unit = { @@ -91,7 +92,7 @@ class CallStatsState(val kb: EmitClassBuilder[_]) extends PointerBasedRVAState { } def copyFromAddress(cb: EmitCodeBuilder, src: Code[Long]): Unit = { - cb.assign(off, CallStatsState.stateType.store(cb, region, CallStatsState.stateType.loadCheapPCode(cb, src), deepCopy = true)) + cb.assign(off, CallStatsState.stateType.store(cb, region, CallStatsState.stateType.loadCheapSCode(cb, src), deepCopy = true)) loadNAlleles(cb) } } @@ -139,7 +140,7 @@ class CallStatsAggregator extends StagedAggregator { call.toI(cb).consume(cb, { /* do nothing if missing */ - }, { case callc: PCallCode => + }, { case callc: SCallCode => val call = callc.memoize(cb, "callstats_seqop_callv") val hom = cb.newLocal[Boolean]("hom", true) val lastAllele = cb.newLocal[Int]("lastAllele", -1) @@ -188,7 +189,7 @@ class CallStatsAggregator extends StagedAggregator { val ac = acType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val acAtIndex = cb.newLocal[Int]("callstats_result_acAtIndex", state.alleleCountAtIndex(i, state.nAlleles)) cb.assign(alleleNumber, alleleNumber + acAtIndex) - IEmitCode.present(cb, PCode(acType.elementType, acAtIndex)) + IEmitCode.present(cb, primitive(acAtIndex)) } acType.storeAtAddress(cb, rt.fieldOffset(addr, "AC"), region, ac, deepCopy = false) @@ -199,20 +200,20 @@ class CallStatsAggregator extends StagedAggregator { val afType = resultType.fieldType("AF").asInstanceOf[PCanonicalArray] val af = afType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val acAtIndex = cb.newLocal[Int]("callstats_result_acAtIndex", state.alleleCountAtIndex(i, state.nAlleles)) - IEmitCode.present(cb, PCode(afType.elementType, acAtIndex.toD / alleleNumber.toD)) + IEmitCode.present(cb, primitive(acAtIndex.toD / alleleNumber.toD)) } afType.storeAtAddress(cb, rt.fieldOffset(addr, "AF"), region, af, deepCopy = false) }) val anType = resultType.fieldType("AN") - val an = PCode(anType, alleleNumber) + val an = primitive(alleleNumber) anType.storeAtAddress(cb, rt.fieldOffset(addr, "AN"), region, an, deepCopy = false) val homCountType = resultType.fieldType("homozygote_count").asInstanceOf[PCanonicalArray] val homCount = homCountType.constructFromElements(cb, region, state.nAlleles, deepCopy = true) { (cb, i) => val homCountAtIndex = cb.newLocal[Int]("callstats_result_homCountAtIndex", state.homCountAtIndex(i, state.nAlleles)) - IEmitCode.present(cb, PCode(PInt32Required, homCountAtIndex)) + IEmitCode.present(cb, primitive(homCountAtIndex)) } homCountType.storeAtAddress(cb, rt.fieldOffset(addr, "homozygote_count"), region, homCount, deepCopy = false) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala index a3439e2f1d9..7312df0c385 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CollectAsSetAggregator.scala @@ -8,6 +8,7 @@ import is.hail.io._ import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode import is.hail.types.virtual.Type import is.hail.utils._ @@ -17,8 +18,8 @@ class TypedKey(typ: PType, kb: EmitClassBuilder[_], region: Value[Region]) exten def isKeyMissing(src: Code[Long]): Code[Boolean] = storageType.isFieldMissing(src, 0) - def loadKey(cb: EmitCodeBuilder, src: Code[Long]): PCode = { - typ.loadCheapPCode(cb, storageType.loadField(src, 0)) + def loadKey(cb: EmitCodeBuilder, src: Code[Long]): SCode = { + typ.loadCheapSCode(cb, storageType.loadField(src, 0)) } def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = storageType.isFieldMissing(off, 1) @@ -45,7 +46,7 @@ class TypedKey(typ: PType, kb: EmitClassBuilder[_], region: Value[Region]) exten cb += Region.copyFrom(src, dest, storageType.byteSize) def deepCopy(cb: EmitCodeBuilder, er: EmitRegion, dest: Code[Long], src: Code[Long]): Unit = { - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = true) } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala index db94bd44587..89de267a373 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/CountAggregator.scala @@ -4,6 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCode, EmitCodeBuilder} import is.hail.types.physical._ +import is.hail.types.physical.stypes.interfaces.primitive import is.hail.types.virtual.Type object CountAggregator extends StagedAggregator { @@ -17,21 +18,21 @@ object CountAggregator extends StagedAggregator { assert(init.length == 0) assert(state.vtypes.head.r.required) val ev = state.fields(0) - cb.assign(ev, EmitCode.present(cb.emb, PCode(resultType, 0L))) + cb.assign(ev, EmitCode.present(cb.emb, primitive(const(0L)))) } protected def _seqOp(cb: EmitCodeBuilder, state: State, seq: Array[EmitCode]): Unit = { assert(seq.length == 0) assert(state.vtypes.head.r.required) val ev = state.fields(0) - cb.assign(ev, EmitCode.present(cb.emb, PCode(resultType, ev.pv.asInt64.longCode(cb) + 1L))) + cb.assign(ev, EmitCode.present(cb.emb, primitive(ev.pv.asInt64.longCode(cb) + 1L))) } protected def _combOp(cb: EmitCodeBuilder, state: State, other: State): Unit = { assert(state.vtypes.head.r.required) val v1 = state.fields(0) val v2 = other.fields(0) - cb.assign(v1, EmitCode.present(cb.emb, PCode(resultType, v1.pv.asInt64.longCode(cb) + v2.pv.asInt64.longCode(cb)))) + cb.assign(v1, EmitCode.present(cb.emb, primitive(v1.pv.asInt64.longCode(cb) + v2.pv.asInt64.longCode(cb)))) } protected def _storeResult(cb: EmitCodeBuilder, state: State, pt: PType, addr: Value[Long], region: Value[Region], ifMissing: EmitCodeBuilder => Unit): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala index 4f6d0f51c6c..f574a974f53 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/DensifyAggregator.scala @@ -57,7 +57,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ val codecSpec = TypedCodecSpec(arrayStorageType, codec) (cb: EmitCodeBuilder, ob: Value[OutputBuffer]) => { - val arrayCode = arrayStorageType.loadCheapPCode(cb, arrayAddr) + val arrayCode = arrayStorageType.loadCheapSCode(cb, arrayAddr) codecSpec.encodedType.buildEncoder(arrayCode.st, kb) .apply(cb, arrayCode, ob) cb += ob.writeInt(const(DensifyAggregator.END_SERIALIZATION)) @@ -88,7 +88,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ private def gc(cb: EmitCodeBuilder): Unit = { cb.ifx(region.totalManagedBytes() > maxRegionSize, { val newRegion = cb.newLocal[Region]("densify_gc", Region.stagedCreate(regionSize, kb.pool())) - cb.assign(arrayAddr, arrayStorageType.store(cb, newRegion, arrayStorageType.loadCheapPCode(cb, arrayAddr), deepCopy = true)) + cb.assign(arrayAddr, arrayStorageType.store(cb, newRegion, arrayStorageType.loadCheapSCode(cb, arrayAddr), deepCopy = true)) cb += region.invalidate() cb.assign(r, newRegion) @@ -113,7 +113,7 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ def combine(cb: EmitCodeBuilder, other: DensifyState): Unit = { assert(other.arrayStorageType == this.arrayStorageType) - val arr = arrayStorageType.loadCheapPCode(cb, other.arrayAddr).memoize(cb, "densify_comb_other") + val arr = arrayStorageType.loadCheapSCode(cb, other.arrayAddr).memoize(cb, "densify_comb_other") arr.asInstanceOf[SIndexableValue].forEachDefined(cb) { case (cb, idx, element) => cb += arrayStorageType.setElementPresent(arrayAddr, idx) eltType.storeAtAddress(cb, arrayStorageType.elementOffset(arrayAddr, length, idx), region, element, deepCopy = true) @@ -122,14 +122,14 @@ class DensifyState(val arrayVType: VirtualTypeWithReq, val kb: EmitClassBuilder[ } def result(cb: EmitCodeBuilder, region: Value[Region]): SIndexablePointerCode = { - arrayStorageType.loadCheapPCode(cb, arrayAddr) + arrayStorageType.loadCheapSCode(cb, arrayAddr) } def copyFrom(cb: EmitCodeBuilder, srcCode: Code[Long]): Unit = { cb.assign(arrayAddr, arrayStorageType.store(cb, region, - arrayStorageType.loadCheapPCode(cb, arrayStorageType.loadFromNested(srcCode)), + arrayStorageType.loadCheapSCode(cb, arrayStorageType.loadFromNested(srcCode)), deepCopy = true)) cb.assign(length, arrayStorageType.loadLength(arrayAddr)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala index 076da1e431c..368c097adfb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/DownsampleAggregator.scala @@ -3,11 +3,12 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering -import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitParamType, EmitRegion, IEmitCode, PCodeEmitParamType, ParamType} +import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder, EmitParamType, EmitRegion, IEmitCode, SCodeEmitParamType, ParamType} import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ +import is.hail.types.physical.stypes.SingleCodeSCode import is.hail.types.physical.stypes.concrete.SIndexablePointerCode import is.hail.types.virtual._ import is.hail.utils._ @@ -22,7 +23,7 @@ class DownsampleBTreeKey(binType: PBaseStruct, pointType: PBaseStruct, kb: EmitC val compType: PType = binType private val kcomp = kb.getOrderingFunction(binType.sType, CodeOrdering.Compare()) - def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = PBooleanRequired.loadCheapPCode(cb, storageType.loadField(off, "empty")).boolCode(cb) + def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = PBooleanRequired.loadCheapSCode(cb, storageType.loadField(off, "empty")).boolCode(cb) def initializeEmpty(cb: EmitCodeBuilder, off: Code[Long]): Unit = cb += Region.storeBoolean(storageType.fieldOffset(off, "empty"), true) @@ -32,12 +33,12 @@ class DownsampleBTreeKey(binType: PBaseStruct, pointType: PBaseStruct, kb: EmitC val src = cb.newLocal[Long]("dsa_deep_copy_src", srcc) cb.ifx(Region.loadBoolean(storageType.loadField(src, "empty")), cb += Code._fatal[Unit]("key empty!")) - storageType.storeAtAddress(cb, dest, er.region, storageType.loadCheapPCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, er.region, storageType.loadCheapSCode(cb, src), deepCopy = true) } def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = kcomp(cb, k1, k2) - def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = EmitCode.present(cb.emb, binType.loadCheapPCode(cb, storageType.loadField(off, "bin"))) + def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = EmitCode.present(cb.emb, binType.loadCheapSCode(cb, storageType.loadField(off, "bin"))) } @@ -207,10 +208,10 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq tree.bulkStore(cb, ob) { (cb, ob, srcCode) => val src = cb.newLocal("downsample_state_ser_src", srcCode) cb += Region.loadBoolean(key.storageType.loadField(src, "empty")).orEmpty(Code._fatal[Unit]("bad")) - val binCode = binType.loadCheapPCode(cb, key.storageType.loadField(src, "bin")) + val binCode = binType.loadCheapSCode(cb, key.storageType.loadField(src, "bin")) binET.buildEncoder(binCode.st, kb).apply(cb, binCode, ob) - val pointCode = pointType.loadCheapPCode(cb, key.storageType.loadField(src, "point")) + val pointCode = pointType.loadCheapSCode(cb, key.storageType.loadField(src, "point")) pointET.buildEncoder(pointCode.st, kb).apply(cb, pointCode, ob) } cb += ob.writeInt(DownsampleState.serializationEndMarker) @@ -290,13 +291,13 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq cb += Region.storeInt(binType.fieldOffset(binStaging, "x"), binX) cb += Region.storeInt(binType.fieldOffset(binStaging, "y"), binY) cb.assign(insertOffset, - tree.getOrElseInitialize(cb, EmitCode.present(cb.emb, storageType.fieldType("binStaging").loadCheapPCode(cb, binStaging)))) + tree.getOrElseInitialize(cb, EmitCode.present(cb.emb, storageType.fieldType("binStaging").loadCheapSCode(cb, binStaging)))) cb.ifx(key.isEmpty(cb, insertOffset), { cb.assign(binOffset, key.storageType.loadField(insertOffset, "bin")) cb += Region.storeInt(binType.loadField(binOffset, "x"), binX) cb += Region.storeInt(binType.loadField(binOffset, "y"), binY) cb.assign(insertedPointOffset, key.storageType.loadField(insertOffset, "point")) - pointType.storeAtAddress(cb, insertedPointOffset, region, pointType.loadCheapPCode(cb, point), deepCopy = deepCopy) + pointType.storeAtAddress(cb, insertedPointOffset, region, pointType.loadCheapSCode(cb, point), deepCopy = deepCopy) cb += Region.storeBoolean(key.storageType.loadField(insertOffset, "empty"), false) cb.assign(treeSize, treeSize + 1) }) @@ -367,7 +368,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq val yc = point.loadField(cb, "y").get(cb).asFloat64.doubleCode(cb) val x = cb.newLocal[Double]("x", xc) val y = cb.newLocal[Double]("y", yc) - val pointc = SingleCodePCode.fromPCode(cb, point, region).code.asInstanceOf[Code[Long]] + val pointc = SingleCodeSCode.fromSCode(cb, point, region).code.asInstanceOf[Code[Long]] insertIntoTree(cb, xBinCoordinate(x), yBinCoordinate(y), pointc, deepCopy = true) }) cb.assign(i, i + 1) @@ -393,7 +394,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq cb.assign(bufferRight, max(bufferRight, x)) cb.assign(bufferBottom, min(bufferBottom, y)) cb.assign(bufferTop, max(bufferTop, y)) - buffer.append(cb, pointType.loadCheapPCode(cb, point)) + buffer.append(cb, pointType.loadCheapSCode(cb, point)) cb.ifx(buffer.size >= maxBufferSize, dumpBuffer(cb)) } } @@ -443,13 +444,13 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq def insert(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, l: EmitCode): Unit = { val name = "downsample_insert" - val mb = kb.getOrGenEmitMethod(name, (this, name), FastIndexedSeq[ParamType](x.pv.st.pType.asParam, y.pv.st.pType.asParam, PCodeEmitParamType(l.pv.st.pType)), UnitInfo) { mb => + val mb = kb.getOrGenEmitMethod(name, (this, name), FastIndexedSeq[ParamType](x.st.paramType, y.st.paramType, l.emitParamType), UnitInfo) { mb => val pointStaging = mb.newLocal[Long]("pointStaging") mb.voidWithBuilder { cb => - val x = mb.getPCodeParam(1) + val x = mb.getSCodeParam(1) .memoize(cb, "downsample_insert_x") - val y = mb.getPCodeParam(2) + val y = mb.getSCodeParam(2) .memoize(cb, "downsample_insert_y") val l = mb.getEmitParam(3, region) @@ -514,7 +515,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq mb.emitWithBuilder { cb => cb.assign(i, 0) cb.whileLoop(i < other.buffer.size, { - val point = SingleCodePCode.fromPCode(cb, other.buffer.loadElement(cb, i).pv, region) + val point = SingleCodeSCode.fromSCode(cb, other.buffer.loadElement(cb, i).pv, region) deepCopyAndInsertPoint(cb, point.code.asInstanceOf[Code[Long]]) cb.assign(i, i + 1) }) @@ -535,7 +536,7 @@ class DownsampleState(val kb: EmitClassBuilder[_], labelType: VirtualTypeWithReq val (pushElement, finish) = resType.constructFromFunctions(cb, region, treeSize, deepCopy = true) cb.ifx(treeSize > 0, { tree.foreach(cb) { (cb, tv) => - val pointCode = pointType.loadCheapPCode(cb, key.storageType.loadField(tv, "point")) + val pointCode = pointType.loadCheapSCode(cb, key.storageType.loadField(tv, "point")) pushElement(cb, IEmitCode.present(cb, pointCode)) } }) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala index b9094408b04..3ecca5700cf 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/GroupedAggregator.scala @@ -8,6 +8,7 @@ import is.hail.io._ import is.hail.types.VirtualTypeWithReq import is.hail.types.encoded.EType import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode import is.hail.types.virtual.{TVoid, Type} import is.hail.utils._ @@ -20,7 +21,7 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], override def compWithKey(cb: EmitCodeBuilder, off: Code[Long], k: EmitCode): Code[Int] = { val mb = kb.getOrGenEmitMethod("compWithKey", - ("compWithKey_grouped_btree", kt, k.pt), + ("compWithKey_grouped_btree", kt, k.emitType), FastIndexedSeq[ParamType](typeInfo[Long], k.emitParamType), typeInfo[Int] ) { mb => @@ -41,8 +42,8 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], def isKeyMissing(off: Code[Long]): Code[Boolean] = storageType.isFieldMissing(off, 0) - def loadKey(cb: EmitCodeBuilder, off: Code[Long]): PCode = { - kt.loadCheapPCode(cb, storageType.loadField(off, 0)) + def loadKey(cb: EmitCodeBuilder, off: Code[Long]): SCode = { + kt.loadCheapSCode(cb, storageType.loadField(off, 0)) } def initValue(cb: EmitCodeBuilder, destc: Code[Long], k: EmitCode, rIdx: Code[Int]): Unit = { @@ -81,11 +82,11 @@ class GroupedBTreeKey(kt: PType, kb: EmitClassBuilder[_], region: Value[Region], cb += Region.storeInt(storageType.fieldOffset(off, 1), -1) def copy(cb: EmitCodeBuilder, src: Code[Long], dest: Code[Long]): Unit = - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = false) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = false) def deepCopy(cb: EmitCodeBuilder, er: EmitRegion, dest: Code[Long], srcCode: Code[Long]): Unit = { val src = cb.newLocal("ga_deep_copy_src", srcCode) - storageType.storeAtAddress(cb, dest, region, storageType.loadCheapPCode(cb, src), deepCopy = true) + storageType.storeAtAddress(cb, dest, region, storageType.loadCheapSCode(cb, src), deepCopy = true) container.copyFrom(cb, containerOffset(src)) container.store(cb) } @@ -312,6 +313,6 @@ class GroupedAggregator(ktV: VirtualTypeWithReq, nestedAggs: Array[StagedAggrega } // don't need to deep copy because that's done in nested aggregators - pt.storeAtAddress(cb, addr, region, resultType.loadCheapPCode(cb, resultAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, resultType.loadCheapSCode(cb, resultAddr), deepCopy = false) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala index ac361466dab..734c7991f85 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/ImputeTypeAggregator.scala @@ -4,6 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} import is.hail.types.physical._ +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TInt32, TString, Type} import is.hail.types.{RPrimitive, VirtualTypeWithReq} import is.hail.utils._ @@ -80,7 +81,7 @@ class ImputeTypeState(kb: EmitClassBuilder[_]) extends PrimitiveRVAState(Array(V | (supportsI32.toI << 3) | (supportsI64.toI << 4) | (supportsF64.toI << 5)) - cb.assign(_repr, EmitCode.present(cb.emb, PCode(_repr.pt, value))) + cb.assign(_repr, EmitCode.present(cb.emb, primitive(value))) } def initialize(cb: EmitCodeBuilder): Unit = { @@ -90,8 +91,8 @@ class ImputeTypeState(kb: EmitClassBuilder[_]) extends PrimitiveRVAState(Array(V def seqOp(cb: EmitCodeBuilder, ec: EmitCode): Unit = { ec.toI(cb) .consume(cb, - cb.assign(_repr, EmitCode.present(cb.emb, PCode(_repr.pt, repr & (~(1 << 1))))), - { case (pc: PStringCode) => + cb.assign(_repr, EmitCode.present(cb.emb, primitive(repr & (~(1 << 1))))), + { case (pc: SStringCode) => val s = cb.newLocal[String]("impute_type_agg_seq_str") cb.assign(s, pc.loadString()) @@ -150,7 +151,7 @@ class ImputeTypeAggregator() extends StagedAggregator { Array(state.getAnyNonMissing, state.getAllDefined, state.getSupportsBool, state.getSupportsI32, state.getSupportsI64, state.getSupportsF64) .zipWithIndex.foreach { case (b, idx) => - rt.types(idx).storeAtAddress(cb, rt.fieldOffset(addr, idx), region, PCode(PBooleanRequired, b), deepCopy = true) + rt.types(idx).storeAtAddress(cb, rt.fieldOffset(addr, idx), region, primitive(b), deepCopy = true) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala index 37d3cda6bce..0eaf4065640 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/LinearRegressionAggregator.scala @@ -273,6 +273,6 @@ class LinearRegressionAggregator() extends StagedAggregator { stateType.loadField(state.off, 0), stateType.loadField(state.off, 1), Region.loadInt(stateType.loadField(state.off, 2)))) - pt.storeAtAddress(cb, addr, region, LinearRegressionAggregator.resultType.loadCheapPCode(cb, resAddr), deepCopy = false) + pt.storeAtAddress(cb, addr, region, LinearRegressionAggregator.resultType.loadCheapSCode(cb, resAddr), deepCopy = false) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala index c2a63f4211c..a1616d00883 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/MonoidAggregator.scala @@ -4,7 +4,8 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.functions.UtilFunctions import is.hail.expr.ir.{coerce => _, _} -import is.hail.types.physical.{PCode, PType, typeToTypeInfo} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.{PType, typeToTypeInfo} import is.hail.types.virtual._ import scala.language.existentials @@ -31,12 +32,12 @@ class MonoidAggregator(monoid: StagedMonoidSpec) extends StagedAggregator { assert(init.length == 0) val stateRequired = state.vtypes.head.r.required val ev = state.fields(0) - if (!ev.pt.required) { + if (!ev.required) { assert(!stateRequired, s"monoid=$monoid, stateRequired=$stateRequired") - cb.assign(ev, EmitCode.missing(cb.emb, ev.pt)) + cb.assign(ev, EmitCode.missing(cb.emb, ev.st)) } else { assert(stateRequired, s"monoid=$monoid, stateRequired=$stateRequired") - cb.assign(ev, EmitCode.present(cb.emb, PCode(ev.pt, monoid.neutral.get))) + cb.assign(ev, EmitCode.present(cb.emb, primitive(ev.st.virtualType, monoid.neutral.get))) } } @@ -64,10 +65,11 @@ class MonoidAggregator(monoid: StagedMonoidSpec) extends StagedAggregator { ev1: EmitSettable, ev2: EmitValue ): Unit = { + val combined = primitive(monoid.typ, monoid(ev1.pv.asPrimitive.primitiveCode, ev2.pv.asPrimitive.primitiveCode)) cb.ifx(ev1.m, cb.ifx(!ev2.m, cb.assign(ev1, ev2)), cb.ifx(!ev2.m, - cb.assign(ev1, EmitCode.present(cb.emb, PCode(ev1.pt, monoid(ev1.v, ev2.v)))))) + cb.assign(ev1, EmitCode.present(cb.emb, combined)))) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala index 8d8d78d0b22..00816d997f8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala @@ -2,11 +2,12 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, PCodeEmitParamType} +import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, SCodeEmitParamType} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.interfaces.SNDArray -import is.hail.types.physical.{PCanonicalNDArray, PNDArrayCode, PNDArrayValue, PType} +import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable +import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArrayValue} +import is.hail.types.physical.{PCanonicalNDArray, PType} import is.hail.types.virtual.Type import is.hail.utils._ @@ -33,13 +34,13 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator override protected def _seqOp(cb: EmitCodeBuilder, state: State, seq: Array[EmitCode]): Unit = { val Array(nextNDCode) = seq - val seqOpMethod = cb.emb.genEmitMethod("ndarray_sum_aggregator_seq_op", FastIndexedSeq(PCodeEmitParamType(nextNDCode.pt)), CodeParamType(UnitInfo)) + val seqOpMethod = cb.emb.genEmitMethod("ndarray_sum_aggregator_seq_op", FastIndexedSeq(nextNDCode.emitParamType), CodeParamType(UnitInfo)) seqOpMethod.voidWithBuilder { cb => val nextNDInput = seqOpMethod.getEmitParam(1, null) // no streams here - nextNDInput.toI(cb).consume(cb, {}, { case nextNDArrayPCode: PNDArrayCode => + nextNDInput.toI(cb).consume(cb, {}, { case nextNDArrayPCode: SNDArrayCode => val nextNDPV = nextNDArrayPCode.memoize(cb, "ndarray_sum_seqop_next") - val statePV = state.storageType.loadCheapPCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_seq_op_state") + val statePV = state.storageType.loadCheapSCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_seq_op_state") statePV.loadField(cb, ndarrayFieldNumber).consume(cb, { cb += (state.region.getNewRegion(Region.TINY)) @@ -60,11 +61,11 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator val combOpMethod = cb.emb.genEmitMethod[Unit]("ndarray_sum_aggregator_comb_op") combOpMethod.voidWithBuilder { cb => - val rightPV = other.storageType.loadCheapPCode(cb, other.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_right") + val rightPV = other.storageType.loadCheapSCode(cb, other.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_right") rightPV.loadField(cb, ndarrayFieldNumber).consume(cb, {}, { rightNDPC => val rightNdValue = rightNDPC.asNDArray.memoize(cb, "right_ndarray_sum_agg") - val leftPV = state.storageType.loadCheapPCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_left") + val leftPV = state.storageType.loadCheapSCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_comb_op_left") leftPV.loadField(cb, ndarrayFieldNumber).consume(cb, { state.storeNonmissing(cb, rightNdValue) @@ -80,15 +81,16 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator cb.invokeVoid(combOpMethod) } - private def addValues(cb: EmitCodeBuilder, region: Value[Region], leftNdValue: PNDArrayValue, rightNdValue: PNDArrayValue): Unit = { + private def addValues(cb: EmitCodeBuilder, region: Value[Region], leftNdValue: SNDArrayValue, rightNdValue: SNDArrayValue): Unit = { cb.ifx(!leftNdValue.sameShape(rightNdValue, cb), cb += Code._fatal[Unit]("Can't sum ndarrays of different shapes.")) - SNDArray.forEachIndex(cb, leftNdValue.shapes(cb), "ndarray_sum_addvalues") { case (cb, indices) => - val newElement = SCode.add(cb, leftNdValue.loadElement(indices, cb), rightNdValue.loadElement(indices, cb), true) - ndTyp.setElement(cb, region, indices, leftNdValue.value.asInstanceOf[Value[Long]], newElement, deepCopy = true) - } + SNDArray.coiterate(cb, region, FastIndexedSeq((leftNdValue.get, "left"), (rightNdValue.get, "right")), { + case Seq(l, r) => + val newElement = SCode.add(cb, l, r, true) + cb.assign(l, newElement.copyToRegion(cb, region, leftNdValue.st.elementType)) + }) } protected def _storeResult(cb: EmitCodeBuilder, state: State, pt: PType, addr: Value[Long], region: Value[Region], ifMissing: EmitCodeBuilder => Unit): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala index 2dad65a2df5..cae640bd80d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/StagedArrayBuilder.scala @@ -37,11 +37,11 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ cb.assign(tmpOff, src) cb.assign(size, Region.loadInt(currentSizeOffset(tmpOff))) cb.assign(capacity, Region.loadInt(capacityOffset(tmpOff))) - cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapPCode(cb, Region.loadAddress(dataOffset(tmpOff))), deepCopy = true)) + cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapSCode(cb, Region.loadAddress(dataOffset(tmpOff))), deepCopy = true)) } def reallocateData(cb: EmitCodeBuilder): Unit = { - cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapPCode(cb, data), deepCopy = true)) + cb.assign(data, eltArray.store(cb, region, eltArray.loadCheapSCode(cb, data), deepCopy = true)) } def storeTo(cb: EmitCodeBuilder, dest: Code[Long]): Unit = { @@ -58,7 +58,7 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ cb += ob.writeInt(size) cb += ob.writeInt(capacity) codecSpec.encodedType.buildEncoder(eltArray.sType, kb) - .apply(cb, eltArray.loadCheapPCode(cb, data), ob) + .apply(cb, eltArray.loadCheapSCode(cb, data), ob) cb += ob.writeInt(const(StagedArrayBuilder.END_SERIALIZATION)) } } @@ -112,7 +112,7 @@ class StagedArrayBuilder(eltType: PType, kb: EmitClassBuilder[_], region: Value[ def loadElement(cb: EmitCodeBuilder, idx: Value[Int]): EmitCode = { val m = eltArray.isElementMissing(data, idx) - EmitCode(Code._empty, m, eltType.loadCheapPCode(cb, eltArray.loadElement(data, capacity, idx))) + EmitCode(Code._empty, m, eltType.loadCheapSCode(cb, eltArray.loadElement(data, capacity, idx))) } private def resize(cb: EmitCodeBuilder): Unit = { diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala index 53cdfd1fa71..bd75c85f1bc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/StagedBlockLinkedList.scala @@ -6,6 +6,7 @@ import is.hail.expr.ir._ import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.encoded._ import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SIndexablePointerSettable} import is.hail.utils._ @@ -141,7 +142,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { cb.whileLoop(i < count(n), { f(cb, EmitCode(Code._empty, bufferType.isElementMissing(buffer(n), i), - elemType.loadCheapPCode(cb, bufferType.loadElement(buffer(n), capacity(n), i)))) + elemType.loadCheapSCode(cb, bufferType.loadElement(buffer(n), capacity(n), i)))) cb.assign(i, i + 1) }) } @@ -208,7 +209,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { foreachNode(cb, n) { cb => cb += ob.writeBoolean(true) cb.assign(b, buffer(n)) - bufferEType.buildPrefixEncoder(cb, bufferType.loadCheapPCode(cb, b).memoize(cb, "sbll_serialize_v"), ob, count(n)) + bufferEType.buildPrefixEncoder(cb, bufferType.loadCheapSCode(cb, b).memoize(cb, "sbll_serialize_v"), ob, count(n)) } cb += ob.writeBoolean(false) } @@ -230,7 +231,7 @@ class StagedBlockLinkedList(val elemType: PType, val kb: EmitClassBuilder[_]) { cb.invokeVoid(desF, region, inputBuffer) } - private def appendShallow(cb: EmitCodeBuilder, r: Code[Region], aCode: PCode): Unit = { + private def appendShallow(cb: EmitCodeBuilder, r: Code[Region], aCode: SCode): Unit = { val buff = cb.memoize(aCode, "sbll_append_shallow_a").asInstanceOf[SIndexablePointerSettable] val newNode = cb.newLocal[Long]("sbll_append_shallow_newnode", nodeType.allocate(r)) cb += initNode(newNode, buf = buff.a, count = buff.length) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala index 5635f88109f..6ac443bf8fa 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/TakeByAggregator.scala @@ -7,8 +7,9 @@ import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, import is.hail.io.{BufferSpec, InputBuffer, OutputBuffer} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SIndexablePointerCode} -import is.hail.types.physical.stypes.interfaces.SBaseStruct +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TInt32, Type} import is.hail.utils._ @@ -56,7 +57,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi ord.compare(cb, k1, k2, true) } - private def compareIndexedKey(cb: EmitCodeBuilder, k1: PCode, k2: PCode): Code[Int] = { + private def compareIndexedKey(cb: EmitCodeBuilder, k1: SCode, k2: SCode): Code[Int] = { val ord = StructOrdering.make(k1.st.asInstanceOf[SBaseStruct], k2.st.asInstanceOf[SBaseStruct], cb.emb.ecb, Array(so, Ascending), true) ord.compareNonnull(cb, k1, k2) } @@ -198,7 +199,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi private def keyIsMissing(offset: Code[Long]): Code[Boolean] = indexedKeyType.isFieldMissing(offset, 0) - private def loadKeyValue(cb: EmitCodeBuilder, offset: Code[Long]): PCode = keyType.loadCheapPCode(cb, indexedKeyType.loadField(offset, 0)) + private def loadKeyValue(cb: EmitCodeBuilder, offset: Code[Long]): SCode = keyType.loadCheapSCode(cb, indexedKeyType.loadField(offset, 0)) private def loadKey(cb: EmitCodeBuilder, offset: Value[Long]): EmitCode = EmitCode(Code._empty, keyIsMissing(offset), loadKeyValue(cb, offset)) @@ -209,8 +210,8 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi val j = mb.getCodeParam[Long](2) mb.emitWithBuilder(cb => compareIndexedKey(cb, - indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(i, 0)), - indexedKeyType.loadCheapPCode(cb, eltTuple.fieldOffset(j, 0)))) + indexedKeyType.loadCheapSCode(cb, eltTuple.fieldOffset(i, 0)), + indexedKeyType.loadCheapSCode(cb, eltTuple.fieldOffset(j, 0)))) mb.invokeCode(_, _) } @@ -328,7 +329,7 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi indexedKeyType.storeAtAddress(cb, eltTuple.fieldOffset(staging, 0), region, - indexedKeyType.loadCheapPCode(cb, indexedKey), + indexedKeyType.loadCheapSCode(cb, indexedKey), deepCopy = false) value.toI(cb) .consume(cb, @@ -342,12 +343,12 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi } private def swapStaging(cb: EmitCodeBuilder): Unit = { - eltTuple.storeAtAddress(cb, ab.elementOffset(0), region, eltTuple.loadCheapPCode(cb, staging), true) + eltTuple.storeAtAddress(cb, ab.elementOffset(0), region, eltTuple.loadCheapSCode(cb, staging), true) rebalanceDown(cb, 0) } private def enqueueStaging(cb: EmitCodeBuilder): Unit = { - ab.append(cb, eltTuple.loadCheapPCode(cb, staging)) + ab.append(cb, eltTuple.loadCheapSCode(cb, staging)) rebalanceUp(cb, ab.size - 1) } @@ -382,8 +383,8 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi // for tests def seqOp(cb: EmitCodeBuilder, vm: Code[Boolean], v: Code[_], km: Code[Boolean], k: Code[_]): Unit = { - val vec = EmitCode(Code._empty, vm, PCode(valueType, v)) - val kec = EmitCode(Code._empty, km, PCode(keyType, k)) + val vec = EmitCode(Code._empty, vm, if (valueType.isPrimitive) primitive(valueType.virtualType, v) else valueType.loadCheapSCode(cb, coerce[Long](v))) + val kec = EmitCode(Code._empty, km, if (keyType.isPrimitive) primitive(keyType.virtualType, k) else keyType.loadCheapSCode(cb, coerce[Long](k))) seqOp(cb, vec, kec) } @@ -530,11 +531,11 @@ class TakeByRVAS(val valueVType: VirtualTypeWithReq, val keyVType: VirtualTypeWi val sortedIdx = cb.newLocal[Int]("tba_result_sortedidx", Region.loadInt(indexOffset(idx))) ab.loadElement(cb, sortedIdx).toI(cb) .flatMap(cb) { case pct: SBaseStructPointerCode => - pct.memoize(cb, "takeby_result_tuple").loadField(cb, 1).typecast[PCode] + pct.memoize(cb, "takeby_result_tuple").loadField(cb, 1) } }.a } - resultType.loadCheapPCode(cb, cb.invokeCode[Long](mb, _r)) + resultType.loadCheapSCode(cb, cb.invokeCode[Long](mb, _r)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala new file mode 100644 index 00000000000..ce9f1eb6c15 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala @@ -0,0 +1,45 @@ +package is.hail.expr.ir.analyses + +import is.hail.HailContext +import is.hail.expr.ir._ + +object ComputeMethodSplits { + def apply(ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { + val m = Memo.empty[Unit] + + val splitThreshold = HailContext.getFlag("method_split_ir_limit").toInt + require(splitThreshold > 0, s"invalid method_split_ir_limit") + + def recurAndComputeSizeUnderneath(x: IR): Int = { + val sizeUnderneath = x.children.iterator.map { case child: IR => recurAndComputeSizeUnderneath(child) }.sum + + val shouldSplit = !controlFlowPreventsSplit.contains(x) && (x match { + case _: TailLoop => true + + // stream consumers + case _: ToArray => true + case _: ToSet => true + case _: ToDict => true + case _: StreamFold => true + case _: StreamFold2 => true + case _: StreamLen => true + case _: StreamFor => true + + case _ => sizeUnderneath > splitThreshold + }) + if (shouldSplit) { + m.bind(x, ()) + 0 // method call is small + } else { + sizeUnderneath + (x match { + case _: Ref => 0 + case _: In => 0 + case _ if IsConstant(x) => 0 + case _ => 1 + }) + } + } + recurAndComputeSizeUnderneath(ir) + m + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala new file mode 100644 index 00000000000..afee062cd8e --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/analyses/ControlFlowPreventsSplit.scala @@ -0,0 +1,33 @@ +package is.hail.expr.ir.analyses + +import is.hail.expr.ir.{BaseIR, Memo, Recur, Ref, TailLoop, UsesAndDefs, VisitIR} +import is.hail.types.virtual.TStream + +object ControlFlowPreventsSplit { + + def apply(x: BaseIR, parentPointers: Memo[BaseIR], usesAndDefs: UsesAndDefs): Memo[Unit] = { + val m = Memo.empty[Unit] + VisitIR(x) { + case r@Recur(name, _, _) => + var parent: BaseIR = r + while (parent match { + case TailLoop(`name`, _, _) => false + case _ => true + }) { + if (!m.contains(parent)) + m.bind(parent, ()) + parent = parentPointers.lookup(parent) + } + case r@Ref(name, t) if t.isInstanceOf[TStream] => + val declaration = usesAndDefs.defs.lookup(r) + var parent: BaseIR = r + while (!(parent.eq(declaration))) { + if (!m.contains(parent)) + m.bind(parent, ()) + parent = parentPointers.lookup(parent) + } + case _ => + } + m + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala new file mode 100644 index 00000000000..fb6f52b6552 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/analyses/ParentPointers.scala @@ -0,0 +1,17 @@ +package is.hail.expr.ir.analyses + +import is.hail.expr.ir.{BaseIR, Memo} + +object ParentPointers { + def apply(x: BaseIR): Memo[BaseIR] = { + val m = Memo.empty[BaseIR] + + def recur(ir: BaseIR, parent: BaseIR): Unit = { + m.bind(ir, parent) + ir.children.foreach(recur(_, ir)) + } + + recur(x, null) + m + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala index aaad04eb331..92c77f32490 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala @@ -4,7 +4,9 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir._ import is.hail.types.coerce -import is.hail.types.physical.{PArray, PCode, PFloat64, PIndexableCode, PType} +import is.hail.types.physical.stypes.EmitType +import is.hail.types.physical.stypes.primitives.SFloat64 +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ @@ -305,10 +307,10 @@ object ArrayFunctions extends RegistryFunctions { } registerIEmitCode2("corr", TArray(TFloat64), TArray(TFloat64), TFloat64, { - (_: Type, _: PType, _: PType) => PFloat64() + (_: Type, _: EmitType, _: EmitType) => EmitType(SFloat64, false) }) { case (cb, r, rt, ec1, ec2) => - ec1.toI(cb).flatMap(cb) { case pc1: PIndexableCode => - ec2.toI(cb).flatMap(cb) { case pc2: PIndexableCode => + ec1.toI(cb).flatMap(cb) { case pc1: SIndexableCode => + ec2.toI(cb).flatMap(cb) { case pc2: SIndexableCode => val pv1 = pc1.memoize(cb, "corr_a1") val pv2 = pc2.memoize(cb, "corr_a2") val l1 = cb.newLocal("len1", pv1.loadLength()) @@ -346,7 +348,7 @@ object ArrayFunctions extends RegistryFunctions { MathFunctions.mathPackageClass, "sqrt", (n.toD * xSqSum - xSum * xSum) * (n.toD * ySqSum - ySum * ySum)) - PCode(rt, res) + primitive(res) }) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala index db99a38d660..d0a86ae95b3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala @@ -1,44 +1,96 @@ package is.hail.expr.ir.functions -import is.hail.expr.ir.InferPType -import is.hail.types._ -import is.hail.types.physical.{PBoolean, PCanonicalArray, PCanonicalCall, PInt32, PType} +import is.hail.asm4s.Code +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SIndexablePointer} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32} +import is.hail.types.physical.{PCanonicalArray, PInt32} import is.hail.types.virtual._ -import is.hail.utils.FastSeq import is.hail.variant._ +import scala.reflect.classTag + object CallFunctions extends RegistryFunctions { def registerAll() { - registerWrappedScalaFunction1("Call", TString, TCall, (rt: Type, st: PType) => PCanonicalCall(st.required))(Call.getClass, "parse") + registerWrappedScalaFunction1("Call", TString, TCall, (rt: Type, st: SType) => SCanonicalCall)(Call.getClass, "parse") + + registerSCode1("callFromRepr", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { + case (er, cb, rt, repr) => SCanonicalCall.constructFromIntRepr(repr.asInt.intCode(cb)) + } - registerScalaFunction("Call", Array(TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call0.getClass, "apply") + registerSCode1("Call", TBoolean, TCall, (rt: Type, _: SType) => SCanonicalCall) { + case (er, cb, rt, phased) => + SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( + Call0.getClass, "apply", Array(classTag[Boolean].runtimeClass), Array(phased.asBoolean.boolCode(cb)))) + } - registerScalaFunction("Call", Array(TInt32, TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call1.getClass, "apply") + registerSCode2("Call", TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { + case (er, cb, rt, a1, phased) => + SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( + Call1.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) + } - registerScalaFunction("Call", Array(TInt32, TInt32, TBoolean), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call2.getClass, "apply") + registerSCode3("Call", TInt32, TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType, _: SType) => SCanonicalCall) { + case (er, cb, rt, a1, a2, phased) => + SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( + Call2.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), a2.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) + } + + registerSCode1("UnphasedDiploidGtIndexCall", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { + case (er, cb, rt, x) => + SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( + Call2.getClass, "fromUnphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(x.asInt.intCode(cb)))) + } - registerScalaFunction("UnphasedDiploidGtIndexCall", Array(TInt32), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call2.getClass, "fromUnphasedDiploidGtIndex") registerWrappedScalaFunction2("Call", TArray(TInt32), TBoolean, TCall, { - case(rt: Type, _: PType, _: PType) => PCanonicalCall() + case (rt: Type, _: SType, _: SType) => SCanonicalCall })(CallN.getClass, "apply") val qualities = Array("isPhased", "isHomRef", "isHet", "isHomVar", "isNonRef", "isHetNonRef", "isHetRef") - for (q <- qualities) registerScalaFunction(q, Array(TCall), TBoolean, (rt: Type, _: Seq[PType]) => PBoolean())(Call.getClass, q) + for (q <- qualities) { + registerSCode1(q, TCall, TBoolean, (rt: Type, _: SType) => SBoolean) { + case (er, cb, rt, call) => + primitive(Code.invokeScalaObject[Boolean]( + Call.getClass, q, Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) + } + } + + registerSCode1("ploidy", TCall, TInt32, (rt: Type, _: SType) => SInt32) { + case (er, cb, rt, call) => + primitive(Code.invokeScalaObject[Int]( + Call.getClass, "ploidy", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) + } - registerScalaFunction("ploidy", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "ploidy") + registerSCode1("nNonRefAlleles", TCall, TInt32, (rt: Type, _: SType) => SInt32) { + case (er, cb, rt, call) => + primitive(Code.invokeScalaObject[Int]( + Call.getClass, "nNonRefAlleles", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) + } - registerScalaFunction("nNonRefAlleles", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "nNonRefAlleles") + registerSCode1("unphasedDiploidGtIndex", TCall, TInt32, (rt: Type, _: SType) => SInt32) { + case (er, cb, rt, call) => + primitive(Code.invokeScalaObject[Int]( + Call.getClass, "unphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) + } - registerScalaFunction("unphasedDiploidGtIndex", Array(TCall), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "unphasedDiploidGtIndex") + registerSCode2("index", TCall, TInt32, TInt32, (rt: Type, _: SType, _: SType) => SInt32) { + case (er, cb, rt, call, idx) => + primitive(Code.invokeScalaObject[Int]( + Call.getClass, "alleleByIndex", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), idx.asInt.intCode(cb)))) + } - registerScalaFunction("index", Array(TCall, TInt32), TInt32, (rt: Type, _: Seq[PType]) => PInt32())(Call.getClass, "alleleByIndex") - registerScalaFunction("downcode", Array(TCall, TInt32), TCall, (rt: Type, _: Seq[PType]) => PCanonicalCall())(Call.getClass, "downcode") + registerSCode2("downcode", TCall, TInt32, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { + case (er, cb, rt, call, downcodedAllele) => + SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( + Call.getClass, "downcode", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), downcodedAllele.asInt.intCode(cb)))) + } registerWrappedScalaFunction2("oneHotAlleles", TCall, TInt32, TArray(TInt32), { - case(rt: Type, _: PType, _: PType) => PCanonicalArray(PInt32(true)) + case (rt: Type, _: SType, _: SType) => SIndexablePointer(PCanonicalArray(PInt32(true))) })(Call.getClass, "oneHotAlleles") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index e6d72ecc860..60727916e77 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -8,8 +8,8 @@ import is.hail.utils._ import is.hail.asm4s.coerce import is.hail.experimental.ExperimentalFunctions import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SStringPointer} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SCanonicalCall, SCanonicalCallCode, SIndexablePointer, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ @@ -250,22 +250,6 @@ abstract class RegistryFunctions { def tnum(name: String): TVariable = tv(name, "numeric") - def wrapArg(r: EmitRegion, t: PType): Code[_] => Code[_] = t match { - case _: PBoolean => coerce[Boolean] - case _: PInt32 => coerce[Int] - case _: PInt64 => coerce[Long] - case _: PFloat32 => coerce[Float] - case _: PFloat64 => coerce[Double] - case _: PCall => coerce[Int] - case t: PString => c => t.loadString(coerce[Long](c)) - case t: PLocus => c => EmitCodeBuilder.scopedCode(r.mb)(cb => PCode(t, c).asLocus.getLocusObj(cb)) - case _ => c => - Code.invokeScalaObject3[PType, Region, Long, Any]( - UnsafeRow.getClass, "read", - r.mb.getPType(t), - r.region, coerce[Long](c)) - } - def boxedTypeInfo(t: Type): TypeInfo[_ >: Null] = t match { case TBoolean => classInfo[java.lang.Boolean] case TInt32 => classInfo[java.lang.Integer] @@ -280,12 +264,12 @@ abstract class RegistryFunctions { def scodeToJavaValue(cb: EmitCodeBuilder, r: Value[Region], sc: SCode): Code[AnyRef] = { sc.st match { - case _: SInt32 => Code.boxInt(sc.asInt32.intCode(cb)) - case _: SInt64 => Code.boxLong(sc.asInt64.longCode(cb)) - case _: SFloat32 => Code.boxFloat(sc.asFloat32.floatCode(cb)) - case _: SFloat64 => Code.boxDouble(sc.asFloat64.doubleCode(cb)) - case _: SBoolean => Code.boxBoolean(sc.asBoolean.boolCode(cb)) - case _: SCall => Code.boxInt(coerce[Int](sc.asPCode.code)) + case SInt32 => Code.boxInt(sc.asInt32.intCode(cb)) + case SInt64 => Code.boxLong(sc.asInt64.longCode(cb)) + case SFloat32 => Code.boxFloat(sc.asFloat32.floatCode(cb)) + case SFloat64 => Code.boxDouble(sc.asFloat64.doubleCode(cb)) + case SBoolean => Code.boxBoolean(sc.asBoolean.boolCode(cb)) + case _: SCall => Code.boxInt(sc.asCall.loadCanonicalRepresentation(cb)) case _: SString => sc.asString.loadString() case _: SLocus => sc.asLocus.getLocusObj(cb) case t => @@ -299,34 +283,21 @@ abstract class RegistryFunctions { } } - def boxArg(r: EmitRegion, t: PType): Code[_] => Code[AnyRef] = t match { - case _: PBoolean => c => Code.boxBoolean(coerce[Boolean](c)) - case _: PInt32 => c => Code.boxInt(coerce[Int](c)) - case _: PInt64 => c => Code.boxLong(coerce[Long](c)) - case _: PFloat32 => c => Code.boxFloat(coerce[Float](c)) - case _: PFloat64 => c => Code.boxDouble(coerce[Double](c)) - case _: PCall => c => Code.boxInt(coerce[Int](c)) - case t: PString => c => t.loadString(coerce[Long](c)) - case t: PLocus => c => EmitCodeBuilder.scopedCode(r.mb)(cb => PCode(t, c).asLocus.getLocusObj(cb)) - case _ => c => - Code.invokeScalaObject3[PType, Region, Long, AnyRef]( - UnsafeRow.getClass, "readAnyRef", - r.mb.getPType(t), - r.region, coerce[Long](c)) - } - - def unwrapReturn(cb: EmitCodeBuilder, r: Value[Region], pt: PType, value: Code[_]): PCode = pt.virtualType match { - case TBoolean => PCode(pt, value) - case TInt32 => PCode(pt, value) - case TInt64 => PCode(pt, value) - case TFloat32 => PCode(pt, value) - case TFloat64 => PCode(pt, value) + def unwrapReturn(cb: EmitCodeBuilder, r: Value[Region], st: SType, value: Code[_]): SCode = st.virtualType match { + case TBoolean => primitive(coerce[Boolean](value)) + case TInt32 => primitive(coerce[Int](value)) + case TInt64 => primitive(coerce[Long](value)) + case TFloat32 => primitive(coerce[Float](value)) + case TFloat64 => primitive(coerce[Double](value)) case TString => - val st = SStringPointer(pt.asInstanceOf[PCanonicalString]) - st.constructFromString(cb, r, coerce[String](value)) - case TCall => PCode(pt, value) + val sst = st.asInstanceOf[SStringPointer] + sst.constructFromString(cb, r, coerce[String](value)) + case TCall => + assert(st == SCanonicalCall) + new SCanonicalCallCode(coerce[Int](value)) case TArray(TInt32) => - val pca = pt.asInstanceOf[PCanonicalArray] + val ast = st.asInstanceOf[SIndexablePointer] + val pca = ast.pType.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[Int]]("unrwrap_return_array_int32_arr", coerce[IndexedSeq[Int]](value)) val len = cb.newLocal[Int]("unwrap_return_array_int32_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => @@ -335,7 +306,8 @@ abstract class RegistryFunctions { IEmitCode(cb, elt.isNull, primitive(elt.invoke[Int]("intValue"))) } case TArray(TFloat64) => - val pca = pt.asInstanceOf[PCanonicalArray] + val ast = st.asInstanceOf[SIndexablePointer] + val pca = ast.pType.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[Double]]("unrwrap_return_array_float64_arr", coerce[IndexedSeq[Double]](value)) val len = cb.newLocal[Int]("unwrap_return_array_float64_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => @@ -344,38 +316,37 @@ abstract class RegistryFunctions { IEmitCode(cb, elt.isNull, primitive(elt.invoke[Double]("doubleValue"))) } case TArray(TString) => - val pca = pt.asInstanceOf[PCanonicalArray] + val ast = st.asInstanceOf[SIndexablePointer] + val pca = ast.pType.asInstanceOf[PCanonicalArray] val arr = cb.newLocal[IndexedSeq[String]]("unrwrap_return_array_str_arr", coerce[IndexedSeq[String]](value)) val len = cb.newLocal[Int]("unwrap_return_array_str_len", arr.invoke[Int]("length")) pca.constructFromElements(cb, r, len, deepCopy = false) { (cb, idx) => - val st = SStringPointer(pca.elementType.asInstanceOf[PCanonicalString]) + val st = SStringPointer(pca.elementType.setRequired(false).asInstanceOf[PCanonicalString]) val elt = cb.newLocal[String]("unwrap_return_array_str_elt", Code.checkcast[String](arr.invoke[Int, java.lang.Object]("apply", idx))) IEmitCode(cb, elt.isNull, st.constructFromString(cb, r, elt)) } case t: TBaseStruct => + val sst = st.asInstanceOf[SBaseStructPointer] + val pt = sst.pType.asInstanceOf[PCanonicalBaseStruct] val addr = Code.invokeScalaObject3[Region, Row, PType, Long]( RegistryHelpers.getClass, "stupidUnwrapStruct", r.region, coerce[Row](value), cb.emb.ecb.getPType(pt)) - new SBaseStructPointerCode(SBaseStructPointer(pt.asInstanceOf[PBaseStruct]), addr) + new SBaseStructPointerCode(SBaseStructPointer(pt.setRequired(false).asInstanceOf[PBaseStruct]), addr) } - def registerPCode( + def registerSCode( name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType, + calculateReturnType: (Type, Seq[SType]) => SType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, EmitCodeBuilder, Seq[Type], PType, Array[PCode]) => PCode + impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, Array[SCode]) => SCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: PCode*): PCode = - impl(r, cb, typeParameters, returnPType, args.toArray) - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] = { - assert(unify(typeParameters, args.map(_._1.virtualType), returnPType.virtualType)) - apply(r, cb, returnPType, typeParameters, args.map { case (t, a) => PCode(t, a) }: _*).code - } + new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = + impl(r, cb, typeParameters, returnSType, args.toArray) }) } @@ -383,16 +354,16 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType, + calculateReturnType: (Type, Seq[SType]) => SType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, EmitCodeBuilder, PType, Array[Type], Array[(PType, Code[_])]) => Code[_] + impl: (EmitRegion, EmitCodeBuilder, SType, Array[Type], Array[SCode]) => Code[_] ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] = { - assert(unify(typeParameters, args.map(_._1.virtualType), returnPType.virtualType)) - impl(r, cb, returnPType, typeParameters.toArray, args.toArray) + new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = { + assert(unify(typeParameters, args.map(_.st.virtualType), returnSType.virtualType)) + returnSType.fromCodes(FastIndexedSeq(impl(r, cb, returnSType, typeParameters.toArray, args.toArray))) } }) } @@ -401,15 +372,15 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType, + calculateReturnType: (Type, Seq[EmitType]) => EmitType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion,PType, Array[EmitCode]) => EmitCode + impl: (EmitRegion, SType, Array[EmitCode]) => EmitCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { - override def apply(r: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { - assert(unify(typeParameters, args.map(_.pt.virtualType), rpt.virtualType)) + new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { + override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + assert(unify(typeParameters, args.map(_.st.virtualType), rpt.virtualType)) impl(r, rpt, args.toArray) } }) @@ -419,22 +390,22 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType, + calculateReturnType: (Type, Seq[EmitType]) => EmitType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitCodeBuilder, Value[Region], PType, Array[EmitCode]) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], SType, Array[EmitCode]) => IEmitCode ) { IRFunctionRegistry.addJVMFunction( - new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnPType) { + new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { override def apply( cb: EmitCodeBuilder, r: Value[Region], - rpt: PType, + rpt: SType, typeParameters: Seq[Type], args: EmitCode* ): IEmitCode = impl(cb, r, rpt, args.toArray) - override def apply(r: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb) { cb => apply(cb, r.region, rpt, typeParameters, args: _*) } @@ -446,14 +417,16 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType + calculateReturnType: (Type, Seq[SType]) => SType )( cls: Class[_], method: String ) { - registerCode(name, valueParameterTypes, returnType, calculateReturnPType) { case (r, cb, rt, _, args) => - val cts = valueParameterTypes.map(TypeToIRIntermediateClassTag(_).runtimeClass) - Code.invokeScalaObject(cls, method, cts, args.map(_._2))(TypeToIRIntermediateClassTag(returnType)) + registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => + val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) + rt.fromCodes(FastIndexedSeq( + Code.invokeScalaObject(cls, method, cts, args.map { a => SType.extractPrimCode(cb, a) })(PrimitiveTypeToIRIntermediateClassTag(returnType)) + )) } } @@ -461,7 +434,7 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType + calculateReturnType: (Type, Seq[SType]) => SType )( cls: Class[_], method: String @@ -473,133 +446,128 @@ abstract class RegistryFunctions { case TArray(TString) => classTag[IndexedSeq[String]] case TSet(TString) => classTag[Set[String]] case TDict(TString, TString) => classTag[Map[String, String]] - case t => TypeToIRIntermediateClassTag(t) + case TCall => classTag[Int] + case t => PrimitiveTypeToIRIntermediateClassTag(t) + } + + def wrap(cb: EmitCodeBuilder, r: Value[Region], code: SCode): Code[_] = code.st match { + case t if t.isPrimitive => SType.extractPrimCode(cb, code) + case call: SCall => code.asCall.loadCanonicalRepresentation(cb) + case _ => scodeToJavaValue(cb, r, code) } - registerCode(name, valueParameterTypes, returnType, calculateReturnPType) { case (r, cb, rt, _, args) => + registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => val cts = valueParameterTypes.map(ct(_).runtimeClass) - val out = Code.invokeScalaObject(cls, method, cts, args.map { case (t, a) => wrapArg(r, t)(a) })(ct(returnType)) - unwrapReturn(cb, r.region, rt, out).code + unwrapReturn(cb, r.region, rt, + Code.invokeScalaObject(cls, method, cts, args.map { a => wrap(cb, r.region, a) })(ct(returnType))) } } - def registerWrappedScalaFunction1(name: String, a1: Type, returnType: Type, pt: (Type, PType) => PType)(cls: Class[_], method: String): Unit = + def registerWrappedScalaFunction1(name: String, a1: Type, returnType: Type, pt: (Type, SType) => SType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1), returnType, unwrappedApply(pt))(cls, method) - def registerWrappedScalaFunction2(name: String, a1: Type, a2: Type, returnType: Type, pt: (Type, PType, PType) => PType)(cls: Class[_], method: String): Unit = + def registerWrappedScalaFunction2(name: String, a1: Type, a2: Type, returnType: Type, pt: (Type, SType, SType) => SType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2), returnType, unwrappedApply(pt))(cls, method) def registerWrappedScalaFunction3(name: String, a1: Type, a2: Type, a3: Type, returnType: Type, - pt: (Type, PType, PType, PType) => PType)(cls: Class[_], method: String): Unit = + pt: (Type, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2, a3), returnType, unwrappedApply(pt))(cls, method) - def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[PType]) => PType)(cls: Class[_], method: String) { + def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[SType]) => SType)(cls: Class[_], method: String) { registerCode(name, valueParameterTypes, returnType, pt) { case (r, cb, rt, _, args) => - val cts = valueParameterTypes.map(TypeToIRIntermediateClassTag(_).runtimeClass) - Code.invokeStatic(cls, method, cts, args.map(_._2))(TypeToIRIntermediateClassTag(returnType)) + val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) + Code.invokeStatic(cls, method, cts, args.map(a => SType.extractPrimCode(cb, a)))(PrimitiveTypeToIRIntermediateClassTag(returnType)) } } def registerIR(name: String, valueParameterTypes: Array[Type], returnType: Type, inline: Boolean = false, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], Seq[IR]) => IR): Unit = IRFunctionRegistry.addIR(name, typeParameters, valueParameterTypes, returnType, inline, f) - def registerPCode1(name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, EmitCodeBuilder, PType, PCode) => PCode): Unit = - registerPCode(name, Array(mt1), rt, unwrappedApply(pt)) { + def registerSCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, SType, SCode) => SCode): Unit = + registerSCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1)) => impl(r, cb, rt, a1) } - def registerPCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode) => PCode): Unit = - registerPCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { + def registerSCode1t(name: String, typeParams: Array[Type], mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode) => SCode): Unit = + registerSCode(name, Array(mt1), rt, unwrappedApply(pt), typeParameters = typeParams) { + case (r, cb, typeParams, rt, Array(a1)) => impl(r, cb, typeParams, rt, a1) + } + + def registerSCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2)) => impl(r, cb, rt, a1, a2) } - def registerPCode3(name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, PType, PType, PType) => PType) - (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode) => PCode): Unit = - registerPCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { + def registerSCode2t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt), typeParameters = typeParams) { + case (r, cb, typeParams, rt, Array(a1, a2)) => impl(r, cb, typeParams, rt, a1, a2) + } + + def registerSCode3(name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3)) => impl(r, cb, rt, a1, a2, a3) } - def registerPCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, PType, PType, PType, PType) => PType) - (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode, PCode) => PCode): Unit = - registerPCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { + def registerSCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, SType, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3, a4)) => impl(r, cb, rt, a1, a2, a3, a4) } - def registerPCode4t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, - pt: (Type, PType, PType, PType, PType) => PType) - (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], PType, PCode, PCode, PCode, PCode) => PCode): Unit = - registerPCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt), typeParams) { + def registerSCode4t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, + pt: (Type, SType, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt), typeParams) { case (r, cb, typeParams, rt, Array(a1, a2, a3, a4)) => impl(r, cb, typeParams, rt, a1, a2, a3, a4) } - def registerPCode5(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, rt: Type, pt: (Type, PType, PType, PType, PType, PType) => PType) - (impl: (EmitRegion, EmitCodeBuilder, PType, PCode, PCode, PCode, PCode, PCode) => PCode): Unit = - registerPCode(name, Array(mt1, mt2, mt3, mt4, mt5), rt, unwrappedApply(pt)) { + def registerSCode5(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, rt: Type, pt: (Type, SType, SType, SType, SType, SType) => SType) + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode, SCode) => SCode): Unit = + registerSCode(name, Array(mt1, mt2, mt3, mt4, mt5), rt, unwrappedApply(pt)) { case (r, cb, _, rt, Array(a1, a2, a3, a4, a5)) => impl(r, cb, rt, a1, a2, a3, a4, a5) } - def registerCode1[A1](name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, PType, (PType, Code[A1])) => Code[_]): Unit = + def registerCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, EmitRegion, SType, SCode) => Code[_]): Unit = registerCode(name, Array(mt1), rt, unwrappedApply(pt)) { - case (r, xb, rt, _, Array(a1: (PType, Code[A1]) @unchecked)) => impl(r, rt, a1) + case (r, cb, rt, _, Array(a1)) => impl(cb, r, rt, a1) } - def registerCode1t[A1](name: String, typeParam: Type, mt1: Type, rt: Type, pt: (Type, PType) => PType)(impl: (EmitRegion, PType, Type, (PType, Code[A1])) => Code[_]): Unit = - registerCode(name, Array(mt1), rt, unwrappedApply(pt), typeParameters = Array(typeParam)) { - case (r, cb, rt, Array(t), Array(a1: (PType, Code[A1]) @unchecked)) => impl(r, rt, t, a1) - } - - - def registerCode2[A1, A2](name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitRegion, PType, (PType, Code[A1]), (PType, Code[A2])) => Code[_]): Unit = + def registerCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) + (impl: (EmitCodeBuilder, EmitRegion, SType, SCode, SCode) => Code[_]): Unit = registerCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { - case (r, cb, rt, _, Array( - a1: (PType, Code[A1]) @unchecked, - a2: (PType, Code[A2]) @unchecked)) => impl(r, rt, a1, a2) - } - - def registerCode2t[A1, A2](name: String, typeParam1: Type, arg1: Type, arg2: Type, rt: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitRegion, PType, Type, (PType, Code[A1]), (PType, Code[A2])) => Code[_]): Unit = - registerCode(name, Array(arg1, arg2), rt, unwrappedApply(pt), Array(typeParam1)) { - case (r, cb, rt, Array(t1), Array(a1: (PType, Code[A1]) @unchecked, a2: (PType, Code[A2]) @unchecked)) => impl(r, rt, t1, a1, a2) - } - - def registerCode3[A1, A2, A3](name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, PType, PType, PType) => PType) - (impl: (EmitRegion, PType, (PType, Code[A1]), (PType, Code[A2]), (PType, Code[A3])) => Code[_]): Unit = - registerCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { - case (r, cb, rt, _, Array( - a1: (PType, Code[A1]) @unchecked, - a2: (PType, Code[A2]) @unchecked, - a3: (PType, Code[A3]) @unchecked)) => impl(r, rt, a1, a2, a3) + case (r, cb, rt, _, Array(a1, a2)) => impl(cb, r, rt, a1, a2) } - def registerIEmitCode1(name: String, mt1: Type, rt: Type, pt: (Type, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode) => IEmitCode): Unit = + def registerIEmitCode1(name: String, mt1: Type, rt: Type, pt: (Type, EmitType) => EmitType) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1)) => impl(cb, r, rt, a1) } - def registerIEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2)) => impl(cb, r, rt, a1, a2) } - def registerIEmitCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, PType, PType, PType, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType) => EmitType) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4)) => impl(cb, r, rt, a1, a2, a3, a4) } - def registerIEmitCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, PType, PType, PType, PType, PType, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + def registerIEmitCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType, EmitType, EmitType) => EmitType) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = registerIEmitCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4, a5, a6)) => impl(cb, r, rt, a1, a2, a3, a4, a5, a6) } - def registerEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitRegion, PType, EmitCode, EmitCode) => EmitCode): Unit = + def registerEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) + (impl: (EmitRegion, SType, EmitCode, EmitCode) => EmitCode): Unit = registerEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, rt, Array(a1, a2)) => impl(r, rt, a1, a2) } def registerIR1(name: String, mt1: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR) => IR): Unit = @@ -618,22 +586,22 @@ abstract class RegistryFunctions { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnPType: (Type, Seq[PType]) => PType + computeReturnType: (Type, Seq[SType]) => SType )( - impl: (EmitCodeBuilder, Value[Region], PType, Long, Array[SCode]) => SCode + impl: (EmitCodeBuilder, Value[Region], SType, Long, Array[SCode]) => SCode ) { IRFunctionRegistry.addJVMFunction( - new SeededMissingnessObliviousJVMFunction(name, valueParameterTypes, returnType, calculateReturnPType) { + new SeededMissingnessObliviousJVMFunction(name, valueParameterTypes, returnType, computeReturnType) { val isDeterministic: Boolean = false - def applySeeded(cb: EmitCodeBuilder, seed: Long, r: Value[Region], rpt: PType, args: SCode*): SCode = { + def applySeeded(cb: EmitCodeBuilder, seed: Long, r: Value[Region], rpt: SType, args: SCode*): SCode = { assert(unify(Array.empty[Type], args.map(_.st.virtualType), rpt.virtualType)) impl(cb, r, rpt, seed, args.toArray) } - def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], rpt: PType, args: (PType, EmitCode)*): IEmitCode = { - IEmitCode.multiMapEmitCodes(cb, args.map(_._2).toFastIndexedSeq) { - argPCs => applySeeded(cb, seed, r, rpt, argPCs: _*).asPCode + def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], rpt: SType, args: EmitCode*): IEmitCode = { + IEmitCode.multiMapEmitCodes(cb, args.toFastIndexedSeq) { + argPCs => applySeeded(cb, seed, r, rpt, argPCs: _*) } } @@ -641,23 +609,23 @@ abstract class RegistryFunctions { }) } - def registerSeeded0(name: String, returnType: Type, pt: PType)(impl: (EmitCodeBuilder, Value[Region], PType, Long) => SCode): Unit = - registerSeeded(name, Array[Type](), returnType, if (pt == null) null else (_: Type, _: Seq[PType]) => pt) { case (cb, r, rt, seed, _) => impl(cb, r, rt, seed) } + def registerSeeded0(name: String, returnType: Type, pt: SType)(impl: (EmitCodeBuilder, Value[Region], SType, Long) => SCode): Unit = + registerSeeded(name, Array[Type](), returnType, if (pt == null) null else (_: Type, _: Seq[SType]) => pt) { case (cb, r, rt, seed, _) => impl(cb, r, rt, seed) } - def registerSeeded1(name: String, arg1: Type, returnType: Type, pt: (Type, PType) => PType)(impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode) => SCode): Unit = + def registerSeeded1(name: String, arg1: Type, returnType: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode) => SCode): Unit = registerSeeded(name, Array(arg1), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1)) => impl(cb, r, rt, seed, a1) } - def registerSeeded2(name: String, arg1: Type, arg2: Type, returnType: Type, pt: (Type, PType, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode, SCode) => SCode): Unit = + def registerSeeded2(name: String, arg1: Type, arg2: Type, returnType: Type, pt: (Type, SType, SType) => SType) + (impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode, SCode) => SCode): Unit = registerSeeded(name, Array(arg1, arg2), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1, a2)) => impl(cb, r, rt, seed, a1, a2) } - def registerSeeded4(name: String, arg1: Type, arg2: Type, arg3: Type, arg4: Type, returnType: Type, pt: (Type, PType, PType, PType, PType) => PType) - (impl: (EmitCodeBuilder, Value[Region], PType, Long, SCode, SCode, SCode, SCode) => SCode): Unit = + def registerSeeded4(name: String, arg1: Type, arg2: Type, arg3: Type, arg4: Type, returnType: Type, pt: (Type, SType, SType, SType, SType) => SType) + (impl: (EmitCodeBuilder, Value[Region], SType, Long, SCode, SCode, SCode, SCode) => SCode): Unit = registerSeeded(name, Array(arg1, arg2, arg3, arg4), returnType, unwrappedApply(pt)) { case (cb, r, rt, seed, Array(a1, a2, a3, a4)) => impl(cb, r, rt, seed, a1, a2, a3, a4) } @@ -672,11 +640,9 @@ sealed abstract class JVMFunction { def returnType: Type - def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType + def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType - def apply(mb: EmitRegion, returnType: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode - - def getAsMethod[C](cb: EmitClassBuilder[C], rpt: PType, typeParameters: Seq[Type], args: PType*): EmitMethodBuilder[C] = ??? + def apply(mb: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode override def toString: String = s"$name[${ typeParameters.mkString(", ") }](${ valueParameterTypes.mkString(", ") }): $returnType" @@ -691,11 +657,11 @@ sealed abstract class JVMFunction { } object MissingnessObliviousJVMFunction { - def returnPType(calculateReturnPType: (Type, Seq[PType]) => PType)(returnType: Type, valueParameterTypes: Seq[PType]): PType = { - val returnPType = - if (calculateReturnPType == null) PType.canonical(returnType) - else calculateReturnPType(returnType, valueParameterTypes) - returnPType.setRequired(valueParameterTypes.forall(_.required)) + def returnSType(computeStrictReturnEmitType: (Type, Seq[SType]) => SType)(returnType: Type, valueParameterTypes: Seq[SType]): SType = { + if (computeStrictReturnEmitType == null) + SType.canonical(returnType) + else + computeStrictReturnEmitType(returnType, valueParameterTypes) } } @@ -704,42 +670,39 @@ abstract class UnseededMissingnessObliviousJVMFunction ( override val typeParameters: Seq[Type], override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessObliviousReturnPType: (Type, Seq[PType]) => PType + missingnessObliviousComputeReturnType: (Type, Seq[SType]) => SType ) extends JVMFunction { - override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = - MissingnessObliviousJVMFunction.returnPType(missingnessObliviousReturnPType)(returnType, valueParameterTypes) - - def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: (PType, Code[_])*): Code[_] + override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = { + EmitType(computeStrictReturnEmitType(returnType, valueParameterTypes.map(_.st)), valueParameterTypes.forall(_.required)) + } + def computeStrictReturnEmitType(returnType: Type, valueParameterTypes: Seq[SType]): SType = + MissingnessObliviousJVMFunction.returnSType(missingnessObliviousComputeReturnType)(returnType, valueParameterTypes) - def apply(r: EmitRegion, cb: EmitCodeBuilder, returnPType: PType, typeParameters: Seq[Type], args: PCode*): PCode = - PCode(returnPType, apply(r, cb, returnPType, typeParameters, args.map(pc => pc.pt -> pc.code): _*)) + def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode - def apply(r: EmitRegion, returnPType: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { + def apply(r: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb)(cb => IEmitCode.multiMapEmitCodes(cb, args.toFastIndexedSeq) { args => - apply(r, cb, returnPType, typeParameters, args: _*) + apply(r, cb, returnType, typeParameters, args: _*) }) } - override def getAsMethod[C](cb: EmitClassBuilder[C], rpt: PType, typeParameters: Seq[Type], args: PType*): EmitMethodBuilder[C] = { + def getAsMethod[C](cb: EmitClassBuilder[C], rpt: SType, typeParameters: Seq[Type], args: SType*): EmitMethodBuilder[C] = { val unified = unify(typeParameters, args.map(_.virtualType), rpt.virtualType) assert(unified) - val argTIs = args.toFastIndexedSeq.map(typeToTypeInfo) - val methodbuilder = cb.genEmitMethod(name, (typeInfo[Region] +: argTIs).map(ti => ti: CodeParamType), typeToTypeInfo(rpt)) - methodbuilder.emitWithBuilder(cb => apply(EmitRegion.default(methodbuilder), + val methodbuilder = cb.genEmitMethod(name, FastIndexedSeq[ParamType](typeInfo[Region]) ++ args.map(_.paramType), rpt.paramType) + methodbuilder.emitSCode(cb => apply(EmitRegion.default(methodbuilder), cb, rpt, typeParameters, - args.zip(argTIs.zipWithIndex.map { case (ti, i) => - methodbuilder.getCodeParam(i + 2)(ti).get - }): _*)) + (0 until args.length).map(i => methodbuilder.getSCodeParam(i + 2)): _*)) methodbuilder } } object MissingnessAwareJVMFunction { - def returnPType(calculateReturnPType: (Type, Seq[PType]) => PType)(returnType: Type, valueParameterTypes: Seq[PType]): PType= - if (calculateReturnPType == null) PType.canonical(returnType) - else calculateReturnPType(returnType, valueParameterTypes) + def returnSType(calculateReturnType: (Type, Seq[EmitType]) => EmitType)(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = + if (calculateReturnType == null) EmitType(SType.canonical(returnType), false) + else calculateReturnType(returnType, valueParameterTypes) } abstract class UnseededMissingnessAwareJVMFunction ( @@ -747,14 +710,14 @@ abstract class UnseededMissingnessAwareJVMFunction ( override val typeParameters: Seq[Type], override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessAwareReturnPType: (Type, Seq[PType]) => PType + missingnessAwareComputeReturnSType: (Type, Seq[EmitType]) => EmitType ) extends JVMFunction { - override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = - MissingnessAwareJVMFunction.returnPType(missingnessAwareReturnPType)(returnType, valueParameterTypes) + override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = + MissingnessAwareJVMFunction.returnSType(missingnessAwareComputeReturnSType)(returnType, valueParameterTypes) def apply(cb: EmitCodeBuilder, r: Value[Region], - rpt: PType, + rpt: SType, typeParameters: Seq[Type], args: EmitCode* ): IEmitCode = { @@ -773,12 +736,12 @@ abstract class SeededJVMFunction ( def setSeed(s: Long): Unit = { seed = s } - def applySeededI(seed: Long, cb: EmitCodeBuilder, region: Value[Region], rpt: PType, args: (PType, EmitCode)*): IEmitCode + def applySeededI(seed: Long, cb: EmitCodeBuilder, region: Value[Region], rpt: SType, args: EmitCode*): IEmitCode - def apply(region: EmitRegion, rpt: PType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = + def apply(region: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = fatal("seeded functions must go through IEmitCode path") - def apply(region: EmitRegion, rpt: PType, args: EmitCode*): EmitCode = + def apply(region: EmitRegion, rpt: SType, args: EmitCode*): EmitCode = fatal("seeded functions must go through IEmitCode path") def isStrict: Boolean = false @@ -788,18 +751,22 @@ abstract class SeededMissingnessObliviousJVMFunction ( override val name: String, override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessObliviousReturnPType: (Type, Seq[PType]) => PType + missingnessObliviousreturnSType: (Type, Seq[SType]) => SType ) extends SeededJVMFunction(name, valueParameterTypes, returnType) { - override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = - MissingnessObliviousJVMFunction.returnPType(missingnessObliviousReturnPType)(returnType, valueParameterTypes) + override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = { + EmitType(computeStrictReturnEmitType(returnType, valueParameterTypes.map(_.st)), valueParameterTypes.forall(_.required)) + } + + def computeStrictReturnEmitType(returnType: Type, valueParameterTypes: Seq[SType]): SType = + MissingnessObliviousJVMFunction.returnSType(missingnessObliviousreturnSType)(returnType, valueParameterTypes) } abstract class SeededMissingnessAwareJVMFunction ( override val name: String, override val valueParameterTypes: Seq[Type], override val returnType: Type, - missingnessAwareReturnPType: (Type, Seq[PType]) => PType + missingnessAwarereturnSType: (Type, Seq[EmitType]) => EmitType ) extends SeededJVMFunction(name, valueParameterTypes, returnType) { - override def returnPType(returnType: Type, valueParameterTypes: Seq[PType]): PType = - MissingnessAwareJVMFunction.returnPType(missingnessAwareReturnPType)(returnType, valueParameterTypes) + override def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType = + MissingnessAwareJVMFunction.returnSType(missingnessAwarereturnSType)(returnType, valueParameterTypes) } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala index 0cc3f0110b7..8ffb6c9d933 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala @@ -4,15 +4,16 @@ import is.hail.annotations.Region import is.hail.asm4s.{coerce => _, _} import is.hail.types.{coerce => _, _} import is.hail.expr.ir._ -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.{PArray, PCode, PFloat64, PIndexableCode, PInt32, PType} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.stypes.primitives.{SFloat64, SInt32} +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TArray, TFloat64, TInt32, Type} object GenotypeFunctions extends RegistryFunctions { def registerAll() { - registerPCode1("gqFromPL", TArray(tv("N", "int32")), TInt32, (_: Type, _: PType) => PInt32()) - { case (r, cb, rt, _pl: PIndexableCode) => + registerSCode1("gqFromPL", TArray(tv("N", "int32")), TInt32, (_: Type, _: SType) => SInt32) + { case (r, cb, rt, _pl: SIndexableCode) => val code = EmitCodeBuilder.scopedCode(r.mb) { cb => val pl = _pl.memoize(cb, "plv") val m = cb.newLocal[Int]("m", 99) @@ -33,13 +34,13 @@ object GenotypeFunctions extends RegistryFunctions { }) m2 - m } - PCode(rt, code) + primitive(code) } registerIEmitCode1("dosage", TArray(tv("N", "float64")), TFloat64, - (_: Type, arrayType: PType) => PFloat64(arrayType.required && arrayType.asInstanceOf[PArray].elementType.required) + (_: Type, arrayType: EmitType) => EmitType(SFloat64, arrayType.required && arrayType.st.asInstanceOf[SContainer].elementEmitType.required) ) { case (cb, r, rt, gp) => - gp.toI(cb).flatMap(cb) { case (gpc: PIndexableCode) => + gp.toI(cb).flatMap(cb) { case (gpc: SIndexableCode) => val gpv = gpc.memoize(cb, "dosage_gp") cb.ifx(gpv.loadLength().cne(3), @@ -47,7 +48,7 @@ object GenotypeFunctions extends RegistryFunctions { gpv.loadElement(cb, 1).flatMap(cb) { (_1: SCode) => gpv.loadElement(cb, 2).map(cb) { (_2: SCode) => - PCode(rt, _1.asDouble.doubleCode(cb) + _2.asDouble.doubleCode(cb) * 2.0) + primitive(_1.asDouble.doubleCode(cb) + _2.asDouble.doubleCode(cb) * 2.0) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala b/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala index ec45cae7768..93017184fda 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/GetElement.scala @@ -5,7 +5,7 @@ import is.hail.types.BlockMatrixType import is.hail.types.virtual.Type import is.hail.linalg.BlockMatrix -case class GetElement(index: Seq[Long]) extends BlockMatrixToValueFunction { +case class GetElement(index: IndexedSeq[Long]) extends BlockMatrixToValueFunction { assert(index.length == 2) override def typ(childType: BlockMatrixType): Type = childType.elementType diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala index a2ce9bf5d36..d292b5ec2ff 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala @@ -4,6 +4,10 @@ import is.hail.asm4s.{Code, _} import is.hail.expr.ir._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ +import is.hail.types.physical.stypes.{EmitType, SType} +import is.hail.types.physical.stypes.concrete.SIntervalPointer +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.SBoolean import is.hail.types.virtual._ object IntervalFunctions extends RegistryFunctions { @@ -11,18 +15,18 @@ object IntervalFunctions extends RegistryFunctions { def registerAll(): Unit = { registerIEmitCode4("Interval", tv("T"), tv("T"), TBoolean, TBoolean, TInterval(tv("T")), - { case (_: Type, startpt, endpt, includesStartPT, includesEndPT) => - PCanonicalInterval( - InferPType.getCompatiblePType(Seq(startpt, endpt)), - required = includesStartPT.required && includesEndPT.required - ) + { case (_: Type, startpt, endpt, includesStartET, includesEndET) => + EmitType(PCanonicalInterval( + InferPType.getCompatiblePType(Seq(startpt.canonicalPType, endpt.canonicalPType)), + required = includesStartET.required && includesEndET.required + ).sType, includesStartET.required && includesEndET.required) }) { - case (cb, r, rt: PCanonicalInterval, start, end, includesStart, includesEnd) => + case (cb, r, SIntervalPointer(pt: PCanonicalInterval), start, end, includesStart, includesEnd) => includesStart.toI(cb).flatMap(cb) { includesStart => includesEnd.toI(cb).map(cb) { includesEnd => - rt.constructFromCodes(cb, r, + pt.constructFromCodes(cb, r, start, end, EmitCode.present(cb.emb, includesStart), @@ -32,83 +36,83 @@ object IntervalFunctions extends RegistryFunctions { } registerIEmitCode1("start", TInterval(tv("T")), tv("T"), - (_: Type, x: PType) => x.asInstanceOf[PInterval].pointType.orMissing(x.required)) { + (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { case (cb, r, rt, interval) => - interval.toI(cb).flatMap(cb) { case pi: PIntervalCode => + interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => val pv = pi.memoize(cb, "interval") - pv.loadStart(cb).typecast[PCode] + pv.loadStart(cb) } } registerIEmitCode1("end", TInterval(tv("T")), tv("T"), - (_: Type, x: PType) => x.asInstanceOf[PInterval].pointType.orMissing(x.required)) { + (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { case (cb, r, rt, interval) => - interval.toI(cb).flatMap(cb) { case pi: PIntervalCode => + interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => val pv = pi.memoize(cb, "interval") - pv.loadEnd(cb).typecast[PCode] + pv.loadEnd(cb) } } - registerPCode1("includesStart", TInterval(tv("T")), TBoolean, (_: Type, x: PType) => - PBoolean(x.required) + registerSCode1("includesStart", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => + SBoolean ) { - case (r, cb, rt, interval: PIntervalCode) => PCode(rt, interval.includesStart()) + case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesStart()) } - registerPCode1("includesEnd", TInterval(tv("T")), TBoolean, (_: Type, x: PType) => - PBoolean(x.required) + registerSCode1("includesEnd", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => + SBoolean ) { - case (r, cb, rt, interval: PIntervalCode) => PCode(rt, interval.includesEnd()) + case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesEnd()) } registerIEmitCode2("contains", TInterval(tv("T")), tv("T"), TBoolean, { - case(_: Type, intervalT: PInterval, _: PType) => PBoolean(intervalT.required) + case(_: Type, intervalT: EmitType, _: EmitType) => EmitType(SBoolean, intervalT.required) }) { case (cb, r, rt, int, point) => - int.toI(cb).map(cb) { case (intc: PIntervalCode) => - val interval: PIntervalValue = intc.memoize(cb, "interval") + int.toI(cb).map(cb) { case (intc: SIntervalCode) => + val interval: SIntervalValue = intc.memoize(cb, "interval") val pointv = cb.memoize(point.toI(cb), "point") val compare = cb.emb.ecb.getOrderingFunction(pointv.st, interval.st.pointType, CodeOrdering.Compare()) - val start = EmitCode.fromI(cb.emb)(cb => interval.loadStart(cb).typecast[PCode]) + val start = EmitCode.fromI(cb.emb)(cb => interval.loadStart(cb)) val cmp = cb.newLocal("cmp", compare(cb, pointv, start)) val contains = cb.newLocal[Boolean]("contains", false) cb.ifx(cmp > 0 || (cmp.ceq(0) && interval.includesStart()), { - val end = EmitCode.fromI(cb.emb)(cb => interval.loadEnd(cb).typecast[PCode]) + val end = EmitCode.fromI(cb.emb)(cb => interval.loadEnd(cb)) cb.assign(cmp, compare(cb, pointv, end)) cb.assign(contains, cmp < 0 || (cmp.ceq(0) && interval.includesEnd())) }) - PCode(rt, contains) + primitive(contains) } } - registerPCode1("isEmpty", TInterval(tv("T")), TBoolean, (_: Type, pt: PType) => PBoolean(pt.required)) { - case (r, cb, rt, interval: PIntervalCode) => + registerSCode1("isEmpty", TInterval(tv("T")), TBoolean, (_: Type, pt: SType) => SBoolean) { + case (r, cb, rt, interval: SIntervalCode) => val empty = EmitCodeBuilder.scopedCode(r.mb) { cb => val intv = interval.memoize(cb, "interval") intv.isEmpty(cb) } - PCode(rt, empty) + primitive(empty) } - registerPCode2("overlaps", TInterval(tv("T")), TInterval(tv("T")), TBoolean, (_: Type, i1t: PType, i2t: PType) => PBoolean(i1t.required && i2t.required)) { - case (r, cb, rt, int1: PIntervalCode, int2: PIntervalCode) => + registerSCode2("overlaps", TInterval(tv("T")), TInterval(tv("T")), TBoolean, (_: Type, i1t: SType, i2t: SType) => SBoolean) { + case (r, cb, rt, int1: SIntervalCode, int2: SIntervalCode) => val overlap = EmitCodeBuilder.scopedCode(r.mb) { cb => val interval1 = int1.memoize(cb, "interval1") val interval2 = int2.memoize(cb, "interval2") val compare = cb.emb.ecb.getOrderingFunction(int1.st.pointType, int2.st.pointType, CodeOrdering.Compare()) - def isAboveOnNonempty(cb: EmitCodeBuilder, lhs: PIntervalValue, rhs: PIntervalValue): Code[Boolean] = { - val start = EmitCode.fromI(cb.emb)(cb => lhs.loadStart(cb).typecast[PCode]) - val end = EmitCode.fromI(cb.emb)(cb => rhs.loadEnd(cb).typecast[PCode]) + def isAboveOnNonempty(cb: EmitCodeBuilder, lhs: SIntervalValue, rhs: SIntervalValue): Code[Boolean] = { + val start = EmitCode.fromI(cb.emb)(cb => lhs.loadStart(cb)) + val end = EmitCode.fromI(cb.emb)(cb => rhs.loadEnd(cb)) val cmp = cb.newLocal("cmp", compare(cb, start, end)) cmp > 0 || (cmp.ceq(0) && (!lhs.includesStart() || !rhs.includesEnd())) } - def isBelowOnNonempty(cb: EmitCodeBuilder, lhs: PIntervalValue, rhs: PIntervalValue): Code[Boolean] = { - val end = EmitCode.fromI(cb.emb)(cb => lhs.loadEnd(cb).typecast[PCode]) - val start = EmitCode.fromI(cb.emb)(cb => rhs.loadStart(cb).typecast[PCode]) + def isBelowOnNonempty(cb: EmitCodeBuilder, lhs: SIntervalValue, rhs: SIntervalValue): Code[Boolean] = { + val end = EmitCode.fromI(cb.emb)(cb => lhs.loadEnd(cb)) + val start = EmitCode.fromI(cb.emb)(cb => rhs.loadStart(cb)) val cmp = cb.newLocal("cmp", compare(cb, end, start)) cmp < 0 || (cmp.ceq(0) && (!lhs.includesEnd() || !rhs.includesStart())) } @@ -117,7 +121,7 @@ object IntervalFunctions extends RegistryFunctions { isBelowOnNonempty(cb, interval1, interval2) || isAboveOnNonempty(cb, interval1, interval2)) } - PCode(rt, overlap) + primitive(overlap) } registerIR2("sortedNonOverlappingIntervalsContain", diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala index 30dc4e4498d..26be380affb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala @@ -5,9 +5,10 @@ import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir.{EmitMethodBuilder, _} import is.hail.types.physical._ -import is.hail.types.physical.stypes.concrete.{SBaseStructPointerCode, SCanonicalLocusPointerCode, SIntervalPointerCode, SStringPointer} +import is.hail.types.physical.stypes.{EmitType, SType} +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerCode, SCanonicalLocusPointer, SCanonicalLocusPointerCode, SIntervalPointer, SIntervalPointerCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.SFloat64Code +import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32, SInt64} import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant._ @@ -39,7 +40,7 @@ object LocusFunctions extends RegistryFunctions { val pAlleles = rt.types(1).asInstanceOf[PCanonicalArray] val all = cb.newLocal[IndexedSeq[String]]("locus_alleles_parsed_alleles", variant.invoke[IndexedSeq[String]]("_2")) val len = cb.newLocal[Int]("locus_alleles_n_alleles", all.invoke[Int]("length")) - val ps = pAlleles.elementType.asInstanceOf[PCanonicalString] + val ps = pAlleles.elementType.setRequired(false).asInstanceOf[PCanonicalString] val ss = SStringPointer(ps) val (push, finish) = pAlleles.constructFromFunctions(cb, r, len, deepCopy = false) val i = cb.newLocal[Int]("locus_alleles_i", 0) @@ -93,16 +94,16 @@ object LocusFunctions extends RegistryFunctions { def registerAll() { val locusClass = Locus.getClass - registerPCode1("contig", tlocus("T"), TString, - (_: Type, x: PType) => x.asInstanceOf[PLocus].contigType) { - case (r, cb, rt, locus: PLocusCode) => - locus.contig(cb).asPCode + registerSCode1("contig", tlocus("T"), TString, + (_: Type, x: SType) => x.asInstanceOf[SLocus].contigType) { + case (r, cb, rt, locus: SLocusCode) => + locus.contig(cb) } - registerPCode1("position", tlocus("T"), TInt32, (_: Type, x: PType) => x.asInstanceOf[PLocus].positionType) { + registerSCode1("position", tlocus("T"), TInt32, (_: Type, x: SType) => SInt32) { case (r, cb, rt, pc: SLocusCode) => val locus = pc.memoize(cb, "locus_position_locus") - PCode(rt, locus.position(cb)) + primitive(locus.position(cb)) } registerLocusCode("isAutosomalOrPseudoAutosomal") { locus => isAutosomal(locus) || ((inX(locus) || inY(locus)) && inPar(locus)) @@ -114,13 +115,13 @@ object LocusFunctions extends RegistryFunctions { registerLocusCode("inXNonPar") { locus => inX(locus) && !inPar(locus) } registerLocusCode("inYNonPar") { locus => inY(locus) && !inPar(locus) } - registerPCode2("min_rep", tlocus("T"), TArray(TString), TStruct("locus" -> tv("T"), "alleles" -> TArray(TString)), { - (returnType: Type, _: PType, _: PType) => { + registerSCode2("min_rep", tlocus("T"), TArray(TString), TStruct("locus" -> tv("T"), "alleles" -> TArray(TString)), { + (returnType: Type, _: SType, _: SType) => { val locusPT = PCanonicalLocus(returnType.asInstanceOf[TStruct].field("locus").typ.asInstanceOf[TLocus].rg, true) - PCanonicalStruct("locus" -> locusPT, "alleles" -> PCanonicalArray(PCanonicalString(true), true)) + PCanonicalStruct("locus" -> locusPT, "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType } }) { - case (r, cb, rt: PCanonicalStruct, locus: PLocusCode, alleles: PIndexableCode) => + case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), locus: SLocusCode, alleles: SIndexableCode) => val variantTuple = Code.invokeScalaObject2[Locus, IndexedSeq[String], (Locus, IndexedSeq[String])]( VariantMethods.getClass, "minRep", locus.getLocusObj(cb), @@ -129,11 +130,11 @@ object LocusFunctions extends RegistryFunctions { emitVariant(cb, r.region, variantTuple, rt) } - registerPCode2("locus_windows_per_contig", TArray(TArray(TFloat64)), TFloat64, TTuple(TArray(TInt32), TArray(TInt32)), { - (_: Type, _: PType, _: PType) => - PCanonicalTuple(false, PCanonicalArray(PInt32(true), true), PCanonicalArray(PInt32(true), true)) + registerSCode2("locus_windows_per_contig", TArray(TArray(TFloat64)), TFloat64, TTuple(TArray(TInt32), TArray(TInt32)), { + (_: Type, _: SType, _: SType) => + PCanonicalTuple(false, PCanonicalArray(PInt32(true), true), PCanonicalArray(PInt32(true), true)).sType }) { - case (r: EmitRegion, cb: EmitCodeBuilder, rt: PCanonicalTuple, groupedCode: PIndexableCode, radiusCode: SFloat64Code) => + case (r: EmitRegion, cb: EmitCodeBuilder, SBaseStructPointer(rt: PCanonicalTuple), groupedCode: SIndexableCode, radiusCode: SFloat64Code) => val grouped = groupedCode.memoize(cb, "locuswindows_grouped") val radius = cb.newLocal("locuswindows_radius", radiusCode.doubleCode(cb)) @@ -200,7 +201,7 @@ object LocusFunctions extends RegistryFunctions { ) cb.define(Lbreak) - pushElement(cb, IEmitCode.present(cb, PCode(arrayType.elementType, offset + idx))) + pushElement(cb, IEmitCode.present(cb, primitive(offset + idx))) cb.assign(i, i + 1) }) @@ -229,10 +230,10 @@ object LocusFunctions extends RegistryFunctions { ), deepCopy = false) } - registerPCode1("Locus", TString, tlocus("T"), { - (returnType: Type, _: PType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) + registerSCode1("Locus", TString, tlocus("T"), { + (returnType: Type, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, rt: PCanonicalLocus, str: PStringCode) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), str: SStringCode) => val slocus = str.loadString() emitLocus(cb, r.region, @@ -240,23 +241,23 @@ object LocusFunctions extends RegistryFunctions { rt) } - registerPCode2("Locus", TString, TInt32, tlocus("T"), { - (returnType: Type, _: PType, _: PType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) + registerSCode2("Locus", TString, TInt32, tlocus("T"), { + (returnType: Type, _: SType, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, rt: PCanonicalLocus, contig, pos) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), contig, pos) => val contigMemo = contig.memoize(cb, "locus_contig") val posMemo = pos.memoize(cb, "locus_pos") cb += rgCode(r.mb, rt.rg).invoke[String, Int, Unit]("checkLocus", contigMemo.asString.loadString(), posMemo.asInt.intCode(cb)) rt.constructFromPositionAndString(cb, r.region, contigMemo.asString.loadString(), posMemo.asInt.intCode(cb)) } - registerPCode1("LocusAlleles", TString, tvariant("T"), { - (returnType: Type, _: PType) => { + registerSCode1("LocusAlleles", TString, tvariant("T"), { + (returnType: Type, _: SType) => { val lTyp = returnType.asInstanceOf[TStruct].field("locus").typ.asInstanceOf[TLocus] - PCanonicalStruct("locus" -> PCanonicalLocus(lTyp.rg, true), "alleles" -> PCanonicalArray(PCanonicalString(true), true)) + PCanonicalStruct("locus" -> PCanonicalLocus(lTyp.rg, true), "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType } }) { - case (r, cb, rt: PCanonicalStruct, variantStr) => + case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), variantStr) => val svar = variantStr.asString.loadString() val plocus = rt.types(0).asInstanceOf[PCanonicalLocus] @@ -267,11 +268,11 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode2("LocusInterval", TString, TBoolean, tinterval("T"), { - (returnType: Type, _: PType, _: PType) => { + (returnType: Type, _: EmitType, _: EmitType) => { val lPTyp = returnType.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - PCanonicalInterval(PCanonicalLocus(lPTyp.asInstanceOf[TLocus].rg)) + EmitType(PCanonicalInterval(PCanonicalLocus(lPTyp.asInstanceOf[TLocus].rg)).sType, false) } - }) { case (cb: EmitCodeBuilder, r: Value[Region], rt: PCanonicalInterval, locusStrEC: EmitCode, invalidMissingEC: EmitCode) => + }) { case (cb: EmitCodeBuilder, r: Value[Region], SIntervalPointer(rt: PCanonicalInterval), locusStrEC: EmitCode, invalidMissingEC: EmitCode) => val plocus = rt.pointType.asInstanceOf[PLocus] @@ -298,13 +299,13 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode6("LocusInterval", TString, TInt32, TInt32, TBoolean, TBoolean, TBoolean, tinterval("T"), { - (returnType: Type, _: PType, _: PType, _: PType, _: PType, _: PType, _: PType) => { + (returnType: Type, _: EmitType, _: EmitType, _: EmitType, _: EmitType, _: EmitType, _: EmitType) => { val lPTyp = returnType.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - PCanonicalInterval(PCanonicalLocus(lPTyp.rg)) + EmitType(PCanonicalInterval(PCanonicalLocus(lPTyp.rg)).sType, false) } }) { case (cb: EmitCodeBuilder, r: Value[Region], - rt: PCanonicalInterval, + SIntervalPointer(rt: PCanonicalInterval), locusString: EmitCode, pos1: EmitCode, pos2: EmitCode, @@ -347,38 +348,38 @@ object LocusFunctions extends RegistryFunctions { } } - registerPCode1("globalPosToLocus", TInt64, tlocus("T"), { - (returnType: Type, _: PType) => - PCanonicalLocus(returnType.asInstanceOf[TLocus].rg) + registerSCode1("globalPosToLocus", TInt64, tlocus("T"), { + (returnType: Type, _: SType) => + PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, rt: PCanonicalLocus, globalPos) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), globalPos) => val locus = cb.newLocal[Locus]("global_pos_locus", rgCode(r.mb, rt.rg).invoke[Long, Locus]("globalPosToLocus", globalPos.asLong.longCode(cb))) rt.constructFromPositionAndString(cb, r.region, locus.invoke[String]("contig"), locus.invoke[Int]("position")) } - registerPCode1("locusToGlobalPos", tlocus("T"), TInt64, (_: Type, _: PType) => PInt64()) { - case (r, cb, rt, locus: PLocusCode) => + registerSCode1("locusToGlobalPos", tlocus("T"), TInt64, (_: Type, _: SType) => SInt64) { + case (r, cb, rt, locus: SLocusCode) => val locusObject = locus.memoize(cb, "locus_to_global_pos") .getLocusObj(cb) - val globalPos = rgCode(r.mb, locus.pt.rg).invoke[Locus, Long]("locusToGlobalPos", locusObject) - PCode(rt, globalPos) + val globalPos = rgCode(r.mb, locus.st.rg).invoke[Locus, Long]("locusToGlobalPos", locusObject) + primitive(globalPos) } registerIEmitCode2("liftoverLocus", tlocus("T"), TFloat64, TStruct("result" -> tv("U", "locus"), "is_negative_strand" -> TBoolean), { - (returnType: Type, _: PType, _: PType) => { + (returnType: Type, _: EmitType, _: EmitType) => { val lTyp = returnType.asInstanceOf[TStruct].field("result").typ.asInstanceOf[TLocus] - PCanonicalStruct("result" -> PCanonicalLocus(lTyp.rg, true), "is_negative_strand" -> PBoolean(true)) + EmitType(PCanonicalStruct("result" -> PCanonicalLocus(lTyp.rg, true), "is_negative_strand" -> PBoolean(true)).sType, false) } }) { - case (cb, r, rt: PCanonicalStruct, loc, minMatch) => + case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), loc, minMatch) => loc.toI(cb).flatMap(cb) { loc => minMatch.toI(cb).flatMap(cb) { minMatch => val Lmissing = CodeLabel() val Ldefined = CodeLabel() - val locT = loc.pt.asInstanceOf[PLocus] + val locT = loc.asLocus.st val srcRG = locT.rg val destRG = rt.types(0).asInstanceOf[PLocus].rg @@ -405,12 +406,12 @@ object LocusFunctions extends RegistryFunctions { } registerIEmitCode2("liftoverLocusInterval", tinterval("T"), TFloat64, TStruct("result" -> tinterval("U"), "is_negative_strand" -> TBoolean), { - (returnType: Type, _: PType, _: PType) => { + (returnType: Type, _: EmitType, _: EmitType) => { val lTyp = returnType.asInstanceOf[TStruct].field("result").typ.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] - PCanonicalStruct("result" -> PCanonicalInterval(PCanonicalLocus(lTyp.rg, true), true), "is_negative_strand" -> PBoolean(true)) + EmitType(PCanonicalStruct("result" -> PCanonicalInterval(PCanonicalLocus(lTyp.rg, true), true), "is_negative_strand" -> PBoolean(true)).sType, false) } }) { - case (cb, r, rt: PCanonicalStruct, interval, minMatch) => + case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), interval, minMatch) => interval.toI(cb).flatMap(cb) { interval => minMatch.toI(cb).flatMap(cb) { minMatch => @@ -418,8 +419,8 @@ object LocusFunctions extends RegistryFunctions { val Ldefined = CodeLabel() - val iT = interval.pt.asInstanceOf[PInterval] - val srcRG = iT.pointType.asInstanceOf[PLocus].rg + val iT = interval.st.asInstanceOf[SInterval] + val srcRG = iT.pointType.asInstanceOf[SLocus].rg val destRG = rt.types(0).asInstanceOf[PInterval].pointType.asInstanceOf[PLocus].rg val er = EmitRegion(cb.emb, r) val intervalObj = Code.checkcast[Interval](scodeToJavaValue(cb, r, interval)) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala index fddb45ac23c..745f995a58c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala @@ -3,7 +3,8 @@ package is.hail.expr.ir.functions import is.hail.asm4s.Code import is.hail.expr.ir._ import is.hail.stats._ -import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32Code} +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.primitives._ import is.hail.types.physical.{PBoolean, PFloat32, PFloat64, PInt32, PInt64, PType} import is.hail.types.virtual._ import is.hail.utils._ @@ -100,76 +101,76 @@ object MathFunctions extends RegistryFunctions { registerIR1("toFloat32", tnum("T"), TFloat32)((_, x) => Cast(x, TFloat32)) registerIR1("toFloat64", tnum("T"), TFloat64)((_, x) => Cast(x, TFloat64)) - registerScalaFunction("abs", Array(TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(mathPackageClass, "abs") - registerScalaFunction("abs", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TInt32), TInt32, null)(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TInt64), TInt64, null)(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TFloat32), TFloat32, null)(mathPackageClass, "abs") + registerScalaFunction("abs", Array(TFloat64), TFloat64, null)(mathPackageClass, "abs") - registerScalaFunction("pow", Array(TInt32, TInt32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") - registerScalaFunction("pow", Array(TInt64, TInt64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") - registerScalaFunction("pow", Array(TFloat32, TFloat32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") - registerScalaFunction("pow", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "pow") + registerScalaFunction("pow", Array(TInt32, TInt32), TFloat64, null)(thisClass, "pow") + registerScalaFunction("pow", Array(TInt64, TInt64), TFloat64, null)(thisClass, "pow") + registerScalaFunction("pow", Array(TFloat32, TFloat32), TFloat64, null)(thisClass, "pow") + registerScalaFunction("pow", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "pow") - registerScalaFunction("exp", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "exp") - registerScalaFunction("log10", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "log10") - registerScalaFunction("sqrt", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "sqrt") - registerScalaFunction("log", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(mathPackageClass, "log") - registerScalaFunction("log", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "log") - registerScalaFunction("gamma", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "gamma") - registerScalaFunction("binomTest", Array(TInt32, TInt32, TFloat64, TInt32), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "binomTest") + registerScalaFunction("exp", Array(TFloat64), TFloat64, null)(mathPackageClass, "exp") + registerScalaFunction("log10", Array(TFloat64), TFloat64, null)(mathPackageClass, "log10") + registerScalaFunction("sqrt", Array(TFloat64), TFloat64, null)(mathPackageClass, "sqrt") + registerScalaFunction("log", Array(TFloat64), TFloat64, null)(mathPackageClass, "log") + registerScalaFunction("log", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "log") + registerScalaFunction("gamma", Array(TFloat64), TFloat64, null)(thisClass, "gamma") + registerScalaFunction("binomTest", Array(TInt32, TInt32, TFloat64, TInt32), TFloat64, null)(statsPackageClass, "binomTest") - registerScalaFunction("dbeta", Array(TFloat64, TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dbeta") + registerScalaFunction("dbeta", Array(TFloat64, TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "dbeta") - registerScalaFunction("pnorm", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pnorm") - registerScalaFunction("qnorm", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "qnorm") + registerScalaFunction("pnorm", Array(TFloat64), TFloat64, null)(statsPackageClass, "pnorm") + registerScalaFunction("qnorm", Array(TFloat64), TFloat64, null)(statsPackageClass, "qnorm") - registerScalaFunction("pT", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pT") - registerScalaFunction("pF", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "pF") + registerScalaFunction("pT", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "pT") + registerScalaFunction("pF", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "pF") - registerScalaFunction("dpois", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dpois") - registerScalaFunction("dpois", Array(TFloat64, TFloat64, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "dpois") + registerScalaFunction("dpois", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "dpois") + registerScalaFunction("dpois", Array(TFloat64, TFloat64, TBoolean), TFloat64, null)(statsPackageClass, "dpois") - registerScalaFunction("ppois", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "ppois") - registerScalaFunction("ppois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "ppois") + registerScalaFunction("ppois", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "ppois") + registerScalaFunction("ppois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TFloat64, null)(statsPackageClass, "ppois") - registerScalaFunction("qpois", Array(TFloat64, TFloat64), TInt32, (_: Type, _: Seq[PType]) => PInt32())(statsPackageClass, "qpois") - registerScalaFunction("qpois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TInt32, (_: Type, _: Seq[PType]) => PInt32())(statsPackageClass, "qpois") + registerScalaFunction("qpois", Array(TFloat64, TFloat64), TInt32, null)(statsPackageClass, "qpois") + registerScalaFunction("qpois", Array(TFloat64, TFloat64, TBoolean, TBoolean), TInt32, null)(statsPackageClass, "qpois") - registerScalaFunction("pchisqtail", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "chiSquaredTail") - registerScalaFunction("pnchisqtail", Array(TFloat64, TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "nonCentralChiSquaredTail") - registerScalaFunction("qchisqtail", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(statsPackageClass, "inverseChiSquaredTail") + registerScalaFunction("pchisqtail", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "chiSquaredTail") + registerScalaFunction("pnchisqtail", Array(TFloat64, TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "nonCentralChiSquaredTail") + registerScalaFunction("qchisqtail", Array(TFloat64, TFloat64), TFloat64, null)(statsPackageClass, "inverseChiSquaredTail") - registerScalaFunction("floor", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "floor") - registerScalaFunction("floor", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "floor") + registerScalaFunction("floor", Array(TFloat32), TFloat32, null)(thisClass, "floor") + registerScalaFunction("floor", Array(TFloat64), TFloat64, null)(thisClass, "floor") - registerScalaFunction("ceil", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "ceil") - registerScalaFunction("ceil", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "ceil") + registerScalaFunction("ceil", Array(TFloat32), TFloat32, null)(thisClass, "ceil") + registerScalaFunction("ceil", Array(TFloat64), TFloat64, null)(thisClass, "ceil") - registerScalaFunction("mod", Array(TInt32, TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(thisClass, "mod") - registerScalaFunction("mod", Array(TInt64, TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(thisClass, "mod") - registerScalaFunction("mod", Array(TFloat32, TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(thisClass, "mod") - registerScalaFunction("mod", Array(TFloat64, TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(thisClass, "mod") + registerScalaFunction("mod", Array(TInt32, TInt32), TInt32, null)(thisClass, "mod") + registerScalaFunction("mod", Array(TInt64, TInt64), TInt64, null)(thisClass, "mod") + registerScalaFunction("mod", Array(TFloat32, TFloat32), TFloat32, null)(thisClass, "mod") + registerScalaFunction("mod", Array(TFloat64, TFloat64), TFloat64, null)(thisClass, "mod") - registerJavaStaticFunction("isnan", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isNaN") - registerJavaStaticFunction("isnan", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isNaN") + registerJavaStaticFunction("isnan", Array(TFloat32), TBoolean, null)(jFloatClass, "isNaN") + registerJavaStaticFunction("isnan", Array(TFloat64), TBoolean, null)(jDoubleClass, "isNaN") - registerJavaStaticFunction("is_finite", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isFinite") - registerJavaStaticFunction("is_finite", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isFinite") + registerJavaStaticFunction("is_finite", Array(TFloat32), TBoolean, null)(jFloatClass, "isFinite") + registerJavaStaticFunction("is_finite", Array(TFloat64), TBoolean, null)(jDoubleClass, "isFinite") - registerJavaStaticFunction("is_infinite", Array(TFloat32), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jFloatClass, "isInfinite") - registerJavaStaticFunction("is_infinite", Array(TFloat64), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(jDoubleClass, "isInfinite") + registerJavaStaticFunction("is_infinite", Array(TFloat32), TBoolean, null)(jFloatClass, "isInfinite") + registerJavaStaticFunction("is_infinite", Array(TFloat64), TBoolean, null)(jDoubleClass, "isInfinite") - registerJavaStaticFunction("sign", Array(TInt32), TInt32, (_: Type, _: Seq[PType]) => PInt32())(jIntegerClass, "signum") - registerScalaFunction("sign", Array(TInt64), TInt64, (_: Type, _: Seq[PType]) => PInt64())(mathPackageClass, "signum") - registerJavaStaticFunction("sign", Array(TFloat32), TFloat32, (_: Type, _: Seq[PType]) => PFloat32())(jMathClass, "signum") - registerJavaStaticFunction("sign", Array(TFloat64), TFloat64, (_: Type, _: Seq[PType]) => PFloat64())(jMathClass, "signum") + registerJavaStaticFunction("sign", Array(TInt32), TInt32, null)(jIntegerClass, "signum") + registerScalaFunction("sign", Array(TInt64), TInt64, null)(mathPackageClass, "signum") + registerJavaStaticFunction("sign", Array(TFloat32), TFloat32, null)(jMathClass, "signum") + registerJavaStaticFunction("sign", Array(TFloat64), TFloat64, null)(jMathClass, "signum") - registerScalaFunction("approxEqual", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TBoolean, (_: Type, _: Seq[PType]) => PBoolean())(thisClass, "approxEqual") + registerScalaFunction("approxEqual", Array(TFloat64, TFloat64, TFloat64, TBoolean, TBoolean), TBoolean, null)(thisClass, "approxEqual") - registerWrappedScalaFunction1("entropy", TString, TFloat64, (_: Type, _: PType) => PFloat64())(thisClass, "irentropy") + registerWrappedScalaFunction1("entropy", TString, TFloat64, null)(thisClass, "irentropy") - registerPCode4("fisher_exact_test", TInt32, TInt32, TInt32, TInt32, fetStruct.virtualType, - (_, _, _, _, _) => fetStruct + registerSCode4("fisher_exact_test", TInt32, TInt32, TInt32, TInt32, fetStruct.virtualType, + (_, _, _, _, _) => fetStruct.sType ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("fisher_exact_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "fisherExactTest", @@ -186,8 +187,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerPCode4("chi_squared_test", TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, - (_, _, _, _, _) => chisqStruct + registerSCode4("chi_squared_test", TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, + (_, _, _, _, _) => chisqStruct.sType ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("chi_squared_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "chiSquaredTest", @@ -202,8 +203,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerPCode5("contingency_table_test", TInt32, TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, - (_, _, _, _, _, _) => chisqStruct + registerSCode5("contingency_table_test", TInt32, TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, + (_, _, _, _, _, _) => chisqStruct.sType ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, mcc: SInt32Code) => val res = cb.newLocal[Array[Double]]("contingency_table_test_res", Code.invokeScalaObject5[Int, Int, Int, Int, Int, Array[Double]](statsPackageClass, "contingencyTableTest", @@ -219,8 +220,8 @@ object MathFunctions extends RegistryFunctions { ), deepCopy = false) } - registerPCode3("hardy_weinberg_test", TInt32, TInt32, TInt32, hweStruct.virtualType, - (_, _, _, _) => hweStruct + registerSCode3("hardy_weinberg_test", TInt32, TInt32, TInt32, hweStruct.virtualType, + (_, _, _, _) => hweStruct.sType ) { case (r, cb, rt, nHomRef: SInt32Code, nHet: SInt32Code, nHomVar: SInt32Code) => val res = cb.newLocal[Array[Double]]("hardy_weinberg_test_res", Code.invokeScalaObject3[Int, Int, Int, Array[Double]](statsPackageClass, "hardyWeinbergTest", diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala index 1ba22a949a3..9e37321e4a7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala @@ -2,14 +2,15 @@ package is.hail.expr.ir.functions import is.hail.annotations.{Memory, Region} import is.hail.asm4s.{Code, Value} -import is.hail.expr.{Nat, NatVariable} import is.hail.expr.ir._ +import is.hail.expr.{Nat, NatVariable} import is.hail.linalg.{LAPACK, LinalgCodeUtils} import is.hail.types.coerce -import is.hail.types.physical.PCanonicalNDArray -import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable +import is.hail.types.physical.stypes.EmitType +import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SNDArrayPointer} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.{PBooleanRequired, PCanonicalNDArray, PCanonicalStruct, PFloat64Required, PType} import is.hail.types.virtual._ -import is.hail.utils._ object NDArrayFunctions extends RegistryFunctions { override def registerAll() { @@ -35,58 +36,75 @@ object NDArrayFunctions extends RegistryFunctions { } } - registerIEmitCode2("linear_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), { (t, p1, p2) => p2 }) { case (cb, region, pt, aec, bec) => - aec.toI(cb).flatMap(cb){ apc => - bec.toI(cb).map(cb){ bpc => - val aInput = apc.asNDArray.memoize(cb, "A") - val bInput = bpc.asNDArray.memoize(cb, "B") + def linear_solve(a: SNDArrayCode, b: SNDArrayCode, outputPt: PType, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayCode, Value[Int]) = { + val aInput = a.asNDArray.memoize(cb, "A") + val bInput = b.asNDArray.memoize(cb, "B") + + val aColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(aInput, cb, region) + val bColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(bInput, cb, region) + + val IndexedSeq(n0, n1) = aColMajor.shapes(cb) - val aColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(aInput, cb, region) - val bColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(bInput, cb, region) + cb.ifx(n0 cne n1, cb._fatal("hail.nd.solve: matrix a must be square.")) - val IndexedSeq(n0, n1) = aColMajor.shapes(cb) + val IndexedSeq(n, nrhs) = bColMajor.shapes(cb) - cb.ifx(n0 cne n1, cb._fatal("hail.nd.solve: matrix a must be square.")) + cb.ifx(n0 cne n, cb._fatal("hail.nd.solve: Solve dimensions incompatible")) - val IndexedSeq(n, nrhs) = bColMajor.shapes(cb) + val infoDGESVResult = cb.newLocal[Int]("dgesv_result") + val ipiv = cb.newLocal[Long]("dgesv_ipiv") + cb.assign(ipiv, Code.invokeStatic1[Memory, Long, Long]("malloc", n * 4L)) - cb.ifx(n0 cne n, cb._fatal("hail.nd.solve: Solve dimensions incompatible")) + val aCopy = cb.newLocal[Long]("dgesv_a_copy") - val infoDGESVResult = cb.newLocal[Int]("dgesv_result") - val ipiv = cb.newLocal[Long]("dgesv_ipiv") - cb.assign(ipiv, Code.invokeStatic1[Memory, Long, Long]("malloc", n * 4L)) + def aNumBytes = n * n * 8L - val aCopy = cb.newLocal[Long]("dgesv_a_copy") - def aNumBytes = n * n * 8L - cb.assign(aCopy, Code.invokeStatic1[Memory, Long, Long]("malloc", aNumBytes)) - val aColMajorFirstElement = aColMajor.firstDataAddress(cb) + cb.assign(aCopy, Code.invokeStatic1[Memory, Long, Long]("malloc", aNumBytes)) + val aColMajorFirstElement = aColMajor.firstDataAddress(cb) - cb.append(Region.copyFrom(aColMajorFirstElement, aCopy, aNumBytes)) + cb.append(Region.copyFrom(aColMajorFirstElement, aCopy, aNumBytes)) - val outputPType = coerce[PCanonicalNDArray](pt) - val outputShape = IndexedSeq(n, nrhs) - val (outputAddress, outputFinisher) = outputPType.constructDataFunction(outputShape, outputPType.makeColumnMajorStrides(outputShape, region, cb), cb, region) + val outputPType = coerce[PCanonicalNDArray](outputPt) + val outputShape = IndexedSeq(n, nrhs) + val (outputAddress, outputFinisher) = outputPType.constructDataFunction(outputShape, outputPType.makeColumnMajorStrides(outputShape, region, cb), cb, region) - cb.append(Region.copyFrom(bColMajor.firstDataAddress(cb), outputAddress, n * nrhs * 8L)) + cb.append(Region.copyFrom(bColMajor.firstDataAddress(cb), outputAddress, n * nrhs * 8L)) - cb.assign(infoDGESVResult, Code.invokeScalaObject7[Int, Int, Long, Int, Long, Long, Int, Int](LAPACK.getClass, "dgesv", - n.toI, - nrhs.toI, - aCopy, - n.toI, - ipiv, - outputAddress, - n.toI - )) + cb.assign(infoDGESVResult, Code.invokeScalaObject7[Int, Int, Long, Int, Long, Long, Int, Int](LAPACK.getClass, "dgesv", + n.toI, + nrhs.toI, + aCopy, + n.toI, + ipiv, + outputAddress, + n.toI + )) - cb.ifx(infoDGESVResult cne 0, cb._fatal(s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", infoDGESVResult.toS)) + cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", ipiv.load())) + cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", aCopy.load())) - cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", ipiv.load())) - cb.append(Code.invokeStatic1[Memory, Long, Unit]("free", aCopy.load())) + (outputFinisher(cb), infoDGESVResult) + } - outputFinisher(cb) + registerIEmitCode2("linear_solve_no_crash", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TStruct(("solution", TNDArray(TFloat64, Nat(2))), ("failed", TBoolean)), + { (t, p1, p2) => EmitType(PCanonicalStruct(false, ("solution", PCanonicalNDArray(PFloat64Required, 2, false)), ("failed", PBooleanRequired)).sType, false) }) { + case (cb, region, SBaseStructPointer(outputStructType: PCanonicalStruct), aec, bec) => + aec.toI(cb).flatMap(cb) { apc => + bec.toI(cb).map(cb) { bpc => + val outputNDArrayPType = outputStructType.fieldType("solution") + val (resNDPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, outputNDArrayPType, cb, region) + val ndEmitCode = EmitCode(Code._empty, info cne 0, resNDPCode) + outputStructType.constructFromFields(cb, region, IndexedSeq[EmitCode](ndEmitCode, EmitCode(Code._empty, false, primitive(info cne 0))), false) + } } - } + } + + registerSCode2("linear_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), + { (t, p1, p2) => PCanonicalNDArray(PFloat64Required, 2, true).sType }) { + case (er, cb, SNDArrayPointer(pt), apc, bpc) => + val (resPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, pt, cb, er.region) + cb.ifx(info cne 0, cb._fatal(s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", info.toS)) + resPCode } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala index e995a4a7f21..20e5ea4ecaf 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/RandomSeededFunctions.scala @@ -2,8 +2,11 @@ package is.hail.expr.ir.functions import is.hail.asm4s._ import is.hail.expr.ir.IEmitCode +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete.SIndexablePointer import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.{PBoolean, PCanonicalArray, PCode, PFloat64, PInt32, PType} +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.physical.{PBoolean, PCanonicalArray, PFloat64, PInt32, PType} import is.hail.types.virtual._ import net.sourceforge.jdistlib.rng.MersenneTwister import net.sourceforge.jdistlib.{Beta, Gamma, Poisson} @@ -56,28 +59,28 @@ object RandomSeededFunctions extends RegistryFunctions { def registerAll() { registerSeeded2("rand_unif", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: PType, _: PType) => PFloat64() + case (_: Type, _: SType, _: SType) => SFloat64 }) { case (cb, r, rt, seed, min, max) => - PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("runif", min.asDouble.doubleCode(cb), max.asDouble.doubleCode(cb))) + primitive(cb.emb.newRNG(seed).invoke[Double, Double, Double]("runif", min.asDouble.doubleCode(cb), max.asDouble.doubleCode(cb))) } registerSeeded2("rand_norm", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: PType, _: PType) => PFloat64() + case (_: Type, _: SType, _: SType) => SFloat64 }) { case (cb, r, rt, seed, mean, sd) => - PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double, Double]("rnorm", mean.asDouble.doubleCode(cb), sd.asDouble.doubleCode(cb))) + primitive(cb.emb.newRNG(seed).invoke[Double, Double, Double]("rnorm", mean.asDouble.doubleCode(cb), sd.asDouble.doubleCode(cb))) } - registerSeeded1("rand_bool", TFloat64, TBoolean, (_: Type, _: PType) => PBoolean()) { case (cb, r, rt, seed, p) => - PCode(rt, cb.emb.newRNG(seed).invoke[Double, Boolean]("rcoin", p.asDouble.doubleCode(cb))) + registerSeeded1("rand_bool", TFloat64, TBoolean, (_: Type, _: SType) => SBoolean) { case (cb, r, rt, seed, p) => + primitive(cb.emb.newRNG(seed).invoke[Double, Boolean]("rcoin", p.asDouble.doubleCode(cb))) } - registerSeeded1("rand_pois", TFloat64, TFloat64, (_: Type, _: PType) => PFloat64()) { case (cb, r, rt, seed, lambda) => - PCode(rt, cb.emb.newRNG(seed).invoke[Double, Double]("rpois", lambda.asDouble.doubleCode(cb))) + registerSeeded1("rand_pois", TFloat64, TFloat64, (_: Type, _: SType) => SFloat64) { case (cb, r, rt, seed, lambda) => + primitive(cb.emb.newRNG(seed).invoke[Double, Double]("rpois", lambda.asDouble.doubleCode(cb))) } registerSeeded2("rand_pois", TInt32, TFloat64, TArray(TFloat64), { - case (_: Type, _: PType, _: PType) => PCanonicalArray(PFloat64(true)) - }) { case (cb, r, rt: PCanonicalArray, seed, n, lambdaCode) => + case (_: Type, _: SType, _: SType) => PCanonicalArray(PFloat64(true)).sType + }) { case (cb, r, SIndexablePointer(rt: PCanonicalArray), seed, n, lambdaCode) => val len = cb.newLocal[Int]("rand_pos_len", n.asInt.intCode(cb)) val lambda = cb.newLocal[Double]("rand_pois_lambda", lambdaCode.asDouble.doubleCode(cb)) @@ -87,15 +90,15 @@ object RandomSeededFunctions extends RegistryFunctions { } registerSeeded2("rand_beta", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: PType, _: PType) => PFloat64() + case (_: Type, _: SType, _: SType) => SFloat64 }) { case (cb, r, rt, seed, a, b) => - PCode(rt, + primitive( cb.emb.newRNG(seed).invoke[Double, Double, Double]("rbeta", a.asDouble.doubleCode(cb), b.asDouble.doubleCode(cb))) } registerSeeded4("rand_beta", TFloat64, TFloat64, TFloat64, TFloat64, TFloat64, { - case (_: Type, _: PType, _: PType, _: PType, _: PType) => PFloat64() + case (_: Type, _: SType, _: SType, _: SType, _: SType) => SFloat64 }) { case (cb, r, rt, seed, a, b, min, max) => val rng = cb.emb.newRNG(seed) @@ -107,19 +110,18 @@ object RandomSeededFunctions extends RegistryFunctions { cb.whileLoop(value < lmin || value > lmax, { cb.assign(value, rng.invoke[Double, Double, Double]("rbeta", la, lb)) }) - PCode(rt, value) + primitive(value) } registerSeeded2("rand_gamma", TFloat64, TFloat64, TFloat64, { - case (_: Type, _: PType, _: PType) => PFloat64() + case (_: Type, _: SType, _: SType) => SFloat64 }) { case (cb, r, rt, seed, a, scale) => - PCode( - rt, + primitive( cb.emb.newRNG(seed).invoke[Double, Double, Double]("rgamma", a.asDouble.doubleCode(cb), scale.asDouble.doubleCode(cb)) ) } - registerSeeded1("rand_cat", TArray(TFloat64), TInt32, (_: Type, _: PType) => PInt32()) { case (cb, r, rt, seed, aCode) => + registerSeeded1("rand_cat", TArray(TFloat64), TInt32, (_: Type, _: SType) => SInt32) { case (cb, r, rt, seed, aCode) => val weights = aCode.asIndexable.memoize(cb, "rand_cat_weights") val len = weights.loadLength() @@ -133,7 +135,7 @@ object RandomSeededFunctions extends RegistryFunctions { ) cb.assign(i, i + 1) }) - PCode(rt, cb.emb.newRNG(seed).invoke[Array[Double], Int]("rcat", a)) + primitive(cb.emb.newRNG(seed).invoke[Array[Double], Int]("rcat", a)) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala index 64fe56424ae..d563e778803 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala @@ -3,6 +3,10 @@ package is.hail.expr.ir.functions import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir._ +import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.concrete.SStringPointer +import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32} +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.{PBoolean, PCanonicalString, PInt32, PLocus, PString, PType} import is.hail.types.virtual._ import is.hail.variant.ReferenceGenome @@ -11,25 +15,25 @@ object ReferenceGenomeFunctions extends RegistryFunctions { def rgCode(mb: EmitMethodBuilder[_], rg: ReferenceGenome): Code[ReferenceGenome] = mb.getReferenceGenome(rg) def registerAll() { - registerCode1t("isValidContig", LocusFunctions.tlocus("R"), TString, TBoolean, (_: Type, _: PType) => PBoolean()) { - case (r, rt, tlocus, (contigT, contig: Code[Long])) => - val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) - rgCode(r.mb, tlocus.asInstanceOf[TLocus].rg).invoke[String, Boolean]("isValidContig", scontig) + registerSCode1t("isValidContig", Array(LocusFunctions.tlocus("R")), TString, TBoolean, (_: Type, _: SType) => SBoolean) { + case (r, cb, Seq(tlocus: TLocus), _, contig) => + val scontig = contig.asString.loadString() + primitive(rgCode(r.mb, tlocus.asInstanceOf[TLocus].rg).invoke[String, Boolean]("isValidContig", scontig)) } - registerCode2t("isValidLocus", LocusFunctions.tlocus("R"), TString, TInt32, TBoolean, (_: Type, _: PType, _: PType) => PBoolean()) { - case (r, rt, typeArg: TLocus, (contigT, contig: Code[Long]), (posT, pos: Code[Int])) => - val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) - rgCode(r.mb, typeArg.rg).invoke[String, Int, Boolean]("isValidLocus", scontig, pos) + registerSCode2t("isValidLocus", Array(LocusFunctions.tlocus("R")), TString, TInt32, TBoolean, (_: Type, _: SType, _: SType) => SBoolean) { + case (r, cb, Seq(tlocus: TLocus), _, contig, pos) => + val scontig = contig.asString.loadString() + primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int, Boolean]("isValidLocus", scontig, pos.asInt.intCode(cb))) } - registerPCode4t("getReferenceSequenceFromValidLocus", + registerSCode4t("getReferenceSequenceFromValidLocus", Array(LocusFunctions.tlocus("R")), TString, TInt32, TInt32, TInt32, TString, - (_: Type, _: PType, _: PType, _: PType, _: PType) => PCanonicalString()) { - case (r, cb, Seq(typeParam: TLocus), rt: PString, contig, pos, before, after) => + (_: Type, _: SType, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString())) { + case (r, cb, Seq(typeParam: TLocus), st, contig, pos, before, after) => val scontig = contig.asString.loadString() - unwrapReturn(cb, r.region, rt, + unwrapReturn(cb, r.region, st, rgCode(cb.emb, typeParam.rg).invoke[String, Int, Int, Int, String]("getSequence", scontig, pos.asInt.intCode(cb), @@ -37,10 +41,10 @@ object ReferenceGenomeFunctions extends RegistryFunctions { after.asInt.intCode(cb))) } - registerCode1t("contigLength", LocusFunctions.tlocus("R"), TString, TInt32, (_: Type, _: PType) => PInt32()) { - case (r, rt, typeArg: TLocus, (contigT, contig: Code[Long])) => - val scontig = asm4s.coerce[String](wrapArg(r, contigT)(contig)) - rgCode(r.mb, typeArg.rg).invoke[String, Int]("contigLength", scontig) + registerSCode1t("contigLength", Array(LocusFunctions.tlocus("R")), TString, TInt32, (_: Type, _: SType) => SInt32) { + case (r, cb, Seq(tlocus: TLocus), _, contig) => + val scontig = contig.asString.loadString() + primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int]("contigLength", scontig)) } registerIR("getReferenceSequence", Array(TString, TInt32, TInt32, TInt32), TString, typeParameters = Array(LocusFunctions.tlocus("R"))) { diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index e79b3e6752c..1f2f018be60 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -3,13 +3,15 @@ package is.hail.expr.ir.functions import java.time.temporal.ChronoField import java.time.{Instant, ZoneId} import java.util.Locale - import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.concrete.SStringPointer +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SStringPointer} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32, SInt64} import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.Row @@ -98,17 +100,16 @@ object StringFunctions extends RegistryFunctions { def registerAll(): Unit = { val thisClass = getClass - registerPCode1("length", TString, TInt32, (_: Type, _: PType) => PInt32()) { case (r: EmitRegion, cb, rt, s: PStringCode) => - PCode(rt, s.loadString().invoke[Int]("length")) + registerSCode1("length", TString, TInt32, (_: Type, _: SType) => SInt32) { case (r: EmitRegion, cb, _, s: SStringCode) => + primitive(s.loadString().invoke[Int]("length")) } - registerPCode3("substring", TString, TInt32, TInt32, TString, { - (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() + registerSCode3("substring", TString, TInt32, TInt32, TString, { + (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) }) { - case (r: EmitRegion, cb, rt: PString, s, start, end) => + case (r: EmitRegion, cb, st: SString, s, start, end) => val str = s.asString.loadString().invoke[Int, Int, String]("substring", start.asInt.intCode(cb), end.asInt.intCode(cb)) - val st = SStringPointer(rt) st.constructFromString(cb, r.region, str) } @@ -140,116 +141,112 @@ object StringFunctions extends RegistryFunctions { registerIR2("sliceRight", TString, TInt32, TString) { (_, s, start) => invoke("slice", TString, s, start, invoke("length", TInt32, s)) } registerIR2("sliceLeft", TString, TInt32, TString) { (_, s, end) => invoke("slice", TString, s, I32(0), end) } - registerPCode1("str", tv("T"), TString, (_: Type, _: PType) => PCanonicalString()) { case (r, cb, rt: PString, a) => + registerSCode1("str", tv("T"), TString, (_: Type, _: SType) => SStringPointer(PCanonicalString())) { case (r, cb, st: SString, a) => val annotation = scodeToJavaValue(cb, r.region, a) - val str = cb.emb.getType(a.pt.virtualType).invoke[Any, String]("str", annotation) - val st = SStringPointer(rt) + val str = cb.emb.getType(a.st.virtualType).invoke[Any, String]("str", annotation) st.constructFromString(cb, r.region, str) } registerIEmitCode1("showStr", tv("T"), TString, { - (_: Type, _: PType) => PCanonicalString(true) - }) { case (cb, r, rt: PCanonicalString, a) => - val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.pt.virtualType)) + (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true) + }) { case (cb, r, st: SString, a) => + val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) a.toI(cb).consume(cb, - cb.assignAny(jObj, Code._null(boxedTypeInfo(a.pt.virtualType))), + cb.assignAny(jObj, Code._null(boxedTypeInfo(a.st.virtualType))), sc => cb.assignAny(jObj, scodeToJavaValue(cb, r, sc))) - val str = cb.emb.getType(a.pt.virtualType).invoke[Any, String]("showStr", jObj) - val st = SStringPointer(rt) + val str = cb.emb.getType(a.st.virtualType).invoke[Any, String]("showStr", jObj) IEmitCode.present(cb, st.constructFromString(cb, r, str)) } registerIEmitCode2("showStr", tv("T"), TInt32, TString, { - (_: Type, _: PType, truncType: PType) => PCanonicalString(truncType.required) - }) { case (cb, r, rt: PCanonicalString, a, trunc) => - val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.pt.virtualType)) + (_: Type, _: EmitType, truncType: EmitType) => EmitType(SStringPointer(PCanonicalString()), truncType.required) + }) { case (cb, r, st: SString, a, trunc) => + val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) trunc.toI(cb).map(cb) { trunc => a.toI(cb).consume(cb, - cb.assignAny(jObj, Code._null(boxedTypeInfo(a.pt.virtualType))), + cb.assignAny(jObj, Code._null(boxedTypeInfo(a.st.virtualType))), sc => cb.assignAny(jObj, scodeToJavaValue(cb, r, sc))) - val str = cb.emb.getType(a.pt.virtualType).invoke[Any, Int, String]("showStr", jObj, trunc.asInt.intCode(cb)) - val st = SStringPointer(rt) - + val str = cb.emb.getType(a.st.virtualType).invoke[Any, Int, String]("showStr", jObj, trunc.asInt.intCode(cb)) st.constructFromString(cb, r, str) } } - registerIEmitCode1("json", tv("T"), TString, (_: Type, _: PType) => PCanonicalString(true)) { case (cb, r, rt: PString, a) => - val ti = boxedTypeInfo(a.pt.sType.virtualType) - val inputJavaValue = cb.newLocal("json_func_input_jv")(ti) - a.toI(cb).consume(cb, - cb.assignAny(inputJavaValue, Code._null(ti)), - { sc => - val jv = scodeToJavaValue(cb, r, sc) - cb.assignAny(inputJavaValue, jv) - }) - val json = cb.emb.getType(a.pt.sType.virtualType).invoke[Any, JValue]("toJSON", inputJavaValue) - val str = Code.invokeScalaObject1[JValue, String](JsonMethods.getClass, "compact", json) - val st = SStringPointer(rt) - IEmitCode.present(cb, st.constructFromString(cb, r, str)) + registerIEmitCode1("json", tv("T"), TString, (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true)) { + case (cb, r, st: SString, a) => + val ti = boxedTypeInfo(a.st.virtualType) + val inputJavaValue = cb.newLocal("json_func_input_jv")(ti) + a.toI(cb).consume(cb, + cb.assignAny(inputJavaValue, Code._null(ti)), + { sc => + val jv = scodeToJavaValue(cb, r, sc) + cb.assignAny(inputJavaValue, jv) + }) + val json = cb.emb.getType(a.st.virtualType).invoke[Any, JValue]("toJSON", inputJavaValue) + val str = Code.invokeScalaObject1[JValue, String](JsonMethods.getClass, "compact", json) + IEmitCode.present(cb, st.constructFromString(cb, r, str)) } - registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "reverse") - registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "upper") - registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "lower") - registerWrappedScalaFunction1("strip", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "strip") + registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "reverse") + registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "upper") + registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "lower") + registerWrappedScalaFunction1("strip", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "strip") registerWrappedScalaFunction2("contains", TString, TString, TBoolean, { - case (_: Type, _: PType, _: PType) => PBoolean() + case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "contains") registerWrappedScalaFunction2("translate", TString, TDict(TString, TString), TString, { - case (_: Type, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "translate") registerWrappedScalaFunction2("startswith", TString, TString, TBoolean, { - case (_: Type, _: PType, _: PType) => PBoolean() + case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "startswith") registerWrappedScalaFunction2("endswith", TString, TString, TBoolean, { - case (_: Type, _: PType, _: PType) => PBoolean() + case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "endswith") registerWrappedScalaFunction2("regexMatch", TString, TString, TBoolean, { - case (_: Type, _: PType, _: PType) => PBoolean() + case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "regexMatch") registerWrappedScalaFunction2("concat", TString, TString, TString, { - case (_: Type, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "concat") registerWrappedScalaFunction2("split", TString, TString, TArray(TString), { - case (_: Type, _: PType, _: PType) => - PCanonicalArray(PCanonicalString(true)) + case (_: Type, _: SType, _: SType) => + PCanonicalArray(PCanonicalString(true)).sType })(thisClass, "split") registerWrappedScalaFunction3("split", TString, TString, TInt32, TArray(TString), { - case (_: Type, _: PType, _: PType, _: PType) => - PCanonicalArray(PCanonicalString(true)) + case (_: Type, _: SType, _: SType, _: SType) => + PCanonicalArray(PCanonicalString(true)).sType })(thisClass, "splitLimited") registerWrappedScalaFunction3("replace", TString, TString, TString, TString, { - case (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "replace") registerWrappedScalaFunction2("mkString", TSet(TString), TString, TString, { - case (_: Type, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "setMkString") registerWrappedScalaFunction2("mkString", TArray(TString), TString, TString, { - case (_: Type, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "arrayMkString") registerIEmitCode2("firstMatchIn", TString, TString, TArray(TString), { - case (_: Type, _: PType, _: PType) => PCanonicalArray(PCanonicalString(true)) - }) { case (cb: EmitCodeBuilder, region: Value[Region], rt: PCanonicalArray, + case (_: Type, _: EmitType, _: EmitType) => EmitType(PCanonicalArray(PCanonicalString(true)).sType, false) + }) { case (cb: EmitCodeBuilder, region: Value[Region], SIndexablePointer(rt: PCanonicalArray), s: EmitCode, r: EmitCode) => - s.toI(cb).flatMap(cb) { case sc: PStringCode => - r.toI(cb).flatMap(cb) { case rc: PStringCode => + s.toI(cb).flatMap(cb) { case sc: SStringCode => + r.toI(cb).flatMap(cb) { case rc: SStringCode => val out = cb.newLocal[IndexedSeq[String]]("out", Code.invokeScalaObject2[String, String, IndexedSeq[String]]( thisClass, "firstMatchIn", sc.loadString(), rc.loadString())) IEmitCode(cb, out.isNull, { val len = cb.newLocal[Int]("len", out.invoke[Int]("size")) - val eltType = rt.elementType.asInstanceOf[PCanonicalString] + val eltType = rt.elementType.setRequired(false).asInstanceOf[PCanonicalString] val sstring = SStringPointer(eltType) rt.constructFromElements(cb, region, len, deepCopy = false) { (cb, idx) => val elt = cb.newLocal[String]("first_match_elt", out.invoke[Int, String]("apply", idx)) @@ -261,11 +258,11 @@ object StringFunctions extends RegistryFunctions { } registerEmitCode2("hamming", TString, TString, TInt32, { - case (_: Type, _: PType, _: PType) => PInt32() + case (_: Type, _: EmitType, _: EmitType) => EmitType(SInt32, false) }) { case (r: EmitRegion, rt, e1: EmitCode, e2: EmitCode) => EmitCode.fromI(r.mb) { cb => - e1.toI(cb).flatMap(cb) { case (sc1: PStringCode) => - e2.toI(cb).flatMap(cb) { case (sc2: PStringCode) => + e1.toI(cb).flatMap(cb) { case (sc1: SStringCode) => + e2.toI(cb).flatMap(cb) { case (sc2: SStringCode) => val n = cb.newLocal("hamming_n", 0) val i = cb.newLocal("hamming_i", 0) @@ -280,24 +277,24 @@ object StringFunctions extends RegistryFunctions { cb.assign(n, n + 1)) cb.assign(i, i + 1) }) - PCode(rt, n) + primitive(n) }) } } } } - registerWrappedScalaFunction1("escapeString", TString, TString, (_: Type, _: PType) => PCanonicalString())(thisClass, "escapeString") + registerWrappedScalaFunction1("escapeString", TString, TString, (_: Type, _: SType) => SStringPointer(PCanonicalString()))(thisClass, "escapeString") registerWrappedScalaFunction3("strftime", TString, TInt64, TString, TString, { - case (_: Type, _: PType, _: PType, _: PType) => PCanonicalString() + case (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) })(thisClass, "strftime") registerWrappedScalaFunction3("strptime", TString, TString, TString, TInt64, { - case (_: Type, _: PType, _: PType, _: PType) => PInt64() + case (_: Type, _: SType, _: SType, _: SType) => SInt64 })(thisClass, "strptime") - registerPCode("parse_json", Array(TString), TTuple(tv("T")), - (rType: Type, _: Seq[PType]) => PType.canonical(rType, true), typeParameters = Array(tv("T")) - ) { case (er, cb, _, resultType, Array(s: PStringCode)) => + registerSCode("parse_json", Array(TString), TTuple(tv("T")), + (rType: Type, _: Seq[SType]) => SType.canonical(rType), typeParameters = Array(tv("T")) + ) { case (er, cb, _, resultType, Array(s: SStringCode)) => val warnCtx = cb.emb.genFieldThisRef[mutable.HashSet[String]]("parse_json_context") cb.ifx(warnCtx.load().isNull, cb.assign(warnCtx, Code.newInstance[mutable.HashSet[String]]())) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala index 1d558fcc68b..7197afbe67c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala @@ -4,11 +4,12 @@ import is.hail.asm4s import is.hail.asm4s.{coerce => _, _} import is.hail.expr.ir._ import is.hail.types.physical._ +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.primitives._ import is.hail.types.physical.stypes.concrete.SStringPointer import is.hail.utils._ import is.hail.types.virtual._ import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.SPrimitive import org.apache.spark.sql.Row import scala.reflect.ClassTag @@ -21,53 +22,91 @@ object UtilFunctions extends RegistryFunctions { def parseInt64(s: String): Long = s.toLong - private val NAN = 1 - private val POS_INF = 2 - private val NEG_INF = 3 - - def parseSpecialNum(s: String): Int = s.length match { - case 3 if s equalsCI "nan" => NAN - case 4 if (s equalsCI "+nan") || (s equalsCI "-nan") => NAN - case 3 if s equalsCI "inf" => POS_INF - case 4 if s equalsCI "+inf" => POS_INF - case 4 if s equalsCI "-inf" => NEG_INF - case 8 if s equalsCI "infinity" => POS_INF - case 9 if s equalsCI "+infinity" => POS_INF - case 9 if s equalsCI "-infinity" => NEG_INF - case _ => 0 + def parseSpecialNum32(s: String): Float = { + s.length match { + case 3 => + if (s.equalsCaseInsensitive("nan")) return Float.NaN + if (s.equalsCaseInsensitive("inf")) return Float.PositiveInfinity + case 4 => + if (s.equalsCaseInsensitive("+nan") || s.equalsCaseInsensitive("-nan")) return Float.NaN + if (s.equalsCaseInsensitive("+inf")) return Float.PositiveInfinity + if (s.equalsCaseInsensitive("-inf")) return Float.NegativeInfinity + case 8 => + if (s.equalsCaseInsensitive("infinity")) return Float.PositiveInfinity + case 9 => + if (s.equalsCaseInsensitive("+infinity")) return Float.PositiveInfinity + if (s.equalsCaseInsensitive("-infinity")) return Float.NegativeInfinity + case _ => + } + throw new NumberFormatException(s"cannot parse float32 from $s") } - def parseFloat32(s: String): Float = parseSpecialNum(s) match { - case NAN => Float.NaN - case POS_INF => Float.PositiveInfinity - case NEG_INF => Float.NegativeInfinity - case _ => s.toFloat + def parseSpecialNum64(s: String): Double = { + s.length match { + case 3 => + if (s.equalsCaseInsensitive("nan")) return Double.NaN + if (s.equalsCaseInsensitive("inf")) return Double.PositiveInfinity + case 4 => + if (s.equalsCaseInsensitive("+nan") || s.equalsCaseInsensitive("-nan")) return Double.NaN + if (s.equalsCaseInsensitive("+inf")) return Double.PositiveInfinity + if (s.equalsCaseInsensitive("-inf")) return Double.NegativeInfinity + case 8 => + if (s.equalsCaseInsensitive("infinity")) return Double.PositiveInfinity + case 9 => + if (s.equalsCaseInsensitive("+infinity")) return Double.PositiveInfinity + if (s.equalsCaseInsensitive("-infinity")) return Double.NegativeInfinity + case _ => + } + throw new NumberFormatException(s"cannot parse float64 from $s") } - def parseFloat64(s: String): Double = parseSpecialNum(s) match { - case NAN => Double.NaN - case POS_INF => Double.PositiveInfinity - case NEG_INF => Double.NegativeInfinity - case _ => s.toDouble + def parseFloat32(s: String): Float = { + try { + s.toFloat + } catch { + case _: NumberFormatException => + parseSpecialNum32(s) + } + } + + def parseFloat64(s: String): Double = { + try { + s.toDouble + } catch { + case _: NumberFormatException => + parseSpecialNum64(s) + } } def isValidBoolean(s: String): Boolean = - (s equalsCI "true") || (s equalsCI "false") + (s.equalsCaseInsensitive("true") || s.equalsCaseInsensitive("false")) def isValidInt32(s: String): Boolean = - try { s.toInt; true } catch { case _: NumberFormatException => false } + try { + s.toInt; true + } catch { + case _: NumberFormatException => false + } def isValidInt64(s: String): Boolean = - try { s.toLong; true } catch { case _: NumberFormatException => false } + try { + s.toLong; true + } catch { + case _: NumberFormatException => false + } - def isValidFloat32(s: String): Boolean = parseSpecialNum(s) match { - case 0 => try { s.toFloat; true } catch { case _: NumberFormatException => false } - case _ => true + def isValidFloat32(s: String): Boolean = try { + parseFloat32(s) + true + } catch { + case _: NumberFormatException => false } - def isValidFloat64(s: String): Boolean = parseSpecialNum(s) match { - case 0 => try { s.toDouble; true } catch { case _: NumberFormatException => false } - case _ => true + def isValidFloat64(s: String): Boolean = try { + parseFloat64(s) + true + } catch { + case _: NumberFormatException => false } def min_ignore_missing(l: Int, lMissing: Boolean, r: Int, rMissing: Boolean): Int = @@ -128,47 +167,46 @@ object UtilFunctions extends RegistryFunctions { def registerAll() { val thisClass = getClass - registerPCode4("valuesSimilar", tv("T"), tv("U"), TFloat64, TBoolean, TBoolean, { - case (_: Type, _: PType, _: PType, _: PType, _: PType) => PBoolean() + registerSCode4("valuesSimilar", tv("T"), tv("U"), TFloat64, TBoolean, TBoolean, { + case (_: Type, _: SType, _: SType, _: SType, _: SType) => SBoolean }) { case (er, cb, rt, l, r, tol, abs) => - assert(l.pt.virtualType == r.pt.virtualType, s"\n lt=${ l.pt.virtualType }\n rt=${ r.pt.virtualType }") + assert(l.st.virtualType == r.st.virtualType, s"\n lt=${ l.st.virtualType }\n rt=${ r.st.virtualType }") val lb = scodeToJavaValue(cb, er.region, l) val rb = scodeToJavaValue(cb, er.region, r) primitive(er.mb.getType(l.st.virtualType).invoke[Any, Any, Double, Boolean, Boolean]("valuesSimilar", lb, rb, tol.asDouble.doubleCode(cb), abs.asBoolean.boolCode(cb))) } - registerCode1[Int]("triangle", TInt32, TInt32, (_: Type, n: PType) => n) { case (_, rt, (nT, n: Code[Int])) => - Code.memoize(n, "triangle_n") { n => - (n * (n + 1)) / 2 - } + registerCode1("triangle", TInt32, TInt32, (_: Type, _: SType) => SInt32) { case (cb, _, rt, nn) => + val n = cb.newLocal[Int]("triangle_n", nn.asInt.intCode(cb)) + (n * (n + 1)) / 2 } - registerCode1[Boolean]("toInt32", TBoolean, TInt32, (_: Type, _: PType) => PInt32()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI } - registerCode1[Boolean]("toInt64", TBoolean, TInt64, (_: Type, _: PType) => PInt64()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toL } - registerCode1[Boolean]("toFloat32", TBoolean, TFloat32, (_: Type, _: PType) => PFloat32()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toF } - registerCode1[Boolean]("toFloat64", TBoolean, TFloat64, (_: Type, _: PType) => PFloat64()) { case (_, rt, (xT, x: Code[Boolean])) => x.toI.toD } - - for ((name, t, rpt, ct) <- Seq[(String, Type, PType, ClassTag[_])]( - ("Boolean", TBoolean, PBoolean(), implicitly[ClassTag[Boolean]]), - ("Int32", TInt32, PInt32(), implicitly[ClassTag[Int]]), - ("Int64", TInt64, PInt64(), implicitly[ClassTag[Long]]), - ("Float64", TFloat64, PFloat64(), implicitly[ClassTag[Double]]), - ("Float32", TFloat32, PFloat32(), implicitly[ClassTag[Float]]) + registerSCode1("toInt32", TBoolean, TInt32, (_: Type, _: SType) => SInt32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI) } + registerSCode1("toInt64", TBoolean, TInt64, (_: Type, _: SType) => SInt64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toL) } + registerSCode1("toFloat32", TBoolean, TFloat32, (_: Type, _: SType) => SFloat32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toF) } + registerSCode1("toFloat64", TBoolean, TFloat64, (_: Type, _: SType) => SFloat64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toD) } + + for ((name, t, rpt, ct) <- Seq[(String, Type, SType, ClassTag[_])]( + ("Boolean", TBoolean, SBoolean, implicitly[ClassTag[Boolean]]), + ("Int32", TInt32, SInt32, implicitly[ClassTag[Int]]), + ("Int64", TInt64, SInt64, implicitly[ClassTag[Long]]), + ("Float64", TFloat64, SFloat64, implicitly[ClassTag[Double]]), + ("Float32", TFloat32, SFloat32, implicitly[ClassTag[Float]]) )) { - val ctString: ClassTag[String] = implicitly - registerPCode1(s"to$name", TString, t, (_: Type, _: PType) => rpt) { - case (r, cb, rt, x: PStringCode) => + val ctString: ClassTag[String] = implicitly[ClassTag[String]] + registerSCode1(s"to$name", TString, t, (_: Type, _: SType) => rpt) { + case (r, cb, rt, x: SStringCode) => val s = x.loadString() - PCode(rt, Code.invokeScalaObject1(thisClass, s"parse$name", s)(ctString, ct)) + primitive(rt.virtualType, Code.invokeScalaObject1(thisClass, s"parse$name", s)(ctString, ct)) } - registerIEmitCode1(s"to${name}OrMissing", TString, t, (_: Type, xPT: PType) => rpt.setRequired(xPT.required)) { + registerIEmitCode1(s"to${name}OrMissing", TString, t, (_: Type, xPT: EmitType) => EmitType(rpt, xPT.required)) { case (cb, r, rt, x) => - x.toI(cb).flatMap(cb) { case (sc: PStringCode) => + x.toI(cb).flatMap(cb) { case (sc: SStringCode) => val sv = cb.newLocal[String]("s", sc.loadString()) IEmitCode(cb, !Code.invokeScalaObject1[String, Boolean](thisClass, s"isValid$name", sv), - PCode(rt, Code.invokeScalaObject1(thisClass, s"parse$name", sv)(ctString, ct))) + primitive(rt.virtualType, Code.invokeScalaObject1(thisClass, s"parse$name", sv)(ctString, ct))) } } } @@ -179,31 +217,31 @@ object UtilFunctions extends RegistryFunctions { } Array("min", "max").foreach { name => - registerCode2(name, TFloat32, TFloat32, TFloat32, (_: Type, _: PType, _: PType) => PFloat32()) { - case (r, rt, (t1, v1: Code[Float]), (t2, v2: Code[Float])) => - Code.invokeStatic2[Math, Float, Float, Float](name, v1, v2) + registerCode2(name, TFloat32, TFloat32, TFloat32, (_: Type, _: SType, _: SType) => SFloat32) { + case (cb, r, rt, v1, v2) => + Code.invokeStatic2[Math, Float, Float, Float](name, v1.asFloat.floatCode(cb), v2.asFloat.floatCode(cb)) } - registerCode2(name, TFloat64, TFloat64, TFloat64, (_: Type, _: PType, _: PType) => PFloat64()) { - case (r, rt, (t1, v1: Code[Double]), (t2, v2: Code[Double])) => - Code.invokeStatic2[Math, Double, Double, Double](name, v1, v2) + registerCode2(name, TFloat64, TFloat64, TFloat64, (_: Type, _: SType, _: SType) => SFloat64) { + case (cb, r, rt, v1, v2) => + Code.invokeStatic2[Math, Double, Double, Double](name, v1.asDouble.doubleCode(cb), v2.asDouble.doubleCode(cb)) } val ignoreMissingName = name + "_ignore_missing" val ignoreNanName = "nan" + name val ignoreBothName = ignoreNanName + "_ignore_missing" - registerCode2(ignoreNanName, TFloat32, TFloat32, TFloat32, (_: Type, _: PType, _: PType) => PFloat32()) { - case (r, rt, (t1, v1: Code[Float]), (t2, v2: Code[Float])) => - Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, v1, v2) + registerCode2(ignoreNanName, TFloat32, TFloat32, TFloat32, (_: Type, _: SType, _: SType) => SFloat32) { + case (cb, r, rt, v1, v2) => + Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, v1.asFloat.floatCode(cb), v2.asFloat.floatCode(cb)) } - registerCode2(ignoreNanName, TFloat64, TFloat64, TFloat64, (_: Type, _: PType, _: PType) => PFloat64()) { - case (r, rt, (t1, v1: Code[Double]), (t2, v2: Code[Double])) => - Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, v1, v2) + registerCode2(ignoreNanName, TFloat64, TFloat64, TFloat64, (_: Type, _: SType, _: SType) => SFloat64) { + case (cb, r, rt, v1, v2) => + Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, v1.asDouble.doubleCode(cb), v2.asDouble.doubleCode(cb)) } - def ignoreMissingTriplet[T](cb: EmitCodeBuilder, rt: PType, v1: EmitCode, v2: EmitCode, name: String, f: (Code[T], Code[T]) => Code[T])(implicit ct: ClassTag[T], ti: TypeInfo[T]): IEmitCode = { + def ignoreMissingTriplet[T](cb: EmitCodeBuilder, rt: SType, v1: EmitCode, v2: EmitCode, name: String, f: (Code[T], Code[T]) => Code[T])(implicit ct: ClassTag[T], ti: TypeInfo[T]): IEmitCode = { val value = cb.newLocal[T](s"ignore_missing_${ name }_value") val v1Value = v1.toI(cb).memoize(cb, "ignore_missing_v1") val v2Value = v2.toI(cb).memoize(cb, "ignore_missing_v2") @@ -227,43 +265,43 @@ object UtilFunctions extends RegistryFunctions { }) cb.goto(Ldefined) - IEmitCode(Lmissing, Ldefined, PCode(rt, value.load()), v1.required || v2.required) + IEmitCode(Lmissing, Ldefined, primitive(rt.virtualType, value.load()), v1.required || v2.required) } - registerIEmitCode2(ignoreMissingName, TInt32, TInt32, TInt32, (_: Type, t1: PType, t2: PType) => PInt32(t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TInt32, TInt32, TInt32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt32, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Int](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Int, Int, Int](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TInt64, TInt64, TInt64, (_: Type, t1: PType, t2: PType) => PInt64(t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TInt64, TInt64, TInt64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt64, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Long](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Long, Long, Long](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TFloat32, TFloat32, TFloat32, (_: Type, t1: PType, t2: PType) => PFloat32(t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Float, Float, Float](name, _, _)) } - registerIEmitCode2(ignoreMissingName, TFloat64, TFloat64, TFloat64, (_: Type, t1: PType, t2: PType) => PFloat64(t1.required || t2.required)) { + registerIEmitCode2(ignoreMissingName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Double, Double, Double](name, _, _)) } - registerIEmitCode2(ignoreBothName, TFloat32, TFloat32, TFloat32, (_: Type, t1: PType, t2: PType) => PFloat32(t1.required || t2.required)) { + registerIEmitCode2(ignoreBothName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, _, _)) } - registerIEmitCode2(ignoreBothName, TFloat64, TFloat64, TFloat64, (_: Type, t1: PType, t2: PType) => PFloat64(t1.required || t2.required)) { + registerIEmitCode2(ignoreBothName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, _, _)) } } - registerPCode2("format", TString, tv("T", "tuple"), TString, (_: Type, _: PType, _: PType) => PCanonicalString()) { - case (r, cb, rt: PCanonicalString, format, args) => + registerSCode2("format", TString, tv("T", "tuple"), TString, (_: Type, _: SType, _: SType) => PCanonicalString().sType) { + case (r, cb, SStringPointer(rt: PCanonicalString), format, args) => val javaObjArgs = Code.checkcast[Row](scodeToJavaValue(cb, r.region, args)) val formatted = Code.invokeScalaObject2[String, Row, String](thisClass, "format", format.asString.loadString(), javaObjArgs) val st = SStringPointer(rt) st.constructFromString(cb, r.region, formatted) } - registerIEmitCode2("land", TBoolean, TBoolean, TBoolean, (_: Type, tl: PType, tr: PType) => PBoolean(tl.required && tr.required)) { + registerIEmitCode2("land", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm @@ -291,10 +329,10 @@ object UtilFunctions extends RegistryFunctions { val Lpresent = CodeLabel() val Lmissing = CodeLabel() cb.ifx(((M >> w) & 1).cne(0), cb.goto(Lmissing), cb.goto(Lpresent)) - IEmitCode(Lmissing, Lpresent, PCode(rt, w.ceq(10)), l.required && r.required) + IEmitCode(Lmissing, Lpresent, primitive(w.ceq(10)), l.required && r.required) } - registerIEmitCode2("lor", TBoolean, TBoolean, TBoolean, (_: Type, tl: PType, tr: PType) => PBoolean(tl.required && tr.required)) { + registerIEmitCode2("lor", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm val w = cb.newLocal[Int]("lor_w") @@ -321,7 +359,7 @@ object UtilFunctions extends RegistryFunctions { val Lpresent = CodeLabel() val Lmissing = CodeLabel() cb.ifx(((M >> w) & 1).cne(0), cb.goto(Lmissing), cb.goto(Lpresent)) - IEmitCode(Lmissing, Lpresent, PCode(rt, w.cne(0)), l.required && r.required) + IEmitCode(Lmissing, Lpresent, primitive(w.cne(0)), l.required && r.required) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index 8de84949ca7..97a911a4c18 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -3,6 +3,7 @@ package is.hail.expr.ir.lowering import is.hail.expr.Nat import is.hail.expr.ir._ import is.hail.expr.ir.functions.GetElement +import is.hail.rvd.RVDPartitioner import is.hail.types.{BlockMatrixSparsity, BlockMatrixType, TypeWithRequiredness} import is.hail.types.virtual._ import is.hail.utils._ @@ -88,11 +89,14 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType }, coerce[TArray](cda.typ)), 1) }, coerce[TArray](cda.typ)) } else { - val i = Ref(genUID(), TInt32) - val j = Ref(genUID(), TInt32) - val cols = ToArray(StreamMap(StreamRange(0, typ.nColBlocks, 1), j.name, ArrayRef(blockResults, i * typ.nColBlocks + j))) - ToArray(StreamMap(StreamRange(0, typ.nRowBlocks, 1), i.name, NDArrayConcat(cols, 1))) + ToArray(mapIR(rangeIR(I32(typ.nRowBlocks))){ rowIdxRef => + val blocksInOneRow = ToArray(mapIR(rangeIR(I32(typ.nColBlocks))) { colIdxRef => + ArrayRef(blockResults, rowIdxRef * typ.nColBlocks + colIdxRef) + }) + NDArrayConcat(blocksInOneRow, 1) + }) } + Let(blockResults.name, cda, NDArrayConcat(rows, 0)) } @@ -167,25 +171,89 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType object LowerBlockMatrixIR { def apply(node: IR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): IR = { - def unimplemented[T](node: BaseIR): T = - throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") + def lower(bmir: BlockMatrixIR) = LowerBlockMatrixIR.lower(bmir, typesToLower, ctx, r, relationalLetsAbove) - def lowerIR(node: IR): IR = LowerToCDA.lower(node, typesToLower, ctx, r, relationalLetsAbove: Map[String, IR]) + node match { + case BlockMatrixCollect(child) => + lower(child).collectLocal(relationalLetsAbove, child.typ) + case BlockMatrixToValueApply(child, GetElement(IndexedSeq(i, j))) => + val rowBlock = child.typ.getBlockIdx(i) + val colBlock = child.typ.getBlockIdx(j) - def lower(bmir: BlockMatrixIR): BlockMatrixStage = { - if (!DArrayLowering.lowerBM(typesToLower)) - throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") - bmir.children.foreach { - case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => - throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") - case _ => - } - if (bmir.typ.nDefinedBlocks == 0) - BlockMatrixStage.empty(bmir.typ.elementType) - else lowerNonEmpty(bmir) + val iInBlock = i - rowBlock * child.typ.blockSize + val jInBlock = j - colBlock * child.typ.blockSize + + val lowered = lower(child) + + val elt = bindIR(lowered.blockContext(rowBlock -> colBlock)) { ctx => + NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) + } + + lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } + case BlockMatrixWrite(child, writer) => + writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 + case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) + case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => + throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") + + case node => + throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") + } + } + + // This lowers a BlockMatrixIR to an unkeyed TableStage with rows of (blockRow, blockCol, block) + def lowerToTableStage( + bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, + r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR] + ): TableStage = { + val bms = lower(bmir, typesToLower, ctx, r, relationalLetsAbove) + val typ = bmir.typ + val bmsWithCtx = bms.addContext(TTuple(TInt32, TInt32)){ case (i, j) => MakeTuple(Seq(0 -> i, 1 -> j))} + val blocksRowMajor = Array.range(0, typ.nRowBlocks).flatMap { i => + Array.tabulate(typ.nColBlocks)(j => i -> j).filter(typ.hasBlock) + } + val emptyGlobals = MakeStruct(Seq()) + val globalsId = genUID() + val letBindings = bmsWithCtx.globalVals :+ globalsId -> emptyGlobals + val contextsIR = MakeStream(blocksRowMajor.map{ case (i, j) => bmsWithCtx.blockContext((i, j)) }, TStream(bmsWithCtx.ctxType)) + + val ctxRef = Ref(genUID(), bmsWithCtx.ctxType) + val body = bmsWithCtx.blockBody(ctxRef) + val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) + val bcFields = bmsWithCtx.globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) + + def tsPartitionFunction(ctxRef: Ref): IR = { + val s = MakeStruct(Seq("blockRow" -> GetTupleElement(GetField(ctxRef, "new"), 0), "blockCol" -> GetTupleElement(GetField(ctxRef, "new"), 1), "block" -> bmsWithCtx.blockBody(ctxRef))) + MakeStream(Seq( + s + ), TStream(s.typ)) } + val ts = TableStage(letBindings, bcFields, Ref(globalsId, emptyGlobals.typ), RVDPartitioner.unkeyed(blocksRowMajor.size), TableStageDependency.none, contextsIR, tsPartitionFunction) + ts + } + + private def unimplemented[T](node: BaseIR): T = + throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") - def lowerNonEmpty(bmir: BlockMatrixIR): BlockMatrixStage = bmir match { + def lower(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { + if (!DArrayLowering.lowerBM(typesToLower)) + throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") + bmir.children.foreach { + case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => + throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") + case _ => + } + if (bmir.typ.nDefinedBlocks == 0) + BlockMatrixStage.empty(bmir.typ.elementType) + else lowerNonEmpty(bmir, typesToLower, ctx, r, relationalLetsAbove) + } + + def lowerNonEmpty(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, r: RequirednessAnalysis, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { + def lower(ir: BlockMatrixIR) = LowerBlockMatrixIR.lower(ir, typesToLower, ctx, r, relationalLetsAbove) + + def lowerIR(node: IR): IR = LowerToCDA.lower(node, typesToLower, ctx, r, relationalLetsAbove: Map[String, IR]) + + bmir match { case BlockMatrixRead(reader) => reader.lower(ctx) case x@BlockMatrixRandom(seed, gaussian, shape, blockSize) => val generator = invokeSeeded(if (gaussian) "rand_norm" else "rand_unif", seed, TFloat64, F64(0.0), F64(1.0)) @@ -298,20 +366,31 @@ object LowerBlockMatrixIR { lower(child).condenseBlocks(child.typ, rowDependents, colDependents) .addContext(TTuple(TTuple(TInt64, TInt64, TInt64), TTuple(TInt64, TInt64, TInt64))) { idx => - val i = idx._1 - val j = idx._2 - val rowStartIdx = rowDependents(i).head.toLong * x.typ.blockSize - val colStartIdx = colDependents(j).head.toLong * x.typ.blockSize + val (i, j) = idx + + // Aligned with the edges of blocks in child BM. + val blockAlignedRowStartIdx = rowDependents(i).head.toLong * x.typ.blockSize + val blockAlignedColStartIdx = colDependents(j).head.toLong * x.typ.blockSize + val blockAlignedRowEndIdx = math.min(child.typ.nRows, (rowDependents(i).last + 1L) * x.typ.blockSize * rStep) + val blockAlignedColEndIdx = math.min(child.typ.nCols, (colDependents(j).last + 1L) * x.typ.blockSize * cStep) - val rowEndIdx = java.lang.Math.min(child.typ.nRows, (rowDependents(i).last + 1L) * x.typ.blockSize) - val colEndIdx = java.lang.Math.min(child.typ.nCols, (colDependents(i).last + 1L) * x.typ.blockSize) + // condenseBlocks can give the same data to multiple partitions. Need to make sure we don't use data + // that's already included in an earlier block. + val rStartPlusSeenAlready = rStart + i * x.typ.blockSize * rStep + val cStartPlusSeenAlready = cStart + j * x.typ.blockSize * cStep + + val rowTrueStart = rStartPlusSeenAlready - blockAlignedRowStartIdx + val rowTrueEnd = math.min(math.min(rEnd, blockAlignedRowEndIdx) - blockAlignedRowStartIdx, rowTrueStart + x.typ.blockSize * rStep) val rows = MakeTuple.ordered(FastSeq[IR]( - if (rStart >= rowStartIdx) rStart - rowStartIdx else (rowStartIdx - rStart) % rStep, - java.lang.Math.min(rEnd, rowEndIdx) - rowStartIdx, + rowTrueStart, + rowTrueEnd, rStep)) + + val colTrueStart = cStartPlusSeenAlready - blockAlignedColStartIdx + val colTrueEnd = math.min(java.lang.Math.min(cEnd, blockAlignedColEndIdx) - blockAlignedColStartIdx, colTrueStart + x.typ.blockSize * cStep) val cols = MakeTuple.ordered(FastSeq[IR]( - if (cStart >= colStartIdx) cStart - colStartIdx else (colStartIdx - cStart) % cStep, - java.lang.Math.min(cEnd, colEndIdx) - colStartIdx, + colTrueStart, + colTrueEnd, cStep)) MakeTuple.ordered(FastSeq(rows, cols)) }.mapBody { (ctx, body) => NDArraySlice(body, GetField(ctx, "new")) } @@ -378,32 +457,5 @@ object LowerBlockMatrixIR { } } } - - node match { - case BlockMatrixCollect(child) => - lower(child).collectLocal(relationalLetsAbove, child.typ) - case BlockMatrixToValueApply(child, GetElement(IndexedSeq(i, j))) => - val rowBlock = child.typ.getBlockIdx(i) - val colBlock = child.typ.getBlockIdx(j) - - val iInBlock = i - rowBlock * child.typ.blockSize - val jInBlock = j - colBlock * child.typ.blockSize - - val lowered = lower(child) - - val elt = bindIR(lowered.blockContext(rowBlock -> colBlock)) { ctx => - NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) - } - - lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } - case BlockMatrixWrite(child, writer) => - writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 - case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) - case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") - - case node => - throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") - } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala index 7f64ef62a75..6c1eb8a41c7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala @@ -3,9 +3,10 @@ package is.hail.expr.ir.lowering import is.hail.annotations.{Annotation, ExtendedOrdering, Region, SafeRow, UnsafeRow} import is.hail.asm4s.{AsmFunction1RegionLong, LongInfo, classInfo} import is.hail.expr.ir._ -import is.hail.types.physical.{PArray, PStruct, PTuple, PTypeReferenceSingleCodeType} +import is.hail.types.physical.{PArray, PStruct, PTuple} import is.hail.types.virtual.{TStream, TStruct, Type} import is.hail.rvd.RVDPartitioner +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index 4691adef824..f55b2d0c597 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -271,7 +271,7 @@ class TableStage( GetField(ctxRef, "partitionBound"), StreamFilter( StreamFlatMap( - ToStream(GetField(ctxRef, "oldContexts")), + ToStream(GetField(ctxRef, "oldContexts"), true), prevContextUIDPartition, body ), @@ -440,7 +440,7 @@ object LowerTableIR { RVDPartitioner.unkeyed(nPartitionsAdj), TableStageDependency.none, context, - ctxRef => ToStream(ctxRef)) + ctxRef => ToStream(ctxRef, true)) case TableRange(n, nPartitions) => val nPartitionsAdj = math.max(math.min(n, nPartitions), 1) @@ -465,7 +465,7 @@ object LowerTableIR { MakeStruct(FastIndexedSeq("start" -> start, "end" -> end)) }, TStream(contextType)), - (ctxRef: Ref) => mapIR(rangeIR(GetField(ctxRef, "start"), GetField(ctxRef, "end"))) { i => + (ctxRef: Ref) => mapIR(StreamRange(GetField(ctxRef, "start"), GetField(ctxRef, "end"), I32(1), true)) { i => MakeStruct(FastSeq("idx" -> i)) }) @@ -656,6 +656,12 @@ object LowerTableIR { case TableHead(child, targetNumRows) => val loweredChild = lower(child) + def streamLenOrMax(a: IR): IR = + if (targetNumRows <= Integer.MAX_VALUE) + StreamLen(StreamTake(a, targetNumRows.toInt)) + else + StreamLen(a) + def partitionSizeArray(childContexts: Ref): IR = { val partitionSizeArrayFunc = genUID() val howManyPartsToTry = Ref(genUID(), TInt32) @@ -664,7 +670,7 @@ object LowerTableIR { partitionSizeArrayFunc, FastIndexedSeq(howManyPartsToTry.name -> 4), bindIR(loweredChild.mapContexts(_ => StreamTake(ToStream(childContexts), howManyPartsToTry)){ ctx: IR => ctx } - .mapCollect(relationalLetsAbove)(StreamLen)) { counts => + .mapCollect(relationalLetsAbove)(streamLenOrMax)) { counts => If((Cast(streamSumIR(ToStream(counts)), TInt64) >= targetNumRows) || (ArrayLen(childContexts) <= ArrayLen(counts)), counts, Recur(partitionSizeArrayFunc, FastIndexedSeq(howManyPartsToTry * 4), TArray(TInt32))) @@ -1094,13 +1100,37 @@ object LowerTableIR { case TableLiteral(typ, rvd, enc, encodedGlobals) => RVDToTableStage(rvd, EncodedLiteral(enc, encodedGlobals)) + case bmtt@BlockMatrixToTable(bmir) => + val bmStage = LowerBlockMatrixIR.lower(bmir, typesToLower, ctx, r, relationalLetsAbove) + val ts = LowerBlockMatrixIR.lowerToTableStage(bmir, typesToLower, ctx, r, relationalLetsAbove) + // I now have an unkeyed table of (blockRow, blockCol, block). + val entriesUnkeyed = ts.mapPartitionWithContext { (partition, ctxRef) => + flatMapIR(partition)(singleRowRef => + bindIR(GetField(singleRowRef, "block")) { singleNDRef => + bindIR(NDArrayShape(singleNDRef)) { shapeTupleRef => + flatMapIR(rangeIR(Cast(GetTupleElement(shapeTupleRef, 0), TInt32))) { withinNDRowIdx => + mapIR(rangeIR(Cast(GetTupleElement(shapeTupleRef, 1), TInt32))) { withinNDColIdx => + val entry = NDArrayRef(singleNDRef, IndexedSeq(Cast(withinNDRowIdx, TInt64), Cast(withinNDColIdx, TInt64)), ErrorIDs.NO_ERROR) + val blockStartRow = GetField(singleRowRef, "blockRow") * bmir.typ.blockSize + val blockStartCol = GetField(singleRowRef, "blockCol") * bmir.typ.blockSize + makestruct("i" -> Cast(withinNDRowIdx + blockStartRow, TInt64), "j" -> Cast(withinNDColIdx + blockStartCol, TInt64), "entry" -> entry) + } + } + } + } + ) + } + + val rowR = r.lookup(bmtt).asInstanceOf[RTable].rowType + ctx.backend.lowerDistributedSort(ctx, entriesUnkeyed, IndexedSeq(SortField("i", Ascending), SortField("j", Ascending)), relationalLetsAbove, rowR) + case node => throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(node) }") } assert(tir.typ.globalType == lowered.globalType, s"\n ir global: ${tir.typ.globalType}\n lowered global: ${lowered.globalType}") assert(tir.typ.rowType == lowered.rowType, s"\n ir row: ${tir.typ.rowType}\n lowered row: ${lowered.rowType}") - assert(lowered.key startsWith tir.typ.keyType.fieldNames, s"\n ir key: ${tir.typ.keyType.fieldNames}\n lowered key: ${lowered.key}") + assert(lowered.key startsWith tir.typ.keyType.fieldNames, s"\n ir key: ${tir.typ.keyType.fieldNames.toSeq}\n lowered key: ${lowered.key}") lowered } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala index b819fb5c116..3f1d990a68b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala @@ -3,7 +3,8 @@ package is.hail.expr.ir.lowering import is.hail.annotations.{Region, SafeRow, UnsafeRow} import is.hail.asm4s.{AsmFunction1RegionLong, AsmFunction1RegionUnit, LongInfo, UnitInfo, classInfo} import is.hail.expr.ir._ -import is.hail.types.physical.{PTuple, PType, PTypeReferenceSingleCodeType} +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType +import is.hail.types.physical.{PTuple, PType} import is.hail.types.virtual.Type import is.hail.utils.{FastIndexedSeq, FastSeq} import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala index af1e87019bc..a93bbecda9e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/RVDToTableStage.scala @@ -1,14 +1,14 @@ package is.hail.expr.ir.lowering import java.io.{ByteArrayInputStream, ByteArrayOutputStream} - import is.hail.annotations.{BroadcastRow, Region, RegionValue} import is.hail.asm4s._ import is.hail.expr.ir.{Compile, CompileIterator, ExecuteContext, GetField, IR, In, Let, MakeStruct, PartitionRVDReader, ReadPartition, StreamRange, ToArray, _} import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.rvd.{RVD, RVDType} import is.hail.sparkextras.ContextRDD -import is.hail.types.physical.{PArray, PStruct, PTypeReferenceSingleCodeType} +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType +import is.hail.types.physical.{PArray, PStruct, stypes} import is.hail.utils.{FastIndexedSeq, FastSeq} object RVDToTableStage { diff --git a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala new file mode 100644 index 00000000000..07546639948 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala @@ -0,0 +1,661 @@ +package is.hail.expr.ir.ndarrays + +import is.hail.annotations.Region +import is.hail.expr.ir._ +import is.hail.types.physical.{PCanonicalArray, PCanonicalNDArray, PFloat32, PFloat32Required, PFloat64, PFloat64Required, PInt32, PInt32Required, PInt64, PInt64Required, PNumeric, PType} +import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode} +import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.utils._ +import is.hail.asm4s._ +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat64, SInt32, SInt64} +import is.hail.types.virtual.{TFloat32, TFloat64, TInt32, TInt64, TNDArray} + +abstract class NDArrayProducer { + outer => + + def elementType: PType + val shape: IndexedSeq[Value[Long]] + def nDims = shape.size + + val initAll: EmitCodeBuilder => Unit + val initAxis: IndexedSeq[(EmitCodeBuilder) => Unit] + val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] + def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode + + def copy( + aElementType: PType = elementType, + aShape: IndexedSeq[Value[Long]] = shape, + ainitAll: EmitCodeBuilder => Unit = initAll, + ainitAxis: IndexedSeq[(EmitCodeBuilder) => Unit] = initAxis, + astepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = stepAxis + ): NDArrayProducer = { + new NDArrayProducer() { + override def elementType: PType = aElementType + + override val shape: IndexedSeq[Value[Long]] = aShape + override val initAll: EmitCodeBuilder => Unit = ainitAll + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = ainitAxis + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = astepAxis + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = outer.loadElementAtCurrentAddr(cb) + } + } + + def toSCode(cb: EmitCodeBuilder, targetType: PCanonicalNDArray, region: Value[Region], rowMajor: Boolean = false): SNDArrayCode = { + val (firstElementAddress, finish) = targetType.constructDataFunction( + shape, + targetType.makeColumnMajorStrides(shape, region, cb), + cb, + region) + + val currentWriteAddr = cb.newLocal[Long]("ndarray_producer_to_scode_cur_write_addr") + cb.assign(currentWriteAddr, firstElementAddress) + + initAll(cb) + val idxGenerator = if (rowMajor) SNDArray.forEachIndexWithInitAndIncRowMajor _ else SNDArray.forEachIndexWithInitAndIncColMajor _ + idxGenerator(cb, shape, initAxis, stepAxis.map(stepper => (cb: EmitCodeBuilder) => stepper(cb, 1L)), "ndarray_producer_toSCode"){ (cb, indices) => + targetType.elementType.storeAtAddress(cb, currentWriteAddr, region, loadElementAtCurrentAddr(cb), true) + cb.assign(currentWriteAddr, currentWriteAddr + targetType.elementType.byteSize) + } + + finish(cb) + } +} + +object EmitNDArray { + + def apply( + emitter: Emit[_], + ndIR: IR, + cb: EmitCodeBuilder, + region: Value[Region], + env: EmitEnv, + container: Option[AggContainer], + loopEnv: Option[Env[LoopRef]] + ): IEmitCode = { + + def emitNDInSeparateMethod(context: String, cb: EmitCodeBuilder, ir: IR, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCode = { + + assert(!emitter.ctx.inLoopCriticalPath.contains(ir)) + val mb = cb.emb.genEmitMethod(context, FastIndexedSeq[ParamType](), UnitInfo) + val r = cb.newField[Region]("emitInSeparate_region", region) + + var ev: EmitSettable = null + mb.voidWithBuilder { cb => + emitter.ctx.tryingToSplit.update(ir, ()) + val result: IEmitCode = deforest(ir, cb, r, env, container, loopEnv).map(cb)(ndap => ndap.toSCode(cb, PCanonicalNDArray(ndap.elementType.setRequired(true), ndap.nDims), r)) + + ev = cb.emb.ecb.newEmitField(s"${context}_result", result.emitType) + cb.assign(ev, result) + } + cb.invokeVoid(mb) + ev.toI(cb) + } + + def deforest(x: IR, cb: EmitCodeBuilder, region: Value[Region], env: EmitEnv, container: Option[AggContainer], loopEnv: Option[Env[LoopRef]]): IEmitCodeGen[NDArrayProducer] = { + def deforestRecur(x: IR, cb: EmitCodeBuilder = cb, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCodeGen[NDArrayProducer] = { + + def emitI(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = region, env: EmitEnv = env, container: Option[AggContainer] = container, loopEnv: Option[Env[LoopRef]] = loopEnv): IEmitCode = { + emitter.emitI(ir, cb, region, env, container, loopEnv) + } + + x match { + case NDArrayMap(child, elemName, body) => { + deforestRecur(child, cb).map(cb) { childProducer => + val elemRef = cb.emb.newEmitField("ndarray_map_element_name", childProducer.elementType.sType, required = true) + val bodyEnv = env.bind(elemName, elemRef) + val bodyEC = EmitCode.fromI(cb.emb)(cb => emitI(body, cb, env = bodyEnv)) + + new NDArrayProducer { + override def elementType: PType = bodyEC.st.canonicalPType() + + override val shape: IndexedSeq[Value[Long]] = childProducer.shape + override val initAll: EmitCodeBuilder => Unit = childProducer.initAll + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = childProducer.initAxis + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = childProducer.stepAxis + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { + cb.assign(elemRef, EmitCode.present(cb.emb, childProducer.loadElementAtCurrentAddr(cb))) + bodyEC.toI(cb).get(cb, "NDArray map body cannot be missing") + } + } + } + } + case NDArrayMap2(lChild, rChild, lName, rName, body) => { + deforestRecur(lChild, cb).flatMap(cb) { leftProducer => + deforestRecur(rChild, cb).map(cb) { rightProducer => + val leftShapeValues = leftProducer.shape + val rightShapeValues = rightProducer.shape + + val shapeArray = NDArrayEmitter.unifyShapes2(cb, leftShapeValues, rightShapeValues) + + val lElemRef = cb.emb.newEmitField(lName, leftProducer.elementType.sType, required = true) + val rElemRef = cb.emb.newEmitField(rName, rightProducer.elementType.sType, required = true) + val bodyEnv = env.bind(lName, lElemRef) + .bind(rName, rElemRef) + val bodyEC = EmitCode.fromI(cb.emb)(cb => emitI(body, cb, env = bodyEnv)) + + val leftBroadcasted = broadcast(cb, leftProducer, "left") + val rightBroadcasted = broadcast(cb, rightProducer, "right") + + new NDArrayProducer { + override def elementType: PType = bodyEC.st.canonicalPType() + + override val shape: IndexedSeq[Value[Long]] = shapeArray + override val initAll: EmitCodeBuilder => Unit = { + cb => { + leftBroadcasted.initAll(cb) + rightBroadcasted.initAll(cb) + } + } + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map { idx => { cb: EmitCodeBuilder => + leftBroadcasted.initAxis(idx)(cb) + rightBroadcasted.initAxis(idx)(cb) + } + } + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map { idx => { (cb: EmitCodeBuilder, axis: Value[Long]) => + leftBroadcasted.stepAxis(idx)(cb, axis) + rightBroadcasted.stepAxis(idx)(cb, axis) + } + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { + cb.assign(lElemRef, EmitCode.present(cb.emb, leftBroadcasted.loadElementAtCurrentAddr(cb))) + cb.assign(rElemRef, EmitCode.present(cb.emb, rightBroadcasted.loadElementAtCurrentAddr(cb))) + + bodyEC.toI(cb).get(cb, "NDArrayMap2 body cannot be missing") + } + } + } + } + } + case NDArrayReindex(child, indexExpr) => + deforestRecur(child, cb).map(cb) { childProducer => + + new NDArrayProducer { + override def elementType: PType = childProducer.elementType + + override val shape: IndexedSeq[Value[Long]] = indexExpr.map { childIndex => + if (childIndex < childProducer.nDims) + childProducer.shape(childIndex) + else + const(1L) + } + override val initAll: EmitCodeBuilder => Unit = childProducer.initAll + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { + indexExpr.map { childIndex => + (cb: EmitCodeBuilder) => + if (childIndex < childProducer.nDims) { + childProducer.initAxis(childIndex)(cb) + } + } + } + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { + indexExpr.map { childIndex => + (cb: EmitCodeBuilder, step: Value[Long]) => + if (childIndex < childProducer.nDims) { + childProducer.stepAxis(childIndex)(cb, step) + } + } + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) + } + } + case x@NDArrayReshape(childND, shape) => + emitI(childND, cb).flatMap(cb) { case childND: SNDArrayCode => + // Plan: Run through the child row major, make an array. Then jump around it as needed. + val childMemo = childND.memoize(cb, "ndarray_reshape_child") + + val childShapeValues = childMemo.shapes(cb) + val outputNDims = x.typ.nDims + + val requestedShapeValues = Array.tabulate(outputNDims)(i => cb.newLocal[Long](s"ndarray_reindex_request_shape_$i")).toIndexedSeq + + emitI(shape, cb, env = env).map(cb) { sc: SCode => + val tupleCode = sc.asBaseStruct + val tupleValue = tupleCode.memoize(cb, "ndarray_reshape_requested") + + val hasNegativeOne = cb.newLocal[Boolean]("ndarray_reshape_has_neg_one") + val runningProduct = cb.newLocal[Long]("ndarray_reshape_running_product") + val replacesNegativeOne = cb.newLocal[Long]("ndarray_reshape_replaces_neg_one") + val tempShapeElement = cb.newLocal[Long]("ndarray_reshape_temp_shape_element") + + cb.assign(hasNegativeOne, false) + cb.assign(runningProduct, 1L) + + (0 until outputNDims).foreach { i => + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) + cb.ifx(tempShapeElement < 0L, + { + cb.ifx(tempShapeElement ceq -1L, + { + cb.ifx(hasNegativeOne, { + cb._fatal("Can't infer shape, more than one -1") + }, { + cb.assign(hasNegativeOne, true) + }) + }, + { + cb._fatal("Can't reshape, new shape must contain only nonnegative numbers or -1") + } + ) + }, + { + cb.assign(runningProduct, runningProduct * tempShapeElement) + } + ) + } + + val numElements = cb.newLocal[Long]("ndarray_reshape_child_num_elements") + cb.assign(numElements, SNDArray.numElements(childShapeValues)) + + cb.ifx(hasNegativeOne.mux( + (runningProduct ceq 0L) || (numElements % runningProduct) > 0L, + numElements cne runningProduct + ), { + cb._fatal("Can't reshape since requested shape is incompatible with number of elements") + }) + cb.assign(replacesNegativeOne, (runningProduct ceq 0L).mux(0L, numElements / runningProduct)) + + (0 until outputNDims).foreach { i => + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) + cb.assign(requestedShapeValues(i), (tempShapeElement ceq -1L).mux(replacesNegativeOne, tempShapeElement)) + } + + val childPType = childND.st.canonicalPType().asInstanceOf[PCanonicalNDArray] + val rowMajor = fromSValue(childMemo, cb).toSCode(cb, childPType, region, true).memoize(cb, "ndarray_reshape_row_major_layout") + // The canonical row major thing is now in the order we want. We just need to read this with the row major striding that + // would be generated for something of the new shape. + val outputPType = PCanonicalNDArray(rowMajor.st.elementPType.setRequired(true), x.typ.nDims, true) // TODO Should it be required? + val rowMajorStriding = outputPType.makeRowMajorStrides(requestedShapeValues, region, cb) + fromShapeStridesFirstAddress(rowMajor.st.elementPType, requestedShapeValues, rowMajorStriding, rowMajor.firstDataAddress(cb), cb) + } + } + + case x@NDArrayConcat(nds, axis) => + emitI(nds, cb).flatMap(cb) { ndsPCode => + val ndsArraySValue = ndsPCode.asIndexable.memoize(cb, "ndarray_concat_array_of_nds") + val arrLength = ndsArraySValue.loadLength() + cb.ifx(arrLength ceq 0, { + cb._fatal("need at least one ndarray to concatenate") + }) + + val missing: Code[Boolean] = { + if (ndsArraySValue.st.elementEmitType.required) + const(false) + else { + val missing = cb.newLocal[Boolean]("ndarray_concat_result_missing") + cb.assign(missing, false) + // Need to check if the any of the ndarrays are missing. + val missingCheckLoopIdx = cb.newLocal[Int]("ndarray_concat_missing_check_idx") + cb.forLoop(cb.assign(missingCheckLoopIdx, 0), missingCheckLoopIdx < arrLength, cb.assign(missingCheckLoopIdx, missingCheckLoopIdx + 1), + cb.assign(missing, missing | ndsArraySValue.isElementMissing(missingCheckLoopIdx)) + ) + missing + } + } + + IEmitCode(cb, missing, { + val loopIdx = cb.newLocal[Int]("ndarray_concat_shape_check_idx") + val firstND = ndsArraySValue.loadElement(cb, 0).map(cb) { sCode => sCode.asNDArray }.get(cb).memoize(cb, "ndarray_concat_input_0") + + val stagedArrayOfSizesPType = PCanonicalArray(PInt64(), true) + val (pushElement, finish) = stagedArrayOfSizesPType.constructFromFunctions(cb, region, arrLength, false) + + val newShape = (0 until x.typ.nDims).map { dimIdx => + val localDim = cb.newLocal[Long](s"ndarray_concat_output_shape_element_${dimIdx}") + val ndShape = firstND.shapes(cb) + cb.assign(localDim, ndShape(dimIdx)) + if (dimIdx == axis) { + pushElement(cb, EmitCode(Code._empty, false, primitive(localDim)).toI(cb)) + } + + cb.forLoop(cb.assign(loopIdx, 1), loopIdx < arrLength, cb.assign(loopIdx, loopIdx + 1), { + val shapeOfNDAtIdx = ndsArraySValue.loadElement(cb, loopIdx).map(cb) { sCode => sCode.asNDArray }.get(cb).shape(cb).memoize(cb, "ndarray_concat_input_shape") + val dimLength = cb.newLocal[Long]("dimLength", shapeOfNDAtIdx.loadField(cb, dimIdx).get(cb).asInt64.longCode(cb)) + + if (dimIdx == axis) { + pushElement(cb, EmitCode(Code._empty, false, primitive(dimLength)).toI(cb)) + cb.assign(localDim, localDim + dimLength) + } + else { + cb.ifx(dimLength.cne(localDim), + cb._fatal(const(s"NDArrayConcat: mismatched dimensions of input NDArrays along axis ").concat(loopIdx.toS).concat(": expected ") + .concat(localDim.toS).concat(", got ") + .concat(dimLength.toS)) + ) + } + }) + localDim + } + + val stagedArrayOfSizes = finish(cb).memoize(cb, "ndarray_concat_staged_array_of_sizes") + + new NDArrayProducer { + override def elementType: PType = firstND.st.elementPType + + override val shape: IndexedSeq[Value[Long]] = newShape + + val idxVars = shape.indices.map(i => cb.newLocal[Long](s"ndarray_produceer_fall_through_idx_${i}")) + // Need to keep track of the current ndarray being read from. + val currentNDArrayIdx = cb.newLocal[Int]("ndarray_concat_current_active_ndarray_idx") + + override val initAll: EmitCodeBuilder => Unit = { cb => + idxVars.foreach(idxVar => cb.assign(idxVar, 0L)) + cb.assign(currentNDArrayIdx, 0) + } + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = + shape.indices.map(i => (cb: EmitCodeBuilder) => { + cb.assign(idxVars(i), 0L) + if (i == axis) { + cb.assign(currentNDArrayIdx, 0) + } + }) + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { + // For all boring axes, just add to corresponding indexVar. For the single interesting axis, + // also consider updating the currently tracked ndarray. + shape.indices.map(idx => (cb: EmitCodeBuilder, step: Value[Long]) => { + // Start by updating the idxVar by the step + val curIdxVar = idxVars(idx) + cb.assign(curIdxVar, curIdxVar + step) + if (idx == axis) { + // If bigger than current ndarray, then we need to subtract out the size of this ndarray, increment to the next ndarray, and see if we are happy yet. + val shouldLoop = cb.newLocal[Boolean]("should_loop", curIdxVar >= stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) + cb.whileLoop(shouldLoop, + { + cb.assign(curIdxVar, curIdxVar - stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) + cb.assign(currentNDArrayIdx, currentNDArrayIdx + 1) + cb.ifx(currentNDArrayIdx < stagedArrayOfSizes.loadLength(), { + cb.assign(shouldLoop, curIdxVar >= stagedArrayOfSizes.loadElement(cb, currentNDArrayIdx).get(cb).asInt64.longCode(cb)) + }, { + cb.assign(shouldLoop, false) + }) + } + ) + } + }) + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { + val currentNDArray = ndsArraySValue.loadElement(cb, currentNDArrayIdx).get(cb).asNDArray.memoize(cb, "ndarray_concat_current_active_ndarray") + currentNDArray.loadElement(idxVars, cb) + } + } + }) + } + case NDArraySlice(child, slicesIR) => + deforestRecur(child, cb).flatMap(cb) { childProducer => + emitI(slicesIR, cb).flatMap(cb) { slicesPC => + val slicesValue = slicesPC.asBaseStruct.memoize(cb, "ndarray_slice_tuple_pv") + + val (indexingIndices, slicingIndices) = slicesValue.st.fieldTypes.zipWithIndex.partition { case (pFieldType, idx) => + pFieldType.isPrimitive + } match { + case (a, b) => (a.map(_._2), b.map(_._2)) + } + + IEmitCode.multiFlatMap[Int, SCode, NDArrayProducer](indexingIndices, indexingIndex => slicesValue.loadField(cb, indexingIndex), cb) { indexingSCodes => + val indexingValues = indexingSCodes.map(sCode => cb.newLocal("ndarray_slice_indexer", sCode.asInt64.longCode(cb))) + val slicingValueTriplesBuilder = new BoxedArrayBuilder[(Value[Long], Value[Long], Value[Long])]() + val outputShape = { + IEmitCode.multiFlatMap[Int, SCode, IndexedSeq[Value[Long]]](slicingIndices, + valueIdx => slicesValue.loadField(cb, valueIdx), cb) { sCodeSlices: IndexedSeq[SCode] => + IEmitCode.multiFlatMap(sCodeSlices, { sCodeSlice: SCode => + val sValueSlice = sCodeSlice.asBaseStruct.memoize(cb, "ndarray_slice_sCodeSlice") + // I know I have a tuple of three elements here, start, stop, step + + val newDimSizeI = sValueSlice.loadField(cb, 0).flatMap(cb) { startC => + sValueSlice.loadField(cb, 1).flatMap(cb) { stopC => + sValueSlice.loadField(cb, 2).map(cb) { stepC => + val start = cb.newLocal[Long]("ndarray_slice_start", startC.asLong.longCode(cb)) + val stop = cb.newLocal[Long]("ndarray_slice_stop", stopC.asLong.longCode(cb)) + val step = cb.newLocal[Long]("ndarray_slice_step", stepC.asLong.longCode(cb)) + + slicingValueTriplesBuilder.push((start, stop, step)) + + val newDimSize = cb.newLocal[Long]("new_dim_size") + cb.ifx(step >= 0L && start <= stop, { + cb.assign(newDimSize, const(1L) + ((stop - start) - 1L) / step) + }, { + cb.ifx(step < 0L && start >= stop, { + cb.assign(newDimSize, (((stop - start) + 1L) / step) + 1L) + }, { + cb.assign(newDimSize, 0L) + }) + }) + newDimSize + } + } + } + newDimSizeI + }, cb)(x => IEmitCode(cb, false, x)) + } + } + val slicingValueTriples = slicingValueTriplesBuilder.result() + + outputShape.map(cb) { outputShapeSeq => + new NDArrayProducer() { + override def elementType: PType = childProducer.elementType + + override val shape: IndexedSeq[Value[Long]] = outputShapeSeq + + override val initAll: EmitCodeBuilder => Unit = cb => { + childProducer.initAll(cb) + // Need to get the indexingIndices to the right starting points + indexingIndices.zipWithIndex.foreach { case (childIdx, ordinalIdx) => + childProducer.initAxis(childIdx) + childProducer.stepAxis(childIdx)(cb, indexingValues(ordinalIdx)) + } + } + + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map(idx => { (cb: EmitCodeBuilder) => + val whichSlicingAxis = slicingIndices(idx) + val slicingValue = slicingValueTriples(idx) + childProducer.initAxis(whichSlicingAxis)(cb) + childProducer.stepAxis(whichSlicingAxis)(cb, slicingValue._1) + }) + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map(idx => { (cb: EmitCodeBuilder, outerStep: Value[Long]) => + // SlicingIndices is a map from my coordinates to my child's coordinates. + val whichSlicingAxis = slicingIndices(idx) + val (start, stop, sliceStep) = slicingValueTriples(idx) + val innerStep = cb.newLocal[Long]("ndarray_producer_slice_child_step", sliceStep * outerStep) + childProducer.stepAxis(whichSlicingAxis)(cb, innerStep) + }) + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) + } + } + } + } + } + case NDArrayFilter(child, filters) => + deforestRecur(child, cb).map(cb) { childProducer: NDArrayProducer => + + val filterWasMissing = (0 until filters.size).map(i => cb.newField[Boolean](s"ndarray_filter_${i}_was_missing")) + val filtPValues = new Array[SIndexableValue](filters.size) + val outputShape = childProducer.shape.indices.map(idx => cb.newField[Long](s"ndarray_filter_output_shapes_${idx}")) + + filters.zipWithIndex.foreach { case (filt, i) => + // Each filt is a sequence that may be missing with elements that may not be missing. + emitI(filt, cb).consume(cb, + { + cb.assign(outputShape(i), childProducer.shape(i)) + cb.assign(filterWasMissing(i), true) + }, + { + filtArrayPC => { + val filtArrayPValue = filtArrayPC.asIndexable.memoize(cb, s"ndarray_filt_array_${i}") + filtPValues(i) = filtArrayPValue + cb.assign(outputShape(i), filtArrayPValue.loadLength().toL) + cb.assign(filterWasMissing(i), false) + } + } + ) + } + + new NDArrayProducer { + override def elementType: PType = childProducer.elementType + + override val shape: IndexedSeq[Value[Long]] = outputShape + + // Plan: Keep track of current indices on each axis, use them to step through filtered + // dimensions accordingly. + val idxVars = shape.indices.map(idx => cb.newLocal[Long](s"ndarray_producer_filter_index_${idx}")) + + override val initAll: EmitCodeBuilder => Unit = cb => { + idxVars.foreach(idxVar => cb.assign(idxVar, 0L)) + childProducer.initAll(cb) + } + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = shape.indices.map { idx => + (cb: EmitCodeBuilder) => { + cb.assign(idxVars(idx), 0L) + childProducer.initAxis(idx)(cb) + cb.ifx(filterWasMissing(idx), { + /* pass */ + }, { + val startPoint = cb.newLocal[Long]("ndarray_producer_filter_init_axis", filtPValues(idx).loadElement(cb, idxVars(idx).toI).get( + cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb)) + childProducer.stepAxis(idx)(cb, startPoint) + }) + } + } + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = shape.indices.map { idx => + (cb: EmitCodeBuilder, step: Value[Long]) => { + cb.ifx(filterWasMissing(idx), { + childProducer.stepAxis(idx)(cb, step) + cb.assign(idxVars(idx), idxVars(idx) + step) + }, { + val currentPos = filtPValues(idx).loadElement(cb, idxVars(idx).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb) + cb.assign(idxVars(idx), idxVars(idx) + step) + val newPos = filtPValues(idx).loadElement(cb, idxVars(idx).toI).get(cb, s"NDArrayFilter: can't filter on missing index (axis=$idx)").asLong.longCode(cb) + val stepSize = cb.newLocal[Long]("ndarray_producer_filter_step_size", newPos - currentPos) + childProducer.stepAxis(idx)(cb, stepSize) + }) + } + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) + } + } + case NDArrayAgg(child, axesToSumOut) => + deforestRecur(child, cb).map(cb) { childProducer: NDArrayProducer => + val childDims = child.typ.asInstanceOf[TNDArray].nDims + val axesToKeep = (0 until childDims).filter(axis => !axesToSumOut.contains(axis)) + val newOutputShape = axesToKeep.map(idx => childProducer.shape(idx)) + val newOutputShapeComplement = axesToSumOut.map(idx => childProducer.shape(idx)) + + val newElementType: PType = child.typ.asInstanceOf[TNDArray].elementType match { + case TInt32 => PInt32Required + case TInt64 => PInt64Required + case TFloat32 => PFloat32Required + case TFloat64 => PFloat64Required + } + new NDArrayProducer { + override def elementType: PType = newElementType + + override val shape: IndexedSeq[Value[Long]] = newOutputShape + + override val initAll: EmitCodeBuilder => Unit = childProducer.initAll + // Important part here is that NDArrayAgg has less axes then its child. We need to map + // between them. + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { + axesToKeep.map(idx => childProducer.initAxis(idx)) + } + + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { + axesToKeep.map(idx => childProducer.stepAxis(idx)) + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { + // Idea: For each axis that is being summed over, step through and keep a running sum. + val numericElementType = elementType.asInstanceOf[PNumeric] + val runningSum = NumericPrimitives.newLocal(cb, "ndarray_agg_running_sum", numericElementType.virtualType) + cb.assign(runningSum, numericElementType.zero) + + val initsToSumOut = axesToSumOut.map(idx => childProducer.initAxis(idx)) + val stepsToSumOut = axesToSumOut.map(idx => (cb: EmitCodeBuilder) => childProducer.stepAxis(idx)(cb, 1L)) + + SNDArray.forEachIndexWithInitAndIncColMajor(cb, newOutputShapeComplement, initsToSumOut, stepsToSumOut, "ndarray_producer_ndarray_agg") { (cb, _) => + cb.assign(runningSum, numericElementType.add(runningSum, SType.extractPrimCode(cb, childProducer.loadElementAtCurrentAddr(cb)))) + } + primitive(numericElementType.virtualType, runningSum) + } + } + } + case _ => { + val ndI = emitI(x, cb) + ndI.map(cb) { ndPCode => + val ndPv = ndPCode.asNDArray.memoize(cb, "deforestNDArray_fall_through_ndarray") + fromSValue(ndPv, cb) + } + } + } + } + + deforestRecur(x) + } + + emitNDInSeparateMethod("foo", cb, ndIR, region, env, container, loopEnv) + } + + def fromSValue(ndSv: SNDArrayValue, cb: EmitCodeBuilder): NDArrayProducer = { + val ndSvShape = ndSv.shapes(cb) + val strides = ndSv.strides(cb) + + fromShapeStridesFirstAddress(ndSv.st.elementPType, ndSvShape, strides, ndSv.firstDataAddress(cb), cb) + } + + def fromShapeStridesFirstAddress(newElementType: PType, ndSvShape: IndexedSeq[Value[Long]], strides: IndexedSeq[Value[Long]], firstDataAddress: Value[Long], cb: EmitCodeBuilder): NDArrayProducer = { + val counters = ndSvShape.indices.map(i => cb.newLocal[Long](s"ndarray_producer_fall_through_idx_${i}")) + + assert(ndSvShape.size == strides.size, s"shape.size = ${ndSvShape.size} != strides.size = ${strides.size}") + + new NDArrayProducer { + override def elementType: PType = newElementType + override val shape: IndexedSeq[Value[Long]] = ndSvShape + + override val initAll: EmitCodeBuilder => Unit = cb => { + counters.foreach(ctr => cb.assign(ctr, 0L)) + } + override val initAxis: IndexedSeq[EmitCodeBuilder => Unit] = { + shape.indices.map(i => (cb: EmitCodeBuilder) => { + cb.assign(counters(i), 0L) + }) + } + override val stepAxis: IndexedSeq[(EmitCodeBuilder, Value[Long]) => Unit] = { + shape.indices.map{ i => + (cb: EmitCodeBuilder, step: Value[Long]) => { + cb.assign(counters(i), counters(i) + step * strides(i)) + } + } + } + + override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = { + val offset = counters.foldLeft[Code[Long]](const(0L)){ (a, b) => a + b} + val loaded = elementType.loadCheapSCode(cb, firstDataAddress + offset) + val memoLoaded = loaded.memoize(cb, "temp_memo") + memoLoaded.get + } + } + } + + def createBroadcastMask(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { + val ffff = 0xFFFFFFFFFFFFFFFFL + shape.indices.map { idx => + cb.newLocal[Long](s"ndarray_producer_broadcast_mask_${idx}", (shape(idx) ceq 1L).mux(0L, ffff)) + } + } + + def broadcast(cb: EmitCodeBuilder, prod: NDArrayProducer,ctx: String): NDArrayProducer = { + val broadcastMask = createBroadcastMask(cb, prod.shape) + val newSteps = prod.stepAxis.indices.map { idx => + (cb: EmitCodeBuilder, step: Value[Long]) => { + val maskedStep = cb.newLocal[Long]("ndarray_producer_masked_step", step & broadcastMask(idx)) + prod.stepAxis(idx)(cb, maskedStep) + } + } + prod.copy(astepAxis = newSteps) + } +} diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala index 85152faa92c..4ec321abdf1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/BinaryOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryValue} object BinaryOrdering { @@ -13,7 +13,7 @@ object BinaryOrdering { val type1: SBinary = t1 val type2: SBinary = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val xv: SBinaryValue = x.asBinary.memoize(cb, "xv") val yv: SBinaryValue = y.asBinary.memoize(cb, "yv") val xlen = cb.newLocal[Int]("xlen", xv.loadLength()) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala index 813b807059b..8a13ba01f41 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CallOrdering.scala @@ -2,8 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.PCode -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.interfaces.SCall object CallOrdering { @@ -15,7 +14,7 @@ object CallOrdering { override val type1: SType = t1 override val type2: SType = t2 - override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.asCall.loadCanonicalRepresentation(cb), y.asCall.loadCanonicalRepresentation(cb)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala index 5de6939cdeb..e993c73fc3c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/CodeOrdering.scala @@ -3,7 +3,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ @@ -51,11 +51,11 @@ object CodeOrdering { } t1.virtualType match { - case TInt32 => Int32Ordering.make(t1.asInstanceOf[SInt32], t2.asInstanceOf[SInt32], ecb) - case TInt64 => Int64Ordering.make(t1.asInstanceOf[SInt64], t2.asInstanceOf[SInt64], ecb) - case TFloat32 => Float32Ordering.make(t1.asInstanceOf[SFloat32], t2.asInstanceOf[SFloat32], ecb) - case TFloat64 => Float64Ordering.make(t1.asInstanceOf[SFloat64], t2.asInstanceOf[SFloat64], ecb) - case TBoolean => BooleanOrdering.make(t1.asInstanceOf[SBoolean], t2.asInstanceOf[SBoolean], ecb) + case TInt32 => Int32Ordering.make(t1.asInstanceOf[SInt32.type], t2.asInstanceOf[SInt32.type], ecb) + case TInt64 => Int64Ordering.make(t1.asInstanceOf[SInt64.type], t2.asInstanceOf[SInt64.type], ecb) + case TFloat32 => Float32Ordering.make(t1.asInstanceOf[SFloat32.type], t2.asInstanceOf[SFloat32.type], ecb) + case TFloat64 => Float64Ordering.make(t1.asInstanceOf[SFloat64.type], t2.asInstanceOf[SFloat64.type], ecb) + case TBoolean => BooleanOrdering.make(t1.asInstanceOf[SBoolean.type], t2.asInstanceOf[SBoolean.type], ecb) case TCall => CallOrdering.make(t1.asInstanceOf[SCall], t2.asInstanceOf[SCall], ecb) case TString => StringOrdering.make(t1.asInstanceOf[SString], t2.asInstanceOf[SString], ecb) case TBinary => BinaryOrdering.make(t1.asInstanceOf[SBinary], t2.asInstanceOf[SBinary], ecb) @@ -77,11 +77,11 @@ abstract class CodeOrdering { def reversed: Boolean = false - final def checkedPCode[T](cb: EmitCodeBuilder, arg1: PCode, arg2: PCode, context: String, - f: (EmitCodeBuilder, PCode, PCode) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { - if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) + final def checkedSCode[T](cb: EmitCodeBuilder, arg1: SCode, arg2: SCode, context: String, + f: (EmitCodeBuilder, SCode, SCode) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { + if (arg1.st != type1) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") - if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) + if (arg2.st != type2) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") val cacheKey = ("ordering", reversed, type1, type2, context) @@ -89,8 +89,8 @@ abstract class CodeOrdering { FastIndexedSeq(arg1.st.paramType, arg2.st.paramType), ti) { mb => mb.emitWithBuilder[T] { cb => - val arg1 = mb.getPCodeParam(1) - val arg2 = mb.getPCodeParam(2) + val arg1 = mb.getSCodeParam(1) + val arg2 = mb.getSCodeParam(2) f(cb, arg1, arg2) } } @@ -99,12 +99,12 @@ abstract class CodeOrdering { final def checkedEmitCode[T](cb: EmitCodeBuilder, arg1: EmitCode, arg2: EmitCode, missingEqual: Boolean, context: String, f: (EmitCodeBuilder, EmitCode, EmitCode, Boolean) => Code[T])(implicit ti: TypeInfo[T]): Code[T] = { - if (!arg1.st.equalsExceptTopLevelRequiredness(type1)) + if (arg1.st != type1) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (left)\n generated: $type1\n argument: ${ arg1.st }") - if (!arg2.st.equalsExceptTopLevelRequiredness(type2)) + if (arg2.st != type2) throw new RuntimeException(s"CodeOrdering: $context: type mismatch (right)\n generated: $type2\n argument: ${ arg2.st }") - val cacheKey = ("ordering", reversed, type1, type2, context, missingEqual) + val cacheKey = ("ordering", reversed, arg1.emitType, arg2.emitType, context, missingEqual) val mb = cb.emb.ecb.getOrGenEmitMethod(s"ord_$context", cacheKey, FastIndexedSeq(arg1.emitParamType, arg2.emitParamType), ti) { mb => @@ -118,28 +118,28 @@ abstract class CodeOrdering { } - final def compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { - checkedPCode(cb, x, y, "compareNonnull", _compareNonnull) + final def compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { + checkedSCode(cb, x, y, "compareNonnull", _compareNonnull) } - final def ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - checkedPCode(cb, x, y, "ltNonnull", _ltNonnull) + final def ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + checkedSCode(cb, x, y, "ltNonnull", _ltNonnull) } - final def lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - checkedPCode(cb, x, y, "lteqNonnull", _lteqNonnull) + final def lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + checkedSCode(cb, x, y, "lteqNonnull", _lteqNonnull) } - final def gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - checkedPCode(cb, x, y, "gtNonnull", _gtNonnull) + final def gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + checkedSCode(cb, x, y, "gtNonnull", _gtNonnull) } - final def gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - checkedPCode(cb, x, y, "gteqNonnull", _gteqNonnull) + final def gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + checkedSCode(cb, x, y, "gteqNonnull", _gteqNonnull) } - final def equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { - checkedPCode(cb, x, y, "equivNonnull", _equivNonnull) + final def equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { + checkedSCode(cb, x, y, "equivNonnull", _equivNonnull) } final def lt(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean): Code[Boolean] = { @@ -166,17 +166,17 @@ abstract class CodeOrdering { checkedEmitCode(cb, x, y, missingEqual, "compare", _compare) } - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] def _compare(cb: EmitCodeBuilder, x: EmitCode, y: EmitCode, missingEqual: Boolean = true): Code[Int] = { val xm = cb.newLocal("cord_compare_xm", x.m) @@ -269,28 +269,28 @@ abstract class CodeOrdering { override def reversed: Boolean = true - override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = outer._compareNonnull(cb, y, x) + override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = outer._compareNonnull(cb, y, x) - override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._ltNonnull(cb, y, x) + override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._ltNonnull(cb, y, x) - override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._lteqNonnull(cb, y, x) + override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._lteqNonnull(cb, y, x) - override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gtNonnull(cb, y, x) + override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._gtNonnull(cb, y, x) - override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._gteqNonnull(cb, y, x) + override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._gteqNonnull(cb, y, x) - override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = outer._equivNonnull(cb, y, x) + override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = outer._equivNonnull(cb, y, x) } } abstract class CodeOrderingCompareConsistentWithOthers extends CodeOrdering { - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) < 0 - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) <= 0 - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) > 0 - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y) >= 0 - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = compareNonnull(cb, x, y).ceq(0) } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala index 06152f3280b..b8a8db77d74 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IntervalOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.{Code, CodeLabel} import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} -import is.hail.types.physical.stypes.interfaces.SInterval -import is.hail.types.physical.{PCode, PIntervalCode, PIntervalValue} +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces.{SInterval, SIntervalCode, SIntervalValue} object IntervalOrdering { @@ -12,25 +12,25 @@ object IntervalOrdering { val type1: SInterval = t1 val type2: SInterval = t2 - private val setup: (EmitCodeBuilder, PCode, PCode) => (PIntervalValue, PIntervalValue) = { - case (cb, lhs: PIntervalCode, rhs: PIntervalCode) => + private val setup: (EmitCodeBuilder, SCode, SCode) => (SIntervalValue, SIntervalValue) = { + case (cb, lhs: SIntervalCode, rhs: SIntervalCode) => lhs.memoize(cb, "intervalord_lhs") -> rhs.memoize(cb, "intervalord_rhs") } - override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val pointCompare = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Compare()) val cmp = cb.newLocal[Int]("intervalord_cmp", 0) val (lhs, rhs) = setup(cb, x, y) - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_)) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_)) cb.assign(cmp, pointCompare(cb, lstart, rstart)) cb.ifx(cmp.ceq(0), { cb.ifx(lhs.includesStart().cne(rhs.includesStart()), { cb.assign(cmp, lhs.includesStart().mux(-1, 1)) }, { - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_)) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_)) cb.assign(cmp, pointCompare(cb, lend, rend)) cb.ifx(cmp.ceq(0), { cb.ifx(lhs.includesEnd().cne(rhs.includesEnd()), { @@ -43,7 +43,7 @@ object IntervalOrdering { cmp } - override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) val Lout = CodeLabel() @@ -60,19 +60,19 @@ object IntervalOrdering { exitWith(false) }) - val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast) - val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast) + val lstart = EmitCode.fromI(cb.emb)(lhs.loadStart(_)) + val rstart = EmitCode.fromI(cb.emb)(rhs.loadStart(_)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) - val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast) - val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast) + val lend = EmitCode.fromI(cb.emb)(lhs.loadEnd(_)) + val rend = EmitCode.fromI(cb.emb)(rhs.loadEnd(_)) cb.ifx(!pointEq(cb, lend, rend), exitWith(false)) cb.define(Lout) ret } - override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val pointLt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lt()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -84,16 +84,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") cb.ifx(pointLt(cb, lstart, rstart), exitWith(true)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") cb.ifx(pointLt(cb, lend, rend), exitWith(true)) cb.assign(ret, pointEq(cb, lend, rend) && !lhs.includesEnd() && rhs.includesEnd()) @@ -102,7 +102,7 @@ object IntervalOrdering { ret } - override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val pointLtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Lteq()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -114,16 +114,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") cb.ifx(!pointLtEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) cb.ifx(lhs.includesStart() && !rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") cb.ifx(!pointLtEq(cb, lend, rend), exitWith(false)) cb.assign(ret, !pointEq(cb, lend, rend) || !lhs.includesEnd() || rhs.includesEnd()) @@ -131,7 +131,7 @@ object IntervalOrdering { ret } - override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val pointGt = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gt()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -143,16 +143,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") cb.ifx(pointGt(cb, lstart, rstart), exitWith(true)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") cb.ifx(pointGt(cb, lend, rend), exitWith(true)) cb.assign(ret, pointEq(cb, lend, rend) && lhs.includesEnd() && !rhs.includesEnd()) @@ -161,7 +161,7 @@ object IntervalOrdering { ret } - override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val pointGtEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Gteq()) val pointEq = ecb.getOrderingFunction(t1.pointType, t2.pointType, CodeOrdering.Equiv()) @@ -173,16 +173,16 @@ object IntervalOrdering { } val (lhs, rhs) = setup(cb, x, y) - val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_).typecast), "linterval_start") - val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_).typecast), "rinterval_start") + val lstart = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadStart(_)), "linterval_start") + val rstart = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadStart(_)), "rinterval_start") cb.ifx(!pointGtEq(cb, lstart, rstart), exitWith(false)) cb.ifx(!pointEq(cb, lstart, rstart), exitWith(true)) cb.ifx(!lhs.includesStart() && rhs.includesStart(), exitWith(true)) cb.ifx(lhs.includesStart().cne(rhs.includesStart()), exitWith(false)) - val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_).typecast), "linterval_end") - val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_).typecast), "rinterval_end") + val lend = cb.memoize(EmitCode.fromI(cb.emb)(lhs.loadEnd(_)), "linterval_end") + val rend = cb.memoize(EmitCode.fromI(cb.emb)(rhs.loadEnd(_)), "rinterval_end") cb.ifx(!pointGtEq(cb, lend, rend), exitWith(false)) cb.assign(ret, !pointEq(cb, lend, rend) || lhs.includesEnd() || !rhs.includesEnd()) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala index 447d04d1bfe..a746fd7bc5c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/IterableOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s._ import is.hail.expr.ir.{EmitClassBuilder, EmitCode, EmitCodeBuilder} -import is.hail.types.physical.stypes.interfaces.SContainer -import is.hail.types.physical.{PCode, PIndexableValue} +import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableValue} +import is.hail.types.physical.stypes.SCode object IterableOrdering { @@ -12,25 +12,25 @@ object IterableOrdering { val type1: SContainer = t1 val type2: SContainer = t2 - private[this] def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PIndexableValue, PIndexableValue) = { + private[this] def setup(cb: EmitCodeBuilder, lhs: SCode, rhs: SCode): (SIndexableValue, SIndexableValue) = { val lhsv = lhs.asIndexable.memoize(cb, "container_ord_lhs") val rhsv = rhs.asIndexable.memoize(cb, "container_ord_rhs") lhsv -> rhsv } - private[this] def loop(cb: EmitCodeBuilder, lhs: PIndexableValue, rhs: PIndexableValue)( + private[this] def loop(cb: EmitCodeBuilder, lhs: SIndexableValue, rhs: SIndexableValue)( f: (EmitCode, EmitCode) => Unit ): Unit = { val i = cb.newLocal[Int]("i") val lim = cb.newLocal("lim", lhs.loadLength().min(rhs.loadLength())) cb.forLoop(cb.assign(i, 0), i < lim, cb.assign(i, i + 1), { - val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i).typecast) - val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i).typecast) + val left = EmitCode.fromI(cb.emb)(lhs.loadElement(_, i)) + val right = EmitCode.fromI(cb.emb)(rhs.loadElement(_, i)) f(left, right) }) } - override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val elemCmp = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Compare()) val Lout = CodeLabel() @@ -50,7 +50,7 @@ object IterableOrdering { cmp } - override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val elemLt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lt()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -78,7 +78,7 @@ object IterableOrdering { ret } - override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val elemLtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Lteq()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -106,7 +106,7 @@ object IterableOrdering { ret } - override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val elemGt = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gt()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -134,7 +134,7 @@ object IterableOrdering { ret } - override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val elemGtEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Gteq()) val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) @@ -162,7 +162,7 @@ object IterableOrdering { ret } - override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val elemEq = ecb.getOrderingFunction(t1.elementType, t2.elementType, CodeOrdering.Equiv()) val ret = cb.newLocal[Boolean]("iterable_eq", true) val Lout = CodeLabel() diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala index 24d2574e203..b71c72e53b5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/LocusOrdering.scala @@ -2,9 +2,9 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitMethodBuilder} +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.SCanonicalLocusPointer -import is.hail.types.physical.stypes.interfaces.{SLocus, SStringValue} -import is.hail.types.physical.{PBinary, PCode, PLocusValue} +import is.hail.types.physical.stypes.interfaces.{SLocus, SLocusValue, SStringValue} object LocusOrdering { def make(t1: SLocus, t2: SLocus, ecb: EmitClassBuilder[_]): CodeOrdering = { @@ -17,10 +17,10 @@ object LocusOrdering { require(t1.rg == t2.rg) - def _compareNonnull(cb: EmitCodeBuilder, lhsc: PCode, rhsc: PCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, lhsc: SCode, rhsc: SCode): Code[Int] = { val codeRG = cb.emb.getReferenceGenome(t1.rg) - val lhs: PLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") - val rhs: PLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") + val lhs: SLocusValue = lhsc.asLocus.memoize(cb, "locus_cmp_lhs") + val rhs: SLocusValue = rhsc.asLocus.memoize(cb, "locus_cmp_rhs") val lhsContig = lhs.contig(cb).memoize(cb, "locus_cmp_lcontig").asInstanceOf[SStringValue] val rhsContig = rhs.contig(cb).memoize(cb, "locus_cmp_rcontig").asInstanceOf[SStringValue] @@ -31,8 +31,8 @@ object LocusOrdering { val ret = cb.newLocal[Int]("locus_cmp_ret", 0) cb.ifx(bincmp.compareNonnull(cb, - lhsContig.get.asBytes().asPCode, - rhsContig.get.asBytes().asPCode).ceq(0), { + lhsContig.get.asBytes(), + rhsContig.get.asBytes()).ceq(0), { cb.assign(ret, Code.invokeStatic2[java.lang.Integer, Int, Int, Int]( "compare", lhs.position(cb), rhs.position(cb))) }, { diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala index 7d8c217e59e..fa9056877b5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/PrimitiveOrdering.scala @@ -2,110 +2,110 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.primitives._ object Int32Ordering { - def make(t1: SInt32, t2: SInt32, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SInt32.type, t2: SInt32.type, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SInt32 = t1 - val type2: SInt32 = t2 + val type1: SInt32.type = t1 + val type2: SInt32.type = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = Code.invokeStatic2[java.lang.Integer, Int, Int, Int]("compare", x.asInt.intCode(cb), y.asInt.intCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) < y.asInt.intCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) < y.asInt.intCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) <= y.asInt.intCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) <= y.asInt.intCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) > y.asInt.intCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) > y.asInt.intCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb) >= y.asInt.intCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb) >= y.asInt.intCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asInt.intCode(cb).ceq(y.asInt.intCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asInt.intCode(cb).ceq(y.asInt.intCode(cb)) } } } object Int64Ordering { - def make(t1: SInt64, t2: SInt64, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SInt64.type, t2: SInt64.type, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SInt64 = t1 - val type2: SInt64 = t2 + val type1: SInt64.type = t1 + val type2: SInt64.type = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = Code.invokeStatic2[java.lang.Long, Long, Long, Int]("compare", x.asLong.longCode(cb), y.asLong.longCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) < y.asLong.longCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) < y.asLong.longCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) <= y.asLong.longCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) <= y.asLong.longCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) > y.asLong.longCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) > y.asLong.longCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb) >= y.asLong.longCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb) >= y.asLong.longCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asLong.longCode(cb).ceq(y.asLong.longCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asLong.longCode(cb).ceq(y.asLong.longCode(cb)) } } } object Float32Ordering { - def make(t1: SFloat32, t2: SFloat32, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SFloat32.type, t2: SFloat32.type, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SFloat32 = t1 - val type2: SFloat32 = t2 + val type1: SFloat32.type = t1 + val type2: SFloat32.type = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = Code.invokeStatic2[java.lang.Float, Float, Float, Int]("compare", x.asFloat.floatCode(cb), y.asFloat.floatCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) < y.asFloat.floatCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) < y.asFloat.floatCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) <= y.asFloat.floatCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) <= y.asFloat.floatCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) > y.asFloat.floatCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) > y.asFloat.floatCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb) >= y.asFloat.floatCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb) >= y.asFloat.floatCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asFloat.floatCode(cb).ceq(y.asFloat.floatCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asFloat.floatCode(cb).ceq(y.asFloat.floatCode(cb)) } } } object Float64Ordering { - def make(t1: SFloat64, t2: SFloat64, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SFloat64.type, t2: SFloat64.type, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrdering { - val type1: SFloat64 = t1 - val type2: SFloat64 = t2 + val type1: SFloat64.type = t1 + val type2: SFloat64.type = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = Code.invokeStatic2[java.lang.Double, Double, Double, Int]("compare", x.asDouble.doubleCode(cb), y.asDouble.doubleCode(cb)) - def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) < y.asDouble.doubleCode(cb) + def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) < y.asDouble.doubleCode(cb) - def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) <= y.asDouble.doubleCode(cb) + def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) <= y.asDouble.doubleCode(cb) - def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) > y.asDouble.doubleCode(cb) + def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) > y.asDouble.doubleCode(cb) - def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb) >= y.asDouble.doubleCode(cb) + def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb) >= y.asDouble.doubleCode(cb) - def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = x.asDouble.doubleCode(cb).ceq(y.asDouble.doubleCode(cb)) + def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = x.asDouble.doubleCode(cb).ceq(y.asDouble.doubleCode(cb)) } } } object BooleanOrdering { - def make(t1: SBoolean, t2: SBoolean, ecb: EmitClassBuilder[_]): CodeOrdering = { + def make(t1: SBoolean.type, t2: SBoolean.type, ecb: EmitClassBuilder[_]): CodeOrdering = { new CodeOrderingCompareConsistentWithOthers { - val type1: SBoolean = t1 - val type2: SBoolean = t2 + val type1: SBoolean.type = t1 + val type2: SBoolean.type = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = Code.invokeStatic2[java.lang.Boolean, Boolean, Boolean, Int]("compare", x.asBoolean.boolCode(cb), y.asBoolean.boolCode(cb)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala index cb44907593a..fc5efa62ba7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/ShuffleOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointer, SCanonicalShufflePointerCode} import is.hail.types.physical.stypes.interfaces.SShuffle @@ -15,7 +15,7 @@ object ShuffleOrdering { val type1: SShuffle = t1 val type2: SShuffle = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val bcode1 = x.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr val bcode2 = y.asInstanceOf[SCanonicalShufflePointerCode].binaryRepr val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala index 26e7609c0af..cb40c07f106 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.Code import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder} -import is.hail.types.physical.PCode +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SStringPointer, SStringPointerCode} import is.hail.types.physical.stypes.interfaces.SString @@ -15,7 +15,7 @@ object StringOrdering { val type1: SString = t1 val type2: SString = t2 - def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val bcode1 = x.asInstanceOf[SStringPointerCode].binaryRepr val bcode2 = y.asInstanceOf[SStringPointerCode].binaryRepr val ord = BinaryOrdering.make(bcode1.st, bcode2.st, ecb) diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala index 1f81620e316..d81a5445a7c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StructOrdering.scala @@ -2,8 +2,8 @@ package is.hail.expr.ir.orderings import is.hail.asm4s.{Code, CodeLabel} import is.hail.expr.ir.{Ascending, EmitClassBuilder, EmitCode, EmitCodeBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.SBaseStruct -import is.hail.types.physical.{PBaseStructValue, PCode} +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructValue} object StructOrdering { def make( @@ -19,7 +19,7 @@ object StructOrdering { require(sortOrders == null || sortOrders.size == t1.size) - def setup(cb: EmitCodeBuilder, lhs: PCode, rhs: PCode): (PBaseStructValue, PBaseStructValue) = { + def setup(cb: EmitCodeBuilder, lhs: SCode, rhs: SCode): (SBaseStructValue, SBaseStructValue) = { lhs.asBaseStruct.memoize(cb, "structord_lhs") -> rhs.asBaseStruct.memoize(cb, "structord_rhs") } @@ -28,7 +28,7 @@ object StructOrdering { if (sortOrders == null) Ascending else sortOrders(i), op) - override def _compareNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Int] = { + override def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val cmp = cb.newLocal("cmp", 0) @@ -36,8 +36,8 @@ object StructOrdering { var i = 0 while (i < t1.size) { val fldCmp = fieldOrdering(i, CodeOrdering.Compare(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) } cb.assign(cmp, fldCmp(cb, l, r)) cb.ifx(cmp.cne(0), cb.goto(Lout)) i += 1 @@ -47,7 +47,7 @@ object StructOrdering { cmp } - override def _ltNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _ltNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val lt = cb.newLocal("lt", true) @@ -58,8 +58,8 @@ object StructOrdering { val fldLt = fieldOrdering(i, CodeOrdering.Lt(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lt_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_lt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_lt_rhs_fld$i") cb.assign(lt, fldLt(cb, l, r)) cb.assign(eq, !lt && fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -70,7 +70,7 @@ object StructOrdering { lt } - override def _lteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _lteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val lteq = cb.newLocal("lteq", true) @@ -81,8 +81,8 @@ object StructOrdering { val fldLtEq = fieldOrdering(i, CodeOrdering.Lteq(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_lteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_lteq_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_lteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_lteq_rhs_fld$i") cb.assign(lteq, fldLtEq(cb, l, r)) cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -93,7 +93,7 @@ object StructOrdering { lteq } - override def _gtNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gtNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val gt = cb.newLocal("gt", false) @@ -104,8 +104,8 @@ object StructOrdering { val fldGt = fieldOrdering(i, CodeOrdering.Gt(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gt_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gt_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_gt_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_gt_rhs_fld$i") cb.assign(gt, fldGt(cb, l, r)) cb.assign(eq, !gt && fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -116,7 +116,7 @@ object StructOrdering { gt } - override def _gteqNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _gteqNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val gteq = cb.newLocal("gteq", true) @@ -127,8 +127,8 @@ object StructOrdering { val fldGtEq = fieldOrdering(i, CodeOrdering.Gteq(missingFieldsEqual)) val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast }, s"struct_gteq_lhs_fld$i") - val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast }, s"struct_gteq_rhs_fld$i") + val l = cb.memoize(EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) }, s"struct_gteq_lhs_fld$i") + val r = cb.memoize(EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) }, s"struct_gteq_rhs_fld$i") cb.assign(gteq, fldGtEq(cb, l, r)) cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) @@ -139,7 +139,7 @@ object StructOrdering { gteq } - override def _equivNonnull(cb: EmitCodeBuilder, x: PCode, y: PCode): Code[Boolean] = { + override def _equivNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Boolean] = { val (lhs, rhs) = setup(cb, x, y) val Lout = CodeLabel() val eq = cb.newLocal("cmp", true) @@ -147,8 +147,8 @@ object StructOrdering { var i = 0 while (i < t1.size) { val fldEq = fieldOrdering(i, CodeOrdering.Equiv(missingFieldsEqual)) - val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i).typecast } - val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i).typecast } + val l = EmitCode.fromI(cb.emb) { cb => lhs.loadField(cb, i) } + val r = EmitCode.fromI(cb.emb) { cb => rhs.loadField(cb, i) } cb.assign(eq, fldEq(cb, l, r)) cb.ifx(!eq, cb.goto(Lout)) i += 1 diff --git a/hail/src/main/scala/is/hail/expr/ir/package.scala b/hail/src/main/scala/is/hail/expr/ir/package.scala index 7c62e79ba28..a75f0de877d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/package.scala +++ b/hail/src/main/scala/is/hail/expr/ir/package.scala @@ -4,7 +4,7 @@ import is.hail.asm4s import is.hail.asm4s._ import is.hail.expr.ir.functions.IRFunctionRegistry import is.hail.types.physical._ -import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.{SCode, SValue} import is.hail.types.virtual._ import is.hail.types.{coerce => tycoerce, _} import is.hail.utils._ @@ -14,9 +14,7 @@ import scala.language.implicitConversions package object ir { type TokenIterator = BufferedIterator[Token] - - type IEmitCode = IEmitCodeGen[PCode] - type IEmitSCode = IEmitCodeGen[SCode] + type IEmitCode = IEmitCodeGen[SCode] var uidCounter: Long = 0 @@ -143,6 +141,10 @@ package object ir { ArraySort(stream, l.name, r.name, f(l, r)) } + def sliceArrayIR(arrayIR: IR, startIR: IR, stopIR: IR): IR = { + invoke("slice", arrayIR.typ, arrayIR, startIR, stopIR) + } + def joinIR(left: IR, right: IR, lkey: IndexedSeq[String], rkey: IndexedSeq[String], joinType: String)(f: (Ref, Ref) => IR): IR = { val lRef = Ref(genUID(), left.typ.asInstanceOf[TStream].elementType) val rRef = Ref(genUID(), right.typ.asInstanceOf[TStream].elementType) @@ -175,6 +177,7 @@ package object ir { } def makestruct(fields: (String, IR)*): MakeStruct = MakeStruct(fields) + def maketuple(fields: IR*): MakeTuple = MakeTuple(fields.zipWithIndex.map{ case (field, idx) => (idx, field)}) implicit def toRichIndexedSeqEmitSettable(s: IndexedSeq[EmitSettable]): RichIndexedSeqEmitSettable = new RichIndexedSeqEmitSettable(s) @@ -186,9 +189,9 @@ package object ir { implicit def valueToCodeParam(v: Value[_]): CodeParam = CodeParam(v) - implicit def toPCodeParam(pc: PCode): PCodeParam = PCodeParam(pc) + implicit def sCodeToSCodeParam(sc: SCode): SCodeParam = SCodeParam(sc) - implicit def pValueToPCodeParam(pv: PValue): PCodeParam = PCodeParam(pv) + implicit def sValueToSCodeParam(sv: SValue): SCodeParam = SCodeParam(sv) implicit def toEmitParam(ec: EmitCode): EmitParam = EmitParam(ec) diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala index 04a76870f30..6b525776bad 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala @@ -5,11 +5,12 @@ import is.hail.asm4s._ import is.hail.expr.ir._ import is.hail.expr.ir.orderings.StructOrdering import is.hail.services.shuffler.CompileTimeShuffleClient -import is.hail.types.physical.stypes.EmitType +import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq} +import is.hail.types.physical.stypes.{EmitType, SType} import is.hail.types.physical.stypes.concrete.SCanonicalShufflePointerSettable import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} -import is.hail.types.physical.{PCanonicalStream, PCode, PInterval, PStruct, PType} +import is.hail.types.physical.{PCanonicalArray, PCanonicalStream, PCanonicalStruct, PInterval, PStruct, PType} import is.hail.types.virtual.{TInterval, TShuffle, TStream} import is.hail.utils._ @@ -142,31 +143,33 @@ object EmitStream { streamIR: IR, cb: EmitCodeBuilder, outerRegion: Value[Region], - env: Emit.E, + env: EmitEnv, container: Option[AggContainer] ): IEmitCode = { val mb = cb.emb - def emitVoid(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): Unit = + def emitVoid(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): Unit = emitter.emitVoid(cb, ir, region, env, container, None) - def emit(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): IEmitCode = { + def emit(ir: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): IEmitCode = { ir.typ match { case _: TStream => produce(ir, cb, region, env, container) case _ => emitter.emitI(ir, cb, region, env, container, None) } } - def produce(streamIR: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: Emit.E = env, container: Option[AggContainer] = container): IEmitCode = + def produce(streamIR: IR, cb: EmitCodeBuilder, region: Value[Region] = outerRegion, env: EmitEnv = env, container: Option[AggContainer] = container): IEmitCode = EmitStream.produce(emitter, streamIR, cb, region, env, container) + def typeWithReqx(node: IR): VirtualTypeWithReq = VirtualTypeWithReq(node.typ, emitter.ctx.req.lookup(node).asInstanceOf[TypeWithRequiredness]) + def typeWithReq: VirtualTypeWithReq = typeWithReqx(streamIR) + streamIR match { - case NA(_typ) => - val eltType = streamIR.pType.asInstanceOf[PCanonicalStream].elementType - val st = SStream(eltType.sType, false) + case x@NA(_typ) => + val st = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] val region = mb.genFieldThisRef[Region]("na_region") val producer = new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = {} @@ -177,15 +180,15 @@ object EmitStream { override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => cb.goto(LendOfStream) } - override val element: EmitCode = EmitCode.present(mb, eltType.defaultValue(mb)) + override val element: EmitCode = EmitCode.present(mb, st.elementType.defaultValue) override def close(cb: EmitCodeBuilder): Unit = {} } - IEmitCode.missing(cb, SStreamCode(st, producer)) + IEmitCode.missing(cb, SStreamCode(producer)) case Ref(name, _typ) => assert(_typ.isInstanceOf[TStream]) - env.lookup(name).toI(cb) + env.bindings.lookup(name).toI(cb) .map(cb) { case (stream: SStreamCode) => val childProducer = stream.producer val producer = new StreamProducer { @@ -216,7 +219,7 @@ object EmitStream { case In(n, _) => // this, Code[Region], ... - val param = mb.getEmitParam(2 + n, outerRegion) + val param = env.inputValues(n).apply(outerRegion) if (!param.st.isInstanceOf[SStream]) throw new RuntimeException(s"parameter ${ 2 + n } is not a stream! t=${ param.st } }, params=${ mb.emitParamTypes }") param.load.toI(cb) @@ -224,13 +227,12 @@ object EmitStream { case ToStream(a, _requiresMemoryManagementPerElement) => emit(a, cb).map(cb) { case ind: SIndexableCode => - val containerField = mb.newPField("tostream_arr", ind.pt) + val containerField = mb.newPField("tostream_arr", ind.st) val container = containerField.asInstanceOf[SIndexableValue] val idx = mb.genFieldThisRef[Int]("tostream_idx") val regionVar = mb.genFieldThisRef[Region]("tostream_region") SStreamCode( - SStream(ind.st.elementType, ind.pt.required), new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = { cb.assign(containerField, ind) @@ -250,7 +252,7 @@ object EmitStream { } val element: EmitCode = EmitCode.fromI(mb) { cb => - container.loadElement(cb, idx).typecast[PCode] } + container.loadElement(cb, idx) } def close(cb: EmitCodeBuilder): Unit = {} }) @@ -261,14 +263,16 @@ object EmitStream { val region = mb.genFieldThisRef[Region]("makestream_region") val emittedArgs = args.map(a => EmitCode.fromI(mb)(cb => emit(a, cb, region))).toFastIndexedSeq - val unifiedType = x.pType.asInstanceOf[PCanonicalStream].elementType.sType // FIXME - val eltField = mb.newEmitField("makestream_elt", EmitType(unifiedType, emittedArgs.forall(_.required))) + // FIXME use SType.chooseCompatibleType + val st = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] + val unifiedType = st.elementEmitType + val eltField = mb.newEmitField("makestream_elt", unifiedType) val staticLen = args.size val current = mb.genFieldThisRef[Int]("makestream_current") IEmitCode.present(cb, SStreamCode( - SStream(unifiedType, required = true), + st, new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = { cb.assign(current, 0) // switches on 1..N @@ -288,7 +292,7 @@ object EmitStream { }, emittedArgs.map { elem => EmitCodeBuilder.scopedVoid(mb) { cb => - cb.assign(eltField, elem.toI(cb).map(cb)(pc => pc.castTo(cb, region, unifiedType.pType, false))) + cb.assign(eltField, elem.toI(cb).map(cb)(pc => pc.castTo(cb, region, unifiedType.st, false))) cb.goto(LendOfSwitch) } }) @@ -317,7 +321,10 @@ object EmitStream { val leftProducer = leftEC.pv.asStream.producer val rightProducer = rightEC.pv.asStream.producer - val xElt = mb.newEmitField(x.pType.asInstanceOf[PCanonicalStream].elementType, leftEC.required && rightEC.required) // FIXME unify here + val unifiedStreamSType = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] + val unifiedElementType = unifiedStreamSType.elementEmitType + + val xElt = mb.newEmitField(unifiedElementType) val region = mb.genFieldThisRef[Region]("streamif_region") cb.ifx(xCond, @@ -350,11 +357,11 @@ object EmitStream { cb.ifx(xCond, cb.goto(leftProducer.LproduceElement), cb.goto(rightProducer.LproduceElement)) cb.define(leftProducer.LproduceElementDone) - cb.assign(xElt, leftProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.pt))) + cb.assign(xElt, leftProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.st))) cb.goto(LproduceElementDone) cb.define(rightProducer.LproduceElementDone) - cb.assign(xElt, rightProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.pt))) + cb.assign(xElt, rightProducer.element.toI(cb).map(cb)(_.castTo(cb, region, xElt.st))) cb.goto(LproduceElementDone) cb.define(leftProducer.LendOfStream) @@ -372,7 +379,7 @@ object EmitStream { } IEmitCode(Lmissing, Lpresent, - SStreamCode(SStream(xElt.st, required = leftEC.pt.required && rightEC.pt.required), producer), + SStreamCode(producer), leftEC.required && rightEC.required) } @@ -436,14 +443,11 @@ object EmitStream { cb.goto(LproduceElementDone) } - val element: EmitCode = EmitCode.present(mb, new SInt32Code(true, curr)) + val element: EmitCode = EmitCode.present(mb, new SInt32Code(curr)) def close(cb: EmitCodeBuilder): Unit = {} } - SStreamCode( - SStream(SInt32(true), required = true), - producer - ) + SStreamCode(producer) } } } @@ -509,9 +513,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode( - childStream.st, - producer) + SStreamCode(producer) } case StreamTake(a, num) => @@ -553,7 +555,7 @@ object EmitStream { } } - SStreamCode(childStream.st, producer) + SStreamCode(producer) } } @@ -600,7 +602,7 @@ object EmitStream { } } - SStreamCode(childStream.st, producer) + SStreamCode(producer) } } @@ -644,17 +646,15 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode( - SStream(bodyResult.st, required = childStream.st.required), - producer - ) + SStreamCode(producer) } case x@StreamScan(childIR, zeroIR, accName, eltName, bodyIR) => produce(childIR, cb).map(cb) { case (childStream: SStreamCode) => val childProducer = childStream.producer - val accEmitType = EmitType(x.accPType.sType, x.accPType.required) + val accEmitType = VirtualTypeWithReq(zeroIR.typ, emitter.ctx.req.lookupState(x).head.asInstanceOf[TypeWithRequiredness]).canonicalEmitType + val accValueAccRegion = mb.newEmitField(accEmitType) val accValueEltRegion = mb.newEmitField(accEmitType) @@ -686,7 +686,7 @@ object EmitStream { cb.ifx(first, { cb.assign(first, false) - cb.assign(accValueEltRegion, emit(zeroIR, cb, region = elementRegion)) + cb.assign(accValueEltRegion, emit(zeroIR, cb, region = elementRegion).map(cb)(sc => sc.castTo(cb, elementRegion, accValueAccRegion.st))) cb.goto(LcopyAndReturn) }) @@ -697,13 +697,13 @@ object EmitStream { if (requiresMemoryManagementPerElement) { // deep copy accumulator into element region, then clear accumulator region - cb.assign(accValueEltRegion, accValueAccRegion.toI(cb).map(cb)(_.castTo(cb, childProducer.elementRegion, x.accPType, deepCopy = true))) + cb.assign(accValueEltRegion, accValueAccRegion.toI(cb).map(cb)(_.castTo(cb, childProducer.elementRegion, accEmitType.st, deepCopy = true))) cb += accRegion.clearRegion() } val bodyCode = cb.withScopedMaybeStreamValue(childProducer.element, "scan_child_elt") { ev => emit(bodyIR, cb, env = env.bind((accName, accValueEltRegion), (eltName, ev)), region = childProducer.elementRegion) - .map(cb)(pc => pc.castTo(cb, childProducer.elementRegion, x.accPType, deepCopy = false)) + .map(cb)(pc => pc.castTo(cb, childProducer.elementRegion, accEmitType.st, deepCopy = false)) } cb.assign(accValueEltRegion, bodyCode) @@ -711,7 +711,7 @@ object EmitStream { cb.define(LcopyAndReturn) if (requiresMemoryManagementPerElement) { - cb.assign(accValueAccRegion, accValueEltRegion.toI(cb).map(cb)(pc => pc.castTo(cb, accRegion, x.accPType, deepCopy = true))) + cb.assign(accValueAccRegion, accValueEltRegion.toI(cb).map(cb)(pc => pc.castTo(cb, accRegion, accEmitType.st, deepCopy = true))) } cb.goto(LproduceElementDone) @@ -730,7 +730,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(SStream(accValueEltRegion.st, childStream.st.required), producer) + SStreamCode(producer) } case RunAggScan(child, name, init, seqs, result, states) => @@ -776,7 +776,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode(SStream(producer.element.st, childStream.st.required), producer) + SStreamCode(producer) } case StreamFlatMap(a, name, body) => @@ -899,10 +899,7 @@ object EmitStream { cb.goto(producer.LendOfStream) } - SStreamCode( - SStream(innerProducer.element.st, required = outerStream.st.required), - producer - ) + SStreamCode(producer) } case x@StreamJoinRightDistinct(leftIR, rightIR, lKey, rKey, leftName, rightName, joinIR, joinType) => @@ -921,8 +918,8 @@ object EmitStream { assert(lelt.emitType == lEltType) assert(relt.emitType == rEltType) - val lhs = EmitCode.fromI(mb)(cb => lelt.toI(cb).map(cb)(_.asBaseStruct.subset(lKey: _*).asPCode)) - val rhs = EmitCode.fromI(mb)(cb => relt.toI(cb).map(cb)(_.asBaseStruct.subset(rKey: _*).asPCode)) + val lhs = EmitCode.fromI(mb)(cb => lelt.toI(cb).map(cb)(_.asBaseStruct.subset(lKey: _*))) + val rhs = EmitCode.fromI(mb)(cb => relt.toI(cb).map(cb)(_.asBaseStruct.subset(rKey: _*))) StructOrdering.make(lhs.st.asInstanceOf[SBaseStruct], rhs.st.asInstanceOf[SBaseStruct], cb.emb.ecb, missingFieldsEqual = false) .compare(cb, lhs, rhs, missingEqual = false) @@ -987,7 +984,7 @@ object EmitStream { cb.ifx(c > 0, cb.goto(LpullRight)) cb.ifx(c < 0, { - cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)) + cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)) }, { // c == 0 if (rightProducer.requiresMemoryManagementPerElement) { @@ -1010,7 +1007,7 @@ object EmitStream { // if right stream ends before left cb.define(rightProducer.LendOfStream) - cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)) + cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)) cb.assign(rightEOS, true) if (leftProducer.requiresMemoryManagementPerElement) @@ -1032,7 +1029,7 @@ object EmitStream { } - SStreamCode(SStream(producer.element.st, leftStream.st.required && rightStream.st.required), producer) + SStreamCode(producer) case "outer" => @@ -1097,6 +1094,8 @@ object EmitStream { cb.goto(rightProducer.LproduceElement) cb.define(LpullLeft) + if (leftProducer.requiresMemoryManagementPerElement) + cb += leftProducer.elementRegion.clearRegion() cb.goto(leftProducer.LproduceElement) val Lcompare = CodeLabel() @@ -1110,7 +1109,6 @@ object EmitStream { cb.assign(lOutMissing, true) if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) - cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1125,7 +1123,6 @@ object EmitStream { cb.assign(rOutMissing, true) if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) - cb += leftProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1133,11 +1130,9 @@ object EmitStream { // c == 0 if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) - cb += leftProducer.elementRegion.clearRegion() } if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) - cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }) @@ -1146,11 +1141,11 @@ object EmitStream { mb.implementLabel(Lpush) { cb => cb.ifx(lOutMissing, - cb.assign(lxOut, EmitCode.missing(mb, lxOut.pt)), + cb.assign(lxOut, EmitCode.missing(mb, lxOut.st)), cb.assign(lxOut, lx) ) cb.ifx(rOutMissing, - cb.assign(rxOut, EmitCode.missing(mb, rxOut.pt)), + cb.assign(rxOut, EmitCode.missing(mb, rxOut.st)), cb.assign(rxOut, rx)) cb.goto(LproduceElementDone) } @@ -1168,7 +1163,6 @@ object EmitStream { { if (leftProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(leftProducer.elementRegion) - cb += leftProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1195,7 +1189,6 @@ object EmitStream { { if (rightProducer.requiresMemoryManagementPerElement) { cb += elementRegion.trackAndIncrementReferenceCountOf(rightProducer.elementRegion) - cb += rightProducer.elementRegion.clearRegion() } cb.goto(Lpush) }, @@ -1233,7 +1226,7 @@ object EmitStream { } } - SStreamCode(SStream(producer.element.st, leftStream.st.required && rightStream.st.required), producer) + SStreamCode(producer) } } } @@ -1243,15 +1236,15 @@ object EmitStream { val childProducer = childStream.producer - val xCurElt = mb.newPField("st_grpby_curelt", childProducer.element.pt) + val xCurElt = mb.newPField("st_grpby_curelt", childProducer.element.st) val keyRegion = mb.genFieldThisRef[Region]("st_groupby_key_region") def subsetCode = xCurElt.asBaseStruct.subset(key: _*) - val curKey = mb.newPField("st_grpby_curkey", subsetCode.st.pType) - // FIXME: PType.canonical is the wrong infrastructure here. This should be some - // notion of "cheap stype with a copy". We don't want to use a subset struct, - // since we don't want to deep copy the parent. - val lastKey = mb.newPField("st_grpby_lastkey", PType.canonical(subsetCode.st.pType)) + val curKey = mb.newPField("st_grpby_curkey", subsetCode.st) + + // This type shouldn't be a subset struct, since it is copied deeply. + // We don't want to deep copy the parent. + val lastKey = mb.newPField("st_grpby_lastkey", SType.canonical(subsetCode.st)) val eos = mb.genFieldThisRef[Boolean]("st_grpby_eos") val nextGroupReady = mb.genFieldThisRef[Boolean]("streamgroupbykey_nextready") @@ -1267,7 +1260,7 @@ object EmitStream { val outerElementRegion = mb.genFieldThisRef[Region]("streamgroupbykey_outer_elt_region") def equiv(cb: EmitCodeBuilder, l: SBaseStructCode, r: SBaseStructCode): Code[Boolean] = - StructOrdering.make(l.st, r.st, cb.emb.ecb, missingFieldsEqual = false).equivNonnull(cb, l.asPCode, r.asPCode) + StructOrdering.make(l.st, r.st, cb.emb.ecb, missingFieldsEqual = false).equivNonnull(cb, l, r) val LchildProduceDoneInner = CodeLabel() val LchildProduceDoneOuter = CodeLabel() @@ -1298,7 +1291,7 @@ object EmitStream { if (requiresMemoryManagementPerElement) cb += keyRegion.clearRegion() - cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.pt, deepCopy = true)) + cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.st, deepCopy = true)) cb.assign(nextGroupReady, true) cb.assign(inOuter, true) cb.goto(LendOfStream) @@ -1374,14 +1367,14 @@ object EmitStream { if (requiresMemoryManagementPerElement) cb += keyRegion.clearRegion() - cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.pt, deepCopy = true)) + cb.assign(lastKey, subsetCode.castTo(cb, keyRegion, lastKey.st, deepCopy = true)) cb.define(LinnerStreamReady) cb.assign(nextGroupReady, false) cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.present(mb, SStreamCode(SStream(innerProducer.element.st, true), innerProducer)) + override val element: EmitCode = EmitCode.present(mb, SStreamCode(innerProducer)) override def close(cb: EmitCodeBuilder): Unit = { childProducer.close(cb) @@ -1406,7 +1399,7 @@ object EmitStream { cb.ifx(inOuter, cb.goto(LchildProduceDoneOuter), cb.goto(LchildProduceDoneInner)) } - SStreamCode(SStream(outerProducer.element.st, required = childStream.st.required), outerProducer) + SStreamCode(outerProducer) } case StreamGrouped(a, groupSize) => @@ -1464,7 +1457,7 @@ object EmitStream { override def close(cb: EmitCodeBuilder): Unit = {} } - val innerStreamCode = EmitCode.present(mb, SStreamCode(SStream(innerProducer.element.st, true), innerProducer)) + val innerStreamCode = EmitCode.present(mb, SStreamCode(innerProducer)) val outerProducer = new StreamProducer { override val length: Option[EmitCodeBuilder => Code[Int]] = @@ -1527,7 +1520,7 @@ object EmitStream { cb.ifx(inOuter, cb.goto(LchildProduceDoneOuter), cb.goto(LchildProduceDoneInner)) } - SStreamCode(SStream(outerProducer.element.st, required = childStream.st.required), outerProducer) + SStreamCode(outerProducer) } } @@ -1734,7 +1727,7 @@ object EmitStream { // this stream has ended before each other, so we set the eos flag and the element EmitSettable cb.assign(eosPerStream(i), true) - cb.assign(vars(i), EmitCode.missing(mb, vars(i).pt)) + cb.assign(vars(i), EmitCode.missing(mb, vars(i).st)) cb.goto(endProduce) @@ -1756,16 +1749,24 @@ object EmitStream { } - SStreamCode(SStream(producer.element.st, childStreams.forall(_.pt.required)), producer) + SStreamCode(producer) } case x@StreamZipJoin(as, key, keyRef, valsRef, joinIR) => IEmitCode.multiMapEmitCodes(cb, as.map(a => EmitCode.fromI(mb)(cb => emit(a, cb)))) { children => val producers = children.map(_.asStream.producer) - // FIXME: unify - val curValsType = x.curValsType - val eltType = curValsType.elementType.setRequired(true).asInstanceOf[PStruct] + val eltType = VirtualTypeWithReq.union(as.map(a => typeWithReqx(a))).canonicalEmitType + .st + .asInstanceOf[SStream] + .elementType + .canonicalPType() + .setRequired(false) + .asInstanceOf[PCanonicalStruct] + + val keyType = eltType.selectFields(key) + + val curValsType = PCanonicalArray(eltType) val _elementRegion = mb.genFieldThisRef[Region]("szj_region") val regionArray = mb.genFieldThisRef[Array[Region]]("szj_region_array") @@ -1813,11 +1814,10 @@ object EmitStream { val result = mb.genFieldThisRef[Array[Long]]("merge_result") val i = mb.genFieldThisRef[Int]("merge_i") - val keyType = eltType.selectFields(key) - val curKey = mb.newPField("st_grpby_curkey", keyType) + val curKey = mb.newPField("st_grpby_curkey", keyType.sType) - val xKey = mb.newPresentEmitField("zipjoin_key", keyType) - val xElts = mb.newPresentEmitField("zipjoin_elts", curValsType) + val xKey = mb.newEmitField("zipjoin_key", keyType.sType, required = true) + val xElts = mb.newEmitField("zipjoin_elts", curValsType.sType, required = true) val joinResult: EmitCode = EmitCode.fromI(mb) { cb => val newEnv = env.bind((keyRef -> xKey), (valsRef -> xElts)) @@ -1868,18 +1868,18 @@ object EmitStream { }) cb.define(Lpush) - cb.assign(xKey, curKey) - cb.assign(xElts, curValsType.constructFromElements(cb, elementRegion, k, false) { (cb, i) => - IEmitCode(cb, result(i).ceq(0L), eltType.loadCheapPCode(cb, result(i))) - }) + cb.assign(xKey, EmitCode.present(cb.emb, curKey)) + cb.assign(xElts, EmitCode.present(cb.emb, curValsType.constructFromElements(cb, elementRegion, k, false) { (cb, i) => + IEmitCode(cb, result(i).ceq(0L), eltType.loadCheapSCode(cb, result(i))) + })) cb.goto(LproduceElementDone) cb.define(LstartNewKey) cb.forLoop(cb.assign(i, 0), i < k, cb.assign(i, i + 1), { cb += (result(i) = 0L) }) - cb.assign(curKey, eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) - .castTo(cb, elementRegion, curKey.pt, true)) + cb.assign(curKey, eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) + .castTo(cb, elementRegion, curKey.st, true)) cb.goto(LaddToResult) cb.define(LaddToResult) @@ -1909,8 +1909,8 @@ object EmitStream { cb.ifx(winner.ceq(k), cb.goto(LchallengerWins)) - val left = eltType.loadCheapPCode(cb, heads(challenger)).subset(key: _*) - val right = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) + val left = eltType.loadCheapSCode(cb, heads(challenger)).subset(key: _*) + val right = eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) val ord = StructOrdering.make(left.st, right.st, cb.emb.ecb, missingFieldsEqual = false) cb.ifx(ord.lteqNonnull(cb, left, right), cb.goto(LchallengerWins), @@ -1938,7 +1938,7 @@ object EmitStream { }) }, { cb.ifx(!winner.cne(k), cb.goto(Lpush)) - val left = eltType.loadCheapPCode(cb, heads(winner)).subset(key: _*) + val left = eltType.loadCheapSCode(cb, heads(winner)).subset(key: _*) val right = curKey val ord = StructOrdering.make(left.st, right.st.asInstanceOf[SBaseStruct], cb.emb.ecb, missingFieldsEqual = false) @@ -1985,14 +1985,19 @@ object EmitStream { } } - SStreamCode(SStream(producer.element.st, children.forall(_.pt.required)), producer) + SStreamCode(producer) } case x@StreamMultiMerge(as, key) => IEmitCode.multiMapEmitCodes(cb, as.map(a => EmitCode.fromI(mb)(cb => emit(a, cb)))) { children => val producers = children.map(_.asStream.producer) - val unifiedType = x.pType.elementType.asInstanceOf[PStruct] // FIXME unify + val unifiedType = VirtualTypeWithReq.union(as.map(a => typeWithReqx(a))).canonicalEmitType + .st + .asInstanceOf[SStream] + .elementEmitType + .canonicalPType + .asInstanceOf[PCanonicalStruct] val region = mb.genFieldThisRef[Region]("smm_region") val regionArray = mb.genFieldThisRef[Array[Region]]("smm_region_array") @@ -2053,8 +2058,8 @@ object EmitStream { * left when key fields are missing. */ def comp(cb: EmitCodeBuilder, li: Code[Int], lv: Code[Long], ri: Code[Int], rv: Code[Long]): Code[Boolean] = { - val l = unifiedType.loadCheapPCode(cb, lv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_l") - val r = unifiedType.loadCheapPCode(cb, rv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_r") + val l = unifiedType.loadCheapSCode(cb, lv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_l") + val r = unifiedType.loadCheapSCode(cb, rv).asBaseStruct.subset(key: _*).memoize(cb, "stream_merge_r") val ord1 = StructOrdering.make(l.asBaseStruct.st, r.asBaseStruct.st, cb.emb.ecb, missingFieldsEqual = false) val ord2 = StructOrdering.make(r.asBaseStruct.st, l.asBaseStruct.st, cb.emb.ecb, missingFieldsEqual = false) val b = cb.newLocal[Boolean]("stream_merge_comp_result") @@ -2163,7 +2168,7 @@ object EmitStream { } } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, unifiedType.loadCheapPCode(cb, heads(winner)))) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, unifiedType.loadCheapSCode(cb, heads(winner)))) override def close(cb: EmitCodeBuilder): Unit = { producers.foreach { p => @@ -2175,7 +2180,7 @@ object EmitStream { cb.assign(heads, Code._null) } } - SStreamCode(SStream(producer.element.st, children.forall(_.pt.required)), producer) + SStreamCode(producer) } case ReadPartition(context, rowType, reader) => @@ -2185,15 +2190,13 @@ object EmitStream { case ShuffleRead(idIR, keyRangeIR) => val shuffleType = idIR.typ.asInstanceOf[TShuffle] val keyType = keyRangeIR.typ.asInstanceOf[TInterval].pointType - val keyPType = keyRangeIR.pType.asInstanceOf[PInterval].pointType assert(keyType == shuffleType.keyType) - assert(keyPType == shuffleType.keyDecodedPType) val region = mb.genFieldThisRef[Region]("shuffleread_region") val emitID = EmitCode.fromI(mb)(cb => emit(idIR, cb)) - val shuffleField = cb.emb.newPField(emitID.pt).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleField = cb.emb.newPField(emitID.st).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, shuffleField) @@ -2220,7 +2223,7 @@ object EmitStream { cb.goto(LproduceElementDone) } - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, shuffle.readValue(cb, region).asPCode)) + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, shuffle.readValue(cb, region))) override def close(cb: EmitCodeBuilder): Unit = { shuffle.finishGet(cb) @@ -2228,13 +2231,13 @@ object EmitStream { } } - IEmitCode.present(cb, SStreamCode(SStream(producer.element.st, true), producer)) + IEmitCode.present(cb, SStreamCode(producer)) case ShufflePartitionBounds(idIR, nPartitionsIR) => val region = mb.genFieldThisRef[Region]("shuffle_partition_bounds_region") val emitID = EmitCode.fromI(mb)(cb => emit(idIR, cb)) - val shuffleField = cb.emb.newPField(emitID.pt).asInstanceOf[SCanonicalShufflePointerSettable] + val shuffleField = cb.emb.newPField(emitID.st).asInstanceOf[SCanonicalShufflePointerSettable] val shuffle = CompileTimeShuffleClient.create(cb, shuffleField) val currentAddr = mb.genFieldThisRef[Long]("shuffle_partition_bounds_addr") @@ -2256,14 +2259,14 @@ object EmitStream { cb.goto(LproduceElementDone) } override val element: EmitCode = EmitCode.fromI(mb)(cb => - IEmitCode.present(cb, shuffle.readPartitionBound(cb, elementRegion).asPCode)) + IEmitCode.present(cb, shuffle.readPartitionBound(cb, elementRegion))) override def close(cb: EmitCodeBuilder): Unit = { shuffle.partitionBoundsFinished(cb) shuffle.close(cb) } } - IEmitCode.present(cb, SStreamCode(SStream(producer.element.st, true), producer)) + IEmitCode.present(cb, SStreamCode(producer)) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala index 784bcf1650e..86490a5bfae 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala @@ -3,7 +3,9 @@ package is.hail.expr.ir.streams import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode, IR, NDArrayMap, NDArrayMap2, Ref, RunAggScan, StagedArrayBuilder, StreamFilter, StreamFlatMap, StreamFold, StreamFold2, StreamFor, StreamJoinRightDistinct, StreamMap, StreamScan, StreamZip, StreamZipJoin} -import is.hail.types.physical.{PCanonicalArray, PCode, PIndexableCode, SingleCodePCode} +import is.hail.types.physical.stypes.interfaces.SIndexableCode +import is.hail.types.physical.PCanonicalArray +import is.hail.types.physical.stypes.SingleCodeType trait StreamArgType { def apply(outerRegion: Region, eltRegion: Region): Iterator[java.lang.Long] @@ -15,25 +17,25 @@ object StreamUtils { cb: EmitCodeBuilder, stream: StreamProducer, destRegion: Value[Region] - ): PIndexableCode = { + ): SIndexableCode = { val mb = cb.emb val xLen = mb.newLocal[Int]("sta_len") - val aTyp = PCanonicalArray(stream.element.st.canonicalPType(), true) + val aTyp = PCanonicalArray(stream.element.emitType.canonicalPType, true) stream.length match { case None => - val vab = new StagedArrayBuilder(stream.element.st.canonicalPType(), mb, 0) + val vab = new StagedArrayBuilder(SingleCodeType.fromSType(stream.element.st), stream.element.required, mb, 0) writeToArrayBuilder(cb, stream, vab, destRegion) cb.assign(xLen, vab.size) aTyp.constructFromElements(cb, destRegion, xLen, deepCopy = false) { (cb, i) => - IEmitCode(cb, vab.isMissing(i), PCode(aTyp.elementType, vab(i))) + vab.loadFromIndex(cb, destRegion, i) } case Some(computeLen) => var pushElem: (EmitCodeBuilder, IEmitCode) => Unit = null - var finish: (EmitCodeBuilder) => PIndexableCode = null + var finish: (EmitCodeBuilder) => SIndexableCode = null stream.memoryManagedConsume(destRegion, cb, setup = { cb => cb.assign(xLen, computeLen(cb)) @@ -65,7 +67,7 @@ object StreamUtils { }) { cb => stream.element.toI(cb).consume(cb, cb += ab.addMissing(), - sc => cb += ab.add(SingleCodePCode.fromPCode(cb, sc, destRegion, deepCopy = stream.requiresMemoryManagementPerElement).code) + sc => cb += ab.add(ab.elt.coerceSCode(cb, sc, destRegion, deepCopy = stream.requiresMemoryManagementPerElement).code) ) } } diff --git a/hail/src/main/scala/is/hail/io/CodecSpec.scala b/hail/src/main/scala/is/hail/io/CodecSpec.scala index 000b098fa1c..73f538976a2 100644 --- a/hail/src/main/scala/is/hail/io/CodecSpec.scala +++ b/hail/src/main/scala/is/hail/io/CodecSpec.scala @@ -3,12 +3,11 @@ package is.hail.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream} import is.hail.annotations.{Region, RegionValue} -import is.hail.asm4s.{Code, TypeInfo, Value} -import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, ExecuteContext} +import is.hail.asm4s.Code +import is.hail.expr.ir.ExecuteContext import is.hail.types.encoded.EType -import is.hail.types.physical.{PCode, PType, PValue, typeToTypeInfo} +import is.hail.types.physical.PType import is.hail.types.virtual.Type -import is.hail.rvd.RVDContext import is.hail.sparkextras.ContextRDD import is.hail.utils.using import org.apache.spark.rdd.RDD diff --git a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala index 8d7a861f49a..95b8c246cb8 100644 --- a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala +++ b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala @@ -8,7 +8,8 @@ import is.hail.io.fs.FS import is.hail.rvd.RVDPartitioner import is.hail.types._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SStringPointer} +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.concrete.{SIndexablePointerCode, SStackStruct, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual._ import is.hail.utils._ @@ -344,7 +345,7 @@ class TextMatrixReader( params.hasHeader) { (region: Region, context: Any) => - val (lc, partitionIdx: Int) = context + val Row(lc, partitionIdx: Int) = context compiledLineParser.apply(partitionIdx, region, linesBody(lc).filter { line => val l = line.toString @@ -361,8 +362,8 @@ class TextMatrixReader( val subset = tt.globalType.valueSubsetter(requestedGlobalsType) subset(globals).asInstanceOf[Row] }, - lines.contextType, - lines.contexts.zipWithIndex, + TTuple(lines.contextType, TInt32), + lines.contexts.zipWithIndex.map { case (x, i) => Row(x, i) }, bodyPType, body) @@ -380,6 +381,8 @@ class TextMatrixReader( decomposeWithName(params, "TextMatrixReader") } + override def renderShort(): String = defaultRender() + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { @@ -480,7 +483,7 @@ class CompiledLineParser( private[this] def parseOptionalValue( cb: EmitCodeBuilder, - parse: EmitCodeBuilder => PCode + parse: EmitCodeBuilder => SCode ): IEmitCode = { assert(missingValue.size > 0) val end = cb.newLocal[Int]("parse_optional_value_end", pos + missingValue.size) @@ -571,15 +574,15 @@ class CompiledLineParser( } private[this] def parseValueOfType(cb: EmitCodeBuilder, t: PType): IEmitCode = { - def parseDefinedValue(cb: EmitCodeBuilder): PCode = t match { + def parseDefinedValue(cb: EmitCodeBuilder): SCode = t match { case t: PInt32 => - PCode(t, cb.invokeCode[Int](parseIntMb, region)) + primitive(cb.invokeCode[Int](parseIntMb, region)) case t: PInt64 => - PCode(t, cb.invokeCode[Long](parseLongMb, region)) + primitive(cb.invokeCode[Long](parseLongMb, region)) case t: PFloat32 => - PCode(t, Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region))) + primitive(Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region))) case t: PFloat64 => - PCode(t, Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region))) + primitive(Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region))) case t: PString => val st = SStringPointer(t) st.constructFromString(cb, region, cb.invokeCode[String](parseStringMb, region)) @@ -638,16 +641,14 @@ class CompiledLineParser( private[this] def parseEntries(cb: EmitCodeBuilder, entriesType: PCanonicalArray): SIndexablePointerCode = { val entryType = entriesType.elementType.asInstanceOf[PCanonicalStruct] assert(entryType.fields.size == 1) - val (nextAddress, _, finish) = entriesType.constructFromNextAddress(cb, region, nCols) + val (push, finish) = entriesType.constructFromFunctions(cb, region, nCols, false) val i = cb.newLocal[Int]("i", 0) cb.whileLoop(i < nCols, { - val nextAddr = nextAddress(cb) - cb.ifx(pos >= line.length, parseError(cb, const("unexpected end of line while reading entry ").concat(i.toS))) val ec = EmitCode.fromI(cb.emb)(cb => parseValueOfType(cb, entryType.fields(0).typ)) - entryType.storeAtAddressFromFields(cb, nextAddr, region, FastIndexedSeq(ec), deepCopy = false) + push(cb, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, entryType.virtualType, ec))) cb.assign(pos, pos + 1) cb.assign(i, i + 1) }) diff --git a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala index df6a7657fb2..fb75cdc0960 100644 --- a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala +++ b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala @@ -8,7 +8,7 @@ import is.hail.io.fs.FS import is.hail.io.index.IndexReaderBuilder import is.hail.io.{ByteArrayReader, HadoopFSDataBinaryReader} import is.hail.types._ -import is.hail.types.physical.stypes.concrete.{SCanonicalCallCode, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SCanonicalCallCode, SStackStruct, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.{PCanonicalArray, PCanonicalLocus, PCanonicalString, PCanonicalStruct, PStruct} import is.hail.types.virtual.{TInterval, Type} @@ -279,7 +279,7 @@ object CompileDecoder { t.constructFromPositionAndString(cb, region, contigRecoded, position) case t: PCanonicalStruct => val strT = t.field("contig").typ.asInstanceOf[PCanonicalString] - val contigPC = SStringPointer(strT).constructFromString(cb, region, contigRecoded) + val contigPC = strT.sType.constructFromString(cb, region, contigRecoded) t.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, contigPC), EmitCode.present(cb.emb, primitive(position))), deepCopy = false) @@ -310,9 +310,9 @@ object CompileDecoder { } if (settings.hasField("rsid")) - structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(true)).constructFromString(cb, region, rsid)) + structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(false)).constructFromString(cb, region, rsid)) if (settings.hasField("varid")) - structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(true)).constructFromString(cb, region, varid)) + structFieldCodes += EmitCode.present(cb.emb, SStringPointer(PCanonicalString(false)).constructFromString(cb, region, varid)) if (settings.hasField("offset")) structFieldCodes += EmitCode.present(cb.emb, primitive(offset)) if (settings.hasField("file_idx")) @@ -343,7 +343,7 @@ object CompileDecoder { val LnoOp = CodeLabel() cb.ifx(alreadyMemoized, cb.goto(LnoOp)) - val (nextAddr, _, finish) = memoTyp.constructFromNextAddress(cb, partRegion, 1 << 16) + val (push, finish) = memoTyp.constructFromFunctions(cb, partRegion, 1 << 16, false) val d0 = cb.newLocal[Int]("memoize_entries_d0", 0) cb.whileLoop(d0 < 256, { @@ -351,8 +351,6 @@ object CompileDecoder { cb.whileLoop(d1 < 256, { val d2 = cb.newLocal[Int]("memoize_entries_d2", const(255) - d0 - d1) - val structAddr = nextAddr(cb) - val entryFieldCodes = new BoxedArrayBuilder[EmitCode]() if (includeGT) @@ -388,7 +386,7 @@ object CompileDecoder { cb.goto(Lpresent) }))) - IEmitCode(Lmissing, Lpresent, new SCanonicalCallCode(false, value), false) + IEmitCode(Lmissing, Lpresent, new SCanonicalCallCode(value), false) } if (includeGP) @@ -412,7 +410,8 @@ object CompileDecoder { IEmitCode.present(cb, primitive((d1 + (d2 << 1)).toD / 255.0)) } - entryType.storeAtAddressFromFields(cb, structAddr, partRegion, entryFieldCodes.result(), deepCopy = false) + push(cb, IEmitCode.present(cb, + SStackStruct.constructFromArgs(cb, partRegion, entryType.virtualType, entryFieldCodes.result(): _*))) cb.assign(d1, d1 + 1) }) @@ -518,7 +517,7 @@ object CompileDecoder { val dataOffset = cb.newLocal[Int]("bgen_add_entries_offset", const(settings.nSamples + 10) + i * 2) val d0 = data(dataOffset) & 0xff val d1 = data(dataOffset + 1) & 0xff - val pc = entryType.loadCheapPCode(cb, memoTyp.loadElement(memoizedEntryData, settings.nSamples, (d0 << 8) | d1)) + val pc = entryType.loadCheapSCode(cb, memoTyp.loadElement(memoizedEntryData, settings.nSamples, (d0 << 8) | d1)) cb.goto(Lpresent) val iec = IEmitCode(Lmissing, Lpresent, pc, false) pushElement(cb, iec) diff --git a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala index b1f11198c62..2a7e21297bc 100644 --- a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala +++ b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala @@ -488,6 +488,8 @@ class MatrixBGENReader( override def toJValue: JValue = params.toJValue + def renderShort(): String = defaultRender() + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala index f3eec87f90f..8012c9dafc8 100644 --- a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala @@ -175,4 +175,18 @@ class HadoopFS(val conf: SerializableHadoopConfiguration) extends FS { val pathFS = ppath.getFileSystem(conf.value) pathFS.deleteOnExit(ppath) } + + def supportsScheme(scheme: String): Boolean = { + if (scheme == "") { + true + } else { + try { + hadoop.fs.FileSystem.getFileSystemClass(scheme, conf.value) + true + } catch { + case e: hadoop.fs.UnsupportedFileSystemException => false + case e: Exception => throw e + } + } + } } diff --git a/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala b/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala index b0394149407..1d55bad4429 100644 --- a/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala +++ b/hail/src/main/scala/is/hail/io/gen/ExportBGEN.scala @@ -2,7 +2,7 @@ package is.hail.io.gen import is.hail.HailContext import is.hail.annotations.{RegionValue, UnsafeRow} -import is.hail.expr.ir.{ExecuteContext, MatrixValue} +import is.hail.expr.ir.{ByteArrayBuilder, ExecuteContext, MatrixValue} import is.hail.types.physical.PStruct import is.hail.io.fs.FS import is.hail.utils.BoxedArrayBuilder @@ -17,19 +17,19 @@ object BgenWriter { val phased: Byte = 0 val totalProb: Int = 255 - def shortToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int) { + def shortToBytesLE(bb: ByteArrayBuilder, i: Int) { bb += (i & 0xff).toByte bb += ((i >>> 8) & 0xff).toByte } - def intToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int) { + def intToBytesLE(bb: ByteArrayBuilder, i: Int) { bb += (i & 0xff).toByte bb += ((i >>> 8) & 0xff).toByte bb += ((i >>> 16) & 0xff).toByte bb += ((i >>> 24) & 0xff).toByte } - def stringToBytesWithShortLength(bb: BoxedArrayBuilder[Byte], s: String): Int = { + def stringToBytesWithShortLength(bb: ByteArrayBuilder, s: String): Int = { val bytes = s.getBytes val l = bytes.length shortToBytesLE(bb, l) @@ -37,7 +37,7 @@ object BgenWriter { 2 + l } - def stringToBytesWithIntLength(bb: BoxedArrayBuilder[Byte], s: String): Int = { + def stringToBytesWithIntLength(bb: ByteArrayBuilder, s: String): Int = { val bytes = s.getBytes val l = bytes.length intToBytesLE(bb, l) @@ -45,7 +45,7 @@ object BgenWriter { 4 + l } - def updateIntToBytesLE(bb: BoxedArrayBuilder[Byte], i: Int, pos: Int) { + def updateIntToBytesLE(bb: ByteArrayBuilder, i: Int, pos: Int) { bb(pos) = (i & 0xff).toByte bb(pos + 1) = ((i >>> 8) & 0xff).toByte bb(pos + 2) = ((i >>> 16) & 0xff).toByte @@ -53,7 +53,7 @@ object BgenWriter { } def headerBlock(sampleIds: IndexedSeq[String], nVariants: Long): Array[Byte] = { - val bb = new BoxedArrayBuilder[Byte] + val bb = new ByteArrayBuilder() val nSamples = sampleIds.length assert(nVariants < (1L << 32)) @@ -93,8 +93,8 @@ object BgenWriter { class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { import BgenWriter._ - val bb: BoxedArrayBuilder[Byte] = new BoxedArrayBuilder[Byte] - val uncompressedData: BoxedArrayBuilder[Byte] = new BoxedArrayBuilder[Byte] + val bb: ByteArrayBuilder = new ByteArrayBuilder() + val uncompressedData: ByteArrayBuilder = new ByteArrayBuilder() val gs = new ArrayGenotypeView(rowPType) val v = new RegionValueVariant(rowPType) val va = new GenAnnotationView(rowPType) @@ -125,13 +125,13 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { i += 1 } - val gtDataBlockStart = bb.length + val gtDataBlockStart = bb.size intToBytesLE(bb, 0) // placeholder for length of compressed data intToBytesLE(bb, 0) // placeholder for length of uncompressed data val dropped = emitGPData(chr, pos, alleles) - val uncompressedLength = uncompressedData.length + val uncompressedLength = uncompressedData.size val compressedLength = compress(bb, uncompressedData.result()) updateIntToBytesLE(bb, compressedLength + 4, gtDataBlockStart) @@ -191,7 +191,7 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { } def roundWithConstantSum(input: Array[Double], fractional: Array[Double], index: Array[Int], - indexInverse: Array[Int], output: BoxedArrayBuilder[Byte], expectedSize: Long) { + indexInverse: Array[Int], output: ByteArrayBuilder, expectedSize: Long) { val n = input.length assert(fractional.length == n && index.length == n && indexInverse.length == n) @@ -245,7 +245,7 @@ class BgenPartitionWriter(rowPType: PStruct, nSamples: Int) { val indexInverse = new Array[Int](nGenotypes) val fractional = new Array[Double](nGenotypes) - val samplePloidyStart = uncompressedData.length + val samplePloidyStart = uncompressedData.size var i = 0 while (i < nSamples) { uncompressedData += 0x82.toByte // placeholder for sample ploidy - default is missing diff --git a/hail/src/main/scala/is/hail/io/gen/LoadGen.scala b/hail/src/main/scala/is/hail/io/gen/LoadGen.scala index bb71316ff02..72fb99d65d3 100644 --- a/hail/src/main/scala/is/hail/io/gen/LoadGen.scala +++ b/hail/src/main/scala/is/hail/io/gen/LoadGen.scala @@ -21,6 +21,8 @@ import org.apache.spark.sql.Row import org.apache.spark.broadcast.Broadcast import org.json4s.{DefaultFormats, Extraction, Formats, JObject, JValue} +import scala.collection.mutable + case class GenResult(file: String, nSamples: Int, nVariants: Int, rdd: RDD[(Annotation, Iterable[Annotation])]) object LoadGen { @@ -81,7 +83,7 @@ object LoadGen { if (gp.length != (3 * nSamples)) fatal("Number of genotype probabilities does not match 3 * number of samples. If no chromosome column is included, use -c to input the chromosome.") - val gsb = new BoxedArrayBuilder[Annotation]() + val gsb = new mutable.ArrayBuffer[Annotation]() for (i <- gp.indices by 3) { val d0 = gp(i) @@ -102,7 +104,7 @@ object LoadGen { val annotations = Annotation(locus, alleles, rsid, varid) - Some(annotations -> gsb.result().toIterable) + Some(annotations -> gsb.result()) } } } @@ -265,6 +267,8 @@ class MatrixGENReader( decomposeWithName(params, "MatrixGENReader") } + def renderShort(): String = defaultRender() + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala index 018d8c9558e..ee4e18503ef 100644 --- a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala +++ b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala @@ -1,16 +1,17 @@ package is.hail.io.index import java.io.OutputStream - import is.hail.annotations.{Annotation, Region, RegionPool, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.expr.ir.{CodeParam, EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, EmitMethodBuilder, ExecuteContext, IEmitCode, ParamType} +import is.hail.expr.ir.{CodeParam, EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, EmitMethodBuilder, ExecuteContext, IEmitCode, IntArrayBuilder, LongArrayBuilder, ParamType} import is.hail.io._ import is.hail.io.fs.FS import is.hail.rvd.AbstractRVDSpec import is.hail.types +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable} -import is.hail.types.physical.{PBaseStructValue, PCanonicalArray, PCanonicalStruct, PCode, PType} +import is.hail.types.physical.stypes.interfaces.SBaseStructValue +import is.hail.types.physical.{PCanonicalArray, PCanonicalStruct, PType} import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream @@ -102,7 +103,7 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r private val aoff = sb.newSettable[Long](s"${name}_aoff") private val len = sb.newSettable[Int](s"${name}_len") - val eltType: PCanonicalStruct = types.coerce[PCanonicalStruct](arrayType.elementType) + val eltType: PCanonicalStruct = types.coerce[PCanonicalStruct](arrayType.elementType.setRequired((false))) private val elt = new SBaseStructPointerSettable(SBaseStructPointer(eltType), sb.newSettable[Long](s"${name}_elt_off")) def length: Code[Int] = len @@ -115,13 +116,13 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r def create(cb: EmitCodeBuilder, dest: Code[Long]): Unit = { cb.assign(aoff, arrayType.allocate(region, maxSize)) cb += arrayType.stagedInitialize(aoff, maxSize) - arrayType.storeAtAddress(cb, dest, region, arrayType.loadCheapPCode(cb, aoff), deepCopy = false) + arrayType.storeAtAddress(cb, dest, region, arrayType.loadCheapSCode(cb, aoff), deepCopy = false) cb.assign(len, 0) } def storeLength(cb: EmitCodeBuilder): Unit = cb += arrayType.storeLength(aoff, length) - def setFieldValue(cb: EmitCodeBuilder, name: String, field: PCode): Unit = { + def setFieldValue(cb: EmitCodeBuilder, name: String, field: SCode): Unit = { cb += eltType.setFieldPresent(elt.a, name) eltType.fieldType(name).storeAtAddress(cb, eltType.fieldOffset(elt.a, name), region, field, deepCopy = true) } @@ -135,10 +136,8 @@ class IndexWriterArrayBuilder(name: String, maxSize: Int, sb: SettableBuilder, r loadChild(cb, len) cb.assign(len, len + 1) } - def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = elt.store(cb, PCode(eltType, arrayType.elementOffset(aoff, idx))) - def getLoadedChild: PBaseStructValue = elt - - def getChild(idx: Value[Int]): PCode = PCode(eltType, arrayType.elementOffset(aoff, idx)) + def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = elt.store(cb, eltType.loadCheapSCode(cb, arrayType.loadElement(aoff, idx))) + def getLoadedChild: SBaseStructValue = elt } class StagedIndexWriterUtils(ib: Settable[IndexWriterUtils]) { @@ -189,8 +188,8 @@ class IndexWriterUtils(path: String, fs: FS, meta: StagedIndexMetadata) { } val rBuilder = new BoxedArrayBuilder[Region]() - val aBuilder = new BoxedArrayBuilder[Long]() - val lBuilder = new BoxedArrayBuilder[Int]() + val aBuilder = new LongArrayBuilder() + val lBuilder = new IntArrayBuilder() def size: Int = rBuilder.size @@ -245,9 +244,9 @@ object StagedIndexWriter { .voidWithBuilder(cb => siw.init(cb, cb.emb.getCodeParam[String](1))) fb.emb.voidWithBuilder { cb => siw.add(cb, - IEmitCode(cb, false, PCode(keyType, fb.getCodeParam[Long](1))), + IEmitCode(cb, false, keyType.loadCheapSCode(cb, fb.getCodeParam[Long](1))), fb.getCodeParam[Long](2), - IEmitCode(cb, false, PCode(annotationType, fb.getCodeParam[Long](3)))) + IEmitCode(cb, false, annotationType.loadCheapSCode(cb, fb.getCodeParam[Long](3)))) } cb.newEmitMethod("close", FastIndexedSeq[ParamType](), typeInfo[Unit]) .voidWithBuilder(siw.close) diff --git a/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala b/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala index 37efa8b3029..cbbe8ea338e 100644 --- a/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala +++ b/hail/src/main/scala/is/hail/io/index/InternalNodeBuilder.scala @@ -4,7 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, SettableBuilder, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.OutputBuffer -import is.hail.types +import is.hail.types.physical.stypes.interfaces._ import is.hail.types.encoded.EType import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable, SIndexablePointerCode} @@ -62,7 +62,7 @@ class StagedInternalNodeBuilder(maxSize: Int, keyType: PType, annotationType: PT } def allocate(cb: EmitCodeBuilder): Unit = { - node.store(cb, PCode(pType, pType.allocate(region))) + node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) ab.create(cb, pType.fieldOffset(node.a, "children")) } @@ -77,28 +77,26 @@ class StagedInternalNodeBuilder(maxSize: Int, keyType: PType, annotationType: PT enc(cb, node, ob) } - def nodeAddress: PBaseStructValue = node + def nodeAddress: SBaseStructValue = node - def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstIndex: Code[Long], firstChild: PBaseStructValue): Unit = { - val childtyp = types.coerce[PBaseStruct](firstChild.pt) + def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstIndex: Code[Long], firstChild: SBaseStructValue): Unit = { ab.addChild(cb) - ab.setFieldValue(cb, "index_file_offset", PCode(PInt64(), indexFileOffset)) - ab.setFieldValue(cb, "first_idx", PCode(PInt64(), firstIndex)) - ab.setField(cb, "first_key", firstChild.loadField(cb, childtyp.fieldIdx("key")).typecast[PCode]) - ab.setField(cb, "first_record_offset", firstChild.loadField(cb, childtyp.fieldIdx("offset")).typecast[PCode]) - ab.setField(cb, "first_annotation", firstChild.loadField(cb, childtyp.fieldIdx("annotation")).typecast[PCode]) + ab.setFieldValue(cb, "index_file_offset", primitive(indexFileOffset)) + ab.setFieldValue(cb, "first_idx", primitive(firstIndex)) + ab.setField(cb, "first_key", firstChild.loadField(cb, "key")) + ab.setField(cb, "first_record_offset", firstChild.loadField(cb, "offset")) + ab.setField(cb, "first_annotation", firstChild.loadField(cb, "annotation")) } - def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstChild: PBaseStructValue): Unit = { - val childtyp = types.coerce[PBaseStruct](firstChild.pt) + def add(cb: EmitCodeBuilder, indexFileOffset: Code[Long], firstChild: SBaseStructValue): Unit = { ab.addChild(cb) - ab.setFieldValue(cb, "index_file_offset", PCode(PInt64(), indexFileOffset)) - ab.setField(cb, "first_idx", firstChild.loadField(cb, childtyp.fieldIdx("first_idx")).typecast[PCode]) - ab.setField(cb, "first_key", firstChild.loadField(cb, childtyp.fieldIdx("first_key")).typecast[PCode]) - ab.setField(cb, "first_record_offset", firstChild.loadField(cb, childtyp.fieldIdx("first_record_offset")).typecast[PCode]) - ab.setField(cb, "first_annotation", firstChild.loadField(cb, childtyp.fieldIdx("first_annotation")).typecast[PCode]) + ab.setFieldValue(cb, "index_file_offset", primitive(indexFileOffset)) + ab.setField(cb, "first_idx", firstChild.loadField(cb, "first_idx")) + ab.setField(cb, "first_key", firstChild.loadField(cb, "first_key")) + ab.setField(cb, "first_record_offset", firstChild.loadField(cb, "first_record_offset")) + ab.setField(cb, "first_annotation", firstChild.loadField(cb, "first_annotation")) } def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = ab.loadChild(cb, idx) - def getLoadedChild: PBaseStructValue = ab.getLoadedChild + def getLoadedChild: SBaseStructValue = ab.getLoadedChild } diff --git a/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala b/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala index 7957bfd4223..a691952a963 100644 --- a/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala +++ b/hail/src/main/scala/is/hail/io/index/LeafNodeBuilder.scala @@ -6,7 +6,9 @@ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} import is.hail.io.OutputBuffer import is.hail.types.encoded.EType import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SBaseStructPointerSettable} +import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, primitive} import is.hail.types.virtual.{TStruct, Type} import is.hail.utils._ @@ -47,15 +49,15 @@ class StagedLeafNodeBuilder(maxSize: Int, keyType: PType, annotationType: PType, def reset(cb: EmitCodeBuilder, firstIdx: Code[Long]): Unit = { cb += region.invoke[Unit]("clear") - node.store(cb, pType.loadCheapPCode(cb, pType.allocate(region))) - idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), PCode(idxType, firstIdx)) + node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) + idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), primitive(firstIdx)) ab.create(cb, pType.fieldOffset(node.a, "keys")) } def create(cb: EmitCodeBuilder, firstIdx: Code[Long]): Unit = { cb.assign(region, Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) - node.store(cb, pType.loadCheapPCode(cb, pType.allocate(region))) - idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), PCode(idxType, firstIdx)) + node.store(cb, pType.loadCheapSCode(cb, pType.allocate(region))) + idxType.storePrimitiveAtAddress(cb, pType.fieldOffset(node.a, "first_idx"), primitive(firstIdx)) ab.create(cb, pType.fieldOffset(node.a, "keys")) } @@ -65,16 +67,16 @@ class StagedLeafNodeBuilder(maxSize: Int, keyType: PType, annotationType: PType, enc(cb, node, ob) } - def nodeAddress: PBaseStructValue = node + def nodeAddress: SBaseStructValue = node def add(cb: EmitCodeBuilder, key: => IEmitCode, offset: Code[Long], annotation: => IEmitCode): Unit = { ab.addChild(cb) ab.setField(cb, "key", key) - ab.setFieldValue(cb, "offset", PCode(PInt64(), offset)) + ab.setFieldValue(cb, "offset", primitive(offset)) ab.setField(cb, "annotation", annotation) } def loadChild(cb: EmitCodeBuilder, idx: Code[Int]): Unit = ab.loadChild(cb, idx) - def getLoadedChild: PBaseStructValue = ab.getLoadedChild - def firstIdx(cb: EmitCodeBuilder): PCode = idxType.loadCheapPCode(cb, pType.fieldOffset(node.a, "first_idx")) + def getLoadedChild: SBaseStructValue = ab.getLoadedChild + def firstIdx(cb: EmitCodeBuilder): SCode = idxType.loadCheapSCode(cb, pType.fieldOffset(node.a, "first_idx")) } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala index 18cd44b02de..64fcc76cc91 100644 --- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala +++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala @@ -217,7 +217,7 @@ object MatrixPLINKReader { val partSize = partition(nVariants, nPartitions) val partScan = partSize.scanLeft(0)(_ + _) - val cb = new BoxedArrayBuilder[Any]() + val cb = new BoxedArrayBuilder[Row]() val ib = new BoxedArrayBuilder[Interval]() var p = 0 @@ -247,7 +247,7 @@ object MatrixPLINKReader { } assert(prevEnd == nVariants) - val contexts = cb.result() + val contexts = cb.result().map(r => r: Any) val partitioner = new RVDPartitioner(locusAllelesType, ib.result(), 0) @@ -474,6 +474,8 @@ class MatrixPLINKReader( decomposeWithName(params, "MatrixPLINKReader") } + def renderShort(): String = defaultRender() + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala index 3a185c73214..791fad40960 100644 --- a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala +++ b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala @@ -1,13 +1,13 @@ package is.hail.io.tabix import java.io.InputStream - import htsjdk.samtools.util.FileExtensions import htsjdk.tribble.util.ParsingUtils import is.hail.io.compress.BGzipLineReader import is.hail.io.fs.FS import is.hail.utils._ import is.hail.backend.BroadcastValue +import is.hail.expr.ir.IntArrayBuilder import scala.collection.mutable import scala.language.implicitConversions @@ -256,7 +256,7 @@ class TabixReader(val filePath: String, fs: FS, idxFilePath: Option[String] = No new Array[Int](0) else { var end = _end - val bins = new BoxedArrayBuilder[Int](MaxBin) + val bins = new IntArrayBuilder(MaxBin) if (end >= (1 << 29)) { end = 1 << 29 } diff --git a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala index aaa6115fe98..cf1df02705a 100644 --- a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala +++ b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala @@ -7,7 +7,7 @@ import is.hail.backend.BroadcastValue import is.hail.backend.spark.SparkBackend import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.lowering.TableStage -import is.hail.expr.ir.{ExecuteContext, GenericLine, GenericLines, GenericTableValue, IRParser, LowerMatrixIR, LoweredTableReader, MatrixHybridReader, MatrixIR, MatrixLiteral, PruneDeadFields, TableRead, TableValue} +import is.hail.expr.ir.{ExecuteContext, GenericLine, GenericLines, GenericTableValue, IR, IRParser, Literal, LowerMatrixIR, LoweredTableReader, MatrixHybridReader, MatrixIR, MatrixLiteral, PruneDeadFields, TableRead, TableValue} import is.hail.types._ import is.hail.types.physical.{PBoolean, PCall, PCanonicalArray, PCanonicalCall, PCanonicalLocus, PCanonicalSet, PCanonicalString, PCanonicalStruct, PField, PFloat64, PInt32, PStruct, PType} import is.hail.types.virtual._ @@ -1208,7 +1208,11 @@ object LoadVCF { (line.getCount == 1 || (isFlag && line.getCount == 0))) ((id, baseType), (id, attrs), isFlag) - else if (baseType.isInstanceOf[PCall]) + else if (isFlag) { + warn(s"invalid VCF header: at INFO field '$id' of type 'Flag', expected 'Number=0', got 'Number=${headerNumberToString(line)}''" + + s"\n Interpreting as 'Number=0' regardless.") + ((id, baseType), (id, attrs), isFlag) + } else if (baseType.isInstanceOf[PCall]) fatal("fields in 'call_fields' must have 'Number' equal to 1.") else ((id, PCanonicalArray(baseType.setRequired(arrayElementsRequired))), (id, attrs), isFlag) @@ -1777,6 +1781,13 @@ class MatrixVCFReader( body) } + override def lowerGlobals(ctx: ExecuteContext, requestedGlobalsType: TStruct): IR = { + val globals = Row(sampleIDs.map(Row(_)).toFastIndexedSeq) + Literal.coerce(requestedGlobalsType, + fullType.globalType.valueSubsetter(requestedGlobalsType) + .apply(globals)) + } + override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = executeGeneric(ctx).toTableStage(ctx, requestedType) @@ -1788,6 +1799,8 @@ class MatrixVCFReader( decomposeWithName(params, "MatrixVCFReader") } + def renderShort(): String = defaultRender() + override def hashCode(): Int = params.hashCode() override def equals(that: Any): Boolean = that match { diff --git a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala index a81d5a6e84a..ba0439a47b2 100644 --- a/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala +++ b/hail/src/main/scala/is/hail/linalg/BlockMatrix.scala @@ -2,7 +2,6 @@ package is.hail.linalg import java.io._ import java.nio._ - import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, sum => breezeSum, _} import breeze.numerics.{abs => breezeAbs, log => breezeLog, pow => breezePow, sqrt => breezeSqrt} import breeze.stats.distributions.{RandBasis, ThreadLocalRandomGenerator} @@ -12,7 +11,7 @@ import is.hail.backend.{BroadcastValue, HailTaskContext} import is.hail.backend.spark.{SparkBackend, SparkTaskContext} import is.hail.utils._ import is.hail.expr.Parser -import is.hail.expr.ir.{CompileAndEvaluate, ExecuteContext, IR, TableValue} +import is.hail.expr.ir.{CompileAndEvaluate, ExecuteContext, IR, IntArrayBuilder, TableValue} import is.hail.types._ import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalStruct, PFloat64, PFloat64Optional, PFloat64Required, PInt64, PInt64Optional, PInt64Required, PStruct} import is.hail.types.virtual._ @@ -1368,8 +1367,8 @@ object BlockMatrixFilterRDD { val blockSize = gp.blockSize val ab = new BoxedArrayBuilder[(Int, Array[Int], Array[Int])]() - val startIndices = new BoxedArrayBuilder[Int]() - val endIndices = new BoxedArrayBuilder[Int]() + val startIndices = new IntArrayBuilder() + val endIndices = new IntArrayBuilder() keep .grouped(blockSize) diff --git a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala index e56d3533400..dff7c72ba31 100644 --- a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala +++ b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala @@ -3,52 +3,67 @@ package is.hail.linalg import is.hail.annotations.Region import is.hail.asm4s.{Code, _} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} -import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable -import is.hail.types.physical.stypes.interfaces.SNDArray -import is.hail.types.physical.{PCanonicalNDArray, PNDArrayCode, PNDArrayValue} +import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerSettable} +import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArrayValue} +import is.hail.utils.FastIndexedSeq object LinalgCodeUtils { - def checkColumnMajor(pndv: PNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { + def checkColumnMajor(pndv: SNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { val answer = cb.newField[Boolean]("checkColumnMajorResult") val shapes = pndv.shapes(cb) val strides = pndv.strides(cb) val runningProduct = cb.newLocal[Long]("check_column_major_running_product") - val elementType = pndv.pt.elementType - val nDims = pndv.pt.nDims + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType + val elementType = pt.elementType + val nDims = pndv.st.nDims cb.assign(answer, true) - cb.append(Code( - runningProduct := elementType.byteSize, - Code.foreach(0 until nDims){ index => - Code( - answer := answer & (strides(index) ceq runningProduct), - runningProduct := runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L) - ) - } - )) + cb.assign(runningProduct, elementType.byteSize) + (0 until nDims).foreach{ index => + cb.assign(answer, answer & (strides(index) ceq runningProduct)) + cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) + } + answer + } + + def checkRowMajor(pndv: SNDArrayValue, cb: EmitCodeBuilder): Value[Boolean] = { + val answer = cb.newField[Boolean]("checkColumnMajorResult") + val shapes = pndv.shapes(cb) + val strides = pndv.strides(cb) + val runningProduct = cb.newLocal[Long]("check_column_major_running_product") + + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType + val elementType = pt.elementType + val nDims = pt.nDims + + cb.assign(answer, true) + cb.assign(runningProduct, elementType.byteSize) + ((nDims - 1) to 0 by -1).foreach { index => + cb.assign(answer, answer & (strides(index) ceq runningProduct)) + cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) + } answer } - def createColumnMajorCode(pndv: PNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): PNDArrayCode = { + def createColumnMajorCode(pndv: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayCode = { val shape = pndv.shapes(cb) - val pt = pndv.pt.asInstanceOf[PCanonicalNDArray] + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType val strides = pt.makeColumnMajorStrides(shape, region, cb) - val (dataFirstElementAddress, dataFinisher) = pndv.pt.constructDataFunction(shape, strides, cb, region) - - val curAddr = cb.newLocal[Long]("create_column_major_cur_addr", dataFirstElementAddress) + val (dataFirstElementAddress, dataFinisher) = pt.constructDataFunction(shape, strides, cb, region) + // construct an SNDArrayCode with undefined contents + val result = dataFinisher(cb).memoize(cb, "col_major_result") - SNDArray.forEachIndex(cb, shape, "nda_create_column_major") { case (cb, idxVars) => - pt.elementType.storeAtAddress(cb, curAddr, region, pndv.loadElement(idxVars, cb), true) - cb.assign(curAddr, curAddr + pt.elementType.byteSize) - } - dataFinisher(cb) + SNDArray.coiterate(cb, region, FastIndexedSeq((result.get, "result"), (pndv.get, "pndv")), { + case Seq(l, r) => cb.assign(l, r) + }) + result.get } - def checkColMajorAndCopyIfNeeded(aInput: PNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): PNDArrayValue = { + def checkColMajorAndCopyIfNeeded(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayValue = { val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) - val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.pt).asInstanceOf[SNDArrayPointerSettable] + val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.st).asInstanceOf[SNDArrayPointerSettable] cb.ifx(aIsColumnMajor, {cb.assign(aColMajor, aInput)}, { cb.assign(aColMajor, LinalgCodeUtils.createColumnMajorCode(aInput, cb, region)) @@ -56,6 +71,19 @@ object LinalgCodeUtils { aColMajor } + def checkStandardStriding(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayValue, Value[Boolean]) = { + val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) + val a = cb.emb.newPField("ndarray_output_standardized", aInput.st).asInstanceOf[SNDArrayPointerSettable] + cb.ifx(aIsColumnMajor, {cb.assign(a, aInput)}, { + val isRowMajor = LinalgCodeUtils.checkRowMajor(aInput, cb) + cb.ifx(isRowMajor, {cb.assign(a, aInput)}, { + cb.assign(a, LinalgCodeUtils.createColumnMajorCode(aInput, cb, region)) + }) + }) + + (a, aIsColumnMajor) + } + def linearizeIndicesRowMajor(indices: IndexedSeq[Code[Long]], shapeArray: IndexedSeq[Value[Long]], mb: EmitMethodBuilder[_]): Code[Long] = { val index = mb.genFieldThisRef[Long]() val elementsInProcessedDimensions = mb.genFieldThisRef[Long]() diff --git a/hail/src/main/scala/is/hail/lir/PST.scala b/hail/src/main/scala/is/hail/lir/PST.scala index 5affedfbede..90e7fa17825 100644 --- a/hail/src/main/scala/is/hail/lir/PST.scala +++ b/hail/src/main/scala/is/hail/lir/PST.scala @@ -1,5 +1,6 @@ package is.hail.lir +import is.hail.expr.ir.{BooleanArrayBuilder, IntArrayBuilder} import is.hail.utils.BoxedArrayBuilder import scala.collection.mutable @@ -256,16 +257,16 @@ class PSTBuilder( private val regions: mutable.ArrayBuffer[PSTRegion] = mutable.ArrayBuffer[PSTRegion]() // regions with no parents - private val frontier = new BoxedArrayBuilder[Int]() + private val frontier = new IntArrayBuilder() private def addRegion(start: Int, end: Int): Int = { - var firstc = frontier.length + var firstc = frontier.size while ((firstc - 1) >= 0 && regions(frontier(firstc - 1)).start >= start) firstc -= 1 assert(firstc == 0 || regions(frontier(firstc - 1)).end <= start) val ri = regions.length - val n = frontier.length - firstc + val n = frontier.size - firstc val children = new Array[Int](n) var i = 0 while (i < n) { @@ -275,8 +276,8 @@ class PSTBuilder( children(i) = c i += 1 } - frontier.setSizeUninitialized(frontier.length - n) - if (frontier.nonEmpty && regions(frontier.last).end == start) + frontier.setSizeUninitialized(frontier.size - n) + if (frontier.size > 0 && regions(frontier(frontier.size - 1)).end == start) splitBlock.set(start) frontier += ri regions += new PSTRegion(start, end, children) @@ -284,7 +285,7 @@ class PSTBuilder( } private def addRoot(): Int = { - if (frontier.length == 1 && + if (frontier.size == 1 && regions(frontier(0)).start == 0 && regions(frontier(0)).end == nBlocks - 1) { frontier(0) @@ -292,7 +293,7 @@ class PSTBuilder( val c = regions.length val ri = regions.length - val n = frontier.length + val n = frontier.size val children = new Array[Int](n) var i = 0 while (i < n) { @@ -312,7 +313,7 @@ class PSTBuilder( // find regions in [start, end] // no edges from [0, start) target (start, end] private def findRegions(start: Int, end: Int): Unit = { - var regionStarts = new BoxedArrayBuilder[Int]() + var regionStarts = new IntArrayBuilder() regionStarts += start // find subregions of [start, end] @@ -343,7 +344,7 @@ class PSTBuilder( } } - f(regionStarts.length - 1) + f(regionStarts.size - 1) regionStarts += newStart } @@ -382,7 +383,7 @@ class PSTBuilder( val root = addRoot() val newBlocksB = new BoxedArrayBuilder[Block]() - val newSplitBlock = new BoxedArrayBuilder[Boolean]() + val newSplitBlock = new BooleanArrayBuilder() // split blocks, compute new blocks // in linearization order @@ -442,7 +443,7 @@ class PSTBuilder( child = regions(children(c)) } - val newChildren = new BoxedArrayBuilder[Int]() + val newChildren = new IntArrayBuilder() var j = r.start var jincluded = false @@ -491,7 +492,7 @@ class PSTBuilder( // but are not contained in region i def findLoopRegions(i: Int): Array[Int] = { val r = newRegions(i) - val backEdgeSourcesB = new BoxedArrayBuilder[Int]() + val backEdgeSourcesB = new IntArrayBuilder() if (r.children.nonEmpty) { var c = 0 while (c < r.children.length) { diff --git a/hail/src/main/scala/is/hail/lir/X.scala b/hail/src/main/scala/is/hail/lir/X.scala index aff8be6c7ee..c4fa0e716a5 100644 --- a/hail/src/main/scala/is/hail/lir/X.scala +++ b/hail/src/main/scala/is/hail/lir/X.scala @@ -831,6 +831,7 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case L2I => IntInfo case F2I => IntInfo case D2I => IntInfo + case IALOAD => IntInfo // Long case LNEG => LongInfo case LADD => LongInfo @@ -847,6 +848,7 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2L => LongInfo case F2L => LongInfo case D2L => LongInfo + case LALOAD => LongInfo // Float case FNEG => FloatInfo case FADD => FloatInfo @@ -857,6 +859,8 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2F => FloatInfo case L2F => FloatInfo case D2F => FloatInfo + case FALOAD => FloatInfo + // Double case DNEG => DoubleInfo case DADD => DoubleInfo @@ -867,8 +871,10 @@ class InsnX(val op: Int, _ti: TypeInfo[_], var lineNumber: Int = 0) extends Valu case I2D => DoubleInfo case L2D => DoubleInfo case F2D => DoubleInfo + case DALOAD => DoubleInfo // Boolean case I2B => BooleanInfo + case BALOAD => BooleanInfo } } } diff --git a/hail/src/main/scala/is/hail/lir/package.scala b/hail/src/main/scala/is/hail/lir/package.scala index 4ed17cf071d..65ccf0fd9b2 100644 --- a/hail/src/main/scala/is/hail/lir/package.scala +++ b/hail/src/main/scala/is/hail/lir/package.scala @@ -9,9 +9,11 @@ package object lir { def genName(tag: String, baseName: String): String = synchronized { counter += 1 - if (baseName != null) + if (baseName != null) { + if (baseName.contains(".")) + throw new RuntimeException(s"genName has invalid character(s): $baseName") s"__$tag$counter$baseName" - else + } else s"__$tag${ counter }null" } diff --git a/hail/src/main/scala/is/hail/methods/LinearRegression.scala b/hail/src/main/scala/is/hail/methods/LinearRegression.scala index 139b48584f6..59c1be9d103 100644 --- a/hail/src/main/scala/is/hail/methods/LinearRegression.scala +++ b/hail/src/main/scala/is/hail/methods/LinearRegression.scala @@ -5,7 +5,7 @@ import breeze.numerics.sqrt import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} import is.hail.types._ import is.hail.types.physical.PStruct import is.hail.types.virtual.{TArray, TFloat64, TInt32, TStruct} @@ -86,7 +86,7 @@ case class LinearRegressionRowsSingle( val producerCtx = consumerCtx.freshContext val rvb = new RegionValueBuilder() - val missingCompleteCols = new BoxedArrayBuilder[Int] + val missingCompleteCols = new IntArrayBuilder() val data = new Array[Double](n * rowBlockSize) val blockWRVs = new Array[WritableRegionValue](rowBlockSize) @@ -244,7 +244,7 @@ case class LinearRegressionRowsChained( val rvb = new RegionValueBuilder() val inputData = bc.value - val builder = new BoxedArrayBuilder[Int] + val builder = new IntArrayBuilder() val data = inputData.map(cri => new Array[Double](cri.n * rowBlockSize)) val blockWRVs = new Array[WritableRegionValue](rowBlockSize) diff --git a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala index 08e0df305ae..73a353c42e7 100644 --- a/hail/src/main/scala/is/hail/methods/LogisticRegression.scala +++ b/hail/src/main/scala/is/hail/methods/LogisticRegression.scala @@ -4,7 +4,7 @@ import breeze.linalg._ import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} import is.hail.types.virtual.{TArray, TFloat64, TStruct} import is.hail.types.{MatrixType, TableType} import is.hail.rvd.RVDType @@ -94,7 +94,7 @@ case class LogisticRegression( val newRVD = mv.rvd.mapPartitions(newRVDType) { (ctx, it) => val rvb = ctx.rvb - val missingCompleteCols = new BoxedArrayBuilder[Int]() + val missingCompleteCols = new IntArrayBuilder() val _nullFits = nullFitBc.value val _yVecs = yVecsBc.value val X = XBc.value.copy diff --git a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala index b07f1e1ae27..a48b013cf26 100644 --- a/hail/src/main/scala/is/hail/methods/PoissonRegression.scala +++ b/hail/src/main/scala/is/hail/methods/PoissonRegression.scala @@ -4,7 +4,7 @@ import breeze.linalg._ import is.hail.HailContext import is.hail.annotations._ import is.hail.expr.ir.functions.MatrixToTableFunction -import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} import is.hail.types.virtual.{TFloat64, TStruct} import is.hail.types.{MatrixType, TableType} import is.hail.rvd.RVDType @@ -81,7 +81,7 @@ case class PoissonRegression( val newRVD = mv.rvd.mapPartitions(newRVDType) { (ctx, it) => val rvb = ctx.rvb - val missingCompleteCols = new BoxedArrayBuilder[Int]() + val missingCompleteCols = new IntArrayBuilder() val X = XBc.value.copy it.map { ptr => diff --git a/hail/src/main/scala/is/hail/methods/Skat.scala b/hail/src/main/scala/is/hail/methods/Skat.scala index 1b4bba56a5f..04982f50d55 100644 --- a/hail/src/main/scala/is/hail/methods/Skat.scala +++ b/hail/src/main/scala/is/hail/methods/Skat.scala @@ -11,7 +11,7 @@ import org.apache.spark.sql.Row import com.sun.jna.Native import com.sun.jna.ptr.IntByReference import is.hail.HailContext -import is.hail.expr.ir.{ExecuteContext, MatrixValue, TableValue} +import is.hail.expr.ir.{ExecuteContext, IntArrayBuilder, MatrixValue, TableValue} import is.hail.expr.ir.functions.MatrixToTableFunction import is.hail.types.virtual.{TFloat64, TInt32, TStruct, Type} import is.hail.rvd.RVDType @@ -356,7 +356,7 @@ case class Skat( val key = Annotation.copy(keyType.virtualType, UnsafeRow.read(keyType, ctx.r, fullRowType.loadField(ptr, keyIndex))) val data = new Array[Double](n) - RegressionUtils.setMeanImputedDoubles(data, 0, completeColIdxBc.value, new BoxedArrayBuilder[Int](), + RegressionUtils.setMeanImputedDoubles(data, 0, completeColIdxBc.value, new IntArrayBuilder(), ptr, fullRowType, entryArrayType, entryType, entryArrayIdx, fieldIdx) Some(key -> (BDV(data) -> weight)) } else None diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala index 834e833c687..ce9e47fed83 100644 --- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala +++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala @@ -8,6 +8,7 @@ import is.hail.expr.ir.{ExecuteContext, IR, PartitionZippedNativeReader} import is.hail.io._ import is.hail.io.fs.FS import is.hail.io.index.{InternalNodeBuilder, LeafNodeBuilder} +import is.hail.types.TableType import is.hail.types.encoded.ETypeSerializer import is.hail.types.physical.{PCanonicalStruct, PInt64Optional, PStruct, PType, PTypeSerializer} import is.hail.types.virtual.{TStructSerializer, _} @@ -164,7 +165,7 @@ object AbstractRVDSpec { contexts, body) extendedNewPartitioner match { - case Some(np) if filterIntervals => + case Some(np) if !filterIntervals => ts.repartitionNoShuffle(np) case _ => ts } @@ -260,24 +261,28 @@ abstract class AbstractRVDSpec { def readTableStage( ctx: ExecuteContext, path: String, - requestedType: TStruct, + requestedType: TableType, newPartitioner: Option[RVDPartitioner] = None, filterIntervals: Boolean = false ): IR => TableStage = newPartitioner match { case Some(_) => fatal("attempted to read unindexed data as indexed") case None => + if (!partitioner.kType.fieldNames.startsWith(requestedType.key)) + fatal(s"cannot generate whole-stage code for legacy table: " + + s"table key = [${ requestedType.key.mkString(", ") }], " + + s"key on disk: [${ partitioner.kType.fieldNames.mkString(", ") }]") val rSpec = typedCodecSpec val ctxType = TStruct("path" -> TString) val contexts = ir.ToStream(ir.Literal(TArray(ctxType), absolutePartPaths(path).map(x => Row(x)).toFastIndexedSeq)) - val body = (ctx: IR) => ir.ReadPartition(ir.GetField(ctx, "path"), requestedType, ir.PartitionNativeReader(rSpec)) + val body = (ctx: IR) => ir.ReadPartition(ir.GetField(ctx, "path"), requestedType.rowType, ir.PartitionNativeReader(rSpec)) (globals: IR) => TableStage( globals, - partitioner, + partitioner.coarsen(partitioner.kType.fieldNames.takeWhile(requestedType.rowType.hasField).length), TableStageDependency.none, contexts, body) @@ -487,7 +492,7 @@ case class IndexedRVDSpec2(_key: IndexedSeq[String], override def readTableStage( ctx: ExecuteContext, path: String, - requestedType: TStruct, + requestedType: TableType, newPartitioner: Option[RVDPartitioner] = None, filterIntervals: Boolean = false ): IR => TableStage = newPartitioner match { @@ -519,7 +524,7 @@ case class IndexedRVDSpec2(_key: IndexedSeq[String], val contexts = ir.ToStream(ir.Literal(TArray(reader.contextType), contextsValues)) - val body = (ctx: IR) => ir.ReadPartition(ctx, requestedType, reader) + val body = (ctx: IR) => ir.ReadPartition(ctx, requestedType.rowType, reader) { (globals: IR) => val ts = TableStage( diff --git a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala index 4c31782e2a7..c6009f3e520 100644 --- a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala +++ b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala @@ -1,7 +1,8 @@ package is.hail.services.batch_client -import java.nio.charset.StandardCharsets +import is.hail.expr.ir.ByteArrayBuilder +import java.nio.charset.StandardCharsets import is.hail.utils._ import is.hail.services._ import is.hail.services.{DeployConfig, Tokens} @@ -97,7 +98,7 @@ class BatchClient( bunchb.clear() size = 0 - val b = new BoxedArrayBuilder[Byte]() + val b = new ByteArrayBuilder() i = 0 // reuse while (i < bunches.length) { diff --git a/hail/src/main/scala/is/hail/services/package.scala b/hail/src/main/scala/is/hail/services/package.scala index 7af90b676a0..ed994d2d17d 100644 --- a/hail/src/main/scala/is/hail/services/package.scala +++ b/hail/src/main/scala/is/hail/services/package.scala @@ -42,7 +42,9 @@ package object services { true case e: SocketException => e.getMessage != null && ( - e.getMessage.contains("Connection reset") || e.getMessage.contains("Broken pipe")) + e.getMessage.contains("Connection reset") || + e.getMessage.contains("Broken pipe") || + e.getMessage.contains("Connection refused")) case e: EOFException => e.getMessage != null && ( e.getMessage.contains("SSL peer shut down incorrectly")) diff --git a/hail/src/main/scala/is/hail/services/shuffler/package.scala b/hail/src/main/scala/is/hail/services/shuffler/package.scala index 781cecc9635..5c2f22ac3fa 100644 --- a/hail/src/main/scala/is/hail/services/shuffler/package.scala +++ b/hail/src/main/scala/is/hail/services/shuffler/package.scala @@ -4,14 +4,15 @@ import java.io._ import java.net.Socket import java.security.KeyStore import java.util.Base64 - import is.hail.annotations._ import is.hail.asm4s._ +import is.hail.expr.ir.LongArrayBuilder import is.hail.types.physical._ import is.hail.io._ import is.hail.utils._ import org.apache.log4j.Logger -import javax.net.ssl._; + +import javax.net.ssl._ import scala.language.implicitConversions package object shuffler { @@ -38,7 +39,7 @@ package object shuffler { decoder: Decoder, sizeHint: Int = BoxedArrayBuilder.defaultInitialCapacity ): Array[Long] = { - val ab = new BoxedArrayBuilder[Long](sizeHint) + val ab = new LongArrayBuilder(sizeHint) var hasNext = decoder.readByte() while (hasNext == 1) { diff --git a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala index 755c0d2b15c..2282cf94d01 100644 --- a/hail/src/main/scala/is/hail/stats/RegressionUtils.scala +++ b/hail/src/main/scala/is/hail/stats/RegressionUtils.scala @@ -2,7 +2,7 @@ package is.hail.stats import breeze.linalg._ import is.hail.annotations.{Region, RegionValue} -import is.hail.expr.ir.MatrixValue +import is.hail.expr.ir.{IntArrayBuilder, MatrixValue} import is.hail.types.physical.{PArray, PStruct} import is.hail.types.virtual.TFloat64 import is.hail.utils._ @@ -12,7 +12,7 @@ object RegressionUtils { def setMeanImputedDoubles(data: Array[Double], offset: Int, completeColIdx: Array[Int], - missingCompleteCols: BoxedArrayBuilder[Int], + missingCompleteCols: IntArrayBuilder, rv: Long, rvRowType: PStruct, entryArrayType: PArray, diff --git a/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala b/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala index 6d1b8532663..0be6b195d60 100644 --- a/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala +++ b/hail/src/main/scala/is/hail/types/TypeWithRequiredness.scala @@ -151,6 +151,15 @@ object VirtualTypeWithReq { assert(!twr.required) VirtualTypeWithReq(t, twr) } + + def union(vs: Seq[VirtualTypeWithReq]): VirtualTypeWithReq = { + val t = vs.head.t + assert(vs.tail.forall(_.t == t)) + + val tr = TypeWithRequiredness(t) + tr.unionFrom(vs.map(_.r)) + VirtualTypeWithReq(t, tr) + } } case class VirtualTypeWithReq(t: Type, r: TypeWithRequiredness) { diff --git a/hail/src/main/scala/is/hail/types/encoded/EArray.scala b/hail/src/main/scala/is/hail/types/encoded/EArray.scala index a8adb15ffb1..632ceae660f 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EArray.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EArray.scala @@ -7,7 +7,7 @@ import is.hail.types.BaseType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode, SIndexablePointerSettable} import is.hail.types.physical.stypes.interfaces.SIndexableValue import is.hail.utils._ @@ -17,12 +17,12 @@ final case class EArray(val elementType: EType, override val required: Boolean = val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TContainer].elementType) requestedType match { case _: TSet => - SIndexablePointer(PCanonicalSet(elementPType, required)) + SIndexablePointer(PCanonicalSet(elementPType, false)) case _: TArray => - SIndexablePointer(PCanonicalArray(elementPType, required)) + SIndexablePointer(PCanonicalArray(elementPType, false)) case _: TDict => val et = elementPType.asInstanceOf[PStruct] - SIndexablePointer(PCanonicalDict(et.fieldType("key"), et.fieldType("value"), required)) + SIndexablePointer(PCanonicalDict(et.fieldType("key"), et.fieldType("value"), false)) } } @@ -81,12 +81,12 @@ final case class EArray(val elementType: EType, override val required: Boolean = }) } - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val ind = v.asInstanceOf[SIndexableValue] buildPrefixEncoder(cb, ind, out, ind.loadLength()) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val st = decodedSType(t).asInstanceOf[SIndexablePointer] val arrayType: PCanonicalArray = st.pType match { diff --git a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala index b9fb3c378a3..96c9a802491 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala @@ -6,7 +6,7 @@ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.BaseStruct import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete._ import is.hail.types.physical.stypes.interfaces.SBaseStructValue import is.hail.types.virtual._ @@ -53,24 +53,24 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: case t: TInterval => val structPType = decodedPType(t.structRepresentation).asInstanceOf[PStruct] val pointType = structPType.field("start").typ - SIntervalPointer(PCanonicalInterval(pointType, required)) + SIntervalPointer(PCanonicalInterval(pointType, false)) case t: TLocus => - SCanonicalLocusPointer(PCanonicalLocus(t.rg, required)) + SCanonicalLocusPointer(PCanonicalLocus(t.rg, false)) case t: TStruct => val pFields = t.fields.map { case Field(name, typ, idx) => val pt = fieldType(name).decodedPType(typ) PField(name, pt, idx) } - SBaseStructPointer(PCanonicalStruct(pFields, required)) + SBaseStructPointer(PCanonicalStruct(pFields, false)) case t: TTuple => val pFields = t.fields.map { case Field(name, typ, idx) => val pt = fieldType(name).decodedPType(typ) PTupleField(t._types(idx).index, pt) } - SBaseStructPointer(PCanonicalTuple(pFields, required)) + SBaseStructPointer(PCanonicalTuple(pFields, false)) } - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val structValue = v.st match { case SIntervalPointer(t: PCanonicalInterval) => new SBaseStructPointerSettable( SBaseStructPointer(t.representation), @@ -79,20 +79,18 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: new SBaseStructPointerSettable( SBaseStructPointer(t.representation), v.asInstanceOf[SCanonicalLocusPointerSettable].a) - case SBaseStructPointer(t) => v.asInstanceOf[SBaseStructValue] + case _ => v.asInstanceOf[SBaseStructValue] } - val ft = structValue.pt - // write missing bytes structValue.st match { case SBaseStructPointer(st) if st.size == size && st.fieldRequired.sameElements(fields.map(_.typ.required)) => - val missingBytes = UnsafeUtils.packBitsToBytes(ft.nMissing) + val missingBytes = UnsafeUtils.packBitsToBytes(st.nMissing) val addr = structValue.asInstanceOf[SBaseStructPointerSettable].a if (nMissingBytes > 1) cb += out.writeBytes(addr, missingBytes - 1) if (nMissingBytes > 0) - cb += out.writeByte((Region.loadByte(addr + (missingBytes.toLong - 1)).toI & const(EType.lowBitMask(ft.nMissing & 0x7))).toB) + cb += out.writeByte((Region.loadByte(addr + (missingBytes.toLong - 1)).toI & const(EType.lowBitMask(st.nMissing & 0x7))).toB) case _ => var j = 0 @@ -123,19 +121,18 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: if (ef.typ.required) cb._fatal(s"required field ${ ef.name } saw missing value in encode") }, - { _pc => - val pc = _pc.asPCode + { pc => ef.typ.buildEncoder(pc.st, cb.emb.ecb) .apply(cb, pc, out) }) } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val pt = decodedPType(t) val addr = cb.newLocal[Long]("base_struct_dec_addr", region.allocate(pt.alignment, pt.byteSize)) _buildInplaceDecoder(cb, pt, region, addr, in) - pt.loadCheapPCode(cb, addr) + pt.loadCheapSCode(cb, addr) } override def _buildInplaceDecoder(cb: EmitCodeBuilder, pt: PType, region: Value[Region], addr: Value[Long], in: Value[InputBuffer]): Unit = { diff --git a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala index d53b0bd0092..9185f7dcf12 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala @@ -7,7 +7,7 @@ import is.hail.types.BaseType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SBinaryPointer, SBinaryPointerCode, SBinaryPointerSettable, SStringPointer, SStringPointerCode, SStringPointerSettable} import is.hail.types.physical.stypes.interfaces.SBinaryValue import is.hail.utils._ @@ -17,7 +17,7 @@ case object EBinaryRequired extends EBinary(true) class EBinary(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val bin = v.st match { case SBinaryPointer(t) => v.asInstanceOf[SBinaryValue] case SStringPointer(t) => new SBinaryPointerSettable(SBinaryPointer(t.binaryRepresentation), v.asInstanceOf[SStringPointerSettable].a) @@ -28,7 +28,7 @@ class EBinary(override val required: Boolean) extends EType { cb += out.writeBytes(bin.bytesAddress(), len) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val t1 = decodedSType(t) val pt = t1 match { case SStringPointer(t) => t.binaryRepresentation @@ -51,8 +51,8 @@ class EBinary(override val required: Boolean) extends EType { } def _decodedSType(requestedType: Type): SType = requestedType match { - case TBinary => SBinaryPointer(PCanonicalBinary(required)) - case TString => SStringPointer(PCanonicalString(required)) + case TBinary => SBinaryPointer(PCanonicalBinary(false)) + case TString => SStringPointer(PCanonicalString(false)) } def _asIdent = "binary" diff --git a/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala b/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala index e6c232973fb..9f01bb08443 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBlockMatrixNDArray.scala @@ -5,8 +5,9 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.SNDArrayPointer +import is.hail.types.physical.stypes.interfaces.SNDArrayValue import is.hail.types.virtual._ import is.hail.utils._ @@ -17,11 +18,11 @@ final case class EBlockMatrixNDArray(elementType: EType, encodeRowMajor: Boolean def _decodedSType(requestedType: Type): SType = { val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TNDArray].elementType) - SNDArrayPointer(PCanonicalNDArray(elementPType, 2, required)) + SNDArrayPointer(PCanonicalNDArray(elementPType, 2, false)) } - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { - val ndarray = v.asInstanceOf[PNDArrayValue] + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + val ndarray = v.asInstanceOf[SNDArrayValue] val shapes = ndarray.shapes(cb) val r = cb.newLocal[Long]("r", shapes(0)) val c = cb.newLocal[Long]("c", shapes(1)) @@ -35,19 +36,19 @@ final case class EBlockMatrixNDArray(elementType: EType, encodeRowMajor: Boolean if (encodeRowMajor) { cb.forLoop(cb.assign(i, 0L), i < r, cb.assign(i, i + 1L), { cb.forLoop(cb.assign(j, 0L), j < c, cb.assign(j, j + 1L), { - writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb).asPCode, out) + writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) }) }) } else { cb.forLoop(cb.assign(j, 0L), j < c, cb.assign(j, j + 1L), { cb.forLoop(cb.assign(i, 0L), i < r, cb.assign(i, i + 1L), { - writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb).asPCode, out) + writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) }) }) } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val st = decodedSType(t).asInstanceOf[SNDArrayPointer] val pt = st.pType val readElemF = elementType.buildInplaceDecoder(pt.elementType, cb.emb.ecb) diff --git a/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala b/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala index 7b1f8aa4178..f4d47e7245d 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBoolean.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.primitives.{SBoolean, SBooleanCode} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EBooleanOptional extends EBoolean(false) case object EBooleanRequired extends EBoolean(true) class EBoolean(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { cb += out.writeBoolean(v.asBoolean.boolCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { - new SBooleanCode(required, in.readBoolean()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + new SBooleanCode(in.readBoolean()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipBoolean() - def _decodedSType(requestedType: Type): SType = SBoolean(required) + def _decodedSType(requestedType: Type): SType = SBoolean def _asIdent = "bool" diff --git a/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala b/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala index 2f3ab5a9b14..f28dd7b48d4 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EFloat32.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.primitives.{SFloat32, SFloat32Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EFloat32Optional extends EFloat32(false) case object EFloat32Required extends EFloat32(true) class EFloat32(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { cb += out.writeFloat(v.asFloat.floatCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { - new SFloat32Code(required, in.readFloat()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + new SFloat32Code(in.readFloat()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipFloat() - def _decodedSType(requestedType: Type): SType = SFloat32(required) + def _decodedSType(requestedType: Type): SType = SFloat32 def _asIdent = "float32" diff --git a/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala b/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala index 408c4ca6f4f..cc478a63caf 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EFloat64.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.primitives.{SFloat64, SFloat64Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EFloat64Optional extends EFloat64(false) case object EFloat64Required extends EFloat64(true) class EFloat64(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { cb += out.writeDouble(v.asDouble.doubleCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { - new SFloat64Code(required, in.readDouble()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + new SFloat64Code(in.readDouble()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipDouble() - def _decodedSType(requestedType: Type): SType = SFloat64(required) + def _decodedSType(requestedType: Type): SType = SFloat64 def _asIdent = "float64" diff --git a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala index a19b7270752..1acac463b3b 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SCanonicalCallCode} import is.hail.types.physical.stypes.interfaces.SCallValue import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} @@ -17,19 +17,19 @@ case object EInt32Optional extends EInt32(false) case object EInt32Required extends EInt32(true) class EInt32(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val x = v.st match { - case t: SCanonicalCall => v.asInstanceOf[SCallValue].canonicalCall(cb) - case t: SInt32 => v.asInt32.intCode(cb) + case SCanonicalCall => v.asInstanceOf[SCallValue].canonicalCall(cb) + case SInt32 => v.asInt32.intCode(cb) } cb += out.writeInt(x) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val x = in.readInt() t match { - case TCall => new SCanonicalCallCode(required, x) - case TInt32 => new SInt32Code(required, x) + case TCall => new SCanonicalCallCode(x) + case TInt32 => new SInt32Code(x) } } @@ -44,8 +44,8 @@ class EInt32(override val required: Boolean) extends EType { def _decodedSType(requestedType: Type): SType = requestedType match { - case TCall => SCanonicalCall(required) - case _ => SInt32(required) + case TCall => SCanonicalCall + case _ => SInt32 } def _asIdent = "int32" diff --git a/hail/src/main/scala/is/hail/types/encoded/EInt64.scala b/hail/src/main/scala/is/hail/types/encoded/EInt64.scala index 3017ad2e403..90931039e05 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EInt64.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EInt64.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.primitives.{SInt64, SInt64Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,17 +15,17 @@ case object EInt64Optional extends EInt64(false) case object EInt64Required extends EInt64(true) class EInt64(override val required: Boolean) extends EType { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { cb += out.writeLong(v.asLong.longCode(cb)) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { - new SInt64Code(required, in.readLong()) + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { + new SInt64Code(in.readLong()) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = cb += in.skipLong() - def _decodedSType(requestedType: Type): SType = SInt64(required) + def _decodedSType(requestedType: Type): SType = SInt64 def _asIdent = "int64" diff --git a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala index d54c54340b7..7d8a9171855 100644 --- a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala +++ b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala @@ -4,29 +4,29 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.SNDArrayPointer import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayValue} -import is.hail.types.physical.{PCanonicalNDArray, PCode, PValue} +import is.hail.types.physical.PCanonicalNDArray import is.hail.types.virtual.{TNDArray, Type} import is.hail.utils._ case class ENDArrayColumnMajor(elementType: EType, nDims: Int, required: Boolean = false) extends EContainer { - override def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit = { + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val ndarray = v.asInstanceOf[SNDArrayValue] val shapes = ndarray.shapes(cb) shapes.foreach(s => cb += out.writeLong(s)) - SNDArray.forEachIndex(cb, shapes, "ndarray_encoder") { case (cb, idxVars) => - val elt = ndarray.loadElement(idxVars, cb) - elementType.buildEncoder(elt.st, cb.emb.ecb) - .apply(cb, elt, out) - } + SNDArray.coiterate(cb, null, FastIndexedSeq((ndarray.get, "A")), { + case Seq(elt) => + elementType.buildEncoder(elt.st, cb.emb.ecb) + .apply(cb, elt, out) + }) } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val st = decodedSType(t).asInstanceOf[SNDArrayPointer] val pnd = st.pType val readElemF = elementType.buildInplaceDecoder(pnd.elementType, cb.emb.ecb) @@ -63,7 +63,7 @@ case class ENDArrayColumnMajor(elementType: EType, nDims: Int, required: Boolean def _decodedSType(requestedType: Type): SType = { val requestedTNDArray = requestedType.asInstanceOf[TNDArray] val elementPType = elementType.decodedPType(requestedTNDArray.elementType) - SNDArrayPointer(PCanonicalNDArray(elementPType, requestedTNDArray.nDims, required)) + SNDArrayPointer(PCanonicalNDArray(elementPType, requestedTNDArray.nDims, false)) } override def setRequired(required: Boolean): EType = ENDArrayColumnMajor(elementType, nDims, required) diff --git a/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala b/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala index 93f97c26b3f..608ff09e283 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EShuffle.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ -import is.hail.types.physical.stypes.SType +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointer, SCanonicalShufflePointerCode, SCanonicalShufflePointerSettable} import is.hail.types.virtual._ import is.hail.utils._ @@ -15,7 +15,7 @@ case object EShuffleOptional extends EShuffle(false) case object EShuffleRequired extends EShuffle(true) class EShuffle(override val required: Boolean) extends EType { - def _buildEncoder(cb: EmitCodeBuilder, pv: PValue, out: Value[OutputBuffer]): Unit = { + def _buildEncoder(cb: EmitCodeBuilder, pv: SValue, out: Value[OutputBuffer]): Unit = { pv.st match { case SCanonicalShufflePointer(t) => val v = pv.asInstanceOf[SCanonicalShufflePointerSettable] @@ -25,14 +25,14 @@ class EShuffle(override val required: Boolean) extends EType { } } - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode = { + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { val shuffleType = decodedPType(t).asInstanceOf[PCanonicalShuffle] val bT = shuffleType.representation val len = cb.newLocal[Int]("len", in.readInt()) val barray = cb.newLocal[Long]("barray", bT.allocate(region, len)) cb += bT.storeLength(barray, len) cb += in.readBytes(region, bT.bytesAddress(barray), len) - new SCanonicalShufflePointerCode(SCanonicalShufflePointer(shuffleType), bT.loadCheapPCode(cb, barray)) + new SCanonicalShufflePointerCode(SCanonicalShufflePointer(shuffleType), bT.loadCheapSCode(cb, barray)) } def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = { diff --git a/hail/src/main/scala/is/hail/types/encoded/EType.scala b/hail/src/main/scala/is/hail/types/encoded/EType.scala index 59c69ae1733..bb4c029e34d 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EType.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EType.scala @@ -1,7 +1,6 @@ package is.hail.types.encoded import java.util import java.util.Map.Entry - import is.hail.HailContext import is.hail.annotations.Region import is.hail.asm4s.{coerce => _, _} @@ -9,7 +8,7 @@ import is.hail.expr.ir.{EmitClassBuilder, EmitCodeBuilder, EmitFunctionBuilder, import is.hail.io._ import is.hail.types._ import is.hail.types.physical._ -import is.hail.types.physical.stypes.{SCode, SType} +import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.virtual._ import is.hail.utils._ import org.json4s.CustomSerializer @@ -25,7 +24,7 @@ class ETypeSerializer extends CustomSerializer[EType](format => ( { abstract class EType extends BaseType with Serializable with Requiredness { type StagedEncoder = (EmitCodeBuilder, SCode, Code[OutputBuffer]) => Unit - type StagedDecoder = (EmitCodeBuilder, Code[Region], Code[InputBuffer]) => PCode + type StagedDecoder = (EmitCodeBuilder, Code[Region], Code[InputBuffer]) => SCode type StagedInplaceDecoder = (EmitCodeBuilder, Code[Region], Code[Long], Code[InputBuffer]) => Unit final def buildEncoder(ctx: ExecuteContext, t: PType): (OutputBuffer) => Encoder = { @@ -45,7 +44,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { final def buildEncoder(st: SType, kb: EmitClassBuilder[_]): StagedEncoder = { val mb = buildEncoderMethod(st, kb); - { (cb: EmitCodeBuilder, sc: SCode, ob: Code[OutputBuffer]) => cb.invokeVoid(mb, sc.asPCode, ob) } + { (cb: EmitCodeBuilder, sc: SCode, ob: Code[OutputBuffer]) => cb.invokeVoid(mb, sc, ob) } } final def buildEncoderMethod(st: SType, kb: EmitClassBuilder[_]): EmitMethodBuilder[_] = { @@ -55,7 +54,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { UnitInfo) { mb => mb.voidWithBuilder { cb => - val arg = mb.getPCodeParam(1) + val arg = mb.getSCodeParam(1) .memoize(cb, "encoder_method_arg") val out = mb.getCodeParam[OutputBuffer](2) _buildEncoder(cb, arg, out) @@ -66,7 +65,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { final def buildDecoder(t: Type, kb: EmitClassBuilder[_]): StagedDecoder = { val mb = buildDecoderMethod(t: Type, kb); { (cb: EmitCodeBuilder, r: Code[Region], ib: Code[InputBuffer]) => - cb.invokePCode(mb, r, ib) + cb.invokeSCode(mb, r, ib) } } @@ -77,7 +76,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { FastIndexedSeq[ParamType](typeInfo[Region], classInfo[InputBuffer]), st.paramType) { mb => - mb.emitPCode { cb => + mb.emitSCode { cb => val region: Value[Region] = mb.getCodeParam[Region](1) val in: Value[InputBuffer] = mb.getCodeParam[InputBuffer](2) val sc = _buildDecoder(cb, t, region, in) @@ -123,9 +122,9 @@ abstract class EType extends BaseType with Serializable with Requiredness { }).invokeCode(_, _) } - def _buildEncoder(cb: EmitCodeBuilder, v: PValue, out: Value[OutputBuffer]): Unit + def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit - def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): PCode + def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode def _buildInplaceDecoder( cb: EmitCodeBuilder, @@ -162,7 +161,7 @@ abstract class EType extends BaseType with Serializable with Requiredness { } final def decodedPType(requestedType: Type): PType = { - decodedSType(requestedType).canonicalPType() + decodedSType(requestedType).canonicalPType().setRequired(required) } def _decodedSType(requestedType: Type): SType @@ -205,7 +204,7 @@ object EType { mb.voidWithBuilder { cb => val addr: Code[Long] = mb.getCodeParam[Long](1) val out: Code[OutputBuffer] = mb.getCodeParam[OutputBuffer](2) - val pc = pt.loadCheapPCode(cb, addr) + val pc = pt.loadCheapSCode(cb, addr) val f = et.buildEncoder(pc.st, mb.ecb) f(cb, pc, out) } diff --git a/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala b/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala index 9ae5a095a24..def4336dbbe 100644 --- a/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/PArrayBackedContainer.scala @@ -142,9 +142,9 @@ trait PArrayBackedContainer extends PContainer { def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = arrayRep.unstagedStoreAtAddress(addr, region, srcPType.asInstanceOf[PArrayBackedContainer].arrayRep, srcAddress, deepCopy) - def sType: SContainer = SIndexablePointer(this) + def sType: SIndexablePointer = SIndexablePointer(setRequired(false).asInstanceOf[PArrayBackedContainer]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SIndexablePointerCode(SIndexablePointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SIndexablePointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = arrayRep.store(cb, region, value.asIndexable.castToArray(cb), deepCopy) diff --git a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala index 6f3a6a36daa..574627c643c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBaseStruct.scala @@ -178,18 +178,4 @@ abstract class PBaseStruct extends PType { } else Gen.uniformSequence(types.map(t => t.genValue)).map(a => Annotation(a: _*)) } -} - -abstract class PBaseStructValue extends PValue with SBaseStructValue { - def pt: PBaseStruct -} - -abstract class PBaseStructCode extends PCode with SBaseStructCode { - def pt: PBaseStruct - - def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue -} - -trait PStructSettable extends PBaseStructValue with PSettable \ No newline at end of file +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PBinary.scala b/hail/src/main/scala/is/hail/types/physical/PBinary.scala index 9b0c679b41e..ff9f5325b5a 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBinary.scala @@ -71,23 +71,3 @@ abstract class PBinary extends PType { def store(addr: Code[Long], bytes: Code[Array[Byte]]): Code[Unit] } - -abstract class PBinaryValue extends PValue with SBinaryValue { - def loadLength(): Code[Int] - - def loadBytes(): Code[Array[Byte]] - - def loadByte(i: Code[Int]): Code[Byte] -} - -abstract class PBinaryCode extends PCode with SBinaryCode { - def pt: PBinary - - def loadLength(): Code[Int] - - def loadBytes(): Code[Array[Byte]] - - def memoize(cb: EmitCodeBuilder, name: String): PBinaryValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PBinaryValue -} diff --git a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala index 48fcfb68f16..98fd06a064b 100644 --- a/hail/src/main/scala/is/hail/types/physical/PBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/PBoolean.scala @@ -27,13 +27,13 @@ class PBoolean(override val required: Boolean) extends PType with PPrimitive { override def byteSize: Long = 1 - def sType: SBoolean = SBoolean(required) + def sType: SBoolean.type = SBoolean def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = { cb += Region.storeBoolean(addr, value.asBoolean.boolCode(cb)) } - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBooleanCode = new SBooleanCode(required, Region.loadBoolean(addr)) + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBooleanCode = new SBooleanCode(Region.loadBoolean(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeByte(addr, annotation.asInstanceOf[Boolean].toByte) diff --git a/hail/src/main/scala/is/hail/types/physical/PCall.scala b/hail/src/main/scala/is/hail/types/physical/PCall.scala index 324b74714b2..93f6cdc5662 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCall.scala @@ -1,30 +1,7 @@ package is.hail.types.physical -import is.hail.asm4s._ import is.hail.types.virtual.TCall -import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.interfaces.{SCallCode, SCallValue} abstract class PCall extends PType { lazy val virtualType: TCall.type = TCall -} - -abstract class PCallValue extends PValue with SCallValue { - def ploidy(): Code[Int] - - def isPhased(): Code[Boolean] - - def forEachAllele(cb: EmitCodeBuilder)(alleleCode: Value[Int] => Unit): Unit -} - -abstract class PCallCode extends PCode with SCallCode { - def pt: PCall - - def ploidy(): Code[Int] - - def isPhased(): Code[Boolean] - - def memoize(cb: EmitCodeBuilder, name: String): PCallValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PCallValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala index 49d70d5bb97..86a4f357894 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala @@ -2,7 +2,7 @@ package is.hail.types.physical import is.hail.annotations.{Region, _} import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder, IEmitCode, IEmitSCode} +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder, IEmitCode} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode, SIndexablePointerSettable} import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableValue} @@ -323,7 +323,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) cb.ifx(isElementDefined(dstAddress, currentIdx), { cb.assign(currentElementAddress, elementOffset(dstAddress, len, currentIdx)) - this.elementType.storeAtAddress(cb, currentElementAddress, region, this.elementType.loadCheapPCode(cb, this.elementType.loadFromNested(currentElementAddress)), true) + this.elementType.storeAtAddress(cb, currentElementAddress, region, this.elementType.loadCheapSCode(cb, this.elementType.loadFromNested(currentElementAddress)), true) })) } @@ -377,9 +377,9 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) } } - def sType: SContainer = SIndexablePointer(this) + def sType: SIndexablePointer = SIndexablePointer(setRequired(false).asInstanceOf[PCanonicalArray]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SIndexablePointerCode = new SIndexablePointerCode(SIndexablePointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SIndexablePointerCode = new SIndexablePointerCode(sType, addr) def storeContentsAtAddress(cb: EmitCodeBuilder, addr: Value[Long], region: Value[Region], indexable: SIndexableValue, deepCopy: Boolean): Unit = { val length = indexable.loadLength() @@ -387,7 +387,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) case SIndexablePointer(PCanonicalArray(otherElementType, _)) if otherElementType == elementType => cb += Region.copyFrom(indexable.asInstanceOf[SIndexablePointerSettable].a, addr, contentsByteSize(length)) deepPointerCopy(cb, region, addr, length) - case SIndexablePointer(PCanonicalArray(otherElementType, _)) if otherElementType.equalModuloRequired(elementType) => + case SIndexablePointer(otherType@PCanonicalArray(otherElementType, _)) if otherElementType.equalModuloRequired(elementType) => // other is optional, constructing required if (elementType.required) { cb.ifx(indexable.hasMissingValues(cb), @@ -395,7 +395,6 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) } cb += stagedInitialize(addr, indexable.loadLength(), setMissing = false) - val otherType = indexable.st.pType.asInstanceOf[PCanonicalArray] cb += Region.copyFrom(otherType.firstElementOffset(indexable.asInstanceOf[SIndexablePointerSettable].a), this.firstElementOffset(addr), length.toL * otherType.elementByteSize) if (deepCopy) deepPointerCopy(cb, region, addr, length) @@ -448,7 +447,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) PCanonicalArray(this.elementType.deepRename(t.elementType), this.required) def constructFromElements(cb: EmitCodeBuilder, region: Value[Region], length: Value[Int], deepCopy: Boolean) - (f: (EmitCodeBuilder, Value[Int]) => IEmitSCode): SIndexablePointerCode = { + (f: (EmitCodeBuilder, Value[Int]) => IEmitCode): SIndexablePointerCode = { val addr = cb.newLocal[Long]("pcarray_construct1_addr", allocate(region, length)) cb += stagedInitialize(addr, length, setMissing = false) @@ -465,38 +464,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) cb.assign(i, i + 1) }) - new SIndexablePointerCode(SIndexablePointer(this), addr) - } - - // unsafe StagedArrayBuilder-like interface that gives caller control over adding elements and finishing - // this won't need to exist when we have SStackStruct - def constructFromNextAddress(cb: EmitCodeBuilder, region: Value[Region], length: Value[Int]): - ((EmitCodeBuilder => Value[Long], (EmitCodeBuilder => Unit), (EmitCodeBuilder => SIndexablePointerCode))) = { - - val addr = cb.newLocal[Long]("pcarray_construct2_addr", allocate(region, length)) - cb += stagedInitialize(addr, length, setMissing = false) - val currentIndex = cb.newLocal[Int]("pcarray_construct2_i", -1) - - val currentElementAddress = cb.newLocal[Long]("pcarray_construct2_firstelementaddr", firstElementOffset(addr, length) - elementByteSize) - - def nextAddr(cb: EmitCodeBuilder): Value[Long] = { - cb.assign(currentIndex, currentIndex + 1) - cb.assign(currentElementAddress, currentElementAddress + elementByteSize) - currentElementAddress - } - - def setMissing(cb: EmitCodeBuilder): Unit = { - cb.assign(currentIndex, currentIndex + 1) - cb.assign(currentElementAddress, currentElementAddress + elementByteSize) - cb += this.setElementMissing(addr, currentIndex) - } - - def finish(cb: EmitCodeBuilder): SIndexablePointerCode = { - cb.ifx((currentIndex + 1).cne(length), cb._fatal("PCanonicalArray.constructFromNextAddress nextAddress was called the wrong number of times: len=", - length.toS, ", calls=", (currentIndex + 1).toS)) - new SIndexablePointerCode(SIndexablePointer(this), addr) - } - (nextAddr, setMissing, finish) + new SIndexablePointerCode(sType, addr) } // unsafe StagedArrayBuilder-like interface that gives caller control over pushing elements and finishing @@ -520,7 +488,7 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) val finish: EmitCodeBuilder => SIndexablePointerCode = { (cb: EmitCodeBuilder) => cb.ifx(currentElementIndex.cne(length), cb._fatal("PCanonicalArray.constructFromFunctions push was called the wrong number of times: len=", length.toS, ", calls=", currentElementIndex.toS)) - new SIndexablePointerCode(SIndexablePointer(this), addr) + new SIndexablePointerCode(sType, addr) } (push, finish) } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala index c7d994b3069..42ef32c8793 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala @@ -105,7 +105,7 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct cb.ifx(isFieldDefined(dstAddr, f.index), { val fieldAddr = cb.newLocal[Long]("pcbs_dpcopy_field", fieldOffset(dstAddr, f.index)) - dstFieldType.storeAtAddress(cb, fieldAddr, region, dstFieldType.loadCheapPCode(cb, dstFieldType.loadFromNested(fieldAddr)), deepCopy = true) + dstFieldType.storeAtAddress(cb, fieldAddr, region, dstFieldType.loadCheapSCode(cb, dstFieldType.loadFromNested(fieldAddr)), deepCopy = true) }) } } @@ -154,9 +154,9 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct } } - def sType: SBaseStruct = SBaseStructPointer(this) + def sType: SBaseStructPointer = SBaseStructPointer(setRequired(false).asInstanceOf[PCanonicalBaseStruct]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructPointerCode = new SBaseStructPointerCode(SBaseStructPointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructPointerCode = new SBaseStructPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { @@ -196,21 +196,6 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct } } - // FIXME: this doesn't need to exist when we have StackStruct! - def storeAtAddressFromFields(cb: EmitCodeBuilder, addr: Value[Long], region: Value[Region], emitFields: IndexedSeq[EmitCode], deepCopy: Boolean): Unit = { - require(emitFields.length == size) - cb += stagedInitialize(addr, setMissing = false) - emitFields.zipWithIndex.foreach { case (ev, i) => - ev.toI(cb) - .consume(cb, - cb += setFieldMissing(addr, i), - { sc => - types(i).storeAtAddress(cb, fieldOffset(addr, i), region, sc, deepCopy = deepCopy) - } - ) - } - } - def constructFromFields(cb: EmitCodeBuilder, region: Value[Region], emitFields: IndexedSeq[EmitCode], deepCopy: Boolean): SBaseStructPointerCode = { require(emitFields.length == size) val addr = cb.newLocal[Long]("pcbs_construct_fields", allocate(region)) @@ -225,7 +210,7 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct ) } - new SBaseStructPointerCode(SBaseStructPointer(this), addr) + new SBaseStructPointerCode(sType, addr) } override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala index 5628efe70ef..52f669a8a74 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala @@ -129,9 +129,9 @@ class PCanonicalBinary(val required: Boolean) extends PBinary { } } - def sType: SBinary = SBinaryPointer(this) + def sType: SBinaryPointer = SBinaryPointer(setRequired(false).asInstanceOf[PCanonicalBinary]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = new SBinaryPointerCode(SBinaryPointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = new SBinaryPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala index 264bdafb4ef..c124eed52a5 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala @@ -38,13 +38,13 @@ final case class PCanonicalCall(required: Boolean = false) extends PCall { } } - def sType: SCall = SCanonicalCall(required) + def sType: SCall = SCanonicalCall - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalCallCode(required, Region.loadInt(addr)) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalCallCode(Region.loadInt(addr)) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { - case SCanonicalCall(r) => + case SCanonicalCall => val newAddr = cb.newLocal[Long]("pcanonicalcall_store_addr", region.allocate(representation.alignment, representation.byteSize)) storeAtAddress(cb, newAddr, region, value, deepCopy) newAddr diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala index 0b8b50e189d..3c2efd28c8e 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalDict.scala @@ -3,8 +3,19 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, Region} import is.hail.types.virtual.{TDict, Type} import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SIndexableCode} import org.apache.spark.sql.Row +object PCanonicalDict { + def coerceArrayCode(contents: SIndexableCode): SIndexableCode = { + contents.st match { + case SIndexablePointer(PCanonicalArray(ps: PBaseStruct, r)) => + PCanonicalDict(ps.types(0), ps.types(1), r) + .construct(contents) + } + } +} + final case class PCanonicalDict(keyType: PType, valueType: PType, required: Boolean = false) extends PDict with PArrayBackedContainer { val elementType = PCanonicalStruct(required = true, "key" -> keyType, "value" -> valueType) @@ -39,8 +50,12 @@ final case class PCanonicalDict(keyType: PType, valueType: PType, required: Bool this.arrayRep.unstagedStoreJavaObject(sortedArray, region) } - def construct(contents: PIndexableCode): PIndexableCode = { - assert(contents.pt.equalModuloRequired(arrayRep), s"\n contents: ${ contents.pt }\n arrayrep: ${ arrayRep }") + def construct(contents: SIndexableCode): SIndexableCode = { + contents.st match { + case SIndexablePointer(PCanonicalArray(pbs: PBaseStruct, _)) + if pbs.types.size == 2 && pbs.types(0) == keyType && pbs.types(1) == valueType => + case t => throw new RuntimeException(s"PCDict.construct: contents=${t}, arrayrep=${arrayRep}") + } new SIndexablePointerCode(SIndexablePointer(this), contents.asInstanceOf[SIndexablePointerCode].a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala index a6db53a7f49..488decf4e50 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala @@ -68,21 +68,21 @@ final case class PCanonicalInterval(pointType: PType, override val required: Boo def containsPointers: Boolean = representation.containsPointers - def sType: SIntervalPointer = SIntervalPointer(this) + def sType: SIntervalPointer = SIntervalPointer(setRequired(false).asInstanceOf[PCanonicalInterval]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SIntervalPointerCode(SIntervalPointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SIntervalPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { case SIntervalPointer(t: PCanonicalInterval) => - representation.store(cb, region, t.representation.loadCheapPCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) + representation.store(cb, region, t.representation.loadCheapSCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) } } def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { value.st match { case SIntervalPointer(t: PCanonicalInterval) => - representation.storeAtAddress(cb, addr, region, t.representation.loadCheapPCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) + representation.storeAtAddress(cb, addr, region, t.representation.loadCheapSCode(cb, value.asInstanceOf[SIntervalPointerCode].a), deepCopy) } } def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { @@ -121,6 +121,6 @@ final case class PCanonicalInterval(pointType: PType, override val required: Boo def constructFromCodes(cb: EmitCodeBuilder, region: Value[Region], start: EmitCode, end: EmitCode, includesStart: EmitCode, includesEnd: EmitCode): SIntervalPointerCode = { val sc = representation.constructFromFields(cb, region, FastIndexedSeq(start, end, includesStart, includesEnd), deepCopy = false) - new SIntervalPointerCode(SIntervalPointer(this), sc.a) + new SIntervalPointerCode(sType, sc.a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala index 3104abb1559..e1c1997f345 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala @@ -91,21 +91,21 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e } } - def sType: SCanonicalLocusPointer = SCanonicalLocusPointer(this) + def sType: SCanonicalLocusPointer = SCanonicalLocusPointer(setRequired(false).asInstanceOf[PCanonicalLocus]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalLocusPointerCode(sType, addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalLocusPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { case SCanonicalLocusPointer(pt) => - representation.store(cb, region, pt.representation.loadCheapPCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) + representation.store(cb, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) } } def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { value.st match { case SCanonicalLocusPointer(pt) => - representation.storeAtAddress(cb, addr, region, pt.representation.loadCheapPCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) + representation.storeAtAddress(cb, addr, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) } } @@ -131,8 +131,8 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e def constructFromPositionAndString(cb: EmitCodeBuilder, r: Value[Region], contig: Code[String], pos: Code[Int]): SCanonicalLocusPointerCode = { val contigType = representation.fieldType("contig").asInstanceOf[PCanonicalString] - val contigCode = SStringPointer(contigType).constructFromString(cb, r, contig) + val contigCode = contigType.sType.constructFromString(cb, r, contig) val repr = representation.constructFromFields(cb, r, FastIndexedSeq(EmitCode.present(cb.emb, contigCode), EmitCode.present(cb.emb, primitive(pos))), deepCopy = false) - new SCanonicalLocusPointerCode(SCanonicalLocusPointer(this), repr.a) + new SCanonicalLocusPointerCode(SCanonicalLocusPointer(setRequired(false).asInstanceOf[PCanonicalLocus]), repr.a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala index ac8bc9f167c..bd5d58a3b68 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala @@ -2,16 +2,17 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, NDArray, Region, UnsafeOrdering} import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, PCodeParam, Param, ParamType} +import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, SCodeParam, Param, ParamType} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TNDArray, Type} -import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerCode} +import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerCode, SStackStruct} import org.apache.spark.sql.Row import is.hail.utils._ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boolean = false) extends PNDArray { assert(elementType.required, "elementType must be required") + assert(!elementType.containsPointers, "ndarrays do not currently support elements which contain arrays, ndarrays, or strings") def _asIdent: String = s"ndarray_of_${elementType.asIdent}" @@ -38,8 +39,8 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadShapes(cb: EmitCodeBuilder, addr: Value[Long], settables: IndexedSeq[Settable[Long]]): Unit = { - assert(settables.length == nDims) - val shapeTuple = shapeType.loadCheapPCode(cb, representation.loadField(addr, "shape")) + assert(settables.length == nDims, s"got ${ settables.length } settables, expect ${ nDims } dims") + val shapeTuple = shapeType.loadCheapSCode(cb, representation.loadField(addr, "shape")) .memoize(cb, "pcndarray_shapetuple") (0 until nDims).foreach { dimIdx => cb.assign(settables(dimIdx), shapeTuple.loadField(cb, dimIdx).get(cb).asLong.longCode(cb)) @@ -48,7 +49,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadStrides(cb: EmitCodeBuilder, addr: Value[Long], settables: IndexedSeq[Settable[Long]]): Unit = { assert(settables.length == nDims) - val strideTuple = strideType.loadCheapPCode(cb, representation.loadField(addr, "strides")) + val strideTuple = strideType.loadCheapSCode(cb, representation.loadField(addr, "strides")) .memoize(cb, "pcndarray_stridetuple") (0 until nDims).foreach { dimIdx => cb.assign(settables(dimIdx), strideTuple.loadField(cb, dimIdx).get(cb).asLong.longCode(cb)) @@ -123,7 +124,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo } private def getElementAddress(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], nd: Value[Long]): Value[Long] = { - val ndarrayValue = PCode(this, nd).asNDArray.memoize(cb, "getElementAddressNDValue") + val ndarrayValue = loadCheapSCode(cb, nd).asNDArray.memoize(cb, "getElementAddressNDValue") val stridesTuple = ndarrayValue.strides(cb) val dataStore = cb.newLocal[Long]("nd_get_element_address_data_store", @@ -151,7 +152,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadElement(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], ndAddress: Value[Long]): SCode = { val off = getElementAddress(cb, indices, ndAddress) - elementType.loadCheapPCode(cb, elementType.loadFromNested(off)) + elementType.loadCheapSCode(cb, elementType.loadFromNested(off)) } def loadElementFromDataAndStrides(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], ndDataAddress: Value[Long], strides: IndexedSeq[Value[Long]]): Code[Long] = { @@ -179,28 +180,30 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo dataCode: SIndexableCode, cb: EmitCodeBuilder, region: Value[Region] - ): PNDArrayCode = { + ): SNDArrayCode = { + assert(shape.length == nDims, s"nDims = ${ nDims }, nShapeElts=${ shape.length }") + assert(strides.length == nDims, s"nDims = ${ nDims }, nShapeElts=${ strides.length }") val cacheKey = ("constructByCopyingArray", this, dataCode.st) val mb = cb.emb.ecb.getOrGenEmitMethod("pcndarray_construct_by_copying_array", cacheKey, FastIndexedSeq[ParamType](classInfo[Region], dataCode.st.paramType) ++ (0 until 2 * nDims).map(_ => CodeParamType(LongInfo)), sType.paramType) { mb => - mb.emitPCode { cb => + mb.emitSCode { cb => val region = mb.getCodeParam[Region](1) - val dataValue = mb.getPCodeParam(2).asIndexable.memoize(cb, "pcndarray_construct_by_copying_array_datavalue") + val dataValue = mb.getSCodeParam(2).asIndexable.memoize(cb, "pcndarray_construct_by_copying_array_datavalue") val shape = (0 until nDims).map(i => mb.getCodeParam[Long](3 + i)) val strides = (0 until nDims).map(i => mb.getCodeParam[Long](3 + nDims + i)) val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") cb.assign(ndAddr, this.allocate(shape, region)) - shapeType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), + shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, - shape.map(s => EmitCode.present(cb.emb, primitive(s))), + SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) - strideType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), + strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), region, - strides.map(s => EmitCode.present(cb.emb, primitive(s))), + SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) @@ -208,11 +211,11 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo cb.append(Region.storeLong(this.representation.fieldOffset(ndAddr, "data"), newDataPointer)) dataType.storeContentsAtAddress(cb, newDataPointer, region, dataValue, true) - new SNDArrayPointerCode(SNDArrayPointer(this), ndAddr) + new SNDArrayPointerCode(sType, ndAddr) } } - cb.invokePCode(mb, FastIndexedSeq[Param](region, PCodeParam(dataCode.asPCode)) ++ (shape.map(CodeParam(_)) ++ strides.map(CodeParam(_))): _*) + cb.invokeSCode(mb, FastIndexedSeq[Param](region, SCodeParam(dataCode)) ++ (shape.map(CodeParam(_)) ++ strides.map(CodeParam(_))): _*) .asNDArray } @@ -225,13 +228,13 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") cb.assign(ndAddr, this.allocate(shape, region)) - shapeType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), + shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, - shape.map(s => EmitCode.present(cb.emb, primitive(s))), + SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) - strideType.storeAtAddressFromFields(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), + strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), region, - strides.map(s => EmitCode.present(cb.emb, primitive(s))), + SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) @@ -241,7 +244,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo cb.append(dataType.stagedInitialize(newDataPointer, this.numElements(shape).toI)) - (newFirstElementDataPointer, (cb: EmitCodeBuilder) => new SNDArrayPointerCode(SNDArrayPointer(this), ndAddr)) + (newFirstElementDataPointer, (cb: EmitCodeBuilder) => new SNDArrayPointerCode(sType, ndAddr)) } def unstagedConstructDataFunction( @@ -344,9 +347,9 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo Region.storeAddress(addr, copyFromAddress(region, srcND, srcAddress, deepCopy)) } - def sType: SNDArrayPointer = SNDArrayPointer(this) + def sType: SNDArrayPointer = SNDArrayPointer(setRequired(false).asInstanceOf[PCanonicalNDArray]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SNDArrayPointerCode(sType, addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SNDArrayPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { @@ -361,15 +364,14 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo val shape = oldND.shapes(cb) val newStrides = makeColumnMajorStrides(shape, region, cb) val (targetDataFirstElementAddr, finish) = this.constructDataFunction(shape, newStrides, cb, region) + val result = finish(cb) - val currentOffset = cb.newLocal[Long]("pcanonical_ndarray_store_offset", targetDataFirstElementAddr) - SNDArray.forEachIndex(cb, shape, "PCanonicalNDArray_store") { (cb, currentIndices) => - val oldElement = oldND.loadElement(currentIndices, cb) - elementType.storeAtAddress(cb, currentOffset, region, oldElement, true) - cb.assign(currentOffset, currentOffset + elementType.byteSize) - } + SNDArray.coiterate(cb, region, FastIndexedSeq((result, "result"), (oldND.get, "oldND")), { + case Seq(dest, elt) => + cb.assign(dest, elt) + }, deepCopy = true) - finish(cb).a + result.a } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala index 900a12d040e..49db88abc0b 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalSet.scala @@ -2,9 +2,19 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, Region} import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerCode} +import is.hail.types.physical.stypes.interfaces.SIndexableCode import is.hail.types.virtual.{TSet, Type} import is.hail.utils._ +object PCanonicalSet { + def coerceArrayCode(contents: SIndexableCode): SIndexableCode = { + contents.st match { + case SIndexablePointer(PCanonicalArray(elt, r)) => + PCanonicalSet(elt, r).construct(contents) + } + } +} + final case class PCanonicalSet(elementType: PType, required: Boolean = false) extends PSet with PArrayBackedContainer { val arrayRep = PCanonicalArray(elementType, required) @@ -30,7 +40,8 @@ final case class PCanonicalSet(elementType: PType, required: Boolean = false) e arrayRep.unstagedStoreJavaObject(s, region) } - def construct(contents: PIndexableCode): PIndexableCode = { + def construct(_contents: SIndexableCode): SIndexableCode = { + val contents = _contents.asInstanceOf[SIndexablePointerCode] assert(contents.pt.equalModuloRequired(arrayRep), s"\n contents: ${ contents.pt }\n arrayrep: ${ arrayRep }") new SIndexablePointerCode(SIndexablePointer(this), contents.asInstanceOf[SIndexablePointerCode].a) } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala index b9b19fe1c2b..3f02d7e41f6 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalShuffle.scala @@ -44,11 +44,11 @@ final case class PCanonicalShuffle( override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = this.representation.unstagedStoreJavaObject(annotation, region) - def loadBinary(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = representation.loadCheapPCode(cb, addr).asInstanceOf[SBinaryPointerCode] + def loadBinary(cb: EmitCodeBuilder, addr: Code[Long]): SBinaryPointerCode = representation.loadCheapSCode(cb, addr).asInstanceOf[SBinaryPointerCode] - def sType: SCanonicalShufflePointer = SCanonicalShufflePointer(this) + def sType: SCanonicalShufflePointer = SCanonicalShufflePointer(setRequired(false).asInstanceOf[PCanonicalShuffle]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SCanonicalShufflePointerCode(sType, representation.loadCheapPCode(cb, addr)) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SCanonicalShufflePointerCode(sType, representation.loadCheapSCode(cb, addr)) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala index 219d02b6da8..3625cda0f28 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalStream.scala @@ -3,7 +3,7 @@ package is.hail.types.physical import is.hail.annotations.UnsafeOrdering import is.hail.asm4s.Code import is.hail.expr.ir.EmitMethodBuilder -import is.hail.types.physical.stypes.interfaces +import is.hail.types.physical.stypes.{EmitType, interfaces} import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} import is.hail.types.virtual.{TStream, Type} @@ -16,8 +16,6 @@ final case class PCanonicalStream(elementType: PType, required: Boolean = false) sb.append("]") } - override def defaultValue(mb: EmitMethodBuilder[_]): SStreamCode = throw new UnsupportedOperationException - override def deepRename(t: Type) = deepRenameStream(t.asInstanceOf[TStream]) private def deepRenameStream(t: TStream): PStream = @@ -25,7 +23,7 @@ final case class PCanonicalStream(elementType: PType, required: Boolean = false) def setRequired(required: Boolean): PCanonicalStream = if (required == this.required) this else this.copy(required = required) - override def sType: SStream = interfaces.SStream(elementType.sType, required) + override def sType: SStream = interfaces.SStream(EmitType(elementType.sType, elementType.required)) def loadFromNested(addr: Code[Long]): Code[Long] = throw new NotImplementedError() diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala index e61fc4bf980..30dee6b5807 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalString.scala @@ -58,9 +58,9 @@ class PCanonicalString(val required: Boolean) extends PString { def setRequired(required: Boolean) = if (required == this.required) this else PCanonicalString(required) - def sType: SStringPointer = SStringPointer(this) + def sType: SStringPointer = SStringPointer(setRequired(false).asInstanceOf[PCanonicalString]) - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SStringPointerCode(SStringPointer(this), addr) + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SStringPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/PCode.scala b/hail/src/main/scala/is/hail/types/physical/PCode.scala deleted file mode 100644 index bb122c89f41..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/PCode.scala +++ /dev/null @@ -1,318 +0,0 @@ -package is.hail.types.physical - -import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir._ -import is.hail.expr.ir.streams.{StreamArgType, StreamProducer} -import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete._ -import is.hail.types.physical.stypes.interfaces.{PVoidCode, SStream, SStreamCode} -import is.hail.types.physical.stypes.primitives._ -import is.hail.types.virtual._ -import is.hail.utils._ - -trait PValue extends SValue { pValueSelf => - def pt: PType - - def get: PCode - - def value: Value[_] = { - new Value[Any] { - override def get: Code[Any] = pValueSelf.get.code - } - } -} - -trait PSettable extends PValue with SSettable { - - def store(cb: EmitCodeBuilder, v: SCode): Unit = store(cb, v.asInstanceOf[PCode]) - - def store(cb: EmitCodeBuilder, v: PCode): Unit - - def settableTuple(): IndexedSeq[Settable[_]] - - override def load(): PCode = get -} - -object SingleCodeType { - def typeInfoFromType(t: Type): TypeInfo[_] = t match { - case TInt32 => IntInfo - case TInt64 => LongInfo - case TFloat32 => FloatInfo - case TFloat64 => DoubleInfo - case TBoolean => BooleanInfo - case TVoid => UnitInfo - case _ => LongInfo // all others passed as ptype references - } - - def fromSType(t: SType): SingleCodeType = t.virtualType match { - case TInt32 => Int32SingleCodeType - case TInt64 => Int64SingleCodeType - case TFloat32 => Float32SingleCodeType - case TFloat64 => Float64SingleCodeType - case TBoolean => BooleanSingleCodeType - case _ => PTypeReferenceSingleCodeType(t.canonicalPType()) - - } -} - -sealed trait SingleCodeType { - def ti: TypeInfo[_] - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode - - def virtualType: Type - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode -} - -case object Int32SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = IntInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SInt32Code(true, coerce[Int](c)) - - def virtualType: Type = TInt32 - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asInt.intCode(cb)) -} - -case object Int64SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = LongInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SInt64Code(true, coerce[Long](c)) - - def virtualType: Type = TInt64 - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asLong.longCode(cb)) -} - -case object Float32SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = FloatInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SFloat32Code(true, coerce[Float](c)) - - def virtualType: Type = TFloat32 - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asFloat.floatCode(cb)) -} - -case object Float64SingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = DoubleInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SFloat64Code(true, coerce[Double](c)) - - def virtualType: Type = TFloat64 - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asDouble.doubleCode(cb)) -} - -case object BooleanSingleCodeType extends SingleCodeType { - def ti: TypeInfo[_] = BooleanInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = new SBooleanCode(true, coerce[Boolean](c)) - - def virtualType: Type = TBoolean - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = SingleCodePCode(this, pc.asBoolean.boolCode(cb)) -} - -case class StreamSingleCodeType(requiresMemoryManagementPerElement: Boolean, eltType: PType) extends SingleCodeType { self => - - def virtualType: Type = TStream(eltType.virtualType) - - def ti: TypeInfo[_] = classInfo[StreamArgType] - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = { - val mb = cb.emb - val xIter = mb.genFieldThisRef[Iterator[java.lang.Long]]("streamInIterator") - - // this, Region, ... - val mkIter = coerce[StreamArgType](c) - val eltRegion = mb.genFieldThisRef[Region]("stream_input_element_region") - val rvAddr = mb.genFieldThisRef[Long]("stream_input_addr") - - val producer = new StreamProducer { - override val length: Option[EmitCodeBuilder => Code[Int]] = None - - override def initialize(cb: EmitCodeBuilder): Unit = { - cb.assign(xIter, mkIter.invoke[Region, Region, Iterator[java.lang.Long]]("apply", r, eltRegion)) - } - - override val elementRegion: Settable[Region] = eltRegion - override val requiresMemoryManagementPerElement: Boolean = self.requiresMemoryManagementPerElement - override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => - val hasNext = cb.newLocal[Boolean]("stream_in_hasnext", xIter.load().hasNext) - cb.ifx(!hasNext, cb.goto(LendOfStream)) - cb.assign(rvAddr, xIter.load().next().invoke[Long]("longValue")) - cb.goto(LproduceElementDone) - } - - override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapPCode(cb, rvAddr))) - - override def close(cb: EmitCodeBuilder): Unit = {} - } - SStreamCode(SStream(eltType.sType, true), producer) - } - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = throw new UnsupportedOperationException -} - -case class PTypeReferenceSingleCodeType(pt: PType) extends SingleCodeType { - def ti: TypeInfo[_] = LongInfo - - def loadToPCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): PCode = pt.loadCheapPCode(cb, coerce[Long](c)) - - def virtualType: Type = pt.virtualType - - def coercePCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean): SingleCodePCode = { - SingleCodePCode(this, pt.store(cb, region, pc, deepCopy = deepCopy)) - } -} - -object SingleCodePCode { - def fromPCode(cb: EmitCodeBuilder, pc: PCode, region: Value[Region], deepCopy: Boolean = false): SingleCodePCode = { - SingleCodeType.fromSType(pc.st).coercePCode(cb, pc, region, deepCopy) - } -} - -case class SingleCodePCode(typ: SingleCodeType, code: Code[_]) - -abstract class PCode extends SCode { self => - - def st: SType - - def pt: PType - - def code: Code[_] - - def codeTuple(): IndexedSeq[Code[_]] - - def typeInfo: TypeInfo[_] = typeToTypeInfo(pt) - - override def asBoolean: SBooleanCode = asInstanceOf[SBooleanCode] - override def asInt: SInt32Code = asInstanceOf[SInt32Code] - override def asInt32: SInt32Code = asInstanceOf[SInt32Code] - override def asLong: SInt64Code = asInstanceOf[SInt64Code] - override def asInt64: SInt64Code = asInstanceOf[SInt64Code] - override def asFloat: SFloat32Code = asInstanceOf[SFloat32Code] - override def asFloat32: SFloat32Code = asInstanceOf[SFloat32Code] - override def asFloat64: SFloat64Code = asInstanceOf[SFloat64Code] - override def asDouble: SFloat64Code = asInstanceOf[SFloat64Code] - override def asBinary: PBinaryCode = asInstanceOf[PBinaryCode] - override def asIndexable: PIndexableCode = asInstanceOf[PIndexableCode] - override def asBaseStruct: PBaseStructCode = asInstanceOf[PBaseStructCode] - override def asString: PStringCode = asInstanceOf[PStringCode] - override def asInterval: PIntervalCode = asInstanceOf[PIntervalCode] - override def asNDArray: PNDArrayCode = asInstanceOf[PNDArrayCode] - override def asLocus: PLocusCode = asInstanceOf[PLocusCode] - - override def asCall: PCallCode = asInstanceOf[PCallCode] - - override def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType): PCode = - castTo(cb, region, destType, false) - - override def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType, deepCopy: Boolean): PCode = { - super.castTo(cb, region, destType, deepCopy).asPCode - } - - override def copyToRegion(cb: EmitCodeBuilder, region: Value[Region]): PCode = copyToRegion(cb, region, pt) - - override def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: PType): PCode = - super.copyToRegion(cb, region, destType).asPCode - - def memoize(cb: EmitCodeBuilder, name: String): PValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PValue - - final def toPCode(cb: EmitCodeBuilder, region: Value[Region]): PCode = this -} - -object PCode { - def apply(pt: PType, code: Code[_]): PCode = pt match { - case pt: PCanonicalArray => - new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) - case pt: PCanonicalSet => - new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) - case pt: PCanonicalDict => - new SIndexablePointerCode(SIndexablePointer(pt), coerce[Long](code)) - case pt: PSubsetStruct => - val ss = pt.sType - new SSubsetStructCode(ss, PCode(ss.pType.ps, code).asBaseStruct) - case pt: PCanonicalBaseStruct => - new SBaseStructPointerCode(SBaseStructPointer(pt), coerce[Long](code)) - case pt: PCanonicalBinary => - new SBinaryPointerCode(SBinaryPointer(pt), coerce[Long](code)) - case pt: PCanonicalShuffle => - new SCanonicalShufflePointerCode(SCanonicalShufflePointer(pt), - new SBinaryPointerCode(SBinaryPointer(pt.representation), coerce[Long](code))) - case pt: PCanonicalString => - new SStringPointerCode(SStringPointer(pt), coerce[Long](code)) - case pt: PCanonicalInterval => - new SIntervalPointerCode(SIntervalPointer(pt), coerce[Long](code)) - case pt: PCanonicalLocus => - new SCanonicalLocusPointerCode(SCanonicalLocusPointer(pt), coerce[Long](code)) - case pt: PCanonicalCall => - new SCanonicalCallCode(pt.required, coerce[Int](code)) - case pt: PCanonicalNDArray => - new SNDArrayPointerCode(SNDArrayPointer(pt), coerce[Long](code)) - case pt: PCanonicalStream => - throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") - case PVoid => - throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") - case PBoolean(r) => - new SBooleanCode(r, coerce[Boolean](code)) - case PInt32(r) => - new SInt32Code(r, coerce[Int](code)) - case PInt64(r) => - new SInt64Code(r, coerce[Long](code)) - case PFloat32(r) => - new SFloat32Code(r, coerce[Float](code)) - case PFloat64(r) => - new SFloat64Code(r, coerce[Double](code)) - } - - def _empty: PCode = PVoidCode -} - -object PSettable { - def apply(sb: SettableBuilder, _pt: PType, name: String): PSettable = _pt match { - case pt: PCanonicalArray => - SIndexablePointerSettable(sb, SIndexablePointer(pt), name) - case pt: PCanonicalSet => - SIndexablePointerSettable(sb, SIndexablePointer(pt), name) - case pt: PCanonicalDict => - SIndexablePointerSettable(sb, SIndexablePointer(pt), name) - case pt: PSubsetStruct => - new SSubsetStructSettable(pt.sType, PSettable(sb, pt.ps, name).asInstanceOf[PStructSettable]) - case pt: PCanonicalBaseStruct => - SBaseStructPointerSettable(sb, SBaseStructPointer(pt), name) - case pt: PCanonicalBinary => - SBinaryPointerSettable(sb, SBinaryPointer(pt), name) - case pt: PCanonicalString => - SStringPointerSettable(sb, SStringPointer(pt), name) - case pt: PCanonicalInterval => - SIntervalPointerSettable(sb, SIntervalPointer(pt), name) - case pt: PCanonicalLocus => - SCanonicalLocusPointerSettable(sb, SCanonicalLocusPointer(pt), name) - case pt: PCanonicalCall => - SCanonicalCallSettable(sb, name, pt.required) - case pt: PCanonicalNDArray => - SNDArrayPointerSettable(sb, SNDArrayPointer(pt), name) - case pt: PCanonicalShuffle => - SCanonicalShufflePointerSettable(sb, SCanonicalShufflePointer(pt), name) - case pt: PCanonicalStream => - throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: $pt") - case PVoid => - throw new UnsupportedOperationException(s"Can't PCode.apply unrealizable PType: PVoid") - case PBoolean(r) => - SBooleanSettable(sb, name, r) - case PInt32(r) => - SInt32Settable(sb, name, r) - case PInt64(r) => - SInt64Settable(sb, name, r) - case PFloat32(r) => - SFloat32Settable(sb, name, r) - case PFloat64(r) => - SFloat64Settable(sb, name, r) - } -} diff --git a/hail/src/main/scala/is/hail/types/physical/PContainer.scala b/hail/src/main/scala/is/hail/types/physical/PContainer.scala index 2f842a3c1bd..668acd9dddd 100644 --- a/hail/src/main/scala/is/hail/types/physical/PContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/PContainer.scala @@ -87,14 +87,4 @@ abstract class PContainer extends PIterable { def nextElementAddress(currentOffset: Long): Long def nextElementAddress(currentOffset: Code[Long]): Code[Long] -} - -abstract class PIndexableValue extends PValue with SIndexableValue - -abstract class PIndexableCode extends PCode with SIndexableCode { - def pt: PContainer - - def memoize(cb: EmitCodeBuilder, name: String): PIndexableValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PIndexableValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PDict.scala b/hail/src/main/scala/is/hail/types/physical/PDict.scala index 8ec8b0688b2..508a909039c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PDict.scala +++ b/hail/src/main/scala/is/hail/types/physical/PDict.scala @@ -2,6 +2,7 @@ package is.hail.types.physical import is.hail.annotations._ import is.hail.check.Gen +import is.hail.types.physical.stypes.interfaces.SContainer import is.hail.types.virtual.TDict abstract class PDict extends PContainer { @@ -10,6 +11,8 @@ abstract class PDict extends PContainer { val keyType: PType val valueType: PType + def sType: SContainer + def elementType: PStruct override def genNonmissingValue: Gen[Annotation] = diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala index d6119e2c335..3544e9bbd68 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat32.scala @@ -38,12 +38,12 @@ class PFloat32(override val required: Boolean) extends PNumeric with PPrimitive coerce[PFloat32](coerce[Float](a) * coerce[Float](b)) } - override def sType: SType = SFloat32(required) + override def sType: SType = SFloat32 def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeFloat(addr, value.asFloat.floatCode(cb))) - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SFloat32Code(required, Region.loadFloat(addr)) + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SFloat32Code(Region.loadFloat(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeFloat(addr, annotation.asInstanceOf[Float]) diff --git a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala index 1136bc03e13..b61b314fa54 100644 --- a/hail/src/main/scala/is/hail/types/physical/PFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PFloat64.scala @@ -39,12 +39,12 @@ class PFloat64(override val required: Boolean) extends PNumeric with PPrimitive coerce[PFloat64](coerce[Double](a) * coerce[Double](b)) } - override def sType: SType = SFloat64(required) + override def sType: SType = SFloat64 def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeDouble(addr, value.asDouble.doubleCode(cb))) - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SFloat64Code(required, Region.loadDouble(addr)) + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SFloat64Code(Region.loadDouble(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeDouble(addr, annotation.asInstanceOf[Double]) diff --git a/hail/src/main/scala/is/hail/types/physical/PInt32.scala b/hail/src/main/scala/is/hail/types/physical/PInt32.scala index 9485774e7af..d5f8154d319 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt32.scala @@ -35,16 +35,18 @@ class PInt32(override val required: Boolean) extends PNumeric with PPrimitive { coerce[PInt32](coerce[Int](a) * coerce[Int](b)) } - override def sType: SType = SInt32(required) + override def sType: SType = SInt32 def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeInt(addr, value.asInt.intCode(cb))) - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SInt32Code(required, Region.loadInt(addr)) + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SInt32Code(Region.loadInt(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeInt(addr, annotation.asInstanceOf[Int]) } + + def unstagedLoadFromAddress(addr: Long): Int = Region.loadInt(addr) } object PInt32 { diff --git a/hail/src/main/scala/is/hail/types/physical/PInt64.scala b/hail/src/main/scala/is/hail/types/physical/PInt64.scala index 0c474e4ad03..6b2bfee159c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInt64.scala @@ -36,12 +36,12 @@ class PInt64(override val required: Boolean) extends PNumeric with PPrimitive { coerce[PInt64](coerce[Long](a) * coerce[Long](b)) } - override def sType: SType = SInt64(required) + override def sType: SType = SInt64 def storePrimitiveAtAddress(cb: EmitCodeBuilder, addr: Code[Long], value: SCode): Unit = cb.append(Region.storeLong(addr, value.asLong.longCode(cb))) - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = new SInt64Code(required, Region.loadLong(addr)) + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SInt64Code(Region.loadLong(addr)) override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { Region.storeLong(addr, annotation.asInstanceOf[Long]) diff --git a/hail/src/main/scala/is/hail/types/physical/PInterval.scala b/hail/src/main/scala/is/hail/types/physical/PInterval.scala index 9d159988e46..de2c5be720d 100644 --- a/hail/src/main/scala/is/hail/types/physical/PInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PInterval.scala @@ -99,16 +99,4 @@ abstract class PInterval extends PType { def includesEnd(off: Code[Long]): Code[Boolean] override def genNonmissingValue: Gen[Annotation] = Interval.gen(pointType.virtualType.ordering, pointType.genValue) -} - -abstract class PIntervalValue extends PValue with SIntervalValue { - def pt: PInterval -} - -abstract class PIntervalCode extends PCode with SIntervalCode { - def pt: PInterval - - def memoize(cb: EmitCodeBuilder, name: String): PIntervalValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PIntervalValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PLocus.scala b/hail/src/main/scala/is/hail/types/physical/PLocus.scala index 6f752afbf62..87e511ce2c7 100644 --- a/hail/src/main/scala/is/hail/types/physical/PLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PLocus.scala @@ -25,14 +25,4 @@ abstract class PLocus extends PType { def positionType: PInt32 def unstagedStoreLocus(addr: Long, contig: String, position: Int, region: Region): Unit -} - -abstract class PLocusValue extends PValue with SLocusValue - -abstract class PLocusCode extends PCode with SLocusCode { - def pt: PLocus - - def memoize(cb: EmitCodeBuilder, name: String): PLocusValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PLocusValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala index 1300f25fbdc..691844ac1cf 100644 --- a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala @@ -57,7 +57,7 @@ abstract class PNDArray extends PType { data: SIndexableCode, cb: EmitCodeBuilder, region: Value[Region] - ): PNDArrayCode + ): SNDArrayCode def constructDataFunction( shape: IndexedSeq[Value[Long]], @@ -65,14 +65,4 @@ abstract class PNDArray extends PType { cb: EmitCodeBuilder, region: Value[Region] ): (Value[Long], EmitCodeBuilder => SNDArrayPointerCode) -} - -abstract class PNDArrayValue extends PValue with SNDArrayValue { - def pt: PNDArray -} - -abstract class PNDArrayCode extends PCode with SNDArrayCode { - def pt: PNDArray - - def memoize(cb: EmitCodeBuilder, name: String): PNDArrayValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala b/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala index 82238b65e86..6c242f2e396 100644 --- a/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala +++ b/hail/src/main/scala/is/hail/types/physical/PPrimitive.scala @@ -9,7 +9,7 @@ import is.hail.utils._ trait PPrimitive extends PType { def byteSize: Long - def _construct(mb: EmitMethodBuilder[_], region: Value[Region], pc: PCode): PCode = pc + def _construct(mb: EmitMethodBuilder[_], region: Value[Region], pc: SCode): SCode = pc override def containsPointers: Boolean = false diff --git a/hail/src/main/scala/is/hail/types/physical/PShuffle.scala b/hail/src/main/scala/is/hail/types/physical/PShuffle.scala index 7916e00629c..58d9f725bfa 100644 --- a/hail/src/main/scala/is/hail/types/physical/PShuffle.scala +++ b/hail/src/main/scala/is/hail/types/physical/PShuffle.scala @@ -26,18 +26,4 @@ abstract class PShuffle extends PType { def allocate(region: Region, length: Int): Long def allocate(region: Code[Region], length: Code[Int]): Code[Long] -} - -abstract class PShuffleValue extends PValue with SShuffleValue { - def loadLength(): Code[Int] - - def loadBytes(): Code[Array[Byte]] -} - -abstract class PShuffleCode extends PCode with SShuffleCode { - def pt: PShuffle - - def memoize(cb: EmitCodeBuilder, name: String): PShuffleValue - - def memoizeField(cb: EmitCodeBuilder, name: String): PShuffleValue -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PStream.scala b/hail/src/main/scala/is/hail/types/physical/PStream.scala index 6a4fb44d05a..fed4bd2bfb6 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStream.scala @@ -6,8 +6,4 @@ abstract class PStream extends PIterable with PUnrealizable { lazy val virtualType: TStream = TStream(elementType.virtualType) def _asIdent = s"stream_of_${elementType.asIdent}" -} - -abstract class PStreamCode extends PCode with PUnrealizableCode { - def pt: PStream -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PString.scala b/hail/src/main/scala/is/hail/types/physical/PString.scala index 0f5316226ee..84340c93204 100644 --- a/hail/src/main/scala/is/hail/types/physical/PString.scala +++ b/hail/src/main/scala/is/hail/types/physical/PString.scala @@ -25,16 +25,4 @@ abstract class PString extends PType { def allocateAndStoreString(region: Region, str: String): Long def allocateAndStoreString(mb: EmitMethodBuilder[_], region: Value[Region], str: Code[String]): Code[Long] -} - -abstract class PStringCode extends PCode with SStringCode { - def pt: PString - - def asBytes(): PBinaryCode -} - -abstract class PStringValue extends PValue with SStringValue { - def pt: PString - - def get: PStringCode -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/PStruct.scala b/hail/src/main/scala/is/hail/types/physical/PStruct.scala index 77457713b35..0cbf5328cff 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStruct.scala @@ -54,5 +54,5 @@ trait PStruct extends PBaseStruct { def insertFields(fieldsToInsert: TraversableOnce[(String, PType)]): PStruct - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructCode } diff --git a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala index 80531fe8ee1..4530e20475f 100644 --- a/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PSubsetStruct.scala @@ -4,7 +4,7 @@ import is.hail.annotations.{Annotation, Region, UnsafeUtils} import is.hail.asm4s.{Code, Settable, SettableBuilder, Value, coerce, const} import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} import is.hail.types.BaseStruct -import is.hail.types.physical.stypes.interfaces.SBaseStruct +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode} import is.hail.types.physical.stypes.{SCode, SType} import is.hail.types.physical.stypes.concrete.SSubsetStruct import is.hail.types.virtual.TStruct @@ -123,7 +123,7 @@ final case class PSubsetStruct(ps: PStruct, _fieldNames: IndexedSeq[String]) ext throw new UnsupportedOperationException } - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PBaseStructCode = throw new UnsupportedOperationException + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SBaseStructCode = throw new UnsupportedOperationException def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { throw new UnsupportedOperationException diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index eadf7eb63bf..0e264225a9d 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -162,9 +162,9 @@ object PType { } def literalPType(t: Type, a: Annotation): PType = { - val rb = new BoxedArrayBuilder[Boolean]() - val crib = new BoxedArrayBuilder[Int]() - val cib = new BoxedArrayBuilder[Int]() + val rb = new BooleanArrayBuilder() + val crib = new IntArrayBuilder() + val cib = new IntArrayBuilder() def indexTypes(t: Type): Unit = { val ci = crib.size @@ -194,6 +194,7 @@ object PType { crib.setSizeUninitialized(ci + n) cib.setSizeUninitialized(ci + n) + cib.setSize(ci + n) var j = 0 while (j < n) { @@ -402,23 +403,6 @@ abstract class PType extends Serializable with Requiredness { } } - def deepInnerRequired(required: Boolean): PType = - this match { - case t: PArray => PCanonicalArray(t.elementType.deepInnerRequired(true), required) - case t: PSet => PCanonicalSet(t.elementType.deepInnerRequired(true), required) - case t: PDict => PCanonicalDict(t.keyType.deepInnerRequired(true), t.valueType.deepInnerRequired(true), required) - case t: PStruct => - PCanonicalStruct(t.fields.map(f => PField(f.name, f.typ.deepInnerRequired(true), f.index)), required) - case t: PCanonicalTuple => - PCanonicalTuple(t._types.map { f => f.copy(typ = f.typ.deepInnerRequired(true)) }, required) - case t: PInterval => - PCanonicalInterval(t.pointType.deepInnerRequired(true), required) - case t: PStream => - PCanonicalStream(t.elementType.deepInnerRequired(true), required = required) - case t => - t.setRequired(required) - } - protected[physical] def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long def copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = { @@ -432,8 +416,8 @@ abstract class PType extends Serializable with Requiredness { _copyFromAddress(region, srcPType, srcAddress, deepCopy) } - // return a PCode that can cheaply operate on the region representation. Generally a pointer type, but not necessarily (e.g. primitives). - def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode + // return a SCode that can cheaply operate on the region representation. Generally a pointer type, but not necessarily (e.g. primitives). + def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode // stores a stack value as a region value of this type def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] @@ -445,21 +429,6 @@ abstract class PType extends Serializable with Requiredness { def deepRename(t: Type): PType = this - def defaultValue(mb: EmitMethodBuilder[_]): PCode = PCode(this, is.hail.types.physical.defaultValue(this)) - - def ti: TypeInfo[_] = typeToTypeInfo(this) - - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(ti) - - def asParam: PCodeParamType = PCodeParamType(this) - - def nCodes: Int = 1 - - def fromCodeTuple(ct: IndexedSeq[Code[_]]): PCode = { - assert(ct.length == 1) - PCode(this, ct(0)) - } - // called to load a region value's start address from a nested representation. // Usually a no-op, but may need to dereference a pointer. def loadFromNested(addr: Code[Long]): Code[Long] diff --git a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala index 4e6a0de557c..188375fd268 100644 --- a/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala +++ b/hail/src/main/scala/is/hail/types/physical/PUnrealizable.scala @@ -32,7 +32,7 @@ trait PUnrealizable extends PType { override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = unsupported - override def loadCheapPCode(cb: EmitCodeBuilder, addr: Code[Long]): PCode = unsupported + override def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = unsupported override def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = unsupported @@ -42,16 +42,3 @@ trait PUnrealizable extends PType { throw new UnsupportedOperationException("containsPointers not supported on PUnrealizable") } } - -trait PUnrealizableCode extends PCode { - private def unsupported: Nothing = - throw new UnsupportedOperationException(s"$pt is not realizable") - - def code: Code[_] = unsupported - - def codeTuple(): IndexedSeq[Code[_]] = unsupported - - override def typeInfo: TypeInfo[_] = unsupported - - def memoizeField(cb: EmitCodeBuilder, name: String): PValue = unsupported -} diff --git a/hail/src/main/scala/is/hail/types/physical/package.scala b/hail/src/main/scala/is/hail/types/physical/package.scala index 15d23d1834c..96c554f3398 100644 --- a/hail/src/main/scala/is/hail/types/physical/package.scala +++ b/hail/src/main/scala/is/hail/types/physical/package.scala @@ -2,11 +2,12 @@ package is.hail.types import is.hail.asm4s._ import is.hail.expr.ir.streams.StreamArgType +import is.hail.types.physical.stypes.{SCode, SValue} import scala.language.implicitConversions package object physical { - implicit def pvalueToPCode(pv: PValue): PCode = pv.get + implicit def sValueToSCode(sv: SValue): SCode = sv.get def typeToTypeInfo(t: PType): TypeInfo[_] = t match { case _: PInt32 => typeInfo[Int] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala index a0dc45aa3b9..cc585bb2288 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala @@ -5,51 +5,54 @@ import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ -import is.hail.types.physical.{PCode, PIntervalCode, PNDArrayCode, PShuffleCode, PType, PValue} object SCode { def add(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) + right.asInt.intCode(cb)) - case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) + right.asFloat.floatCode(cb)) - case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) + right.asLong.longCode(cb)) - case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) + right.asDouble.doubleCode(cb)) + case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) + right.asInt.intCode(cb)) + case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) + right.asFloat.floatCode(cb)) + case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) + right.asLong.longCode(cb)) + case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) + right.asDouble.doubleCode(cb)) } } def multiply(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) * right.asInt.intCode(cb)) - case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) * right.asFloat.floatCode(cb)) - case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) * right.asLong.longCode(cb)) - case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) * right.asDouble.doubleCode(cb)) + case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) * right.asInt.intCode(cb)) + case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) * right.asFloat.floatCode(cb)) + case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) * right.asLong.longCode(cb)) + case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) * right.asDouble.doubleCode(cb)) } } def subtract(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) - right.asInt.intCode(cb)) - case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) - right.asFloat.floatCode(cb)) - case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) - right.asLong.longCode(cb)) - case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) - right.asDouble.doubleCode(cb)) + case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) - right.asInt.intCode(cb)) + case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) - right.asFloat.floatCode(cb)) + case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) - right.asLong.longCode(cb)) + case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) - right.asDouble.doubleCode(cb)) } } def divide(cb: EmitCodeBuilder, left: SCode, right: SCode, required: Boolean): SCode = { (left.st, right.st) match { - case (_: SInt32, _: SInt32) => new SInt32Code(required, left.asInt.intCode(cb) / right.asInt.intCode(cb)) - case (_: SFloat32, _: SFloat32) => new SFloat32Code(required, left.asFloat.floatCode(cb) / right.asFloat.floatCode(cb)) - case (_: SInt64, _: SInt64) => new SInt64Code(required, left.asLong.longCode(cb) / right.asLong.longCode(cb)) - case (_: SFloat64, _: SFloat64) => new SFloat64Code(required, left.asDouble.doubleCode(cb) / right.asDouble.doubleCode(cb)) + case (SInt32, SInt32) => new SInt32Code(left.asInt.intCode(cb) / right.asInt.intCode(cb)) + case (SFloat32, SFloat32) => new SFloat32Code(left.asFloat.floatCode(cb) / right.asFloat.floatCode(cb)) + case (SInt64, SInt64) => new SInt64Code(left.asLong.longCode(cb) / right.asLong.longCode(cb)) + case (SFloat64, SFloat64) => new SFloat64Code(left.asDouble.doubleCode(cb) / right.asDouble.doubleCode(cb)) } } + + def _empty: SCode = SVoidCode } abstract class SCode { def st: SType - def codeTuple(): IndexedSeq[Code[_]] + // requires a code builder because forming a code tuple may require appending + // straight-line code, e.g. if a SCode contains nested EmitCodes + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] def asBoolean: SBooleanCode = asInstanceOf[SBooleanCode] @@ -79,9 +82,9 @@ abstract class SCode { def asString: SStringCode = asInstanceOf[SStringCode] - def asInterval: PIntervalCode = asInstanceOf[PIntervalCode] + def asInterval: SIntervalCode = asInstanceOf[SIntervalCode] - def asNDArray: PNDArrayCode = asInstanceOf[PNDArrayCode] + def asNDArray: SNDArrayCode = asInstanceOf[SNDArrayCode] def asLocus: SLocusCode = asInstanceOf[SLocusCode] @@ -89,38 +92,26 @@ abstract class SCode { def asStream: SStreamCode = asInstanceOf[SStreamCode] - def asShuffle: PShuffleCode = asInstanceOf[PShuffleCode] + def asShuffle: SShuffleCode = asInstanceOf[SShuffleCode] - def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType): SCode = + def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SCode = castTo(cb, region, destType, false) - - def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: PType, deepCopy: Boolean): SCode = { - destType.sType.coerceOrCopy(cb, region, this, deepCopy) + def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType, deepCopy: Boolean): SCode = { + destType.coerceOrCopy(cb, region, this, deepCopy) } - - def copyToRegion(cb: EmitCodeBuilder, region: Value[Region]): SCode = - copyToRegion(cb, region, st.pType) - - def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: PType): SCode = - destType.sType.coerceOrCopy(cb, region, this, deepCopy = true) + def copyToRegion(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SCode = + destType.coerceOrCopy(cb, region, this, deepCopy = true) def memoize(cb: EmitCodeBuilder, name: String): SValue def memoizeField(cb: EmitCodeBuilder, name: String): SValue - - def toPCode(cb: EmitCodeBuilder, region: Value[Region]): PCode - - // This method is a very temporary patch. Clients should use `toPCode`. - def asPCode: PCode = asInstanceOf[PCode] } trait SValue { def st: SType def get: SCode - - def asPValue: PValue = asInstanceOf[PValue] } @@ -131,3 +122,22 @@ trait SSettable extends SValue { def load(): SCode = get } + +object SSettable { + def apply(sb: SettableBuilder, st: SType, name: String): SSettable = { + st.fromSettables(st.settableTupleTypes().zipWithIndex.map { case (ti, i) => + sb.newSettable(s"${ name }_${ st.getClass.getSimpleName }_$i")(ti) + }) + } +} + +trait SUnrealizableCode extends SCode { + private def unsupported: Nothing = + throw new UnsupportedOperationException(s"$this is not realizable") + + def code: Code[_] = unsupported + + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = unsupported + + def memoizeField(cb: EmitCodeBuilder, name: String): SValue = unsupported +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala index 08be3b658d9..0c0491d7878 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala @@ -1,46 +1,116 @@ package is.hail.types.physical.stypes import is.hail.annotations.Region -import is.hail.asm4s.{Code, Settable, TypeInfo, Value} -import is.hail.expr.ir.orderings.CodeOrdering -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, EmitParamType, InferPType, PCodeEmitParamType, PCodeParamType, SortOrder} -import is.hail.types.TypeWithRequiredness -import is.hail.types.physical.{PCode, PType} -import is.hail.types.virtual.Type +import is.hail.asm4s._ +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, SCodeEmitParamType, SCodeParamType} +import is.hail.types.VirtualTypeWithReq +import is.hail.types.physical.PType +import is.hail.types.physical.stypes.interfaces.SStream +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.virtual._ object SType { - def chooseCompatibleType(req: TypeWithRequiredness, stypes: SType*): SType = { - InferPType.getCompatiblePType(stypes.map(_.pType), req).sType + def chooseCompatibleType(req: VirtualTypeWithReq, stypes: SType*): SType = { + if (stypes.toSet.size == 1) + stypes.head + else + req.canonicalEmitType.st } -} -trait SType { - def virtualType: Type = pType.virtualType + def canonical(virt: Type): SType = { + PType.canonical(virt).sType + } - def pType: PType + def extractPrimCode(cb: EmitCodeBuilder, x: SCode): Code[_] = x.st.virtualType match { + case TInt32 => x.asInt.intCode(cb) + case TInt64 => x.asLong.longCode(cb) + case TFloat32 => x.asFloat.floatCode(cb) + case TFloat64 => x.asDouble.doubleCode(cb) + case TBoolean => x.asBoolean.boolCode(cb) + } - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode + def canonical(st: SType): SType = st.canonicalPType().sType +} + +trait SType { + def virtualType: Type def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode def codeTupleTypes(): IndexedSeq[TypeInfo[_]] + def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = codeTupleTypes() + + lazy val nCodes: Int = codeTupleTypes().length + + lazy val nSettables: Int = settableTupleTypes().length + def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable def fromCodes(codes: IndexedSeq[Code[_]]): SCode def canonicalPType(): PType - def paramType: PCodeParamType = PCodeParamType(pType) + def paramType: SCodeParamType = SCodeParamType(this) + + def asIdent: String = canonicalPType().asIdent + + def defaultValue: SCode = { + fromCodes(codeTupleTypes().map(ti => ti.uninitializedValue)) + } + + def isPrimitive: Boolean = this match { + case SInt32 | SInt64 | SFloat32 | SFloat64 | SBoolean => true + case _ => false + } - def asIdent: String = pType.asIdent + def isRealizable: Boolean = !this.isInstanceOf[SStream] - def equalsExceptTopLevelRequiredness(that: SType): Boolean = pType.equalModuloRequired(that.pType) + def castRename(t: Type): SType } case class EmitType(st: SType, required: Boolean) { def virtualType: Type = st.virtualType - def paramType: EmitParamType = PCodeEmitParamType(st.pType.setRequired(required)) + + def paramType: SCodeEmitParamType = SCodeEmitParamType(this) + def canonicalPType: PType = st.canonicalPType().setRequired(required) + + def equalModuloRequired(that: EmitType): Boolean = st == that.st + + lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = { + val tc = st.codeTupleTypes() + if (required) + tc + else + tc :+ BooleanInfo + } + + lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = { + val tc = st.settableTupleTypes() + if (required) + tc + else + tc :+ BooleanInfo + } + + def fromCodes(codes: IndexedSeq[Code[_]]): EmitCode = { + val scode = st.fromCodes(codes.take(st.nCodes)) + val m: Code[Boolean] = if (required) const(false) else coerce[Boolean](codes.last) + val ec = EmitCode(Code._empty, m, scode) + if (ec.required && !this.required) + ec.setOptional + else + ec + } + + def fromSettables(settables: IndexedSeq[Settable[_]]): EmitSettable = new EmitSettable( + if (required) None else Some(coerce[Boolean](settables.last)), + st.fromSettables(settables.take(st.nSettables)) + ) + + def nCodes: Int = codeTupleTypes.length + + def nSettables: Int = settableTupleTypes.length } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala new file mode 100644 index 00000000000..c85184ea79e --- /dev/null +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SingleCodeSCode.scala @@ -0,0 +1,173 @@ +package is.hail.types.physical.stypes + +import is.hail.annotations.Region +import is.hail.asm4s._ +import is.hail.expr.ir._ +import is.hail.expr.ir.streams.{StreamArgType, StreamProducer} +import is.hail.types.physical.PType +import is.hail.types.physical.stypes.interfaces.{SStream, SStreamCode} +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.virtual._ +import is.hail.utils._ + +object SingleCodeType { + def typeInfoFromType(t: Type): TypeInfo[_] = t match { + case TInt32 => IntInfo + case TInt64 => LongInfo + case TFloat32 => FloatInfo + case TFloat64 => DoubleInfo + case TBoolean => BooleanInfo + case TVoid => UnitInfo + case _ => LongInfo // all others passed as ptype references + } + + def fromSType(t: SType): SingleCodeType = t.virtualType match { + case TInt32 => Int32SingleCodeType + case TInt64 => Int64SingleCodeType + case TFloat32 => Float32SingleCodeType + case TFloat64 => Float64SingleCodeType + case TBoolean => BooleanSingleCodeType + case _ => PTypeReferenceSingleCodeType(t.canonicalPType().setRequired(true)) + + } +} + +sealed trait SingleCodeType { + def ti: TypeInfo[_] + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode + + def virtualType: Type + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode + + def loadedSType: SType +} + +case object Int32SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = IntInfo + + override def loadedSType: SType = SInt32 + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SInt32Code(coerce[Int](c)) + + def virtualType: Type = TInt32 + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asInt.intCode(cb)) +} + +case object Int64SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = LongInfo + + override def loadedSType: SType = SInt64 + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SInt64Code(coerce[Long](c)) + + def virtualType: Type = TInt64 + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asLong.longCode(cb)) +} + +case object Float32SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = FloatInfo + + override def loadedSType: SType = SFloat32 + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SFloat32Code(coerce[Float](c)) + + def virtualType: Type = TFloat32 + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asFloat.floatCode(cb)) +} + +case object Float64SingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = DoubleInfo + + override def loadedSType: SType = SFloat64 + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SFloat64Code(coerce[Double](c)) + + def virtualType: Type = TFloat64 + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asDouble.doubleCode(cb)) +} + +case object BooleanSingleCodeType extends SingleCodeType { + def ti: TypeInfo[_] = BooleanInfo + + override def loadedSType: SType = SBoolean + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = new SBooleanCode(coerce[Boolean](c)) + + def virtualType: Type = TBoolean + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = SingleCodeSCode(this, pc.asBoolean.boolCode(cb)) +} + +case class StreamSingleCodeType(requiresMemoryManagementPerElement: Boolean, eltType: PType) extends SingleCodeType { + self => + + override def loadedSType: SType = SStream(EmitType(eltType.sType, true)) + + def virtualType: Type = TStream(eltType.virtualType) + + def ti: TypeInfo[_] = classInfo[StreamArgType] + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = { + val mb = cb.emb + val xIter = mb.genFieldThisRef[Iterator[java.lang.Long]]("streamInIterator") + + // this, Region, ... + val mkIter = coerce[StreamArgType](c) + val eltRegion = mb.genFieldThisRef[Region]("stream_input_element_region") + val rvAddr = mb.genFieldThisRef[Long]("stream_input_addr") + + val producer = new StreamProducer { + override val length: Option[EmitCodeBuilder => Code[Int]] = None + + override def initialize(cb: EmitCodeBuilder): Unit = { + cb.assign(xIter, mkIter.invoke[Region, Region, Iterator[java.lang.Long]]("apply", r, eltRegion)) + } + + override val elementRegion: Settable[Region] = eltRegion + override val requiresMemoryManagementPerElement: Boolean = self.requiresMemoryManagementPerElement + override val LproduceElement: CodeLabel = mb.defineAndImplementLabel { cb => + // NB: locals should not be used in this implementation. The way In() nodes are + // stored in fields at the beginning of code generation leads to the method builder + // here being different from the method the stream will eventually be consumed in + cb.ifx(!xIter.load().hasNext, cb.goto(LendOfStream)) + cb.assign(rvAddr, xIter.load().next().invoke[Long]("longValue")) + cb.goto(LproduceElementDone) + } + + override val element: EmitCode = EmitCode.fromI(mb)(cb => IEmitCode.present(cb, eltType.loadCheapSCode(cb, rvAddr))) + + override def close(cb: EmitCodeBuilder): Unit = {} + } + SStreamCode(SStream(EmitType(eltType.sType, true)), producer) + } + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = throw new UnsupportedOperationException +} + +case class PTypeReferenceSingleCodeType(pt: PType) extends SingleCodeType { + def ti: TypeInfo[_] = LongInfo + + override def loadedSType: SType = pt.sType + + def loadToSCode(cb: EmitCodeBuilder, r: Value[Region], c: Code[_]): SCode = pt.loadCheapSCode(cb, coerce[Long](c)) + + def virtualType: Type = pt.virtualType + + def coerceSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean): SingleCodeSCode = { + SingleCodeSCode(this, pt.store(cb, region, pc, deepCopy = deepCopy)) + } +} + +object SingleCodeSCode { + def fromSCode(cb: EmitCodeBuilder, pc: SCode, region: Value[Region], deepCopy: Boolean = false): SingleCodeSCode = { + SingleCodeType.fromSType(pc.st).coerceSCode(cb, pc, region, deepCopy) + } +} + +case class SingleCodeSCode(typ: SingleCodeType, code: Code[_]) \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala index f849530b5ef..5da7ec32fd1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala @@ -4,28 +4,29 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStructSettable, PType} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SBaseStructValue, SStructSettable} +import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} +import is.hail.types.physical.{PBaseStruct, PType} +import is.hail.types.virtual.{TBaseStruct, Type} import is.hail.utils.FastIndexedSeq case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { + require(!pType.required) def size: Int = pType.size + lazy val virtualType: TBaseStruct = pType.virtualType.asInstanceOf[TBaseStruct] + + override def castRename(t: Type): SType = SBaseStructPointer(pType.deepRename(t).asInstanceOf[PBaseStruct]) + + override def fieldIdx(fieldName: String): Int = pType.fieldIdx(fieldName) + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBaseStructPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - if (pt == this.pType) - new SBaseStructPointerCode(this, addr) - else - coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SBaseStructPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked]) = settables assert(a.ti == LongInfo) @@ -40,7 +41,8 @@ case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { def canonicalPType(): PType = pType - override val fieldTypes: Array[SType] = pType.types.map(_.sType) + override val fieldTypes: IndexedSeq[SType] = pType.types.map(_.sType) + override val fieldEmitTypes: IndexedSeq[EmitType] = pType.types.map(t => EmitType(t.sType, t.required)) } @@ -53,20 +55,20 @@ object SBaseStructPointerSettable { class SBaseStructPointerSettable( val st: SBaseStructPointer, val a: Settable[Long] -) extends PStructSettable { +) extends SStructSettable { val pt: PBaseStruct = st.pType - def get: PBaseStructCode = new SBaseStructPointerCode(st, a) + def get: SBaseStructCode = new SBaseStructPointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { IEmitCode(cb, pt.isFieldMissing(a, fieldIdx), - pt.fields(fieldIdx).typ.loadCheapPCode(cb, pt.loadField(a, fieldIdx))) + pt.fields(fieldIdx).typ.loadCheapSCode(cb, pt.loadField(a, fieldIdx))) } - def store(cb: EmitCodeBuilder, pv: PCode): Unit = { + def store(cb: EmitCodeBuilder, pv: SCode): Unit = { cb.assign(a, pv.asInstanceOf[SBaseStructPointerCode].a) } @@ -75,20 +77,20 @@ class SBaseStructPointerSettable( } } -class SBaseStructPointerCode(val st: SBaseStructPointer, val a: Code[Long]) extends PBaseStructCode { +class SBaseStructPointerCode(val st: SBaseStructPointer, val a: Code[Long]) extends SBaseStructCode { val pt: PBaseStruct = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PBaseStructValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SBaseStructValue = { val s = SBaseStructPointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue = memoize(cb, name, cb.fieldBuilder) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala index 7a6505d7de5..18fea2daba7 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala @@ -4,13 +4,17 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.SBinary -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PBinary, PBinaryCode, PBinaryValue, PCode, PSettable, PType} +import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryCode, SBinaryValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PBinary, PType} +import is.hail.types.virtual.Type import is.hail.utils._ case class SBinaryPointer(pType: PBinary) extends SBinary { + require(!pType.required) + + lazy val virtualType: Type = pType.virtualType def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBinaryPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -21,7 +25,7 @@ case class SBinaryPointer(pType: PBinary) extends SBinary { if (pt == this.pType) new SBinaryPointerCode(this, addr) else - coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) + coerceOrCopy(cb, region, pt.loadCheapSCode(cb, addr), deepCopy = false) } def fromSettables(settables: IndexedSeq[Settable[_]]): SBinaryPointerSettable = { @@ -37,6 +41,8 @@ case class SBinaryPointer(pType: PBinary) extends SBinary { } def canonicalPType(): PType = pType + + override def castRename(t: Type): SType = this } object SBinaryPointerSettable { @@ -44,8 +50,8 @@ object SBinaryPointerSettable { new SBinaryPointerSettable(st, sb.newSettable[Long](name)) } -class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) extends PBinaryValue with PSettable { - val pt: PBinary = st.pType +class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) extends SBinaryValue with SSettable { + private val pt: PBinary = st.pType override def bytesAddress(): Code[Long] = st.pType.bytesAddress(a) @@ -59,15 +65,15 @@ class SBinaryPointerSettable(val st: SBinaryPointer, val a: Settable[Long]) exte def loadByte(i: Code[Int]): Code[Byte] = Region.loadByte(pt.bytesAddress(a) + i.toL) - def store(cb: EmitCodeBuilder, pc: PCode): Unit = cb.assign(a, pc.asInstanceOf[SBinaryPointerCode].a) + def store(cb: EmitCodeBuilder, pc: SCode): Unit = cb.assign(a, pc.asInstanceOf[SBinaryPointerCode].a) } -class SBinaryPointerCode(val st: SBinaryPointer, val a: Code[Long]) extends PBinaryCode { - val pt: PBinary = st.pType +class SBinaryPointerCode(val st: SBinaryPointer, val a: Code[Long]) extends SBinaryCode { + private val pt: PBinary = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala index 7dd297e08b9..3f0391ea887 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala @@ -4,67 +4,64 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.SCall -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCall, PCallCode, PCallValue, PCanonicalCall, PCode, PSettable, PType} +import is.hail.types.physical.stypes.interfaces.{SCall, SCallCode, SCallValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PCall, PCanonicalCall, PType} +import is.hail.types.virtual.{TCall, Type} import is.hail.utils._ import is.hail.variant.Genotype -case class SCanonicalCall(required: Boolean) extends SCall { - override def pType: PCall = PCanonicalCall(required) - +case object SCanonicalCall extends SCall { def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SCanonicalCall(_) => - new SCanonicalCallCode(required, value.asInstanceOf[SCanonicalCallCode].call) + case SCanonicalCall => value } } - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(IntInfo) + lazy val virtualType: Type = TCall - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case PCanonicalCall(_) => - new SCanonicalCallCode(required, Region.loadInt(addr)) - } - } + override def castRename(t: Type): SType = this + + def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(IntInfo) def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalCallSettable = { val IndexedSeq(call: Settable[Int@unchecked]) = settables assert(call.ti == IntInfo) - new SCanonicalCallSettable(required, call) + new SCanonicalCallSettable(call) } def fromCodes(codes: IndexedSeq[Code[_]]): SCanonicalCallCode = { val IndexedSeq(call: Code[Int@unchecked]) = codes assert(call.ti == IntInfo) - new SCanonicalCallCode(required, call) + new SCanonicalCallCode(call) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PCanonicalCall(false) + + def constructFromIntRepr(c: Code[Int]): SCanonicalCallCode = new SCanonicalCallCode(c) } object SCanonicalCallSettable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SCanonicalCallSettable = - new SCanonicalCallSettable(required, sb.newSettable[Int](s"${ name }_call")) + def apply(sb: SettableBuilder, name: String): SCanonicalCallSettable = + new SCanonicalCallSettable(sb.newSettable[Int](s"${ name }_call")) } -class SCanonicalCallSettable(required: Boolean, val call: Settable[Int]) extends PCallValue with PSettable { +class SCanonicalCallSettable(val call: Settable[Int]) extends SCallValue with SSettable { - val pt: PCall = PCanonicalCall(required) + val pt: PCall = PCanonicalCall(false) override def canonicalCall(cb: EmitCodeBuilder): Code[Int] = call - override def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(call, v.asInstanceOf[SCanonicalCallCode].call) + override def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(call, v.asInstanceOf[SCanonicalCallCode].call) - val st: SCanonicalCall = SCanonicalCall(required) + val st: SCanonicalCall.type = SCanonicalCall - def get: PCallCode = new SCanonicalCallCode(required, call) + def get: SCallCode = new SCanonicalCallCode(call) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(call) - def store(pc: PCode): Code[Unit] = call.store(pc.asInstanceOf[SCanonicalCallCode].call) + def store(pc: SCode): Code[Unit] = call.store(pc.asInstanceOf[SCanonicalCallCode].call) def ploidy(): Code[Int] = get.ploidy() @@ -96,29 +93,29 @@ class SCanonicalCallSettable(required: Boolean, val call: Settable[Int]) extends } } -class SCanonicalCallCode(required: Boolean, val call: Code[Int]) extends PCallCode { +class SCanonicalCallCode(val call: Code[Int]) extends SCallCode { - val pt: PCall = PCanonicalCall(required) + val pt: PCall = PCanonicalCall(false) - val st: SCanonicalCall = SCanonicalCall(required) + val st: SCanonicalCall.type = SCanonicalCall def code: Code[_] = call - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(call) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(call) def ploidy(): Code[Int] = (call >>> 1) & 0x3 def isPhased(): Code[Boolean] = (call & 0x1).ceq(1) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PCallValue = { - val s = SCanonicalCallSettable(sb, name, required) + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SCallValue = { + val s = SCanonicalCallSettable(sb, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PCallValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SCallValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PCallValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SCallValue = memoize(cb, name, cb.fieldBuilder) def store(mb: EmitMethodBuilder[_], r: Value[Region], dst: Code[Long]): Code[Unit] = Region.storeInt(dst, call) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala index ff601cb89c1..2064cbab6a9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala @@ -4,14 +4,23 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.SLocus -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCanonicalLocus, PCode, PLocusCode, PLocusValue, PSettable, PStringCode, PType} +import is.hail.types.physical.stypes.interfaces.{SLocus, SLocusCode, SLocusValue, SString, SStringCode} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PCanonicalLocus, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq import is.hail.variant.{Locus, ReferenceGenome} case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { + require(!pType.required) + + override def contigType: SString = pType.contigType.sType + + lazy val virtualType: Type = pType.virtualType + + override def castRename(t: Type): SType = this + override def rg: ReferenceGenome = pType.rg def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { @@ -20,12 +29,7 @@ case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case PCanonicalLocus(_, _) => - new SCanonicalLocusPointerCode(this, addr) - } - } + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, LongInfo, IntInfo) def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalLocusPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], contig: Settable[Long@unchecked], position: Settable[Int@unchecked]) = settables @@ -59,34 +63,34 @@ class SCanonicalLocusPointerSettable( val a: Settable[Long], _contig: Settable[Long], val _position: Settable[Int] -) extends PLocusValue with PSettable { +) extends SLocusValue with SSettable { val pt: PCanonicalLocus = st.pType def get = new SCanonicalLocusPointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a, _contig, _position) - def store(cb: EmitCodeBuilder, pc: PCode): Unit = { + def store(cb: EmitCodeBuilder, pc: SCode): Unit = { cb.assign(a, pc.asInstanceOf[SCanonicalLocusPointerCode].a) cb.assign(_contig, pt.contigAddr(a)) cb.assign(_position, pt.position(a)) } - def contig(cb: EmitCodeBuilder): PStringCode = { - pt.contigType.loadCheapPCode(cb, _contig).asString + def contig(cb: EmitCodeBuilder): SStringCode = { + pt.contigType.loadCheapSCode(cb, _contig).asString } def position(cb: EmitCodeBuilder): Code[Int] = _position } -class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Long]) extends PLocusCode { +class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Long]) extends SLocusCode { val pt: PCanonicalLocus = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def contig(cb: EmitCodeBuilder): PStringCode = pt.contigType.loadCheapPCode(cb, pt.contigAddr(a)).asString + def contig(cb: EmitCodeBuilder): SStringCode = pt.contigType.loadCheapSCode(cb, pt.contigAddr(a)).asString def position(cb: EmitCodeBuilder): Code[Int] = pt.position(a) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala index d44bc4294f2..60952a94d96 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala @@ -5,29 +5,27 @@ import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeIn import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} import is.hail.services.shuffler.Wire -import is.hail.types.physical.stypes.interfaces.SShuffle -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCanonicalShuffle, PCode, PSettable, PShuffle, PShuffleCode, PShuffleValue, PType} +import is.hail.types.physical.stypes.interfaces.{SShuffle, SShuffleCode, SShuffleValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType} +import is.hail.types.physical.{PCanonicalShuffle, PShuffle, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq case class SCanonicalShufflePointer(pType: PCanonicalShuffle) extends SShuffle { + require(!pType.required) + + lazy val virtualType: Type = pType.virtualType + + override def castRename(t: Type): SType = this lazy val binarySType = SBinaryPointer(pType.representation) def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { - new SCanonicalShufflePointerCode(this, pType.representation.loadCheapPCode(cb, pType.store(cb, region, value, deepCopy))) + new SCanonicalShufflePointerCode(this, pType.representation.loadCheapSCode(cb, pType.store(cb, region, value, deepCopy))) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case t: PCanonicalShuffle => - assert(t.equalModuloRequired(this.pType)) - new SCanonicalShufflePointerCode(this, t.loadBinary(cb, addr)) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SCanonicalShufflePointerSettable = { new SCanonicalShufflePointerSettable(this, binarySType.fromSettables(settables)) } @@ -48,14 +46,14 @@ object SCanonicalShufflePointerSettable { "PCanonicalShuffleSettableOff", pt.representation.allocate(region, Wire.ID_SIZE)) cb.append(pt.representation.store(off, bytes)) - SCanonicalShufflePointer(pt).loadFrom(cb, region, pt, off).memoize(cb, "scanonicalshuffle_fromarraybytes").asInstanceOf[SCanonicalShufflePointerSettable] + pt.loadCheapSCode(cb, off).memoize(cb, "scanonicalshuffle_fromarraybytes").asInstanceOf[SCanonicalShufflePointerSettable] } } -class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerSettable) extends PShuffleValue with PSettable { +class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerSettable) extends SShuffleValue with SSettable { val pt: PCanonicalShuffle = st.pType - def get: PShuffleCode = new SCanonicalShufflePointerCode(st, shuffle.get) + def get: SShuffleCode = new SCanonicalShufflePointerCode(st, shuffle.get) def settableTuple(): IndexedSeq[Settable[_]] = shuffle.settableTuple() @@ -63,21 +61,21 @@ class SCanonicalShufflePointerSettable(val st: SCanonicalShufflePointer, val shu def loadBytes(): Code[Array[Byte]] = shuffle.loadBytes() - def store(cb: EmitCodeBuilder, pc: PCode): Unit = shuffle.store(cb, pc.asInstanceOf[SCanonicalShufflePointerCode].shuffle) + def store(cb: EmitCodeBuilder, pc: SCode): Unit = shuffle.store(cb, pc.asInstanceOf[SCanonicalShufflePointerCode].shuffle) def storeFromBytes(cb: EmitCodeBuilder, region: Value[Region], bytes: Value[Array[Byte]]): Unit = { val addr = cb.newLocal[Long]("bytesAddr", st.pType.representation.allocate(region, bytes.length())) cb += st.pType.representation.store(addr, bytes) - shuffle.store(cb, st.pType.representation.loadCheapPCode(cb, addr)) + shuffle.store(cb, st.pType.representation.loadCheapSCode(cb, addr)) } } -class SCanonicalShufflePointerCode(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerCode) extends PShuffleCode { +class SCanonicalShufflePointerCode(val st: SCanonicalShufflePointer, val shuffle: SBinaryPointerCode) extends SShuffleCode { val pt: PShuffle = st.pType def code: Code[_] = shuffle.code - def codeTuple(): IndexedSeq[Code[_]] = shuffle.codeTuple() + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = shuffle.makeCodeTuple(cb) def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SCanonicalShufflePointerSettable = { val s = SCanonicalShufflePointerSettable(sb, st, name) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala index 42187e4bc18..be362393fac 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala @@ -3,13 +3,20 @@ package is.hail.types.physical.stypes.concrete import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} -import is.hail.types.physical.stypes.interfaces.SContainer -import is.hail.types.physical.stypes.{EmitType, SCode, SType} -import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalDict, PCanonicalSet, PCode, PContainer, PIndexableCode, PIndexableValue, PSettable, PType} +import is.hail.types.physical.stypes.interfaces.{SContainer, SIndexableCode, SIndexableValue} +import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} +import is.hail.types.physical.{PArray, PCanonicalArray, PCanonicalDict, PCanonicalSet, PContainer, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq case class SIndexablePointer(pType: PContainer) extends SContainer { + require(!pType.required) + + lazy val virtualType: Type = pType.virtualType + + override def castRename(t: Type): SType = SIndexablePointer(pType.deepRename(t).asInstanceOf[PContainer]) + override def elementType: SType = pType.elementType.sType def elementEmitType: EmitType = EmitType(elementType, pType.elementType.required) @@ -20,12 +27,7 @@ case class SIndexablePointer(pType: PContainer) extends SContainer { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - if (pt == this.pType) - new SIndexablePointerCode(this, addr) - else - coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) - } + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, IntInfo, LongInfo) def fromSettables(settables: IndexedSeq[Settable[_]]): SIndexablePointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], length: Settable[Int@unchecked], elementsAddress: Settable[Long@unchecked]) = settables @@ -45,26 +47,26 @@ case class SIndexablePointer(pType: PContainer) extends SContainer { } -class SIndexablePointerCode(val st: SIndexablePointer, val a: Code[Long]) extends PIndexableCode { +class SIndexablePointerCode(val st: SIndexablePointer, val a: Code[Long]) extends SIndexableCode { val pt: PContainer = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PIndexableValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIndexableValue = { val s = SIndexablePointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PIndexableValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SIndexableValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PIndexableValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SIndexableValue = memoize(cb, name, cb.fieldBuilder) - def castToArray(cb: EmitCodeBuilder): PIndexableCode = { + def castToArray(cb: EmitCodeBuilder): SIndexableCode = { pt match { case t: PArray => this case t: PCanonicalDict => new SIndexablePointerCode(SIndexablePointer(t.arrayRep), a) @@ -87,10 +89,10 @@ class SIndexablePointerSettable( val a: Settable[Long], val length: Settable[Int], val elementsAddress: Settable[Long] -) extends PIndexableValue with PSettable { +) extends SIndexableValue with SSettable { val pt: PContainer = st.pType - def get: PIndexableCode = new SIndexablePointerCode(st, a) + def get: SIndexableCode = new SIndexablePointerCode(st, a) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a, length, elementsAddress) @@ -100,12 +102,12 @@ class SIndexablePointerSettable( val iv = cb.newLocal("pcindval_i", i) IEmitCode(cb, isElementMissing(iv), - pt.elementType.loadCheapPCode(cb, pt.loadElement(a, length, iv))) // FIXME loadElement should take elementsAddress + pt.elementType.loadCheapSCode(cb, pt.loadElement(a, length, iv))) // FIXME loadElement should take elementsAddress } def isElementMissing(i: Code[Int]): Code[Boolean] = pt.isElementMissing(a, i) - def store(cb: EmitCodeBuilder, pc: PCode): Unit = { + def store(cb: EmitCodeBuilder, pc: SCode): Unit = { cb.assign(a, pc.asInstanceOf[SIndexablePointerCode].a) cb.assign(length, pt.loadLength(a)) cb.assign(elementsAddress, pt.firstElementOffset(a, length)) @@ -123,7 +125,7 @@ class SIndexablePointerSettable( cb.ifx(isElementMissing(idx), {}, // do nothing, { - val elt = et.loadCheapPCode(cb, et.loadFromNested(elementPtr)) + val elt = et.loadCheapSCode(cb, et.loadFromNested(elementPtr)) f(cb, idx, elt) }) cb.assign(idx, idx + 1) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala new file mode 100644 index 00000000000..bcfd8ee050d --- /dev/null +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala @@ -0,0 +1,164 @@ +package is.hail.types.physical.stypes.concrete + +import is.hail.annotations.Region +import is.hail.asm4s.{Code, Settable, TypeInfo, Value} +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, IEmitCode} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SStructSettable} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.{PCanonicalStruct, PType} +import is.hail.types.virtual.{TStruct, Type} +import is.hail.utils.BoxedArrayBuilder + +case class SInsertFieldsStruct(virtualType: TStruct, parent: SBaseStruct, insertedFields: IndexedSeq[(String, EmitType)]) extends SBaseStruct { + override def size: Int = virtualType.size + + // Maps index in result struct to index in insertedFields. + // Indices that refer to parent fields are not present. + lazy val insertedFieldIndices: Map[Int, Int] = insertedFields.zipWithIndex + .map { case ((name, _), idx) => virtualType.fieldIdx(name) -> idx } + .toMap + + def getFieldIndexInNewOrParent(idx: Int): Either[Int, Int] = { + insertedFieldIndices.get(idx) match { + case Some(idx) => Right(idx) + case None => Left(parent.fieldIdx(virtualType.fieldNames(idx))) + } + } + + val fieldEmitTypes: IndexedSeq[EmitType] = virtualType.fieldNames.zipWithIndex.map { case (f, idx) => + insertedFieldIndices.get(idx) match { + case Some(idx) => insertedFields(idx)._2 + case None => parent.fieldEmitTypes(parent.fieldIdx(f)) + } + } + + private lazy val insertedFieldCodeStarts = insertedFields.map(_._2.nCodes).scanLeft(0)(_ + _).init + private lazy val insertedFieldSettableStarts = insertedFields.map(_._2.nSettables).scanLeft(0)(_ + _).init + + override lazy val fieldTypes: IndexedSeq[SType] = fieldEmitTypes.map(_.st) + + override def fieldIdx(fieldName: String): Int = virtualType.fieldIdx(fieldName) + + override def canonicalPType(): PType = PCanonicalStruct(false, virtualType.fieldNames.zip(fieldEmitTypes).map { case (f, et) => (f, et.canonicalPType) }: _*) + + lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = parent.codeTupleTypes ++ insertedFields.flatMap(_._2.codeTupleTypes) + + override lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = parent.settableTupleTypes() ++ insertedFields.flatMap(_._2.settableTupleTypes) + + override def fromCodes(codes: IndexedSeq[Code[_]]): SInsertFieldsStructCode = { + assert(codes.map(_.ti) == codeTupleTypes) + new SInsertFieldsStructCode(this, parent.fromCodes(codes.take(parent.nCodes)).asInstanceOf[SBaseStructCode], insertedFields.indices.map { i => + val et = insertedFields(i)._2 + val start = insertedFieldCodeStarts(i) + parent.nCodes + et.fromCodes(codes.slice(start, start + et.nCodes)) + }) + } + + override def fromSettables(settables: IndexedSeq[Settable[_]]): SInsertFieldsStructSettable = { + assert(settables.map(_.ti) == settableTupleTypes) + new SInsertFieldsStructSettable(this, parent.fromSettables(settables.take(parent.nSettables)).asInstanceOf[SStructSettable], insertedFields.indices.map { i => + val et = insertedFields(i)._2 + val start = insertedFieldSettableStarts(i) + parent.nSettables + et.fromSettables(settables.slice(start, start + et.nSettables)) + }) + } + + override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + value match { + case ss: SInsertFieldsStructCode if ss.st == this => value + case _ => throw new RuntimeException(s"copy insertfields struct") + } + } + + override def castRename(t: Type): SType = { + val ts = t.asInstanceOf[TStruct] + + val parentType = parent.virtualType.asInstanceOf[TStruct] + + val renamedInsertedFields = Array.fill[(String, EmitType)](insertedFields.size)(null) + val parentPassThroughFieldBuilder = new BoxedArrayBuilder[(String, (String, Type))]() + + (0 until ts.size).foreach { i => + val newField = ts.fields(i) + val newName = newField.name + val oldName = virtualType.fieldNames(i) + insertedFieldIndices.get(i) match { + case Some(idx) => + val et = insertedFields(idx)._2 + renamedInsertedFields(idx) = ((newName, et.copy(st = et.st.castRename(newField.typ)))) + case None => parentPassThroughFieldBuilder += ((oldName, (newName, newField.typ))) + } + } + + val parentPassThroughMap = parentPassThroughFieldBuilder.result().toMap + val parentCastType = TStruct(parentType.fieldNames.map(f => parentPassThroughMap.getOrElse(f, (f, parentType.fieldType(f)))): _*) + val renamedParentType = parent.castRename(parentCastType) + SInsertFieldsStruct(ts, + renamedParentType.asInstanceOf[SBaseStruct], + renamedInsertedFields + ) + } +} + +class SInsertFieldsStructSettable(val st: SInsertFieldsStruct, parent: SStructSettable, newFields: IndexedSeq[EmitSettable]) extends SStructSettable { + def get: SInsertFieldsStructCode = new SInsertFieldsStructCode(st, parent.load().asBaseStruct, newFields.map(_.load)) + + def settableTuple(): IndexedSeq[Settable[_]] = parent.settableTuple() ++ newFields.flatMap(_.settableTuple()) + + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + st.getFieldIndexInNewOrParent(fieldIdx) match { + case Left(parentIdx) => parent.loadField(cb, parentIdx) + case Right(newFieldsIdx) => newFields(newFieldsIdx).toI(cb) + } + } + + def isFieldMissing(fieldIdx: Int): Code[Boolean] = + st.getFieldIndexInNewOrParent(fieldIdx) match { + case Left(parentIdx) => parent.isFieldMissing(parentIdx) + case Right(newFieldsIdx) => newFields(newFieldsIdx).m + } + + def store(cb: EmitCodeBuilder, pv: SCode): Unit = { + val sifc = pv.asInstanceOf[SInsertFieldsStructCode] + parent.store(cb, sifc.parent) + newFields.zip(sifc.newFields).foreach { case (settable, code) => cb.assign(settable, code) } + } +} + +class SInsertFieldsStructCode(val st: SInsertFieldsStruct, val parent: SBaseStructCode, val newFields: IndexedSeq[EmitCode]) extends SBaseStructCode { + override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = parent.makeCodeTuple(cb) ++ newFields.flatMap(_.makeCodeTuple(cb)) + + override def memoize(cb: EmitCodeBuilder, name: String): SInsertFieldsStructSettable = { + new SInsertFieldsStructSettable(st, parent.memoize(cb, name + "_parent").asInstanceOf[SStructSettable], newFields.indices.map { i => + val code = newFields(i) + val es = cb.emb.newEmitLocal(s"${ name }_nf_$i", code.emitType) + es.store(cb, code) + es + }) + } + + override def memoizeField(cb: EmitCodeBuilder, name: String): SInsertFieldsStructSettable = { + new SInsertFieldsStructSettable(st, parent.memoizeField(cb, name + "_parent").asInstanceOf[SStructSettable], newFields.indices.map { i => + val code = newFields(i) + val es = cb.emb.newEmitField(s"${ name }_nf_$i", code.emitType) + es.store(cb, code) + es + }) + } + + override def _insert(newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { + val newFieldSet = fields.map(_._1).toSet + val filteredNewFields = st.insertedFields.map(_._1) + .zipWithIndex + .filter { case (name, idx) => !newFieldSet.contains(name) } + .map { case (name, idx) => (name, newFields(idx)) } + parent._insert(newType, filteredNewFields ++ fields: _*) + } + + override def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + st.getFieldIndexInNewOrParent(fieldIdx) match { + case Left(parentIdx) => parent.loadSingleField(cb, parentIdx) + case Right(newIdx) => newFields(newIdx).toI(cb) + } + } +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala index d890a9473b1..395c6dbb011 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala @@ -4,27 +4,27 @@ import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} -import is.hail.types.physical.stypes.interfaces.SInterval -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCanonicalInterval, PCode, PInterval, PIntervalCode, PIntervalValue, PSettable, PType} +import is.hail.types.physical.stypes.interfaces.{SInterval, SIntervalCode, SIntervalValue} +import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} +import is.hail.types.physical.{PCanonicalInterval, PInterval, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq case class SIntervalPointer(pType: PInterval) extends SInterval { + require(!pType.required) + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIntervalPointerCode(this, pType.store(cb, region, value, deepCopy)) } - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, IntInfo, IntInfo) + override def castRename(t: Type): SType = SIntervalPointer(pType.deepRename(t).asInstanceOf[PInterval]) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case t: PCanonicalInterval if t.equalModuloRequired(this.pType) => - new SIntervalPointerCode(this, addr) - case _ => - new SIntervalPointerCode(this, pType.store(cb, region, pt.loadCheapPCode(cb, addr), false)) - } - } + lazy val virtualType: Type = pType.virtualType + + def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) + + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo, BooleanInfo, BooleanInfo) def fromSettables(settables: IndexedSeq[Settable[_]]): SIntervalPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked], includesStart: Settable[Boolean@unchecked], includesEnd: Settable[Boolean@unchecked]) = settables @@ -41,6 +41,7 @@ case class SIntervalPointer(pType: PInterval) extends SInterval { } override def pointType: SType = pType.pointType.sType + override def pointEmitType: EmitType = EmitType(pType.pointType.sType, pType.pointType.required) def canonicalPType(): PType = pType } @@ -60,8 +61,8 @@ class SIntervalPointerSettable( val a: Settable[Long], val includesStart: Settable[Boolean], val includesEnd: Settable[Boolean] -) extends PIntervalValue with PSettable { - def get: PIntervalCode = new SIntervalPointerCode(st, a) +) extends SIntervalValue with SSettable { + def get: SIntervalCode = new SIntervalPointerCode(st, a) val pt: PInterval = st.pType @@ -70,24 +71,23 @@ class SIntervalPointerSettable( def loadStart(cb: EmitCodeBuilder): IEmitCode = IEmitCode(cb, !(pt.startDefined(a)), - pt.pointType.loadCheapPCode(cb, pt.loadStart(a))) + pt.pointType.loadCheapSCode(cb, pt.loadStart(a))) def startDefined(cb: EmitCodeBuilder): Code[Boolean] = pt.startDefined(a) def loadEnd(cb: EmitCodeBuilder): IEmitCode = IEmitCode(cb, !(pt.endDefined(a)), - pt.pointType.loadCheapPCode(cb, pt.loadEnd(a))) + pt.pointType.loadCheapSCode(cb, pt.loadEnd(a))) def endDefined(cb: EmitCodeBuilder): Code[Boolean] = pt.endDefined(a) - def store(cb: EmitCodeBuilder, pc: PCode): Unit = { + def store(cb: EmitCodeBuilder, pc: SCode): Unit = { cb.assign(a, pc.asInstanceOf[SIntervalPointerCode].a) cb.assign(includesStart, pt.includesStart(a.load())) cb.assign(includesEnd, pt.includesEnd(a.load())) } - // FIXME orderings should take emitcodes/iemitcodes def isEmpty(cb: EmitCodeBuilder): Code[Boolean] = { val gt = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gt()) val gteq = cb.emb.ecb.getOrderingFunction(st.pointType, CodeOrdering.Gteq()) @@ -104,24 +104,24 @@ class SIntervalPointerSettable( } -class SIntervalPointerCode(val st: SIntervalPointer, val a: Code[Long]) extends PIntervalCode { - override def pt: PInterval = st.pType +class SIntervalPointerCode(val st: SIntervalPointer, val a: Code[Long]) extends SIntervalCode { + val pt = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) def includesStart(): Code[Boolean] = pt.includesStart(a) def includesEnd(): Code[Boolean] = pt.includesEnd(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PIntervalValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIntervalValue = { val s = SIntervalPointerSettable(sb, st, name) cb.assign(s, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PIntervalValue = memoize(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SIntervalValue = memoize(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PIntervalValue = memoize(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SIntervalValue = memoize(cb, name, cb.fieldBuilder) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala index 82b1d8a150d..4c0b4171fb1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala @@ -4,28 +4,32 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value, const} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayValue} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PBaseStructCode, PCanonicalNDArray, PCode, PNDArray, PNDArrayCode, PNDArrayValue, PSettable, PType, PValue} +import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SNDArrayValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PCanonicalNDArray, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq case class SNDArrayPointer(pType: PCanonicalNDArray) extends SNDArray { + require(!pType.required) + def nDims: Int = pType.nDims override def elementType: SType = pType.elementType.sType + override def elementPType: PType = pType.elementType + + lazy val virtualType: Type = pType.virtualType + + override def castRename(t: Type): SType = SNDArrayPointer(pType.deepRename(t)) + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SNDArrayPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - if (pt == this.pType) - new SNDArrayPointerCode(this, addr) - else - coerceOrCopy(cb, region, pt.loadCheapPCode(cb, addr), deepCopy = false) - } + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = Array.fill(2 + nDims * 2)(LongInfo) def fromSettables(settables: IndexedSeq[Settable[_]]): SNDArrayPointerSettable = { val a = settables(0).asInstanceOf[Settable[Long@unchecked]] @@ -61,24 +65,24 @@ class SNDArrayPointerSettable( val shape: IndexedSeq[Settable[Long]], val strides: IndexedSeq[Settable[Long]], val dataFirstElement: Settable[Long] - ) extends PNDArrayValue with PSettable { + ) extends SNDArrayValue with SSettable { val pt: PCanonicalNDArray = st.pType - def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): PCode = { + def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode = { assert(indices.size == pt.nDims) - pt.elementType.loadCheapPCode(cb, pt.loadElementFromDataAndStrides(cb, indices, dataFirstElement, strides)) + pt.elementType.loadCheapSCode(cb, pt.loadElementFromDataAndStrides(cb, indices, dataFirstElement, strides)) } def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) ++ shape ++ strides ++ FastIndexedSeq(dataFirstElement) - def store(cb: EmitCodeBuilder, v: PCode): Unit = { + def store(cb: EmitCodeBuilder, v: SCode): Unit = { cb.assign(a, v.asInstanceOf[SNDArrayPointerCode].a) pt.loadShapes(cb, a, shape) pt.loadStrides(cb, a, strides) cb.assign(dataFirstElement, pt.dataFirstElementPointer(a)) } - override def get: PCode = new SNDArrayPointerCode(st, a) + override def get: SNDArrayPointerCode = new SNDArrayPointerCode(st, a) override def outOfBounds(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): Code[Boolean] = { val shape = this.shapes(cb) @@ -117,22 +121,20 @@ class SNDArrayPointerSettable( def firstDataAddress(cb: EmitCodeBuilder): Value[Long] = dataFirstElement } -class SNDArrayPointerCode(val st: SNDArrayPointer, val a: Code[Long]) extends PNDArrayCode { +class SNDArrayPointerCode(val st: SNDArrayPointer, val a: Code[Long]) extends SNDArrayCode { val pt: PCanonicalNDArray = st.pType - override def code: Code[_] = a - - override def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PNDArrayValue = { + def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SNDArrayValue = { val s = SNDArrayPointerSettable(sb, st, name) cb.assign(s, this) s } - override def memoize(cb: EmitCodeBuilder, name: String): PNDArrayValue = memoize(cb, name, cb.localBuilder) + override def memoize(cb: EmitCodeBuilder, name: String): SNDArrayValue = memoize(cb, name, cb.localBuilder) - override def memoizeField(cb: EmitCodeBuilder, name: String): PValue = memoize(cb, name, cb.fieldBuilder) + override def memoizeField(cb: EmitCodeBuilder, name: String): SValue = memoize(cb, name, cb.fieldBuilder) - override def shape(cb: EmitCodeBuilder): PBaseStructCode = pt.shapeType.loadCheapPCode(cb, pt.representation.loadField(a, "shape")) + override def shape(cb: EmitCodeBuilder): SBaseStructCode = pt.shapeType.loadCheapSCode(cb, pt.representation.loadField(a, "shape")) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala new file mode 100644 index 00000000000..d233b204fb4 --- /dev/null +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala @@ -0,0 +1,161 @@ +package is.hail.types.physical.stypes.concrete + +import is.hail.annotations.Region +import is.hail.asm4s.{Code, Settable, TypeInfo, Value} +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, IEmitCode} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SStructSettable} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalStruct, PCanonicalTuple, PTupleField, PType} +import is.hail.types.virtual.{TBaseStruct, TStruct, TTuple, Type} + +object SStackStruct { + val MAX_FIELDS_FOR_CONSTRUCT: Int = 64 + + def constructFromArgs(cb: EmitCodeBuilder, region: Value[Region], t: TBaseStruct, args: EmitCode*): SBaseStructCode = { + val as = args.toArray + assert(t.size == args.size) + if (as.length > MAX_FIELDS_FOR_CONSTRUCT) { + val structType: PCanonicalBaseStruct = t match { + case ts: TStruct => + PCanonicalStruct(false, ts.fieldNames.zip(as.map(_.emitType)).map { case (f, et) => (f, et.canonicalPType) }: _*) + case tt: TTuple => + PCanonicalTuple(tt._types.zip(as.map(_.emitType)).map { case (tf, et) => PTupleField(tf.index, et.canonicalPType) }, false) + } + structType.constructFromFields(cb, region, as, false) + } else { + val st = SStackStruct(t, as.map(_.emitType)) + new SStackStructCode(st, as) + } + } +} + +case class SStackStruct(virtualType: TBaseStruct, fieldEmitTypes: IndexedSeq[EmitType]) extends SBaseStruct { + override def size: Int = virtualType.size + + private lazy val codeStarts = fieldEmitTypes.map(_.nCodes).scanLeft(0)(_ + _).init + private lazy val settableStarts = fieldEmitTypes.map(_.nSettables).scanLeft(0)(_ + _).init + + override lazy val fieldTypes: IndexedSeq[SType] = fieldEmitTypes.map(_.st) + + def fieldIdx(fieldName: String): Int = virtualType.asInstanceOf[TStruct].fieldIdx(fieldName) + + override def canonicalPType(): PType = virtualType match { + case ts: TStruct => + PCanonicalStruct(false, ts.fieldNames.zip(fieldEmitTypes).map { case (f, et) => (f, et.canonicalPType) }: _*) + case tt: TTuple => + PCanonicalTuple(tt._types.zip(fieldEmitTypes).map { case (tf, et) => PTupleField(tf.index, et.canonicalPType) }, false) + + } + + lazy val codeTupleTypes: IndexedSeq[TypeInfo[_]] = fieldEmitTypes.flatMap(_.codeTupleTypes) + + override lazy val settableTupleTypes: IndexedSeq[TypeInfo[_]] = fieldEmitTypes.flatMap(_.settableTupleTypes) + + override def fromCodes(codes: IndexedSeq[Code[_]]): SBaseStructCode = { + new SStackStructCode(this, fieldEmitTypes.indices.map { i => + val et = fieldEmitTypes(i) + val start = codeStarts(i) + et.fromCodes(codes.slice(start, start + et.nCodes)) + }) + } + + override def fromSettables(settables: IndexedSeq[Settable[_]]): SStackStructSettable = { + assert(settables.length == fieldEmitTypes.map(_.nSettables).sum, s"mismatch: ${ settables.length } settables, expect ${ fieldEmitTypes.map(_.nSettables).sum }\n ${ settables.map(_.ti).mkString(",") }\n ${ fieldEmitTypes.map(_.settableTupleTypes).mkString(" | ") }") + new SStackStructSettable(this, fieldEmitTypes.indices.map { i => + val et = fieldEmitTypes(i) + val start = settableStarts(i) + et.fromSettables(settables.slice(start, start + et.nSettables)) + }) + } + + override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + value match { + case ss: SStackStructCode => + if (ss.st == this && !deepCopy) + ss + else + new SStackStructCode(this, fieldEmitTypes.zip(ss.codes).map { case (newType, ec) => + EmitCode.fromI(cb.emb) { cb => + val iec = ec.toI(cb).map(cb) { field => newType.st.coerceOrCopy(cb, region, field, deepCopy) } + (newType.required, iec.required) match { + case (true, false) => IEmitCode.present(cb, iec.get(cb)) + case (false, true) => iec.setOptional + case _ => iec + } + } + }) + case _ => + val sv = value.asBaseStruct.memoize(cb, "stackstruct_coerce_value") + new SStackStructCode(this, Array.tabulate[EmitCode](size) { i => + EmitCode.fromI(cb.emb) { cb => + val newType = fieldEmitTypes(i) + val iec = sv.loadField(cb, i).map(cb) { field => newType.st.coerceOrCopy(cb, region, field, deepCopy) } + (newType.required, iec.required) match { + case (true, false) => IEmitCode.present(cb, iec.get(cb)) + case (false, true) => iec.setOptional + case _ => iec + } + } + }) + } + } + + override def castRename(t: Type): SType = { + val ts = t.asInstanceOf[TBaseStruct] + SStackStruct( + ts, + ts.types.zip(fieldEmitTypes).map { case (v, e) => e.copy(st = e.st.castRename(v)) } + ) + } +} + +class SStackStructSettable(val st: SStackStruct, settables: IndexedSeq[EmitSettable]) extends SStructSettable { + def get: SStackStructCode = new SStackStructCode(st, settables.map(_.load)) + + def settableTuple(): IndexedSeq[Settable[_]] = settables.flatMap(_.settableTuple()) + + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + settables(fieldIdx).toI(cb) + } + + def isFieldMissing(fieldIdx: Int): Code[Boolean] = + settables(fieldIdx).m + + def store(cb: EmitCodeBuilder, pv: SCode): Unit = { + val ssc = pv.asInstanceOf[SStackStructCode] + settables.zip(ssc.codes).foreach { case (s, c) => s.store(cb, c) } + } +} + +class SStackStructCode(val st: SStackStruct, val codes: IndexedSeq[EmitCode]) extends SBaseStructCode { + override def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = codes.flatMap(_.makeCodeTuple(cb)) + + override def memoize(cb: EmitCodeBuilder, name: String): SStackStructSettable = { + new SStackStructSettable(st, codes.indices.map { i => + val code = codes(i) + val es = cb.emb.newEmitLocal(s"${ name }_$i", code.emitType) + es.store(cb, code) + es + }) + } + + override def memoizeField(cb: EmitCodeBuilder, name: String): SStackStructSettable = { + new SStackStructSettable(st, codes.indices.map { i => + val code = codes(i) + val es = cb.emb.newEmitField(s"${ name }_$i", code.emitType) + es.store(cb, code) + es + }) + } + + override def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + codes(fieldIdx).toI(cb) + } + + override def subset(fieldNames: String*): SStackStructCode = { + val newToOld = fieldNames.map(st.fieldIdx).toArray + val oldVType = st.virtualType.asInstanceOf[TStruct] + val newVirtualType = TStruct(newToOld.map(i => (oldVType.fieldNames(i), oldVType.types(i))): _*) + new SStackStructCode(SStackStruct(newVirtualType, newToOld.map(st.fieldEmitTypes)), newToOld.map(codes)) + } +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala index 2098960f60f..30612a0ea43 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala @@ -4,26 +4,26 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SString, SStringCode} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PBinaryCode, PCanonicalString, PCode, PSettable, PString, PStringCode, PStringValue, PType, PValue} +import is.hail.types.physical.stypes.interfaces.{SBinaryCode, SString, SStringCode, SStringValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PCanonicalString, PString, PType} +import is.hail.types.virtual.Type import is.hail.utils.FastIndexedSeq case class SStringPointer(pType: PString) extends SString { + require(!pType.required) + + lazy val virtualType: Type = pType.virtualType + + override def castRename(t: Type): SType = this + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SStringPointerCode(this, pType.store(cb, region, value, deepCopy)) } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case _: PCanonicalString => - new SStringPointerCode(this, addr) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SStringPointerSettable = { val IndexedSeq(a: Settable[Long@unchecked]) = settables assert(a.ti == LongInfo) @@ -44,28 +44,28 @@ case class SStringPointer(pType: PString) extends SString { } -class SStringPointerCode(val st: SStringPointer, val a: Code[Long]) extends PStringCode { - override def pt: PString = st.pType +class SStringPointerCode(val st: SStringPointer, val a: Code[Long]) extends SStringCode { + val pt: PString = st.pType def code: Code[_] = a - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(a) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) def loadLength(): Code[Int] = pt.loadLength(a) def loadString(): Code[String] = pt.loadString(a) - def asBytes(): PBinaryCode = new SBinaryPointerCode(SBinaryPointer(pt.binaryRepresentation), a) + def asBytes(): SBinaryCode = new SBinaryPointerCode(SBinaryPointer(pt.binaryRepresentation), a) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PValue = { + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SValue = { val s = new SStringPointerSettable(st, sb.newSettable[Long]("sstringpointer_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SValue = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PValue = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SValue = memoizeWithBuilder(cb, name, cb.fieldBuilder) def binaryRepr: SBinaryPointerCode = new SBinaryPointerCode(SBinaryPointer(st.pType.binaryRepresentation), a) } @@ -77,14 +77,14 @@ object SStringPointerSettable { } } -class SStringPointerSettable(val st: SStringPointer, val a: Settable[Long]) extends PStringValue with PSettable { +class SStringPointerSettable(val st: SStringPointer, val a: Settable[Long]) extends SStringValue with SSettable { val pt: PString = st.pType def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(a) def get: SStringPointerCode = new SStringPointerCode(st, a.load()) - def store(cb: EmitCodeBuilder, v: PCode): Unit = { + def store(cb: EmitCodeBuilder, v: SCode): Unit = { cb.assign(a, v.asInstanceOf[SStringPointerCode].a) } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala index aae7fcb11ae..877e03f1503 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala @@ -3,24 +3,45 @@ package is.hail.types.physical.stypes.concrete import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, IEmitSCode, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SStructSettable} -import is.hail.types.physical.{PBaseStruct, PBaseStructCode, PBaseStructValue, PCode, PStruct, PStructSettable, PSubsetStruct, PType} -import is.hail.types.virtual.TStruct +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode, SortOrder} +import is.hail.types.physical.stypes.{EmitType, SCode, SType} +import is.hail.types.physical.stypes.interfaces.{SBaseStruct, SBaseStructCode, SBaseStructValue, SStructSettable} +import is.hail.types.physical.{PCanonicalStruct, PType} +import is.hail.types.virtual.{TStruct, Type} case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) extends SBaseStruct { val size: Int = fieldNames.size - val fieldIdx: Map[String, Int] = fieldNames.zipWithIndex.toMap - val newToOldFieldMapping: Map[Int, Int] = fieldIdx - .map { case (f, i) => (i, parent.pType.virtualType.asInstanceOf[TStruct].fieldIdx(f)) } + val _fieldIdx: Map[String, Int] = fieldNames.zipWithIndex.toMap + val newToOldFieldMapping: Map[Int, Int] = _fieldIdx + .map { case (f, i) => (i, parent.virtualType.asInstanceOf[TStruct].fieldIdx(f)) } - val fieldTypes: Array[SType] = Array.tabulate(size)(i => parent.fieldTypes(newToOldFieldMapping(i))) + val fieldTypes: IndexedSeq[SType] = Array.tabulate(size)(i => parent.fieldTypes(newToOldFieldMapping(i))) + val fieldEmitTypes: IndexedSeq[EmitType] = Array.tabulate(size)(i => parent.fieldEmitTypes(newToOldFieldMapping(i))) - val pType: PSubsetStruct = PSubsetStruct(parent.pType.asInstanceOf[PStruct], fieldNames.toArray - ) + lazy val virtualType: TStruct = { + val vparent = parent.virtualType.asInstanceOf[TStruct] + TStruct(fieldNames.map(f => (f, vparent.field(f).typ)): _*) + } + + override def fieldIdx(fieldName: String): Int = _fieldIdx(fieldName) + + override def castRename(t: Type): SType = { + val renamedVType = t.asInstanceOf[TStruct] + val newNames = renamedVType.fieldNames + val subsetPrevVirtualType = virtualType + val vparent = parent.virtualType.asInstanceOf[TStruct] + val newParent = TStruct(vparent.fieldNames.map(f => subsetPrevVirtualType.fieldIdx.get(f) match { + case Some(idxInSelectedFields) => + val renamed = renamedVType.fields(idxInSelectedFields) + (renamed.name, renamed.typ) + case None => (f, vparent.fieldType(f)) + }): _*) + val newType = SSubsetStruct(parent.castRename(newParent).asInstanceOf[SBaseStruct], newNames) + assert(newType.virtualType == t) + newType + } def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) @@ -33,52 +54,46 @@ case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) ex def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = parent.codeTupleTypes() - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - throw new UnsupportedOperationException - } + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = parent.settableTupleTypes() def fromSettables(settables: IndexedSeq[Settable[_]]): SSubsetStructSettable = { - new SSubsetStructSettable(this, parent.fromSettables(settables).asInstanceOf[PStructSettable]) + new SSubsetStructSettable(this, parent.fromSettables(settables).asInstanceOf[SStructSettable]) } def fromCodes(codes: IndexedSeq[Code[_]]): SSubsetStructCode = { - new SSubsetStructCode(this, parent.fromCodes(codes).asInstanceOf[PBaseStructCode]) + new SSubsetStructCode(this, parent.fromCodes(codes)) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = { + PCanonicalStruct(fieldNames.zipWithIndex.map { case (f, i) => + (f, parent.fieldEmitTypes(newToOldFieldMapping(i)).canonicalPType) + }: _*) + } } -// FIXME: prev should be SStructSettable, not PStructSettable -class SSubsetStructSettable(val st: SSubsetStruct, prev: PStructSettable) extends PStructSettable { - def pt: PBaseStruct = st.pType.asInstanceOf[PBaseStruct] - +class SSubsetStructSettable(val st: SSubsetStruct, prev: SStructSettable) extends SStructSettable { def get: SSubsetStructCode = new SSubsetStructCode(st, prev.load().asBaseStruct) def settableTuple(): IndexedSeq[Settable[_]] = prev.settableTuple() - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitSCode = { + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { prev.loadField(cb, st.newToOldFieldMapping(fieldIdx)) } def isFieldMissing(fieldIdx: Int): Code[Boolean] = prev.isFieldMissing(st.newToOldFieldMapping(fieldIdx)) - def store(cb: EmitCodeBuilder, pv: PCode): Unit = prev.store(cb, pv.asInstanceOf[SSubsetStructCode].prev) + def store(cb: EmitCodeBuilder, pv: SCode): Unit = prev.store(cb, pv.asInstanceOf[SSubsetStructCode].prev) } -class SSubsetStructCode(val st: SSubsetStruct, val prev: PBaseStructCode) extends PBaseStructCode { - - val pt: PBaseStruct = st.pType - - def code: Code[_] = prev.code - - def codeTuple(): IndexedSeq[Code[_]] = prev.codeTuple() +class SSubsetStructCode(val st: SSubsetStruct, val prev: SBaseStructCode) extends SBaseStructCode { + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = prev.makeCodeTuple(cb) - def memoize(cb: EmitCodeBuilder, name: String): PBaseStructValue = { - new SSubsetStructSettable(st, prev.memoize(cb, name).asInstanceOf[PStructSettable]) + def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue = { + new SSubsetStructSettable(st, prev.memoize(cb, name).asInstanceOf[SStructSettable]) } - def memoizeField(cb: EmitCodeBuilder, name: String): PBaseStructValue = { - new SSubsetStructSettable(st, prev.memoizeField(cb, name).asInstanceOf[PStructSettable]) + def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue = { + new SSubsetStructSettable(st, prev.memoizeField(cb, name).asInstanceOf[SStructSettable]) } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala index 227c256b63a..fa65a126e8d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala @@ -1,42 +1,79 @@ package is.hail.types.physical.stypes.interfaces -import is.hail.asm4s.Code -import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} -import is.hail.types.physical.PBaseStruct -import is.hail.types.physical.stypes.concrete.{SSubsetStruct, SSubsetStructCode} -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.annotations.Region +import is.hail.asm4s.{Code, Value} +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, IEmitCode} +import is.hail.types.physical.{PCanonicalBaseStruct, PCanonicalStruct} +import is.hail.types.physical.stypes._ +import is.hail.types.physical.stypes.concrete.{SInsertFieldsStruct, SInsertFieldsStructCode, SSubsetStruct, SSubsetStructCode} +import is.hail.types.virtual.{TBaseStruct, TStruct} +import is.hail.utils._ trait SBaseStruct extends SType { + def virtualType: TBaseStruct + override def fromCodes(codes: IndexedSeq[Code[_]]): SBaseStructCode def size: Int - val fieldTypes: Array[SType] + val fieldTypes: IndexedSeq[SType] + val fieldEmitTypes: IndexedSeq[EmitType] + + def fieldIdx(fieldName: String): Int } trait SStructSettable extends SBaseStructValue with SSettable trait SBaseStructValue extends SValue { - def pt: PBaseStruct + def st: SBaseStruct def isFieldMissing(fieldIdx: Int): Code[Boolean] - def isFieldMissing(fieldName: String): Code[Boolean] = isFieldMissing(pt.fieldIdx(fieldName)) + def isFieldMissing(fieldName: String): Code[Boolean] = isFieldMissing(st.fieldIdx(fieldName)) - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitSCode + def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode - def loadField(cb: EmitCodeBuilder, fieldName: String): IEmitSCode = loadField(cb, pt.fieldIdx(fieldName)) + def loadField(cb: EmitCodeBuilder, fieldName: String): IEmitCode = loadField(cb, st.fieldIdx(fieldName)) } -trait SBaseStructCode extends SCode { self => +trait SBaseStructCode extends SCode { + self => def st: SBaseStruct def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue - def subset(fieldNames: String*): SSubsetStructCode = { + final def loadSingleField(cb: EmitCodeBuilder, fieldName: String): IEmitCode = loadSingleField(cb, st.fieldIdx(fieldName)) + + def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = { + memoize(cb, "structcode_loadsinglefield") + .loadField(cb, fieldIdx) + } + + def subset(fieldNames: String*): SBaseStructCode = { val st = SSubsetStruct(self.st, fieldNames.toIndexedSeq) - new SSubsetStructCode(st, self.asPCode.asBaseStruct) // FIXME, should be sufficient to just use self here + new SSubsetStructCode(st, self) + } + + protected[stypes] def _insert(newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { + new SInsertFieldsStructCode( + SInsertFieldsStruct(newType, st, fields.map { case (name, ec) => (name, ec.emitType) }.toFastIndexedSeq), + this, + fields.map(_._2).toFastIndexedSeq + ) + } + + final def insert(cb: EmitCodeBuilder, region: Value[Region], newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { + if (newType.size < 64 || fields.length < 16) + return _insert(newType, fields: _*) + + val newFieldMap = fields.toMap + val oldPV = memoize(cb, "insert_fields_old") + val allFields = newType.fieldNames.map { f => + (f, newFieldMap.getOrElse(f, EmitCode.fromI(cb.emb)(cb => oldPV.loadField(cb, f)))) } + + val pcs = PCanonicalStruct(allFields.map { case (f, ec) => (f, ec.emitType.canonicalPType) }: _*) + pcs.constructFromFields(cb, region, allFields.map(_._2), false) } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala index 6bfcd4a95e3..5534c179df0 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBinary.scala @@ -2,7 +2,6 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.Code import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.PValue import is.hail.types.physical.stypes.{SCode, SType, SValue} trait SBinary extends SType diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala index b9da86c29ba..9a0f4fd7617 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SCall.scala @@ -3,7 +3,6 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.{Code, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical.stypes.{SCode, SType, SValue} -import is.hail.types.physical.{PCode, PValue} trait SCall extends SType diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala index a5f0382b040..83f9318f79a 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala @@ -1,7 +1,7 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s._ -import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} +import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} trait SContainer extends SType { @@ -18,7 +18,7 @@ trait SIndexableValue extends SValue { def isElementDefined(i: Code[Int]): Code[Boolean] = !isElementMissing(i) - def loadElement(cb: EmitCodeBuilder, i: Code[Int]): IEmitSCode + def loadElement(cb: EmitCodeBuilder, i: Code[Int]): IEmitCode def hasMissingValues(cb: EmitCodeBuilder): Code[Boolean] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala index 6d9b2224109..cb7f533e318 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala @@ -1,12 +1,13 @@ package is.hail.types.physical.stypes.interfaces import is.hail.asm4s.{Code, Value} -import is.hail.expr.ir.{EmitCodeBuilder, IEmitSCode} +import is.hail.expr.ir.{EmitCodeBuilder, IEmitCode} import is.hail.types.physical.PInterval -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} trait SInterval extends SType { def pointType: SType + def pointEmitType: EmitType } trait SIntervalValue extends SValue { @@ -16,11 +17,11 @@ trait SIntervalValue extends SValue { def includesEnd(): Value[Boolean] - def loadStart(cb: EmitCodeBuilder): IEmitSCode + def loadStart(cb: EmitCodeBuilder): IEmitCode def startDefined(cb: EmitCodeBuilder): Code[Boolean] - def loadEnd(cb: EmitCodeBuilder): IEmitSCode + def loadEnd(cb: EmitCodeBuilder): IEmitCode def endDefined(cb: EmitCodeBuilder): Code[Boolean] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala index a58a934c132..629799a6a98 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala @@ -7,6 +7,7 @@ import is.hail.variant.{Locus, ReferenceGenome} trait SLocus extends SType { def rg: ReferenceGenome + def contigType: SString } trait SLocusValue extends SValue { @@ -19,6 +20,8 @@ trait SLocusValue extends SValue { } trait SLocusCode extends SCode { + def st: SLocus + def contig(cb: EmitCodeBuilder): SStringCode def position(cb: EmitCodeBuilder): Code[Int] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala index 8085436dfe0..a47530a0758 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala @@ -1,13 +1,125 @@ package is.hail.types.physical.stypes.interfaces +import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.{SCode, SType, SValue} +import is.hail.types.physical.{PNDArray, PType} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.utils.{FastIndexedSeq, toRichIterable} object SNDArray { + def numElements(shape: IndexedSeq[Value[Long]]): Code[Long] = { + shape.foldLeft(1L: Code[Long])(_ * _) + } + // Column major order - def forEachIndex(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) - (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { + def forEachIndexColMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) + (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { + forEachIndexWithInitAndIncColMajor(cb, shape, shape.map(_ => (cb: EmitCodeBuilder) => ()), shape.map(_ => (cb: EmitCodeBuilder) => ()), context)(f) + } + + def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit): Unit = + coiterate(cb, region, arrays, body, deepCopy=false) + + def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { + if (arrays.isEmpty) return + val indexVars = Array.tabulate(arrays(0)._1.st.nDims)(i => s"i$i").toFastIndexedSeq + val indices = Array.range(0, arrays(0)._1.st.nDims).toFastIndexedSeq + coiterate(cb, region, indexVars, arrays.map { case (array, name) => (array, indices, name) }, body, deepCopy) + } + + def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit): Unit = + coiterate(cb, region, indexVars, arrays, body, deepCopy=false) + + // Note: to iterate through an array in column major order, make sure the indices are in ascending order. E.g. + // coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { + // case Seq(a, b) => cb.assign(a, SCode.add(cb, a, b)) + // }) + // computes A += B. + def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { + + val indexSizes = new Array[Settable[Int]](indexVars.length) + val indexCoords = Array.tabulate(indexVars.length) { i => cb.newLocal[Int](indexVars(i)) } + + case class ArrayInfo( + array: SNDArrayValue, + strides: IndexedSeq[Value[Long]], + pos: IndexedSeq[Settable[Long]], + elt: SSettable, + indexToDim: Map[Int, Int], + name: String) + + val info = arrays.map { case (_array, indices, name) => + for (idx <- indices) assert(idx < indexVars.length && idx >= 0) + // FIXME: relax this assumption to handle transposing, non-column major + for (i <- 0 until indices.length - 1) assert(indices(i) < indices(i+1)) + assert(indices.length == _array.st.nDims) + + val array = _array.memoize(cb, s"${name}_copy") + val shape = array.shapes(cb) + for (i <- indices.indices) { + val idx = indices(i) + if (indexSizes(idx) == null) { + indexSizes(idx) = cb.newLocal[Int](s"${indexVars(idx)}_max") + cb.assign(indexSizes(idx), shape(i).toI) + } else { + cb.ifx(indexSizes(idx).cne(shape(i).toI), s"${indexVars(idx)} indexes incompatible dimensions") + } + } + val strides = array.strides(cb) + val pos = Array.tabulate(array.st.nDims + 1) { i => cb.newLocal[Long](s"$name$i") } + val elt = new SSettable { + def st: SType = array.st.elementType + val pt: PType = array.st.pType.elementType + + // FIXME: need to use `pos` of smallest index var + def get: SCode = pt.loadCheapSCode(cb, pt.loadFromNested(pos(0))) + def store(cb: EmitCodeBuilder, v: SCode): Unit = pt.storeAtAddress(cb, pos(0), region, v, deepCopy) + def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(pos.last) + } + val indexToDim = indices.zipWithIndex.toMap + ArrayInfo(array, strides, pos, elt, indexToDim, name) + } + + def recurLoopBuilder(idx: Int): Unit = { + if (idx < 0) { + body(info.map(_.elt)) + } else { + val coord = indexCoords(idx) + def init(): Unit = { + cb.assign(coord, 0) + for (n <- arrays.indices) { + if (info(n).indexToDim.contains(idx)) { + val i = info(n).indexToDim(idx) + // FIXME: assumes array's indices in ascending order + cb.assign(info(n).pos(i), info(n).pos(i+1)) + } + } + } + def increment(): Unit = { + cb.assign(coord, coord + 1) + for (n <- arrays.indices) { + if (info(n).indexToDim.contains(idx)) { + val i = info(n).indexToDim(idx) + cb.assign(info(n).pos(i), info(n).pos(i) + info(n).strides(i)) + } + } + } + + cb.forLoop(init(), coord < indexSizes(idx), increment(), recurLoopBuilder(idx - 1)) + } + } + + for (n <- arrays.indices) { + cb.assign(info(n).pos(info(n).array.st.nDims), info(n).array.firstDataAddress(cb)) + } + recurLoopBuilder(indexVars.length - 1) + } + + // Column major order + def forEachIndexWithInitAndIncColMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], inits: IndexedSeq[EmitCodeBuilder => Unit], + incrementers: IndexedSeq[EmitCodeBuilder => Unit], context: String) + (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { val indices = Array.tabulate(shape.length) { dimIdx => cb.newLocal[Long](s"${ context }_foreach_dim_$dimIdx", 0L) } @@ -21,8 +133,10 @@ object SNDArray { recurLoopBuilder(dimIdx + 1, () => { cb.forLoop({ + inits(dimIdx)(cb) cb.assign(dimVar, 0L) }, dimVar < shape(dimIdx), { + incrementers(dimIdx)(cb) cb.assign(dimVar, dimVar + 1L) }, innerLambda() @@ -37,6 +151,47 @@ object SNDArray { recurLoopBuilder(0, body) } + // Row major order + def forEachIndexRowMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], context: String) + (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { + forEachIndexWithInitAndIncRowMajor(cb, shape, shape.map(_ => (cb: EmitCodeBuilder) => ()), shape.map(_ => (cb: EmitCodeBuilder) => ()), context)(f) + } + + // Row major order + def forEachIndexWithInitAndIncRowMajor(cb: EmitCodeBuilder, shape: IndexedSeq[Value[Long]], inits: IndexedSeq[EmitCodeBuilder => Unit], + incrementers: IndexedSeq[EmitCodeBuilder => Unit], context: String) + (f: (EmitCodeBuilder, IndexedSeq[Value[Long]]) => Unit): Unit = { + + val indices = Array.tabulate(shape.length) { dimIdx => cb.newLocal[Long](s"${ context }_foreach_dim_$dimIdx", 0L) } + + def recurLoopBuilder(dimIdx: Int, innerLambda: () => Unit): Unit = { + if (dimIdx == -1) { + innerLambda() + } + else { + val dimVar = indices(dimIdx) + + recurLoopBuilder(dimIdx - 1, + () => { + cb.forLoop({ + inits(dimIdx)(cb) + cb.assign(dimVar, 0L) + }, dimVar < shape(dimIdx), { + incrementers(dimIdx)(cb) + cb.assign(dimVar, dimVar + 1L) + }, + innerLambda() + ) + } + ) + } + } + + val body = () => f(cb, indices) + + recurLoopBuilder(shape.length - 1, body) + } + // Column major order def unstagedForEachIndex(shape: IndexedSeq[Long]) (f: IndexedSeq[Long] => Unit): Unit = { @@ -68,14 +223,19 @@ object SNDArray { trait SNDArray extends SType { + def pType: PNDArray + def nDims: Int def elementType: SType + def elementPType: PType } trait SNDArrayValue extends SValue { def st: SNDArray + override def get: SNDArrayCode + def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode def shapes(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala index ac7a3947f94..027e000c5a5 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala @@ -4,11 +4,12 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, Value} import is.hail.expr.ir.EmitCodeBuilder import is.hail.expr.ir.streams.StreamProducer -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PCanonicalStream, PCode, PStream, PStreamCode, PType, PValue} +import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType, SUnrealizableCode, SValue} +import is.hail.types.physical.{PCanonicalStream, PStream, PType} +import is.hail.types.virtual.{TStream, Type} -case class SStream(elementType: SType, required: Boolean) extends SType { - def pType: PStream = PCanonicalStream(elementType.pType, required) +case class SStream(elementEmitType: EmitType) extends SType { + def elementType: SType = elementEmitType.st def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) throw new UnsupportedOperationException @@ -19,27 +20,30 @@ case class SStream(elementType: SType, required: Boolean) extends SType { def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = throw new UnsupportedOperationException - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException - def fromCodes(codes: IndexedSeq[Code[_]]): SCode = throw new UnsupportedOperationException def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = throw new UnsupportedOperationException - def canonicalPType(): PType = pType + def canonicalPType(): PType = PCanonicalStream(elementEmitType.canonicalPType) + + override def virtualType: Type = TStream(elementType.virtualType) + + override def castRename(t: Type): SType = ??? } -final case class SStreamCode(st: SStream, producer: StreamProducer) extends PStreamCode { - self => - override def pt: PStream = st.pType +object SStreamCode{ + def apply(producer: StreamProducer): SStreamCode = SStreamCode(SStream(producer.element.emitType), producer) +} - def memoize(cb: EmitCodeBuilder, name: String): PValue = new PValue { - def pt: PStream = PCanonicalStream(st.pType) +final case class SStreamCode(st: SStream, producer: StreamProducer) extends SCode with SUnrealizableCode { + self => + def memoize(cb: EmitCodeBuilder, name: String): SValue = new SValue { override def st: SType = self.st var used: Boolean = false - def get: PCode = { + def get: SCode = { assert(!used) used = true self diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala index 5cb7b78308d..c7e91e77ee8 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala @@ -4,41 +4,38 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, Settable, TypeInfo, UnitInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SSettable, SType} -import is.hail.types.physical.{PCode, PType, PUnrealizableCode, PValue, PVoid} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SUnrealizableCode, SValue} +import is.hail.types.physical.{PType, PVoid} +import is.hail.types.virtual.{TVoid, Type} case object SVoid extends SType { - def pType: PType = PVoid + override def virtualType: Type = TVoid + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = value def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = IndexedSeq() - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = throw new UnsupportedOperationException - def fromCodes(codes: IndexedSeq[Code[_]]): SCode = throw new UnsupportedOperationException def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = throw new UnsupportedOperationException - def canonicalPType(): PType = pType + def canonicalPType(): PType = throw new UnsupportedOperationException } -case object PVoidCode extends PCode with PUnrealizableCode { +case object SVoidCode extends SCode with SUnrealizableCode { self => - override def pt: PType = PVoid - override def st: SType = SVoid - override def typeInfo: TypeInfo[_] = UnitInfo - override def code: Code[_] = Code._empty - def memoize(cb: EmitCodeBuilder, name: String): PValue = new PValue { + def memoize(cb: EmitCodeBuilder, name: String): SValue = new SValue { val pt: PType = PVoid val st: SType = SVoid - def get: PCode = self + def get: SCode = self } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala index ece62fdeb37..4ab4a359248 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/package.scala @@ -1,13 +1,22 @@ package is.hail.types.physical.stypes -import is.hail.asm4s.Code -import is.hail.types.physical.stypes.primitives.{SBooleanCode, SFloat32Code, SFloat64Code, SInt32Code, SInt64Code} +import is.hail.asm4s._ +import is.hail.types.physical.stypes.primitives._ +import is.hail.types.virtual._ package object interfaces { - def primitive(x: Code[Long]): SInt64Code = new SInt64Code(true, x) - def primitive(x: Code[Int]): SInt32Code = new SInt32Code(true, x) - def primitive(x: Code[Double]): SFloat64Code = new SFloat64Code(true, x) - def primitive(x: Code[Float]): SFloat32Code = new SFloat32Code(true, x) - def primitive(x: Code[Boolean]): SBooleanCode = new SBooleanCode(true, x) + def primitive(x: Code[Long]): SInt64Code = new SInt64Code(x) + def primitive(x: Code[Int]): SInt32Code = new SInt32Code(x) + def primitive(x: Code[Double]): SFloat64Code = new SFloat64Code(x) + def primitive(x: Code[Float]): SFloat32Code = new SFloat32Code(x) + def primitive(x: Code[Boolean]): SBooleanCode = new SBooleanCode(x) + + def primitive(t: Type, x: Code[_]): SCode = t match { + case TInt32 => primitive(coerce[Int](x)) + case TInt64 => primitive(coerce[Long](x)) + case TFloat32 => primitive(coerce[Float](x)) + case TFloat64 => primitive(coerce[Double](x)) + case TBoolean => primitive(coerce[Boolean](x)) + } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala index 6a1105387f0..2607f9f318e 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala @@ -2,60 +2,53 @@ package is.hail.types.physical.stypes.primitives import is.hail.annotations.Region import is.hail.asm4s.{BooleanInfo, Code, Settable, SettableBuilder, TypeInfo, Value} -import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PBoolean, PCode, PSettable, PType, PValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PBoolean, PType} +import is.hail.types.virtual.{TBoolean, Type} import is.hail.utils.FastIndexedSeq -case class SBoolean(required: Boolean) extends SPrimitive { +case object SBoolean extends SPrimitive { def ti: TypeInfo[_] = BooleanInfo - override def pType: PBoolean = PBoolean(required) + lazy val virtualType: Type = TBoolean + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SBoolean(_) => - value.asInstanceOf[SBooleanCode] + case SBoolean => + value } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(BooleanInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case PBoolean(_) => - new SBooleanCode(required: Boolean, Region.loadBoolean(addr)) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SBooleanSettable = { val IndexedSeq(x: Settable[Boolean@unchecked]) = settables assert(x.ti == BooleanInfo) - new SBooleanSettable(required, x) + new SBooleanSettable( x) } def fromCodes(codes: IndexedSeq[Code[_]]): SBooleanCode = { val IndexedSeq(x: Code[Boolean@unchecked]) = codes assert(x.ti == BooleanInfo) - new SBooleanCode(required, x) + new SBooleanCode(x) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PBoolean() } -class SBooleanCode(required: Boolean, val code: Code[Boolean]) extends PCode with SPrimitiveCode { +class SBooleanCode(val code: Code[Boolean]) extends SCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PBoolean = PBoolean(required) - - def st: SBoolean = SBoolean(required) + def st: SBoolean.type = SBoolean - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SBooleanSettable = { - val s = new SBooleanSettable(required, sb.newSettable[Boolean]("sboolean_memoize")) + val s = new SBooleanSettable(sb.newSettable[Boolean]("sboolean_memoize")) s.store(cb, this) s } @@ -68,21 +61,21 @@ class SBooleanCode(required: Boolean, val code: Code[Boolean]) extends PCode wit } object SBooleanSettable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SBooleanSettable = { - new SBooleanSettable(required, sb.newSettable[Boolean](name)) + def apply(sb: SettableBuilder, name: String): SBooleanSettable = { + new SBooleanSettable( sb.newSettable[Boolean](name)) } } -class SBooleanSettable(required: Boolean, x: Settable[Boolean]) extends PValue with PSettable { - val pt: PBoolean = PBoolean(required) +class SBooleanSettable(x: Settable[Boolean]) extends SValue with SSettable { + val pt: PBoolean = PBoolean() - def st: SBoolean = SBoolean(required) + def st: SBoolean.type = SBoolean - def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asBoolean.boolCode(cb)) + def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asBoolean.boolCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: PCode = new SBooleanCode(required, x) + def get: SCode = new SBooleanCode(x) def boolCode(cb: EmitCodeBuilder): Code[Boolean] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala index 2e788af86c2..43f5a2db83e 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala @@ -4,92 +4,84 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, FloatInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCode, PFloat32, PSettable, PType, PValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PFloat32, PType} +import is.hail.types.virtual.{TFloat32, Type} import is.hail.utils.FastIndexedSeq -case class SFloat32(required: Boolean) extends SPrimitive { +case object SFloat32 extends SPrimitive { def ti: TypeInfo[_] = FloatInfo - override def pType: PFloat32 = PFloat32(required) + lazy val virtualType: Type = TFloat32 + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SFloat32(r) => - if (r == required) - value - else - new SFloat32Code(required, value.asInstanceOf[SFloat32Code].code) + case SFloat32 => value } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(FloatInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case _: PFloat32 => - new SFloat32Code(required, Region.loadFloat(addr)) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SFloat32Settable = { val IndexedSeq(x: Settable[Float@unchecked]) = settables assert(x.ti == FloatInfo) - new SFloat32Settable(required, x) + new SFloat32Settable(x) } def fromCodes(codes: IndexedSeq[Code[_]]): SFloat32Code = { val IndexedSeq(x: Code[Float@unchecked]) = codes assert(x.ti == FloatInfo) - new SFloat32Code(required, x) + new SFloat32Code(x) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PFloat32() } -trait PFloat32Value extends PValue { +trait SFloat32Value extends SValue { def floatCode(cb: EmitCodeBuilder): Code[Float] } -class SFloat32Code(required: Boolean, val code: Code[Float]) extends PCode with SPrimitiveCode { +class SFloat32Code(val code: Code[Float]) extends SCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PFloat32 = PFloat32(required) + val pt: PFloat32 = PFloat32(false) - def st: SFloat32 = SFloat32(required) + def st: SFloat32.type = SFloat32 - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PFloat32Value = { - val s = new SFloat32Settable(required, sb.newSettable[Float]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SFloat32Value = { + val s = new SFloat32Settable(sb.newSettable[Float]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PFloat32Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SFloat32Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PFloat32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SFloat32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def floatCode(cb: EmitCodeBuilder): Code[Float] = code } object SFloat32Settable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SFloat32Settable = { - new SFloat32Settable(required, sb.newSettable[Float](name)) + def apply(sb: SettableBuilder, name: String): SFloat32Settable = { + new SFloat32Settable(sb.newSettable[Float](name)) } } -class SFloat32Settable(required: Boolean, x: Settable[Float]) extends PFloat32Value with PSettable { - val pt: PFloat32 = PFloat32(required) +class SFloat32Settable(x: Settable[Float]) extends SFloat32Value with SSettable { + val pt: PFloat32 = PFloat32() - def st: SFloat32 = SFloat32(required) + def st: SFloat32.type = SFloat32 - def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asFloat.floatCode(cb)) + def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asFloat.floatCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: PCode = new SFloat32Code(required, x) + def get: SCode = new SFloat32Code(x) def floatCode(cb: EmitCodeBuilder): Code[Float] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala index d72346bc173..753ac6e9e21 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala @@ -4,22 +4,21 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, DoubleInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCode, PFloat64, PSettable, PType, PValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PFloat64, PType} +import is.hail.types.virtual.{TFloat64, Type} import is.hail.utils.FastIndexedSeq -case class SFloat64(required: Boolean) extends SPrimitive { +case object SFloat64 extends SPrimitive { def ti: TypeInfo[_] = DoubleInfo - override def pType: PFloat64 = PFloat64(required) + lazy val virtualType: Type = TFloat64 + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SFloat64(r) => - if (r == required) - value - else - new SFloat64Code(required, value.asInstanceOf[SFloat64Code].code) + case SFloat64 => value } } @@ -28,72 +27,70 @@ case class SFloat64(required: Boolean) extends SPrimitive { def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { pt match { case _: PFloat64 => - new SFloat64Code(required, Region.loadDouble(addr)) + new SFloat64Code(Region.loadDouble(addr)) } } def fromSettables(settables: IndexedSeq[Settable[_]]): SFloat64Settable = { val IndexedSeq(x: Settable[Double@unchecked]) = settables assert(x.ti == DoubleInfo) - new SFloat64Settable(required, x) + new SFloat64Settable(x) } def fromCodes(codes: IndexedSeq[Code[_]]): SFloat64Code = { val IndexedSeq(x: Code[Double@unchecked]) = codes assert(x.ti == DoubleInfo) - new SFloat64Code(required, x) + new SFloat64Code(x) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PFloat64() } -trait PFloat64Value extends PValue { +trait SFloat64Value extends SValue { def doubleCode(cb: EmitCodeBuilder): Code[Double] } object SFloat64Code { - def apply(code: Code[Double], required: Boolean = true): SFloat64Code = new SFloat64Code(required, code) + def apply(code: Code[Double]): SFloat64Code = new SFloat64Code(code) } -class SFloat64Code(required: Boolean, val code: Code[Double]) extends PCode with SPrimitiveCode { +class SFloat64Code(val code: Code[Double]) extends SCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PFloat64 = PFloat64(required) - - def st: SFloat64 = SFloat64(required) + def st: SFloat64.type = SFloat64 - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PFloat64Value = { - val s = new SFloat64Settable(required, sb.newSettable[Double]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SFloat64Value = { + val s = new SFloat64Settable(sb.newSettable[Double]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PFloat64Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SFloat64Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PFloat64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SFloat64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def doubleCode(cb: EmitCodeBuilder): Code[Double] = code } object SFloat64Settable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SFloat64Settable = { - new SFloat64Settable(required, sb.newSettable[Double](name)) + def apply(sb: SettableBuilder, name: String): SFloat64Settable = { + new SFloat64Settable(sb.newSettable[Double](name)) } } -class SFloat64Settable(required: Boolean, x: Settable[Double]) extends PFloat64Value with PSettable { - val pt: PFloat64 = PFloat64(required) +class SFloat64Settable(x: Settable[Double]) extends SFloat64Value with SSettable { + val pt: PFloat64 = PFloat64(false) - def st: SFloat64 = SFloat64(required) + def st: SFloat64.type = SFloat64 - def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asDouble.doubleCode(cb)) + def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asDouble.doubleCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: PCode = new SFloat64Code(required, x) + def get: SCode = new SFloat64Code(x) def doubleCode(cb: EmitCodeBuilder): Code[Double] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala index 9350bd95524..fd9f0cfd6d9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala @@ -4,91 +4,81 @@ import is.hail.annotations.Region import is.hail.asm4s.{Code, IntInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.{SCode, SType} -import is.hail.types.physical.{PCode, PInt32, PSettable, PType, PValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PInt32, PType} +import is.hail.types.virtual.{TInt32, Type} import is.hail.utils.FastIndexedSeq -case class SInt32(required: Boolean) extends SPrimitive { +case object SInt32 extends SPrimitive { def ti: TypeInfo[_] = IntInfo - override def pType: PInt32 = PInt32(required) + lazy val virtualType: Type = TInt32 + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SInt32(r) => - if (r == required) - value - else - new SInt32Code(required, value.asInstanceOf[SInt32Code].code) + case SInt32 => value } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(IntInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case _: PInt32 => - new SInt32Code(required, Region.loadInt(addr)) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SInt32Settable = { val IndexedSeq(x: Settable[Int@unchecked]) = settables assert(x.ti == IntInfo) - new SInt32Settable(required, x) + new SInt32Settable(x) } def fromCodes(codes: IndexedSeq[Code[_]]): SInt32Code = { val IndexedSeq(x: Code[Int@unchecked]) = codes assert(x.ti == IntInfo) - new SInt32Code(required, x) + new SInt32Code(x) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PInt32() } -trait PInt32Value extends PValue { +trait SInt32Value extends SValue { def intCode(cb: EmitCodeBuilder): Code[Int] } -class SInt32Code(required: Boolean, val code: Code[Int]) extends PCode with SPrimitiveCode { +class SInt32Code(val code: Code[Int]) extends SCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PInt32 = PInt32(required) - - def st: SInt32 = SInt32(required) + def st: SInt32.type = SInt32 - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PInt32Value = { - val s = new SInt32Settable(required, sb.newSettable[Int]("sInt32_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SInt32Value = { + val s = new SInt32Settable(sb.newSettable[Int]("sInt32_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PInt32Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SInt32Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PInt32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SInt32Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def intCode(cb: EmitCodeBuilder): Code[Int] = code } object SInt32Settable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SInt32Settable = { - new SInt32Settable(required, sb.newSettable[Int](name)) + def apply(sb: SettableBuilder, name: String): SInt32Settable = { + new SInt32Settable(sb.newSettable[Int](name)) } } -class SInt32Settable(required: Boolean, x: Settable[Int]) extends PInt32Value with PSettable { - val pt: PInt32 = PInt32(required) +class SInt32Settable(x: Settable[Int]) extends SInt32Value with SSettable { + val pt: PInt32 = PInt32(false) - def st: SInt32 = SInt32(required) + def st: SInt32.type = SInt32 - def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asInt.intCode(cb)) + def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asInt.intCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: PCode = new SInt32Code(required, x) + def get: SCode = new SInt32Code(x) def intCode(cb: EmitCodeBuilder): Code[Int] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala index 7ebc3e5c3bf..2a2c0ba9672 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala @@ -3,92 +3,81 @@ package is.hail.types.physical.stypes.primitives import is.hail.annotations.Region import is.hail.asm4s.{Code, LongInfo, Settable, SettableBuilder, TypeInfo, Value} import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.{PCode, PInt64, PSettable, PType, PValue} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} +import is.hail.types.physical.{PInt64, PType} +import is.hail.types.virtual.{TInt64, Type} import is.hail.utils.FastIndexedSeq -case class SInt64(required: Boolean) extends SPrimitive { +case object SInt64 extends SPrimitive { def ti: TypeInfo[_] = LongInfo - override def pType: PInt64 = PInt64(required) + lazy val virtualType: Type = TInt64 + + override def castRename(t: Type): SType = this def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { - case SInt64(r) => - if (r == required) - value - else - new SInt64Code(required, value.asInstanceOf[SInt64Code].code) + case SInt64 => value } } def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq(LongInfo) - def loadFrom(cb: EmitCodeBuilder, region: Value[Region], pt: PType, addr: Code[Long]): SCode = { - pt match { - case _: PInt64 => - new SInt64Code(required, Region.loadLong(addr)) - } - } - def fromSettables(settables: IndexedSeq[Settable[_]]): SInt64Settable = { val IndexedSeq(x: Settable[Long@unchecked]) = settables assert(x.ti == LongInfo) - new SInt64Settable(required, x) + new SInt64Settable(x) } def fromCodes(codes: IndexedSeq[Code[_]]): SInt64Code = { val IndexedSeq(x: Code[Long@unchecked]) = codes assert(x.ti == LongInfo) - new SInt64Code(required, x) + new SInt64Code(x) } - def canonicalPType(): PType = pType + def canonicalPType(): PType = PInt64() } -trait PInt64Value extends PValue { +trait SInt64Value extends SValue { def longCode(cb: EmitCodeBuilder): Code[Long] - } -class SInt64Code(required: Boolean, val code: Code[Long]) extends PCode with SPrimitiveCode { +class SInt64Code(val code: Code[Long]) extends SCode with SPrimitiveCode { override def _primitiveCode: Code[_] = code - val pt: PInt64 = PInt64(required) - - def st: SInt64 = SInt64(required) + def st: SInt64.type = SInt64 - def codeTuple(): IndexedSeq[Code[_]] = FastIndexedSeq(code) + def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(code) - private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): PInt64Value = { - val s = new SInt64Settable(required, sb.newSettable[Long]("sint64_memoize")) + private[this] def memoizeWithBuilder(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SInt64Value = { + val s = new SInt64Settable(sb.newSettable[Long]("sint64_memoize")) s.store(cb, this) s } - def memoize(cb: EmitCodeBuilder, name: String): PInt64Value = memoizeWithBuilder(cb, name, cb.localBuilder) + def memoize(cb: EmitCodeBuilder, name: String): SInt64Value = memoizeWithBuilder(cb, name, cb.localBuilder) - def memoizeField(cb: EmitCodeBuilder, name: String): PInt64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) + def memoizeField(cb: EmitCodeBuilder, name: String): SInt64Value = memoizeWithBuilder(cb, name, cb.fieldBuilder) def longCode(cb: EmitCodeBuilder): Code[Long] = code } object SInt64Settable { - def apply(sb: SettableBuilder, name: String, required: Boolean): SInt64Settable = { - new SInt64Settable(required, sb.newSettable[Long](name)) + def apply(sb: SettableBuilder, name: String): SInt64Settable = { + new SInt64Settable(sb.newSettable[Long](name)) } } -class SInt64Settable(required: Boolean, x: Settable[Long]) extends PInt64Value with PSettable { - val pt: PInt64 = PInt64(required) +class SInt64Settable(x: Settable[Long]) extends SInt64Value with SSettable { + val pt: PInt64 = PInt64(false) - def st: SInt64 = SInt64(required) + def st: SInt64.type = SInt64 - def store(cb: EmitCodeBuilder, v: PCode): Unit = cb.assign(x, v.asLong.longCode(cb)) + def store(cb: EmitCodeBuilder, v: SCode): Unit = cb.assign(x, v.asLong.longCode(cb)) def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(x) - def get: PCode = new SInt64Code(required, x) + def get: SCode = new SInt64Code(x) def longCode(cb: EmitCodeBuilder): Code[Long] = x } \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala index f96fdb7a9a4..d068c0fbe57 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala @@ -1,7 +1,7 @@ package is.hail.types.virtual import is.hail.annotations.{Annotation, AnnotationPathException, _} -import is.hail.expr.ir.{Env, IRParser} +import is.hail.expr.ir.{Env, IRParser, IntArrayBuilder} import is.hail.types.physical.{PField, PStruct} import is.hail.utils._ import org.apache.spark.sql.Row @@ -185,7 +185,7 @@ final case class TStruct(fields: IndexedSeq[Field]) extends TBaseStruct { def annotate(other: TStruct): (TStruct, Merger) = { val newFieldsBuilder = new BoxedArrayBuilder[(String, Type)]() - val fieldIdxBuilder = new BoxedArrayBuilder[Int]() + val fieldIdxBuilder = new IntArrayBuilder() // In fieldIdxBuilder, positive integers are field indices from the left. // Negative integers are the complement of field indices from the right. diff --git a/hail/src/main/scala/is/hail/utils/ArrayStack.scala b/hail/src/main/scala/is/hail/utils/ArrayStack.scala index 4e197ae8823..6c388cae087 100644 --- a/hail/src/main/scala/is/hail/utils/ArrayStack.scala +++ b/hail/src/main/scala/is/hail/utils/ArrayStack.scala @@ -2,7 +2,7 @@ package is.hail.utils import scala.reflect.ClassTag -final class ObjectArrayStack[T](hintSize: Int = 16)(implicit tct: ClassTag[T]) { +final class ObjectArrayStack[T <: AnyRef](hintSize: Int = 16)(implicit tct: ClassTag[T]) { private var a = new Array[T](hintSize) private[this] var size_ = 0 diff --git a/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala index 7472f83479e..8fe07fab815 100644 --- a/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala +++ b/hail/src/main/scala/is/hail/utils/BoxedArrayBuilder.scala @@ -6,7 +6,7 @@ object BoxedArrayBuilder { final val defaultInitialCapacity: Int = 16 } -final class BoxedArrayBuilder[T](initialCapacity: Int)(implicit tct: ClassTag[T]) extends Serializable { +final class BoxedArrayBuilder[T <: AnyRef](initialCapacity: Int)(implicit tct: ClassTag[T]) extends Serializable { private[utils] var b: Array[T] = new Array[T](initialCapacity) private[utils] var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/utils/Graph.scala b/hail/src/main/scala/is/hail/utils/Graph.scala index 7e82d704011..b420705a8af 100644 --- a/hail/src/main/scala/is/hail/utils/Graph.scala +++ b/hail/src/main/scala/is/hail/utils/Graph.scala @@ -2,8 +2,9 @@ package is.hail.utils import is.hail.annotations.{Region, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.types.physical.{PCanonicalTuple, PTuple, PType, PTypeReferenceSingleCodeType} +import is.hail.types.physical.{PCanonicalTuple, PTuple, PType, stypes} import is.hail.expr.ir.{Compile, ExecuteContext, IR, IRParser, IRParserEnvironment, Interpret, Literal, MakeTuple, SingleCodeEmitParamType} +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ import org.apache.spark.sql.Row diff --git a/hail/src/main/scala/is/hail/utils/HailIterator.scala b/hail/src/main/scala/is/hail/utils/HailIterator.scala index a13ad5b72e1..afdd0d9393e 100644 --- a/hail/src/main/scala/is/hail/utils/HailIterator.scala +++ b/hail/src/main/scala/is/hail/utils/HailIterator.scala @@ -1,5 +1,6 @@ package is.hail.utils +import scala.collection.mutable import scala.reflect.ClassTag abstract class HailIterator[@specialized T] { @@ -8,10 +9,10 @@ abstract class HailIterator[@specialized T] { def hasNext: Boolean def toArray(implicit tct: ClassTag[T]): Array[T] = { - val b = new BoxedArrayBuilder[T]() + val b = new mutable.ArrayBuffer[T]() while (hasNext) b += next() - b.result() + b.toArray } def countNonNegative()(implicit ev: Numeric[T]): Int = { diff --git a/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala deleted file mode 100644 index b5f1eb6bd82..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingAnnotationArrayBuilder.scala +++ /dev/null @@ -1,68 +0,0 @@ -package is.hail.utils - -import is.hail.annotations.{Annotation, RegionValueBuilder} -import is.hail.types.virtual.Type - -import scala.collection.mutable - -class MissingAnnotationArrayBuilder extends Serializable { - private var len = 0 - private var elements = new BoxedArrayBuilder[Annotation]() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Annotation) { - elements += x - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Annotation) => Unit) { - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else { - whenPresent(i, elements(j)) - j += 1 - } - i += 1 - } - } - - def write(rvb: RegionValueBuilder, t: Type) { - rvb.startArray(len) - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else { - rvb.addAnnotation(t, elements(j)) - j += 1 - } - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingAnnotationArrayBuilder = { - val ab = new MissingAnnotationArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala deleted file mode 100644 index 31506c4d155..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingBooleanArrayBuilder.scala +++ /dev/null @@ -1,65 +0,0 @@ -package is.hail.utils - -import is.hail.annotations._ -import is.hail.types.virtual.{TArray, TBoolean} - -import scala.collection.mutable - -class MissingBooleanArrayBuilder extends Serializable { - private var len = 0 - private var elements = new mutable.BitSet() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Boolean) { - if (x) - elements.add(len) - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Boolean) => Unit) { - var i = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else - whenPresent(i, elements(i)) - i += 1 - } - } - - val typ = TArray(TBoolean) - - def write(rvb: RegionValueBuilder) { - rvb.startArray(len) - var i = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else - rvb.addBoolean(elements(i)) - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingBooleanArrayBuilder = { - val ab = new MissingBooleanArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala deleted file mode 100644 index a300f915d51..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingDoubleArrayBuilder.scala +++ /dev/null @@ -1,70 +0,0 @@ -package is.hail.utils - -import is.hail.annotations._ -import is.hail.types.virtual.{TArray, TFloat64} - -import scala.collection.mutable - -class MissingDoubleArrayBuilder extends Serializable { - private var len = 0 - private var elements = new BoxedArrayBuilder[Double]() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Double) { - elements += x - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Double) => Unit) { - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else { - whenPresent(i, elements(j)) - j += 1 - } - i += 1 - } - } - - val typ = TArray(TFloat64) - - def write(rvb: RegionValueBuilder) { - rvb.startArray(len) - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else { - rvb.addDouble(elements(j)) - j += 1 - } - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingDoubleArrayBuilder = { - val ab = new MissingDoubleArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala deleted file mode 100644 index e8dca65c818..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingFloatArrayBuilder.scala +++ /dev/null @@ -1,70 +0,0 @@ -package is.hail.utils - -import is.hail.annotations._ -import is.hail.types.virtual.{TArray, TFloat32} - -import scala.collection.mutable - -class MissingFloatArrayBuilder extends Serializable { - private var len = 0 - private var elements = new BoxedArrayBuilder[Float]() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Float) { - elements += x - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Float) => Unit) { - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else { - whenPresent(i, elements(j)) - j += 1 - } - i += 1 - } - } - - val typ = TArray(TFloat32) - - def write(rvb: RegionValueBuilder) { - rvb.startArray(len) - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else { - rvb.addFloat(elements(j)) - j += 1 - } - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingFloatArrayBuilder = { - val ab = new MissingFloatArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala deleted file mode 100644 index 4557cc01c95..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingIntArrayBuilder.scala +++ /dev/null @@ -1,70 +0,0 @@ -package is.hail.utils - -import is.hail.annotations._ -import is.hail.types.virtual.{TArray, TInt32} - -import scala.collection.mutable - -class MissingIntArrayBuilder extends Serializable { - private var len = 0 - private var elements = new BoxedArrayBuilder[Int]() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Int) { - elements += x - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Int) => Unit) { - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else { - whenPresent(i, elements(j)) - j += 1 - } - i += 1 - } - } - - val typ = TArray(TInt32) - - def write(rvb: RegionValueBuilder) { - rvb.startArray(len) - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else { - rvb.addInt(elements(j)) - j += 1 - } - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingIntArrayBuilder = { - val ab = new MissingIntArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala b/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala deleted file mode 100644 index 522da7c67e4..00000000000 --- a/hail/src/main/scala/is/hail/utils/MissingLongArrayBuilder.scala +++ /dev/null @@ -1,70 +0,0 @@ -package is.hail.utils - -import is.hail.annotations._ -import is.hail.types.virtual.{TArray, TInt64} - -import scala.collection.mutable - -class MissingLongArrayBuilder extends Serializable { - private var len = 0 - private var elements = new BoxedArrayBuilder[Long]() - private var isMissing = new mutable.BitSet() - - def addMissing() { - isMissing.add(len) - len += 1 - } - - def add(x: Long) { - elements += x - len += 1 - } - - def length(): Int = len - - def foreach(whenMissing: (Int) => Unit)(whenPresent: (Int, Long) => Unit) { - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - whenMissing(i) - else { - whenPresent(i, elements(j)) - j += 1 - } - i += 1 - } - } - - val typ = TArray(TInt64) - - def write(rvb: RegionValueBuilder) { - rvb.startArray(len) - var i = 0 - var j = 0 - while (i < len) { - if (isMissing(i)) - rvb.setMissing() - else { - rvb.addLong(elements(j)) - j += 1 - } - i += 1 - } - rvb.endArray() - } - - def clear() { - len = 0 - elements.clear() - isMissing.clear() - } - - override def clone(): MissingLongArrayBuilder = { - val ab = new MissingLongArrayBuilder() - ab.len = len - ab.elements = elements.clone() - ab.isMissing = isMissing.clone() - ab - } -} diff --git a/hail/src/main/scala/is/hail/utils/TextTableReader.scala b/hail/src/main/scala/is/hail/utils/TextTableReader.scala index 9dc05818379..ec6267505b5 100644 --- a/hail/src/main/scala/is/hail/utils/TextTableReader.scala +++ b/hail/src/main/scala/is/hail/utils/TextTableReader.scala @@ -333,6 +333,8 @@ class TextTableReader( PCanonicalStruct.empty(required = true) } + def renderShort(): String = defaultRender() + def executeGeneric(ctx: ExecuteContext): GenericTableValue = { val fs = ctx.fs diff --git a/hail/src/main/scala/is/hail/utils/package.scala b/hail/src/main/scala/is/hail/utils/package.scala index 8719cb9459a..dd3267e07c2 100644 --- a/hail/src/main/scala/is/hail/utils/package.scala +++ b/hail/src/main/scala/is/hail/utils/package.scala @@ -7,9 +7,9 @@ import java.security.SecureRandom import java.text.SimpleDateFormat import java.util.{Base64, Date} import java.util.zip.{Deflater, Inflater} - import is.hail.annotations.ExtendedOrdering import is.hail.check.Gen +import is.hail.expr.ir.ByteArrayBuilder import org.apache.commons.io.output.TeeOutputStream import org.apache.commons.lang3.StringUtils import org.apache.hadoop.fs.PathIOException @@ -801,7 +801,7 @@ package object utils extends Logging } } - def compress(bb: BoxedArrayBuilder[Byte], input: Array[Byte]): Int = { + def compress(bb: ByteArrayBuilder, input: Array[Byte]): Int = { val compressor = new Deflater() compressor.setInput(input) compressor.finish() diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala index 3455d14a56d..55c53767cf4 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeInputBuffer.scala @@ -6,6 +6,9 @@ import is.hail.io.InputBuffer import is.hail.utils._ import is.hail.asm4s._ import is.hail.types.physical._ +import is.hail.types.physical.stypes.SCode +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.virtual._ class RichCodeInputBuffer( val ib: Value[InputBuffer] @@ -87,11 +90,11 @@ class RichCodeInputBuffer( ib.invoke[Region, Long, Int, Unit]("readBytes", toRegion, toOff, n) } - def readPrimitive(typ: PType): Code[_] = typ match { - case _: PBoolean => readBoolean() - case _: PInt32 => readInt() - case _: PInt64 => readLong() - case _: PFloat32 => readFloat() - case _: PFloat64 => readDouble() + def readPrimitive(t: Type): SCode = t match { + case TBoolean => primitive(readBoolean()) + case TInt32 => primitive(readInt()) + case TInt64 => primitive(readLong()) + case TFloat32 => primitive(readFloat()) + case TFloat64 => primitive(readDouble()) } } diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala index 01179cccd9b..be5c2f5c114 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeOutputBuffer.scala @@ -2,8 +2,11 @@ package is.hail.utils.richUtils import is.hail.annotations.Region import is.hail.asm4s._ +import is.hail.expr.ir.EmitCodeBuilder import is.hail.types.physical._ import is.hail.io.OutputBuffer +import is.hail.types.physical.stypes.SCode +import is.hail.types.virtual._ class RichCodeOutputBuffer( val ob: Value[OutputBuffer] @@ -56,11 +59,11 @@ class RichCodeOutputBuffer( def writeUTF(s: Code[String]): Code[Unit] = ob.invoke[String, Unit]("writeUTF", s) - def writePrimitive(typ: PType): Code[_] => Code[Unit] = typ match { - case _: PBoolean => v => writeBoolean(coerce[Boolean](v)) - case _: PInt32 => v => writeInt(coerce[Int](v)) - case _: PInt64 => v => writeLong(coerce[Long](v)) - case _: PFloat32 => v => writeFloat(coerce[Float](v)) - case _: PFloat64 => v => writeDouble(coerce[Double](v)) + def writePrimitive(cb: EmitCodeBuilder, pc: SCode): Unit = pc.st.virtualType match { + case TBoolean => cb += writeBoolean(pc.asBoolean.boolCode(cb)) + case TInt32 => cb += writeInt(pc.asInt.intCode(cb)) + case TInt64 => cb += writeLong(pc.asLong.longCode(cb)) + case TFloat32 => cb += writeFloat(pc.asFloat.floatCode(cb)) + case TFloat64 => cb += writeDouble(pc.asDouble.doubleCode(cb)) } } diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala index 52899f482af..94bb63fe33e 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichRow.scala @@ -3,6 +3,8 @@ package is.hail.utils.richUtils import is.hail.utils.BoxedArrayBuilder import org.apache.spark.sql.Row +import scala.collection.mutable + class RichRow(r: Row) { def update(i: Int, a: Any): Row = { @@ -19,18 +21,18 @@ class RichRow(r: Row) { } def append(a: Any): Row = { - val ab = new BoxedArrayBuilder[Any]() + val ab = new mutable.ArrayBuffer[Any]() ab ++= r.toSeq ab += a - Row.fromSeq(ab.result()) + Row.fromSeq(ab) } def insertBefore(i: Int, a: Any): Row = { - val ab = new BoxedArrayBuilder[Any]() + val ab = new mutable.ArrayBuffer[Any]() (0 until i).foreach(ab += r.get(_)) ab += a (i until r.size).foreach(ab += r.get(_)) - Row.fromSeq(ab.result()) + Row.fromSeq(ab) } def truncate(newSize: Int): Row = { diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala index ff57815dfea..e871170e64f 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichString.scala @@ -9,13 +9,14 @@ class RichString(val str: String) extends AnyVal { def strings: (String, String) = (truncate, str) } - def equalsCI(other: String): Boolean = + def equalsCaseInsensitive(other: String): Boolean = if (str.length == other.length) { - for (i <- 0 until str.length) + var i = 0 + while (i < str.length) { if ((str charAt i).toLower != (other charAt i).toLower) return false + i += 1 + } true - } - else - false + } else false } diff --git a/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala b/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala index 0584d17edf8..958b997241a 100644 --- a/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala +++ b/hail/src/main/scala/is/hail/variant/RegionValueVariant.scala @@ -1,12 +1,12 @@ package is.hail.variant import is.hail.annotations._ -import is.hail.types.physical.{PArray, PLocus, PString, PStruct} +import is.hail.types.physical.{PArray, PInt32, PLocus, PString, PStruct} import is.hail.utils._ class RegionValueVariant(rowType: PStruct) extends View { private val locusField = rowType.fieldByName("locus") - private val locusPType = locusField.typ.asInstanceOf[PLocus] + private val locusPType = locusField.typ private val allelesField = rowType.fieldByName("alleles") private val locusIdx = locusField.index private val allelesIdx = allelesField.index @@ -31,12 +31,23 @@ class RegionValueVariant(rowType: PStruct) extends View { } def contig(): String = { - if (cachedContig == null) - cachedContig = locusPType.contig(locusAddress) + if (cachedContig == null) { + locusPType match { + case pl: PLocus => + cachedContig = pl.contig(locusAddress) + case s: PStruct => + cachedContig = s.types(0).asInstanceOf[PString].loadString(s.loadField(locusAddress, 0)) + } + } cachedContig } - def position(): Int = locusPType.position(locusAddress) + def position(): Int = locusPType match { + case pl: PLocus => + pl.position(locusAddress) + case s: PStruct => + s.types(1).asInstanceOf[PInt32].unstagedLoadFromAddress(s.loadField(locusAddress, 1)) + } def alleles(): Array[String] = { if (cachedAlleles == null) { diff --git a/hail/src/test/resources/bad_flag_number.vcf b/hail/src/test/resources/bad_flag_number.vcf new file mode 100644 index 00000000000..0ce2984ae83 --- /dev/null +++ b/hail/src/test/resources/bad_flag_number.vcf @@ -0,0 +1,91 @@ +##fileformat=VCFv4.2 +##hailversion=0.2.67-40d373134612 +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1 . A C . . BAD_FLAG +1 2 . A C . . BAD_FLAG +1 3 . A C . . BAD_FLAG diff --git a/hail/src/test/scala/is/hail/TestUtils.scala b/hail/src/test/scala/is/hail/TestUtils.scala index d448da1a4c3..5929df29285 100644 --- a/hail/src/test/scala/is/hail/TestUtils.scala +++ b/hail/src/test/scala/is/hail/TestUtils.scala @@ -1,7 +1,6 @@ package is.hail import java.io.{File, PrintWriter} - import breeze.linalg.{DenseMatrix, Matrix, Vector} import is.hail.ExecStrategy.ExecStrategy import is.hail.annotations.{Region, RegionValueBuilder, SafeRow} @@ -10,14 +9,17 @@ import is.hail.backend.spark.SparkBackend import is.hail.expr.ir._ import is.hail.expr.ir.{BindingEnv, MakeTuple, Subst} import is.hail.expr.ir.lowering.LowererUnsupportedOperation -import is.hail.types.physical.{PBaseStruct, PCanonicalArray, PType, PTypeReferenceSingleCodeType} +import is.hail.types.physical.{PBaseStruct, PCanonicalArray, PType, stypes} import is.hail.types.virtual._ import is.hail.io.vcf.MatrixVCFReader +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import is.hail.variant._ import org.apache.spark.SparkException import org.apache.spark.sql.Row +import scala.collection.mutable + object ExecStrategy extends Enumeration { type ExecStrategy = Value val Interpret, InterpretUnoptimized, JvmCompile, LoweredJVMCompile, JvmCompileUnoptimized = Value @@ -175,7 +177,7 @@ object TestUtils { ctx: ExecuteContext ): Any = { val inputTypesB = new BoxedArrayBuilder[Type]() - val inputsB = new BoxedArrayBuilder[Any]() + val inputsB = new mutable.ArrayBuffer[Any]() args.foreach { case (v, t) => inputsB += v @@ -346,7 +348,6 @@ object TestUtils { } filteredExecStrats.foreach { strat => - InferPType.clearPTypes(x) try { val res = strat match { case ExecStrategy.Interpret => diff --git a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala index 69e85051979..14578eabbd8 100644 --- a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala +++ b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala @@ -3,7 +3,7 @@ package is.hail.annotations import is.hail.HailSuite import is.hail.asm4s._ import is.hail.check.{Gen, Prop} -import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder, IEmitCode} +import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder, IEmitCode, RequirednessSuite} import is.hail.types.physical._ import is.hail.types.physical.stypes.concrete.SStringPointer import is.hail.types.physical.stypes.interfaces._ @@ -362,7 +362,7 @@ class StagedConstructorSuite extends HailSuite { fb.emitWithBuilder { cb => val region = fb.emb.getCodeParam[Region](1) rt.constructFromElements(cb, region, const(2), deepCopy = false) { (cb, idx) => - IEmitCode(cb, idx > 0, new SInt32Code(false, fb.getCodeParam[Int](2))) + IEmitCode(cb, idx > 0, new SInt32Code(fb.getCodeParam[Int](2))) }.a } @@ -436,7 +436,7 @@ class StagedConstructorSuite extends HailSuite { val fb = EmitFunctionBuilder[Region, Long, Long](ctx, "deep_copy") fb.emitWithBuilder[Long](cb => t.store(cb, fb.apply_method.getCodeParam[Region](1), - t.loadCheapPCode(cb, fb.apply_method.getCodeParam[Long](2)), + t.loadCheapSCode(cb, fb.apply_method.getCodeParam[Long](2)), deepCopy = true)) val copyF = fb.resultWithIndex()(ctx.fs, 0, region) val newOff = copyF(region, src) @@ -462,7 +462,7 @@ class StagedConstructorSuite extends HailSuite { "x3" -> PCanonicalArray(PInt32(true), required = true), "x4" -> PCanonicalSet(PCanonicalStruct(true, "y" -> PCanonicalString(true)), required = false) ), required = false) - val t2 = t1.deepInnerRequired(false) + val t2 = RequirednessSuite.deepInnerRequired(t1, false) val value = IndexedSeq( Row(1, IndexedSeq(1,2,3), IndexedSeq(0, -1), Set(Row("asdasdasd"), Row(""))), @@ -490,7 +490,7 @@ class StagedConstructorSuite extends HailSuite { "x3" -> PCanonicalArray(PInt32(true), required = true), "x4" -> PCanonicalSet(PCanonicalStruct(true, "y" -> PCanonicalString(true)), required = false) ), required = false)) - val t2 = t1.deepInnerRequired(false).asInstanceOf[PCanonicalStruct] + val t2 = RequirednessSuite.deepInnerRequired(t1, false).asInstanceOf[PCanonicalStruct] val value = IndexedSeq( Row(1, IndexedSeq(1,2,3), IndexedSeq(0, -1), Set(Row("asdasdasd"), Row(""))), @@ -505,7 +505,7 @@ class StagedConstructorSuite extends HailSuite { val f1 = EmitFunctionBuilder[Long](ctx, "stagedCopy1") f1.emitWithBuilder { cb => val region = f1.partitionRegion - t2.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapPCode(cb, v1))), deepCopy = false).a + t2.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapSCode(cb, v1))), deepCopy = false).a } val cp1 = f1.resultWithIndex()(ctx.fs, 0, r)() assert(SafeRow.read(t2, cp1) == Row(value)) @@ -513,7 +513,7 @@ class StagedConstructorSuite extends HailSuite { val f2 = EmitFunctionBuilder[Long](ctx, "stagedCopy2") f2.emitWithBuilder { cb => val region = f2.partitionRegion - t1.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapPCode(cb, v1))), deepCopy = false).a + t1.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, t2.types(0).loadCheapSCode(cb, v1))), deepCopy = false).a } val cp2 = f2.resultWithIndex()(ctx.fs, 0, r)() assert(SafeRow.read(t1, cp2) == Row(value)) diff --git a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala index 6e901204adf..5c67dd79ad5 100644 --- a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala @@ -345,7 +345,7 @@ class ASM4SSuite extends TestNGSuite { intField.store(fb.getArg[Int](1)), longField.store(fb.getArg[Long](2)), booleanField.store(fb.getArg[Boolean](3))) - + typeInfo[T] match { case IntInfo => fb.emit(Code(c, intField.load())) case LongInfo => fb.emit(Code(c, longField.load())) @@ -401,48 +401,6 @@ class ASM4SSuite extends TestNGSuite { assert(fb.result()()() == 1) } - @Test def fbFunctionsCanBeNested(): Unit = { - val fb = FunctionBuilder[Boolean]("F") - val fb2 = fb.cb.genDependentFunction[Int, Boolean]("DepF") - val localF = fb.genFieldThisRef[AsmFunction1[Int, Boolean]]() - - val wrappedInt = Code.invokeStatic1[java.lang.Integer, Int, java.lang.Integer]("valueOf", 0) - val rawOut = localF.load().invoke[java.lang.Object, java.lang.Object]("apply", wrappedInt) - - fb2.emit(true) - fb.emit(Code( - localF := fb2.newInstance(fb.apply_method), - checkcast[java.lang.Boolean](rawOut).invoke[Boolean]("booleanValue") - )) - - val f = fb.result()() - assert(f()) - } - - @Test def dependentFunctionsCanUseParentsFields(): Unit = { - val fb = FunctionBuilder[Int, Int, Int]("F") - val fb2 = fb.cb.genDependentFunction[Int, Int]("DepF") - - val localF = fb.genFieldThisRef[AsmFunction1[Int, Int]]() - - val field1 = fb.genFieldThisRef[Int]() - val field2 = fb2.newDepField[Int](field1.load()) - - def wrappedCall(c: Code[Int]) = - localF.load().invoke[java.lang.Object, java.lang.Object]("apply", - Code.invokeStatic1[java.lang.Integer, Int, java.lang.Integer]("valueOf", c)) - - fb2.emit(field2 + fb2.getArg[Int](1)) - fb.emit(Code( - field1 := fb.getArg[Int](1), - localF := fb2.newInstance(fb.apply_method), - checkcast[java.lang.Integer](wrappedCall(fb.getArg[Int](2))).invoke[Int]("intValue") - )) - - val f = fb.result()() - assert(f(1, 2) == 3) - } - @Test def testInitialize(): Unit = { val fb = FunctionBuilder[Boolean, Int]("F") val l = fb.newLocal[Int]() diff --git a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala index 33e0da58d9d..10c9e558feb 100644 --- a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala @@ -6,9 +6,10 @@ import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.agg._ import is.hail.types.{MatrixType, RPrimitive, TypeWithRequiredness, VirtualTypeWithReq} -import is.hail.types.physical._ +import is.hail.types.physical.{stypes, _} import is.hail.types.virtual._ import is.hail.io.BufferSpec +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.utils._ import is.hail.variant.{Call0, Call1, Call2} import is.hail.{ExecStrategy, HailSuite} diff --git a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala index a00ad9d4c64..b1c34a16341 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala @@ -186,9 +186,9 @@ class ArrayFunctionsSuite extends HailSuite { @Test(dataProvider = "arrayOpsData") def arrayOpsFPDiv(a: IndexedSeq[Integer], b: IndexedSeq[Integer]) { - assertEvalsTo(invoke("div", TArray(TFloat32), toIRArray(a), toIRArray(b)), + assertEvalsTo(invoke("div", TArray(TFloat64), toIRArray(a), toIRArray(b)), Option(a).zip(Option(b)).headOption.map { case (a0, b0) => - a0.zip(b0).map { case (i, j) => Option(i).zip(Option(j)).headOption.map[java.lang.Float] { case (m, n) => m.toFloat / n }.orNull } + a0.zip(b0).map { case (i, j) => Option(i).zip(Option(j)).headOption.map[java.lang.Double] { case (m, n) => m.toDouble / n }.orNull } }.orNull ) } diff --git a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala index 6fca2c9bef3..3490d95f16d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala @@ -67,7 +67,7 @@ class ETypeSuite extends HailSuite { val ibArg = fb2.apply_method.getCodeParam[InputBuffer](2) val dec = eType.buildDecoderMethod(outPType.virtualType, fb2.apply_method.ecb) fb2.emitWithBuilder[Long] { cb => - val decoded = cb.invokePCode(dec, regArg, ibArg) + val decoded = cb.invokeSCode(dec, regArg, ibArg) outPType.store(cb, regArg, decoded, deepCopy = false) } diff --git a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala index 294878b4c5c..9eb9c440d45 100644 --- a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir import is.hail.annotations.{Region, RegionValue, RegionValueBuilder, SafeRow, ScalaToRegionValue} import is.hail.asm4s._ -import is.hail.types.physical._ +import is.hail.types.physical.{stypes, _} import is.hail.types.virtual._ import is.hail.utils._ import is.hail.variant.Call2 @@ -12,6 +12,7 @@ import is.hail.expr.ir.streams.{EmitStream, StreamArgType, StreamUtils} import is.hail.types.physical.stypes.interfaces.SStreamCode import org.apache.spark.sql.Row import is.hail.TestUtils._ +import is.hail.types.physical.stypes.{PTypeReferenceSingleCodeType, SingleCodeSCode, StreamSingleCodeType} import org.testng.annotations.Test class EmitStreamSuite extends HailSuite { @@ -52,11 +53,8 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[F](ctx, "F", (classInfo[Region]: ParamType) +: inputTypes.map(pt => pt: ParamType), LongInfo) val mb = fb.apply_method val ir = streamIR.deepCopy() - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - InferPType(ir, Env.empty, requiredness, usesAndDefs) - val emitContext = new EmitContext(ctx, requiredness) + val emitContext = EmitContext.analyze(ctx, ir) var arrayType: PType = null mb.emit(EmitCodeBuilder.scopedCode(mb) { cb => @@ -66,10 +64,10 @@ class EmitStreamSuite extends HailSuite { case s => s } TypeCheck(s) - EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, Env.empty, None) + EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, EmitEnv(Env.empty, inputTypes.indices.map(i => mb.storeEmitParam(i + 2, cb))), None) .consumeCode[Long](cb, 0L, { s => val arr = StreamUtils.toArray(cb, s.asStream.producer, region) - val scp = SingleCodePCode.fromPCode(cb, arr, region, false) + val scp = SingleCodeSCode.fromSCode(cb, arr, region, false) arrayType = scp.typ.asInstanceOf[PTypeReferenceSingleCodeType].pt coerce[Long](scp.code) @@ -126,18 +124,14 @@ class EmitStreamSuite extends HailSuite { val mb = fb.apply_method val region = mb.getCodeParam[Region](1) val ir = streamIR.deepCopy() - val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) - val requiredness = Requiredness.apply(ir, usesAndDefs, null, Env.empty) // Value IR inference doesn't need context - InferPType(ir, Env.empty, requiredness, usesAndDefs) - - val emitContext = new EmitContext(ctx, requiredness) + val emitContext = EmitContext.analyze(ctx, ir) fb.emitWithBuilder { cb => TypeCheck(ir) val len = cb.newLocal[Int]("len", 0) val len2 = cb.newLocal[Int]("len2", -1) - EmitStream.produce(new Emit(emitContext, fb.ecb), ir, cb, region, Env.empty, None) + EmitStream.produce(new Emit(emitContext, fb.ecb), ir, cb, region, EmitEnv(Env.empty, FastIndexedSeq()), None) .consume(cb, {}, { case stream: SStreamCode => diff --git a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala index 6e388feb936..1aef3d03d39 100644 --- a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala @@ -7,6 +7,7 @@ import is.hail.annotations._ import is.hail.asm4s._ import is.hail.expr.ir.functions.{IRFunctionRegistry, RegistryFunctions} import is.hail.types.virtual._ +import is.hail.types.physical.stypes.interfaces._ import is.hail.utils.{FastIndexedSeq, FastSeq} import is.hail.variant.Call2 import org.testng.annotations.Test @@ -31,10 +32,10 @@ object TestRegisterFunctions extends RegistryFunctions { registerJavaStaticFunction("compare", Array(TInt32, TInt32), TInt32, null)(classOf[java.lang.Integer], "compare") registerScalaFunction("foobar1", Array(), TInt32, null)(ScalaTestObject.getClass, "testFunction") registerScalaFunction("foobar2", Array(), TInt32, null)(ScalaTestCompanion.getClass, "testFunction") - registerCode2[Int, Int]("testCodeUnification", tnum("x"), tv("x", "int32"), tv("x"), null) { - case (_, rt, (aT, a: Code[Int]), (bT, b: Code[Int])) => a + b + registerSCode2("testCodeUnification", tnum("x"), tv("x", "int32"), tv("x"), null) { + case (_, cb, rt, a, b) => primitive(a.asInt.intCode(cb) + b.asInt.intCode(cb)) } - registerCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, rt, (aT, a: Code[Long])) => a } + registerSCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, cb, rt, a) => a } } } diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index 4c59ce80950..9be2b14f0f9 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -19,6 +19,7 @@ import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.linalg.BlockMatrix import is.hail.methods._ import is.hail.rvd.{RVD, RVDPartitioner, RVDSpecMaker} +import is.hail.types.physical.stypes.{EmitType, Float32SingleCodeType, Float64SingleCodeType, Int32SingleCodeType, Int64SingleCodeType, PTypeReferenceSingleCodeType, SType, SingleCodeType} import is.hail.utils.{FastIndexedSeq, _} import is.hail.variant.{Call2, Locus} import is.hail.{ExecStrategy, HailContext, HailSuite, utils} @@ -42,40 +43,40 @@ object IRSuite { name: String, valueParameterTypes: Array[Type], returnType: Type, - calculateReturnType: (Type, Seq[PType]) => PType + calculateReturnType: (Type, Seq[EmitType]) => EmitType )( - impl: (EmitCodeBuilder, Value[Region], PType, Long, Array[EmitCode]) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], SType, Long, Array[EmitCode]) => IEmitCode ) { IRFunctionRegistry.addJVMFunction( new SeededMissingnessAwareJVMFunction(name, valueParameterTypes, returnType, calculateReturnType) { val isDeterministic: Boolean = false - def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], returnPType: PType, args: (PType, EmitCode)*): IEmitCode = { - assert(unify(FastSeq(), args.map(_._1.virtualType), returnPType.virtualType)) - impl(cb, r, returnPType, seed, args.map(a => a._2).toArray) + def applySeededI(seed: Long, cb: EmitCodeBuilder, r: Value[Region], returnPType: SType, args: EmitCode*): IEmitCode = { + assert(unify(FastSeq(), args.map(_.st.virtualType), returnPType.virtualType)) + impl(cb, r, returnPType, seed, args.toArray) } } ) } - def registerSeededWithMissingness( + def registerSeededWithMissingness1( name: String, valueParameterType: Type, returnType: Type, - calculateReturnType: (Type, PType) => PType + calculateReturnType: (Type, EmitType) => EmitType )( - impl: (EmitCodeBuilder, Value[Region], PType, Long, EmitCode) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], SType, Long, EmitCode) => IEmitCode ): Unit = registerSeededWithMissingness(name, Array(valueParameterType), returnType, unwrappedApply(calculateReturnType)) { case (cb, r, rt, seed, Array(a1)) => impl(cb, r, rt, seed, a1) } def registerAll() { - registerSeededWithMissingness("incr_s", TBoolean, TBoolean, { (ret: Type, pt: PType) => pt }) { case (cb, mb, rt, _, l) => + registerSeededWithMissingness1("incr_s", TBoolean, TBoolean, { (ret: Type, pt: EmitType) => pt }) { case (cb, r, _, _, l) => cb += Code.invokeScalaObject0[Unit](outer.getClass, "incr") l.toI(cb) } - registerSeededWithMissingness("incr_v", TBoolean, TBoolean, { (ret: Type, pt: PType) => pt }) { case (cb, mb, rt, _, l) => + registerSeededWithMissingness1("incr_v", TBoolean, TBoolean, { (ret: Type, pt: EmitType) => pt }) { case (cb, _, _, _, l) => l.toI(cb).map(cb) { pc => cb += Code.invokeScalaObject0[Unit](outer.getClass, "incr") pc @@ -89,11 +90,6 @@ object IRSuite { class IRSuite extends HailSuite { implicit val execStrats = ExecStrategy.nonLowering - def assertPType(node: IR, expected: PType) { - InferPType(node) - assert(node.pType == expected) - } - @Test def testI32() { assertEvalsTo(I32(5), 5) } @@ -122,17 +118,6 @@ class IRSuite extends HailSuite { @Test def testFalse() { assertEvalsTo(False(), false) } - - @Test def testScalarInferPType() { - assertPType(I32(5), PInt32(true)) - assertPType(I64(5), PInt64(true)) - assertPType(F32(3.1415f), PFloat32(true)) - assertPType(F64(3.1415926589793238462643383), PFloat64(true)) - assertPType(Str("HELLO WORLD"), PCanonicalString(true)) - assertPType(True(), PBoolean(true)) - assertPType(False(), PBoolean(true)) - } - // FIXME Void() doesn't work because we can't handle a void type in a tuple @Test def testCast() { @@ -158,31 +143,6 @@ class IRSuite extends HailSuite { (Cast(F64(3.14), TFloat64), 3.14)) } - @Test def testCastInferPType() { - assertPType(Cast(I32(5), TInt32), PInt32(true)) - assertPType(Cast(I32(5), TInt64), PInt64(true)) - assertPType(Cast(I32(5), TFloat32), PFloat32(true)) - assertPType(Cast(I32(5), TFloat64), PFloat64(true)) - - assertPType(Cast(I64(5), TInt32), PInt32(true)) - assertPType(Cast(I64(0xf29fb5c9af12107dL), TInt32), PInt32(true)) // truncate - assertPType(Cast(I64(5), TInt64), PInt64(true)) - assertPType(Cast(I64(5), TFloat32), PFloat32(true)) - assertPType(Cast(I64(5), TFloat64), PFloat64(true)) - - assertPType(Cast(F32(3.14f), TInt32), PInt32(true)) - assertPType(Cast(F32(3.99f), TInt32), PInt32(true)) // truncate - assertPType(Cast(F32(3.14f), TInt64), PInt64(true)) - assertPType(Cast(F32(3.14f), TFloat32), PFloat32(true)) - assertPType(Cast(F32(3.14f), TFloat64), PFloat64(true)) - - assertPType(Cast(F64(3.14), TInt32), PInt32(true)) - assertPType(Cast(F64(3.99), TInt32), PInt32(true)) // truncate - assertPType(Cast(F64(3.14), TInt64), PInt64(true)) - assertPType(Cast(F64(3.14), TFloat32), PFloat32(true)) - assertPType(Cast(F64(3.14), TFloat64), PFloat64(true)) - } - @Test def testCastRename() { assertEvalsTo(CastRename(MakeStruct(FastSeq(("x", I32(1)))), TStruct("foo" -> TInt32)), Row(1)) assertEvalsTo(CastRename(MakeArray(FastSeq(MakeStruct(FastSeq(("x", I32(1))))), @@ -190,64 +150,10 @@ class IRSuite extends HailSuite { FastIndexedSeq(Row(1))) } - @Test def testCastRenameIR() { - var expectedPType: PType = PCanonicalStruct(true, "foo" -> PInt32(true)) - var childPType: PType = PCanonicalStruct(true, "x" -> PInt32(true)) - var targetType: Type = TStruct("foo" -> TInt32) - assertPType(CastRename(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalArray(PCanonicalStruct(true, "foo" -> PInt64(true))) - childPType = PCanonicalArray(PCanonicalStruct(true, "c" -> PInt64(true))) - targetType = TArray(TStruct("foo" -> TInt64)) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalArray(PCanonicalStruct("foo" -> PCanonicalString(true))) - childPType = PCanonicalArray(PCanonicalStruct("q" -> PCanonicalString(true))) - targetType = TArray(TStruct("foo" -> TString)) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalArray(PCanonicalStruct(true, "foo" -> PCanonicalStruct("baz" -> PBoolean(true)))) - childPType = PCanonicalArray(PCanonicalStruct(true, "b" -> PCanonicalStruct("a" -> PBoolean(true)))) - targetType = TArray(TStruct("foo" -> TStruct("baz" -> TBoolean))) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalArray(PCanonicalStruct("foo" -> PCanonicalArray(PFloat64(true), true), "bar" -> PCanonicalBinary())) - childPType = PCanonicalArray(PCanonicalStruct("x" -> PCanonicalArray(PFloat64(true), true), "y" -> PCanonicalBinary())) - targetType = TArray(TStruct("foo" -> TArray(TFloat64), "bar" -> TBinary)) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalTuple(true, PCanonicalStruct(true, "foo" -> PCanonicalInterval(PFloat32())), PCanonicalStruct(false, "bar" -> PFloat64(true))) - childPType = PCanonicalTuple(true, PCanonicalStruct(true, "v" -> PCanonicalInterval(PFloat32())), PCanonicalStruct(false, "q" -> PFloat64(true))) - targetType = TTuple(TStruct("foo" -> TInterval(TFloat32)), TStruct("bar" -> TFloat64)) - assertPType(CastRename(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalDict(PCanonicalString(), PCanonicalTuple(false, - PCanonicalStruct("foo" -> PCanonicalStruct("bar" -> PCanonicalNDArray(PInt32(true), 3, true))), - PCanonicalStruct(false, "bar" -> PCanonicalBinary(true)))) - childPType = PCanonicalDict(PCanonicalString(), PCanonicalTuple(false, - PCanonicalStruct("xxxxxx" -> PCanonicalStruct("qqq" -> PCanonicalNDArray(PInt32(true), 3, true))), - PCanonicalStruct(false, "ddd" -> PCanonicalBinary(true)))) - targetType = TDict(TString, TTuple(TStruct("foo" -> TStruct("bar" -> TNDArray(TInt32, Nat(3)))), - TStruct("bar" -> TBinary))) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - - expectedPType = PCanonicalStream(PCanonicalStruct("foo2a" -> PCanonicalArray(PFloat64(true), true), "bar2a" -> PCanonicalBinary())) - childPType = PCanonicalStream(PCanonicalStruct("q" -> PCanonicalArray(PFloat64(true), true), "yxxx" -> PCanonicalBinary())) - targetType = TStream(TStruct("foo2a" -> TArray(TFloat64), "bar2a" -> TBinary)) - assertPType(CastRename(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(childPType))), targetType), expectedPType) - } - @Test def testNA() { assertEvalsTo(NA(TInt32), null) } - @Test def testNAIsNAInferPType() { - assertPType(NA(TInt32), PInt32(false)) - - assertPType(IsNA(NA(TInt32)), PBoolean(true)) - assertPType(IsNA(I32(5)), PBoolean(true)) - } - @Test def testCoalesce() { assertEvalsTo(Coalesce(FastSeq(In(0, TInt32))), FastIndexedSeq((null, TInt32)), null) assertEvalsTo(Coalesce(FastSeq(In(0, TInt32))), FastIndexedSeq((1, TInt32)), 1) @@ -266,22 +172,6 @@ class IRSuite extends HailSuite { assertEvalsTo(Coalesce(FastSeq(t1, t2)), FastIndexedSeq((value, TArray(TInt32))), value) } - @Test def testCoalesceInferPType() { - assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), - In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true)))))))), PCanonicalArray(PCanonicalArray(PInt32()))) - assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), - In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true))))))), PCanonicalArray(PCanonicalArray(PInt32()))) - assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), - In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))))), PCanonicalArray(PCanonicalArray(PInt32()), true)) - assertPType(Coalesce(FastSeq(In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), - In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))))), PCanonicalArray(PCanonicalArray(PInt32()), true)) - assertPType(Coalesce(FastSeq( - In(0, SingleCodeEmitParamType(false, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32()))))), - In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(), true))))), - In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true)), true)))) - )), PCanonicalArray(PCanonicalArray(PInt32()), true)) - } - val i32na = NA(TInt32) val i64na = NA(TInt64) val f32na = NA(TFloat32) @@ -318,84 +208,6 @@ class IRSuite extends HailSuite { ) } - @Test def testApplyUnaryPrimOpInferPType() { - val i32na = NA(TInt32) - def i64na = NA(TInt64) - def f32na = NA(TFloat32) - def f64na = NA(TFloat64) - def bna = NA(TBoolean) - - var node = ApplyUnaryPrimOp(Negate(), I32(5)) - assertPType(node, PInt32(true)) - node = ApplyUnaryPrimOp(Negate(), i32na) - assertPType(node, PInt32(false)) - - // should not be able to infer physical type twice on one IR (i32na) - node = ApplyUnaryPrimOp(Negate(), i32na) - intercept[RuntimeException](InferPType(node)) - - node = ApplyUnaryPrimOp(Negate(), I64(5)) - assertPType(node, PInt64(true)) - - node = ApplyUnaryPrimOp(Negate(), i64na) - assertPType(node, PInt64(false)) - - node = ApplyUnaryPrimOp(Negate(), F32(5)) - assertPType(node, PFloat32(true)) - - node = ApplyUnaryPrimOp(Negate(), f32na) - assertPType(node, PFloat32(false)) - - node = ApplyUnaryPrimOp(Negate(), F64(5)) - assertPType(node, PFloat64(true)) - - node = ApplyUnaryPrimOp(Negate(), f64na) - assertPType(node, PFloat64(false)) - - node = ApplyUnaryPrimOp(Bang(), False()) - assertPType(node, PBoolean(true)) - - node = ApplyUnaryPrimOp(Bang(), True()) - assertPType(node, PBoolean(true)) - - node = ApplyUnaryPrimOp(Bang(), bna) - assertPType(node, PBoolean(false)) - - node = ApplyUnaryPrimOp(BitNot(), I32(0xdeadbeef)) - assertPType(node, PInt32(true)) - - node = ApplyUnaryPrimOp(BitNot(), I64(0xdeadbeef12345678L)) - assertPType(node, PInt64(true)) - - node = ApplyUnaryPrimOp(BitNot(), I64(-0xdeadbeef12345678L)) - assertPType(node, PInt64(true)) - - node = ApplyUnaryPrimOp(BitNot(), i64na) - assertPType(node, PInt64(false)) - } - - @Test def testComplexInferPType() { - // InferPType expects array->stream lowered ir - val ir = ToArray(StreamMap( - Let( - "q", - I32(2), - StreamMap( - Let( - "v", - Ref("q", TInt32) + I32(3), - StreamRange(0, Ref("v", TInt32), 1) - ), - "x", - Ref("x", TInt32) + Ref("q", TInt32) - ) - ), - "y", - Ref("y", TInt32) + I32(3))) - - assertPType(ir, PCanonicalArray(PInt32(true), true)) - } - @Test def testApplyBinaryPrimOpAdd() { def assertSumsTo(t: Type, x: Any, y: Any, sum: Any) { assertEvalsTo(ApplyBinaryPrimOp(Add(), In(0, t), In(1, t)), FastIndexedSeq(x -> t, y -> t), sum) @@ -478,12 +290,12 @@ class IRSuite extends HailSuite { assertEvalsTo(ApplyBinaryPrimOp(FloatingPointDivide(), In(0, t), In(1, t)), FastIndexedSeq(x -> t, y -> t), expected) } - assertExpected(TInt32, 5, 2, 2.5f) + assertExpected(TInt32, 5, 2, 2.5) assertExpected(TInt32, 5, null, null) assertExpected(TInt32, null, 2, null) assertExpected(TInt32, null, null, null) - assertExpected(TInt64, 5L, 2L, 2.5f) + assertExpected(TInt64, 5L, 2L, 2.5) assertExpected(TInt64, 5L, null, null) assertExpected(TInt64, null, 2L, null) assertExpected(TInt64, null, null, null) @@ -791,30 +603,6 @@ class IRSuite extends HailSuite { assertEvalsTo(If(True(), NA(TInt32), I32(7)), null) } - @Test def testIfInferPType() { - assertPType(If(True(), In(0, SingleCodeEmitParamType(true, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(true)) - assertPType(If(True(), In(0, SingleCodeEmitParamType(false, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(false)) - assertPType(If(NA(TBoolean), In(0, SingleCodeEmitParamType(true, Int32SingleCodeType)), In(1, SingleCodeEmitParamType(true, Int32SingleCodeType))), PInt32(false)) - - var cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) - var altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) - - var ir = If(True(), cnsqBranch, altrBranch) - assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(true), true), true)) - - cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), true), true)))) - altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(false), true), true)))) - - ir = If(True(), cnsqBranch, altrBranch) - assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(false), true), true)) - - cnsqBranch = In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(true), false), true)))) - altrBranch = In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(PCanonicalArray(PCanonicalArray(PInt32(false), true), true)))) - - ir = If(True(), cnsqBranch, altrBranch) - assertPType(ir, PCanonicalArray(PCanonicalArray(PInt32(false), false), true)) - } - @Test def testLet() { assertEvalsTo(Let("v", I32(5), Ref("v", TInt32)), 5) assertEvalsTo(Let("v", NA(TInt32), Ref("v", TInt32)), null) @@ -846,43 +634,6 @@ class IRSuite extends HailSuite { assertEvalsTo(MakeArray(FastSeq(), TArray(TInt32)), FastIndexedSeq()) } - @Test def testMakeArrayInferPTypeFromNestedRef() { - var ir = MakeArray(FastSeq(), TArray(TInt32)) - assertPType(ir, PCanonicalArray(PInt32(true), true)) - - val eltType = TStruct("a" -> TArray(TArray(TInt32)), "b" -> TInt32, "c" -> TDict(TInt32, TString)) - - val pTypes = Array[PType]( - PCanonicalStruct(true, - "a" -> PCanonicalArray(PCanonicalArray(PInt32(false), true), false), - "b" -> PInt32(true), - "c" -> PCanonicalDict(PInt32(false), PCanonicalString(false), false)), - PCanonicalStruct(true, - "a" -> PCanonicalArray(PCanonicalArray(PInt32(true), true), true), - "b" -> PInt32(true), - "c" -> PCanonicalDict(PInt32(true), PCanonicalString(true), true))) - - val unified = PCanonicalStruct(true, - "a" -> PCanonicalArray(PCanonicalArray(PInt32(false), true), false), - "b" -> PInt32(true), - "c" -> PCanonicalDict(PInt32(false), PCanonicalString(false), false)) - - assertPType(MakeArray(Array(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(0))))), TArray(eltType)), PCanonicalArray(pTypes(0), true)) - assertPType(MakeArray(Array(In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(0)))), In(1, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(pTypes(1))))), TArray(eltType)), PCanonicalArray(pTypes(0), true)) - } - - @Test def testMakeArrayInferPType() { - var ir = MakeArray(FastSeq(I32(5), NA(TInt32), I32(-3)), TArray(TInt32)) - - assertPType(ir, PCanonicalArray(PInt32(false), true)) - - ir = MakeArray(FastSeq(I32(5), I32(1), I32(-3)), TArray(TInt32)) - - assertPType(ir, PCanonicalArray(PInt32(true), true)) - - ir = MakeArray(FastSeq(I32(5), I32(1), I32(-3)), TArray(TInt32)) - } - @Test def testGetNestedElementPTypesI32() { var types = Seq(PInt32(true)) var res = InferPType.getCompatiblePType(types) @@ -1453,29 +1204,6 @@ class IRSuite extends HailSuite { assert(res == PCanonicalInterval(PCanonicalInterval(PInt32(false), false), true)) } - @Test def testToDictInferPtype() { - val allRequired = ToDict(MakeStream(FastIndexedSeq( - MakeTuple.ordered(FastIndexedSeq(I32(5), Str("a"))), - MakeTuple.ordered(FastIndexedSeq(I32(10), Str("b"))) - ), TStream(TTuple(TInt32, TString)))) - - assertPType(allRequired, PCanonicalDict(PInt32(true), PCanonicalString(true), true)) - - var notAllRequired = ToDict(MakeStream(FastIndexedSeq( - MakeTuple.ordered(FastIndexedSeq(NA(TInt32), Str("a"))), - MakeTuple.ordered(FastIndexedSeq(I32(10), Str("b"))) - ), TStream(TTuple(TInt32, TString)))) - - assertPType(notAllRequired, PCanonicalDict(PInt32(false), PCanonicalString(true), true)) - - notAllRequired = ToDict(MakeStream(FastIndexedSeq( - MakeTuple.ordered(FastIndexedSeq(NA(TInt32), Str("a"))), - MakeTuple.ordered(FastIndexedSeq(I32(10), NA(TString)) - )), TStream(TTuple(TInt32, TString)))) - - assertPType(notAllRequired, PCanonicalDict(PInt32(false), PCanonicalString(false), true)) - } - @Test def testMakeStruct() { assertEvalsTo(MakeStruct(FastSeq()), Row()) assertEvalsTo(MakeStruct(FastSeq("a" -> NA(TInt32), "b" -> 4, "c" -> 0.5)), Row(null, 4, 0.5)) @@ -1483,17 +1211,6 @@ class IRSuite extends HailSuite { assertEvalsTo(GetField(MakeStruct((0 until 20000).map(i => s"foo$i" -> I32(1))), "foo1"), 1) } - @Test def testMakeStructInferPType() { - var ir = MakeStruct(FastSeq()) - assertPType(ir, PCanonicalStruct(true)) - - ir = MakeStruct(FastSeq("a" -> NA(TInt32), "b" -> 4, "c" -> 0.5)) - assertPType(ir, PCanonicalStruct(true, "a" -> PInt32(false), "b" -> PInt32(true), "c" -> PFloat64(true))) - - val ir2 = GetField(MakeStruct((0 until 20000).map(i => s"foo$i" -> I32(1))), "foo1") - assertPType(ir2, PInt32(true)) - } - @Test def testMakeArrayWithDifferentRequiredness(): Unit = { val pt1 = PCanonicalArray(PCanonicalStruct("a" -> PInt32(), "b" -> PCanonicalArray(PInt32()))) val pt2 = PCanonicalArray(PCanonicalStruct(true, "a" -> PInt32(true), "b" -> PCanonicalArray(PInt32(), true))) @@ -2703,11 +2420,6 @@ class IRSuite extends HailSuite { assertFatal(Die(NA(TString), TFloat64, -1), "message missing") } - @Test def testDieInferPType() { - assertPType(Die("mumblefoo", TFloat64), PFloat64(true)) - assertPType(Die("mumblefoo", TArray(TFloat64)), PCanonicalArray(PFloat64(true), true)) - } - @Test def testStreamRange() { def assertEquals(start: Integer, stop: Integer, step: Integer, expected: IndexedSeq[Int]) { assertEvalsTo(ToArray(StreamRange(In(0, TInt32), In(1, TInt32), In(2, TInt32))), @@ -3159,6 +2871,7 @@ class IRSuite extends HailSuite { MakeTuple(FastIndexedSeq(2 -> i, 4 -> b)), GetTupleElement(t, 1), Die("mumblefoo", TFloat64), + Trap(Die("mumblefoo", TFloat64)), invoke("land", TBoolean, b, c), // ApplySpecial invoke("toFloat64", TFloat64, i), // Apply Literal(TStruct("x" -> TInt32), Row(1)), diff --git a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala index b8481bd1ba9..0734f8d545f 100644 --- a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala @@ -9,6 +9,7 @@ import is.hail.TestUtils._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.rvd.RVDType import is.hail.types.physical._ +import is.hail.types.physical.stypes.EmitType import is.hail.types.virtual._ import is.hail.utils._ import org.apache.spark.sql.Row @@ -38,8 +39,8 @@ class OrderingSuite extends HailSuite { implicit val x = op.rtti val fb = EmitFunctionBuilder[Region, Long, Long, op.ReturnType](ctx, "lifted") fb.emitWithBuilder { cb => - val cv1 = t.loadCheapPCode(cb, fb.getCodeParam[Long](2)) - val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](3)) + val cv1 = t.loadCheapSCode(cb, fb.getCodeParam[Long](2)) + val cv2 = t.loadCheapSCode(cb, fb.getCodeParam[Long](3)) fb.ecb.getOrderingFunction(cv1.st, cv2.st, op) .apply(cb, EmitCode.present(cb.emb, cv1), EmitCode.present(cb.emb, cv2)) } @@ -57,9 +58,9 @@ class OrderingSuite extends HailSuite { val fb = EmitFunctionBuilder[Region, Boolean, Long, Boolean, Long, op.ReturnType](ctx, "lifted") fb.emitWithBuilder { cb => val m1 = fb.getCodeParam[Boolean](2) - val cv1 = t.loadCheapPCode(cb, fb.getCodeParam[Long](3)) + val cv1 = t.loadCheapSCode(cb, fb.getCodeParam[Long](3)) val m2 = fb.getCodeParam[Boolean](4) - val cv2 = t.loadCheapPCode(cb, fb.getCodeParam[Long](5)) + val cv2 = t.loadCheapSCode(cb, fb.getCodeParam[Long](5)) val ev1 = EmitCode(Code._empty, m1, cv1) val ev2 = EmitCode(Code._empty, m2, cv2) fb.ecb.getOrderingFunction(ev1.st, ev2.st, op) @@ -457,8 +458,10 @@ class OrderingSuite extends HailSuite { val cset = fb.getCodeParam[Long](2) val cetuple = fb.getCodeParam[Long](3) - val bs = new BinarySearch(fb.apply_method, pset, pset.elementType, keyOnly = false) - fb.emitWithBuilder(cb => bs.getClosestIndex(cset, false, pt.loadCheapPCode(cb, pTuple.loadField(cetuple, 0)).code)) + val bs = new BinarySearch(fb.apply_method, pset.sType, EmitType(pset.elementType.sType, true), keyOnly = false) + fb.emitWithBuilder(cb => + bs.getClosestIndex(cb, pset.loadCheapSCode(cb, cset), + EmitCode.fromI(fb.apply_method)(cb => IEmitCode.present(cb, pt.loadCheapSCode(cb, pTuple.loadField(cetuple, 0)))))) val asArray = SafeIndexedSeq(pArray, soff) @@ -493,9 +496,12 @@ class OrderingSuite extends HailSuite { val cdict = fb.getCodeParam[Long](2) val cktuple = fb.getCodeParam[Long](3) - val bs = new BinarySearch(fb.apply_method, pDict, pDict.keyType, keyOnly = true) + val bs = new BinarySearch(fb.apply_method, pDict.sType, EmitType(pDict.keyType.sType, false), keyOnly = true) + val m = ptuple.isFieldMissing(cktuple, 0) - fb.emitWithBuilder(cb => bs.getClosestIndex(cdict, m, pDict.keyType.loadCheapPCode(cb, ptuple.loadField(cktuple, 0)).code)) + fb.emitWithBuilder(cb => + bs.getClosestIndex(cb, pDict.loadCheapSCode(cb, cdict), + EmitCode.fromI(fb.apply_method)(cb => IEmitCode.present(cb, pDict.keyType.loadCheapSCode(cb, ptuple.loadField(cktuple, 0)))))) val asArray = SafeIndexedSeq(PCanonicalArray(pDict.elementType), soff) diff --git a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala index 9df2846eb8d..8e9fca074d8 100644 --- a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala @@ -101,6 +101,7 @@ class PruneSuite extends HailSuite { false).execute(ctx)) lazy val tr = TableRead(tab.typ, false, new TableReader { + override def renderShort(): String = ??? def pathsUsed: Seq[String] = FastSeq() @@ -138,6 +139,8 @@ class PruneSuite extends HailSuite { def lower(mr: MatrixRead): TableIR = ??? def toJValue: JValue = ??? + + override def renderShort(): String = ??? }) lazy val emptyTableDep = TableType(TStruct.empty, FastIndexedSeq(), TStruct.empty) diff --git a/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala index 5af28d6b42e..6764550ff16 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RandomFunctionsSuite.scala @@ -4,10 +4,11 @@ import is.hail.TestUtils._ import is.hail.expr.ir.TestUtils._ import is.hail.asm4s.Code import is.hail.expr.ir.functions.{IRRandomness, RegistryFunctions} -import is.hail.types.physical.{PCode, PInt32, PInt64} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives.{SInt32, SInt64} import is.hail.types.virtual.{TArray, TFloat64, TInt32, TInt64, TStream} import is.hail.utils._ -import is.hail.{ExecStrategy, HailContext, HailSuite} +import is.hail.{ExecStrategy, HailSuite} import org.apache.spark.sql.Row import org.testng.annotations.{BeforeClass, Test} @@ -35,16 +36,16 @@ object TestRandomFunctions extends RegistryFunctions { } def registerAll() { - registerSeeded0("counter_seeded", TInt32, PInt32(true)) { case (cb, r, rt, seed) => - PCode(rt, getTestRNG(cb.emb, seed).invoke[Int]("counter")) + registerSeeded0("counter_seeded", TInt32, SInt32) { case (cb, r, rt, seed) => + primitive(getTestRNG(cb.emb, seed).invoke[Int]("counter")) } - registerSeeded0("seed_seeded", TInt64, PInt64(true)) { case (cb, r, rt, seed) => - PCode(rt, getTestRNG(cb.emb, seed).invoke[Long]("seed")) + registerSeeded0("seed_seeded", TInt64, SInt64) { case (cb, r, rt, seed) => + primitive(getTestRNG(cb.emb, seed).invoke[Long]("seed")) } - registerSeeded0("pi_seeded", TInt32, PInt32(true)) { case (cb, r, rt, seed) => - PCode(rt, getTestRNG(cb.emb, seed).invoke[Int]("partitionIndex")) + registerSeeded0("pi_seeded", TInt32, SInt32) { case (cb, r, rt, seed) => + primitive(getTestRNG(cb.emb, seed).invoke[Int]("partitionIndex")) } } } diff --git a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala index 4ced01d7fad..c1c68862b64 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala @@ -51,12 +51,13 @@ class RequirednessSuite extends HailSuite { def nd(r: Boolean): IR = if (r) MakeNDArray.fill(int(optional), FastIndexedSeq(1, 2), True()) else NA(tnd) - def nestednd(r: Boolean, aelt: Boolean): IR = { - if (r) - MakeNDArray.fill(array(optional, aelt), FastIndexedSeq(1, 2), True()) - else - NDArrayMap(NA(tnestednd), genUID(), array(optional, aelt)) - } +// FIXME: Currently ndarrays don't support elements that have pointers. +// def nestednd(r: Boolean, aelt: Boolean): IR = { +// if (r) +// MakeNDArray.fill(array(optional, aelt), FastIndexedSeq(1, 2), True()) +// else +// NDArrayMap(NA(tnestednd), genUID(), array(optional, aelt)) +// } def nestedstream(r: Boolean, a: Boolean, aelt: Boolean): IR = { if (r) @@ -105,7 +106,7 @@ class RequirednessSuite extends HailSuite { MakeTuple.ordered(FastIndexedSeq(I32(5), Str("bar")))) allRequired.foreach { n => - nodes += Array(n, PType.canonical(n.typ, required).deepInnerRequired(required)) + nodes += Array(n, RequirednessSuite.deepInnerRequired(PType.canonical(n.typ, required), required)) } val bools = Array(true, false) @@ -114,7 +115,6 @@ class RequirednessSuite extends HailSuite { nodes += Array(nd(r1), pnd(r1)) for (r2 <- bools) { nodes += Array(array(r2, r1), parray(r2, r1)) - nodes += Array(nestednd(r2, r1), pnestednd(r2, r1)) for (r3 <- bools) { nodes += Array(nestedarray(r3, r2, r1), pnestedarray(r3, r2, r1)) for (r4 <- bools) { @@ -519,3 +519,22 @@ class RequirednessSuite extends HailSuite { assert(actual == expected) } } + +object RequirednessSuite { + def deepInnerRequired(t: PType, required: Boolean): PType = + t match { + case t: PCanonicalArray => PCanonicalArray(deepInnerRequired(t.elementType, true), required) + case t: PCanonicalSet => PCanonicalSet(deepInnerRequired(t.elementType, true), required) + case t: PCanonicalDict => PCanonicalDict(deepInnerRequired(t.keyType, true), deepInnerRequired(t.valueType, true), required) + case t: PCanonicalStruct => + PCanonicalStruct(t.fields.map(f => PField(f.name, deepInnerRequired(f.typ, true), f.index)), required) + case t: PCanonicalTuple => + PCanonicalTuple(t._types.map { f => f.copy(typ = deepInnerRequired(f.typ, true)) }, required) + case t: PCanonicalInterval => + PCanonicalInterval(deepInnerRequired(t.pointType, true), required) + case t: PCanonicalStream => + PCanonicalStream(deepInnerRequired(t.elementType, true), required = required) + case t => + t.setRequired(required) + } +} diff --git a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala index 20825569446..329eb261908 100644 --- a/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/StagedBTreeSuite.scala @@ -1,7 +1,6 @@ package is.hail.expr.ir import java.io.{ByteArrayInputStream, ByteArrayOutputStream} - import is.hail.HailSuite import is.hail.annotations.Region import is.hail.asm4s._ @@ -10,13 +9,15 @@ import is.hail.expr.ir.agg._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.types.physical._ import is.hail.io.{InputBuffer, OutputBuffer, StreamBufferSpec} +import is.hail.types.physical.stypes.Int64SingleCodeType +import is.hail.types.physical.stypes.interfaces.primitive import is.hail.types.physical.stypes.primitives.SInt64 import is.hail.utils._ import org.testng.annotations.Test import scala.collection.mutable class TestBTreeKey(mb: EmitMethodBuilder[_]) extends BTreeKey { - private val comp = mb.ecb.getOrderingFunction(SInt64(false), SInt64(false), CodeOrdering.Compare()) + private val comp = mb.ecb.getOrderingFunction(SInt64, SInt64, CodeOrdering.Compare()) def storageType: PTuple = PCanonicalTuple(required = true, PInt64(), PCanonicalTuple(false)) def compType: PType = PInt64() def isEmpty(cb: EmitCodeBuilder, off: Code[Long]): Code[Boolean] = @@ -41,7 +42,7 @@ class TestBTreeKey(mb: EmitMethodBuilder[_]) extends BTreeKey { def compKeys(cb: EmitCodeBuilder, k1: EmitCode, k2: EmitCode): Code[Int] = comp(cb, k1, k2) def loadCompKey(cb: EmitCodeBuilder, off: Value[Long]): EmitCode = - EmitCode(Code._empty, storageType.isFieldMissing(off, 0), PCode(compType, Region.loadLong(storageType.fieldOffset(off, 0)))) + EmitCode(Code._empty, storageType.isFieldMissing(off, 0), primitive(Region.loadLong(storageType.fieldOffset(off, 0)))) } object BTreeBackedSet { @@ -110,7 +111,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { val btree = new AppendOnlyBTree(cb, key, r, root, maxElements = n) fb.emitWithBuilder { cb => - val ec = EmitCode(Code._empty, m, PCode(PInt64Optional, v)) + val ec = EmitCode(Code._empty, m, primitive(v)) cb.assign(r, fb.getCodeParam[Region](1)) cb.assign(root, fb.getCodeParam[Long](2)) cb.assign(elt, btree.getOrElseInitialize(cb, ec)) @@ -131,7 +132,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { val key = new TestBTreeKey(fb.apply_method) val btree = new AppendOnlyBTree(cb, key, r, root, maxElements = n) - val sab = new StagedArrayBuilder(PInt64(), fb.apply_method, 16) + val sab = new StagedArrayBuilder(Int64SingleCodeType, true, fb.apply_method, 16) val idx = fb.newLocal[Int]() val returnArray = fb.newLocal[Array[java.lang.Long]]() @@ -143,7 +144,7 @@ class BTreeBackedSet(ctx: ExecuteContext, region: Region, n: Int) { cb += Code.memoize(koff, "koff") { koff => val ec = key.loadCompKey(cb, koff) ec.m.mux(sab.addMissing(), - sab.add(ec.v)) + sab.add(ec.pv.asInt64.longCode(cb))) } } cb += (returnArray := Code.newArray[java.lang.Long](sab.size)) diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala index 798af683148..e501a4c6487 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala @@ -761,6 +761,8 @@ class TableIRSuite extends HailSuite { @Test def testPartitionCountsWithDropRows() { val tr = new TableReader { + override def renderShort(): String = ??? + def pathsUsed: Seq[String] = FastSeq() override def apply(tr: TableRead, ctx: ExecuteContext): TableValue = ??? diff --git a/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala index 6b51186d5c7..dd9070d6577 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TakeByAggregatorSuite.scala @@ -104,7 +104,7 @@ class TakeByAggregatorSuite extends HailSuite { cb.whileLoop(i < n, { cb += (random := rng.invoke[Double, Double, Double]("runif", -10000d, 10000d).toI) tba.seqOp(cb, false, random, false, random) - ab.append(cb, new SInt32Code(true, random)) + ab.append(cb, new SInt32Code(random)) cb += (i := i + 1) }) cb += ab.size.cne(n).orEmpty(Code._fatal[Unit]("bad size!")) diff --git a/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala b/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala index 93b08c1a7b5..0527df4874e 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TestUtils.scala @@ -88,7 +88,7 @@ object TestUtils { def IRSet(a: Integer*): IR = toIRSet(a) - def IRCall(c: Call): IR = Cast(I32(c), TCall) + def IRCall(c: Call): IR = invoke("callFromRepr", TCall, I32(c)) def IRAggCount: IR = { val aggSig = AggSignature(Count(), FastSeq.empty, FastSeq.empty) diff --git a/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala new file mode 100644 index 00000000000..9e4970a4953 --- /dev/null +++ b/hail/src/test/scala/is/hail/expr/ir/TrapNodeSuite.scala @@ -0,0 +1,37 @@ +package is.hail.expr.ir + +import is.hail.TestUtils._ +import is.hail.types.virtual._ +import is.hail.utils._ +import is.hail.{ExecStrategy, HailSuite} +import org.apache.spark.sql.Row +import org.testng.annotations.Test + +class TrapNodeSuite extends HailSuite { + implicit val execStrats = ExecStrategy.javaOnly + + @Test def testTrapNode() { + assertEvalsTo(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(0, 1, 2)), I32(1))), Row(null, 1)) + val res = eval(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(0, 1, 2)), I32(-1)))) + res match { + case Row(Row(msg: String, id: Int), null) => + assert(id == -1) + assert(msg.contains("array index out of bounds")) + } + + assertEvalsTo(Trap(Die(Str("foo bar"), TInt32, 5)), Row(Row("foo bar", 5), null)) + } + + @Test def testTrapNodeInLargerContext() { + def resultByIdx(idx: Int): IR = bindIR(Trap(ArrayRef(Literal(TArray(TInt32), FastIndexedSeq(100, 200, 300)), I32(idx)))) { value => + If(IsNA(GetTupleElement(value, 0)), + GetTupleElement(value, 1), + I32(-1) + ) + } + + assertEvalsTo(resultByIdx(-100), -1) + assertEvalsTo(resultByIdx(2), 300) + assertEvalsTo(resultByIdx(4), -1) + } +} diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala index 196b093a600..8d81fc26dca 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/DownsampleSuite.scala @@ -6,7 +6,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.{EmitCode, EmitFunctionBuilder} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.stypes.primitives.{SFloat64Code, SInt32Code} -import is.hail.types.physical.{PCanonicalArray, PCanonicalString, PCode} +import is.hail.types.physical.{PCanonicalArray, PCanonicalString} import is.hail.utils.FastIndexedSeq import org.testng.annotations.Test @@ -40,9 +40,9 @@ class DownsampleSuite extends HailSuite { cb.assign(y, rng.invoke[Double, Double, Double]("runif", 0d, 1d)) ds1.insert(cb, - EmitCode.present(cb.emb, new SFloat64Code(true, x)), - EmitCode.present(cb.emb, new SFloat64Code(true, y)), - EmitCode.missing(cb.emb, PCanonicalArray(PCanonicalString()))) + EmitCode.present(cb.emb, new SFloat64Code(x)), + EmitCode.present(cb.emb, new SFloat64Code(y)), + EmitCode.missing(cb.emb, PCanonicalArray(PCanonicalString()).sType)) cb.assign(i, i + const(1)) }) ds1.merge(cb, ds2) diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala index 39e5b11d85e..fabf026ebd8 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala @@ -47,7 +47,7 @@ class StagedBlockLinkedListSuite extends HailSuite { sbll.load(cb, ptr) sbll.push(cb, r, EmitCode(Code._empty, eltOff.get.ceq(0L), - elemPType.loadCheapPCode(cb, eltOff))) + elemPType.loadCheapSCode(cb, eltOff))) sbll.store(cb, ptr) Code._empty } diff --git a/hail/src/test/scala/is/hail/methods/SkatSuite.scala b/hail/src/test/scala/is/hail/methods/SkatSuite.scala index acd1a4bbc8d..067b295060b 100644 --- a/hail/src/test/scala/is/hail/methods/SkatSuite.scala +++ b/hail/src/test/scala/is/hail/methods/SkatSuite.scala @@ -3,9 +3,10 @@ package is.hail.methods import is.hail.{HailSuite, TestUtils} import is.hail.utils._ import breeze.linalg._ +import is.hail.expr.ir.DoubleArrayBuilder import org.testng.annotations.Test -case class SkatAggForR(xs: BoxedArrayBuilder[DenseVector[Double]], weights: BoxedArrayBuilder[Double]) +case class SkatAggForR(xs: BoxedArrayBuilder[DenseVector[Double]], weights: DoubleArrayBuilder) class SkatSuite extends HailSuite { diff --git a/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala b/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala index 51b7293079e..cac61e3c4ae 100644 --- a/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala +++ b/hail/src/test/scala/is/hail/services/shuffler/ShuffleSuite.scala @@ -14,6 +14,7 @@ import is.hail.services.shuffler.ShufflerTestUtils._ import is.hail.io._ import is.hail.utils._ import is.hail._ +import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test @@ -40,7 +41,7 @@ class ShuffleSuite extends HailSuite { using(new ShuffleClient(shuffleType, rowPType, keyPType)) { c => val rowDecodedPType = c.codecs.rowDecodedPType - val values = new BoxedArrayBuilder[Long]() + val values = new LongArrayBuilder() pool.scopedRegion { region => val rvb = new RegionValueBuilder(region) val nElements = 1000000 @@ -76,7 +77,7 @@ class ShuffleSuite extends HailSuite { c.get(region, left, true, right, false)) i = 0 - val ab = new BoxedArrayBuilder[Long]() + val ab = new LongArrayBuilder() while (i < nPartitions) { ab ++= c.get(region, partitionBounds(i).offset, true, diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala index d8986268eab..549bb23711b 100644 --- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala +++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala @@ -110,16 +110,17 @@ class PNDArraySuite extends PhysicalTestUtils { assert(PNDArray.getReferenceCount(addr1) == 1) // Deep copy with elements that contain pointers, so have to actually do a full copy - val pNDOfArrays = PCanonicalNDArray(PCanonicalArray(PInt32Required, true), 1) - val annotationNDOfArrays = new SafeNDArray(IndexedSeq(3L), (0 until 3).map(idx => (0 to idx).toArray.toIndexedSeq)) - val addr3 = pNDOfArrays.unstagedStoreJavaObject(annotationNDOfArrays, region=region1) - val unsafe3 = UnsafeRow.read(pNDOfArrays, region1, addr3) - val addr4 = pNDOfArrays.copyFromAddress(region2, pNDOfArrays, addr3, true) - val unsafe4 = UnsafeRow.read(pNDOfArrays, region2, addr4) - assert(addr3 != addr4) - assert(unsafe3 == unsafe4) - assert(PNDArray.getReferenceCount(addr3) == 1L) - assert(PNDArray.getReferenceCount(addr4) == 1L) + // FIXME: Currently ndarrays do not support this, reference counting needs to account for this. +// val pNDOfArrays = PCanonicalNDArray(PCanonicalArray(PInt32Required, true), 1) +// val annotationNDOfArrays = new SafeNDArray(IndexedSeq(3L), (0 until 3).map(idx => (0 to idx).toArray.toIndexedSeq)) +// val addr3 = pNDOfArrays.unstagedStoreJavaObject(annotationNDOfArrays, region=region1) +// val unsafe3 = UnsafeRow.read(pNDOfArrays, region1, addr3) +// val addr4 = pNDOfArrays.copyFromAddress(region2, pNDOfArrays, addr3, true) +// val unsafe4 = UnsafeRow.read(pNDOfArrays, region2, addr4) +// assert(addr3 != addr4) +// assert(unsafe3 == unsafe4) +// assert(PNDArray.getReferenceCount(addr3) == 1L) +// assert(PNDArray.getReferenceCount(addr4) == 1L) // Deep copy with PTypes with different requirements val pNDOfStructs1 = PCanonicalNDArray(PCanonicalStruct(true, ("x", PInt32Required), ("y", PInt32())), 1) diff --git a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala index 132ddb8d691..e3302279ad4 100644 --- a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala +++ b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala @@ -54,7 +54,7 @@ abstract class PhysicalTestUtils extends HailSuite { val value = fb.getCodeParam[Long](2) try { - fb.emitWithBuilder(cb => destType.store(cb, codeRegion, sourceType.loadCheapPCode(cb, value), deepCopy = deepCopy)) + fb.emitWithBuilder(cb => destType.store(cb, codeRegion, sourceType.loadCheapSCode(cb, value), deepCopy = deepCopy)) compileSuccess = true } catch { case e: Throwable => diff --git a/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala b/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala index e957fa7b88b..f33c4ce89e9 100644 --- a/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala +++ b/hail/src/test/scala/is/hail/utils/ArrayBuilderSuite.scala @@ -1,11 +1,12 @@ package is.hail.utils +import is.hail.expr.ir.IntArrayBuilder import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test class ArrayBuilderSuite extends TestNGSuite { @Test def addOneElement() { - val ab = new BoxedArrayBuilder[Int](0) + val ab = new IntArrayBuilder(0) ab += 3 val a = ab.result() assert(a.length == 1) @@ -13,13 +14,13 @@ class ArrayBuilderSuite extends TestNGSuite { } @Test def addArray() { - val ab = new BoxedArrayBuilder[Int](0) + val ab = new IntArrayBuilder(0) ab ++= Array.fill[Int](5)(2) val a = ab.result() assert(a.length == 5) assert(a.forall(_ == 2)) - val ab2 = new BoxedArrayBuilder[Int](0) + val ab2 = new IntArrayBuilder(0) ab2 ++= (Array.fill[Int](4)(3), 2) val a2 = ab2.result() assert(a2.length == 2) @@ -29,7 +30,7 @@ class ArrayBuilderSuite extends TestNGSuite { val ab2Update = ab2.result() assert(ab2Update sameElements Array(5, 3)) - val ab3 = new BoxedArrayBuilder[Int] + val ab3 = new IntArrayBuilder ab3 += 1 ab3 += 5 ab3 ++= Array.fill[Int](2)(3) diff --git a/internal-gateway/Makefile b/internal-gateway/Makefile index bd9d510742c..af77f678db8 100644 --- a/internal-gateway/Makefile +++ b/internal-gateway/Makefile @@ -2,22 +2,16 @@ include ../config.mk .PHONY: build push deploy -INTERNAL_GATEWAY_LATEST = $(DOCKER_PREFIX)/internal-gateway:latest -INTERNAL_GATEWAY_IMAGE = $(DOCKER_PREFIX)/internal-gateway:$(shell docker images -q --no-trunc internal-gateway | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +INTERNAL_GATEWAY_IMAGE := $(DOCKER_PREFIX)/internal-gateway:$(TOKEN) build: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(INTERNAL_GATEWAY_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile Dockerfile.out - docker build -t internal-gateway -f Dockerfile.out --cache-from internal-gateway,$(INTERNAL_GATEWAY_LATEST),hail-ubuntu . - -push: build - docker tag internal-gateway $(INTERNAL_GATEWAY_LATEST) - docker push $(INTERNAL_GATEWAY_LATEST) - docker tag internal-gateway $(INTERNAL_GATEWAY_IMAGE) - docker push $(INTERNAL_GATEWAY_IMAGE) + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh . Dockerfile.out $(INTERNAL_GATEWAY_IMAGE) -deploy: push +deploy: build python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"global":{"internal_ip":"$(INTERNAL_IP)"}}' service.yaml service.yaml.out kubectl -n default apply -f service.yaml.out python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"internal_gateway_image":{"image":"$(INTERNAL_GATEWAY_IMAGE)"}}' deployment.yaml deployment.yaml.out diff --git a/js_common/.gitignore b/js_common/.gitignore new file mode 100644 index 00000000000..d451ff16c10 --- /dev/null +++ b/js_common/.gitignore @@ -0,0 +1,5 @@ +node_modules +.DS_Store +dist +dist-ssr +*.local diff --git a/js_common/batch-client.ts b/js_common/batch-client.ts new file mode 100644 index 00000000000..f9e2bfd334a --- /dev/null +++ b/js_common/batch-client.ts @@ -0,0 +1,7 @@ +import type { Job, Batch } from './types' + +export type GetJobsResult = { jobs: Job[] } +export type GetBatchesResult = { + batches: Batch[], + last_batch_id: number, +} diff --git a/js_common/hail.css b/js_common/hail.css new file mode 100644 index 00000000000..c468b4288a4 --- /dev/null +++ b/js_common/hail.css @@ -0,0 +1,26 @@ +.App { + min-height: 100vh; + display: flex; + flex-direction: column; + align-items: center; + font-size: calc(10px + 2vmin); + color: #2b2d2f; +} + +.List { + margin-left: 50px; +} + +body { + margin: 0; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; +} diff --git a/js_common/package-lock.json b/js_common/package-lock.json new file mode 100644 index 00000000000..7814c678a7c --- /dev/null +++ b/js_common/package-lock.json @@ -0,0 +1,199 @@ +{ + "name": "@hail/common", + "version": "0.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "@hail/common", + "version": "0.0.0", + "license": "ISC", + "dependencies": { + "axios": "^0.21.1", + "react": "^17.0.2" + }, + "devDependencies": { + "@types/react": "^17.0.5", + "svelte": "^3.38.2" + } + }, + "node_modules/@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "node_modules/@types/react": { + "version": "17.0.5", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.5.tgz", + "integrity": "sha512-bj4biDB9ZJmGAYTWSKJly6bMr4BLUiBrx9ujiJEoP9XIDY9CTaPGxE5QWN/1WjpPLzYF7/jRNnV2nNxNe970sw==", + "dev": true, + "dependencies": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "node_modules/axios": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", + "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", + "dependencies": { + "follow-redirects": "^1.10.0" + } + }, + "node_modules/csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "node_modules/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true, + "engines": { + "node": ">= 8" + } + } + }, + "dependencies": { + "@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "@types/react": { + "version": "17.0.5", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.5.tgz", + "integrity": "sha512-bj4biDB9ZJmGAYTWSKJly6bMr4BLUiBrx9ujiJEoP9XIDY9CTaPGxE5QWN/1WjpPLzYF7/jRNnV2nNxNe970sw==", + "dev": true, + "requires": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "axios": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", + "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", + "requires": { + "follow-redirects": "^1.10.0" + } + }, + "csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==" + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "requires": { + "js-tokens": "^3.0.0 || ^4.0.0" + } + }, + "object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" + }, + "react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, + "svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true + } + } +} diff --git a/js_common/package.json b/js_common/package.json new file mode 100644 index 00000000000..88f1ebf210e --- /dev/null +++ b/js_common/package.json @@ -0,0 +1,17 @@ +{ + "name": "@hail/common", + "version": "0.0.0", + "description": "", + "main": "index.js", + "scripts": {}, + "author": "", + "license": "ISC", + "dependencies": { + "axios": "^0.21.1", + "react": "^17.0.2" + }, + "devDependencies": { + "@types/react": "^17.0.5", + "svelte": "^3.38.2" + } +} diff --git a/js_common/react/batch-client.ts b/js_common/react/batch-client.ts new file mode 100644 index 00000000000..5b68b1ab632 --- /dev/null +++ b/js_common/react/batch-client.ts @@ -0,0 +1,11 @@ +import { usePollingApi } from './hooks' +import { Maybe } from '../types' +import { GetJobsResult, GetBatchesResult } from '../batch-client' + +export function useJobs(id: number): Maybe { + return usePollingApi(`/api/v1alpha/batches/${id}/jobs`) +} + +export function useBatches(): Maybe { + return usePollingApi('/api/v1alpha/batches') +} diff --git a/js_common/react/hooks.ts b/js_common/react/hooks.ts new file mode 100644 index 00000000000..5d06fd3f510 --- /dev/null +++ b/js_common/react/hooks.ts @@ -0,0 +1,32 @@ +import { useState, useEffect } from 'react' +import { Maybe } from '../types' +import axios from 'axios' + +const POLL_INTERVAL_MILLIS = 1000 + +export function usePollingApi(apiPath: string): Maybe { + const [value, setValue] = useState>(undefined) + + const fetchData = () => axios.get(apiPath).then(res => setValue(res.data)) + useEffect(() => { + fetchData() + const pollInterval = setInterval(fetchData, POLL_INTERVAL_MILLIS) + + return () => clearInterval(pollInterval) + }, []) + + return value +} + +export function useStreamingApi(apiPath: string): Maybe { + const [value, setValue] = useState>(undefined) + + useEffect(() => { + const ws = new WebSocket(`ws://localhost:5050${apiPath}`) + ws.onmessage = ev => setValue(JSON.parse(ev.data)) + + return () => ws.close() + }, []) + + return value +} diff --git a/js_common/svelte/batch-client.ts b/js_common/svelte/batch-client.ts new file mode 100644 index 00000000000..b5c197526b6 --- /dev/null +++ b/js_common/svelte/batch-client.ts @@ -0,0 +1,11 @@ +import { pollingApiStore } from './store' +import type { StoreApiResult } from './store' +import { GetJobsResult, GetBatchesResult } from '../batch-client' + +export function getJobsStore(id: number): StoreApiResult { + return pollingApiStore(`/api/v1alpha/batches/${id}/jobs`) +} + +export function getBatchesStore(): StoreApiResult { + return pollingApiStore('/api/v1alpha/batches') +} diff --git a/js_common/svelte/store.ts b/js_common/svelte/store.ts new file mode 100644 index 00000000000..f5b7711d24a --- /dev/null +++ b/js_common/svelte/store.ts @@ -0,0 +1,27 @@ +import { writable, Writable } from 'svelte/store' +import type { Maybe } from '../types' +import axios from 'axios' + +const POLL_INTERVAL_MILLIS = 1000 + +export type StoreApiResult = { + store: Writable>, + destroy: () => void, +} + +export function pollingApiStore(apiPath: string): StoreApiResult { + const store = writable(undefined) + const fetchData = () => axios.get(apiPath).then(res => store.set(res.data)) + fetchData() + const interval = setInterval(fetchData, POLL_INTERVAL_MILLIS) + + return { store, destroy: () => clearInterval(interval) } +} + +export function streamingApiStore(apiPath: string): StoreApiResult { + const store = writable(undefined) + const ws = new WebSocket(`ws://localhost:5050${apiPath}`) + ws.onmessage = ev => store.set(JSON.parse(ev.data)) + + return { store, destroy: () => ws.close() } +} diff --git a/js_common/types.ts b/js_common/types.ts new file mode 100644 index 00000000000..0b101b4ab99 --- /dev/null +++ b/js_common/types.ts @@ -0,0 +1,36 @@ +export type Maybe = T | undefined; + +export type Batch = { + id: number, + user: string, + billing_project: string, + token: string, + state: string, + complete: boolean, + closed: boolean, + n_jobs: number, + n_completed: number, + n_succeeded: number, + n_failed: number, + n_cancelled: number, + time_created: string, + time_closed: string, + time_completed: string, + duration: string, + attributes: any, + msec_mcpu: number, + cost: string, +} + +export type Job = { + batch_id: number, + billing_project: string, + cost: number, + duration: number, + exit_code: Maybe, + job_id: number, + msec_mcpu: number, + name: Maybe, + state: string, + user: string, +} diff --git a/letsencrypt/Makefile b/letsencrypt/Makefile index 51ced96d199..9446ddb034c 100644 --- a/letsencrypt/Makefile +++ b/letsencrypt/Makefile @@ -1,25 +1,19 @@ include ../config.mk -LETSENCRYPT_LATEST = $(DOCKER_PREFIX)/letsencrypt:latest -LETSENCRYPT_IMAGE = $(DOCKER_PREFIX)/letsencrypt:$(shell docker images -q --no-trunc letsencrypt | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) -.PHONY: build push start-service run clean +LETSENCRYPT_IMAGE := $(DOCKER_PREFIX)/letsencrypt:$(TOKEN) -build: - docker build -f Dockerfile -t letsencrypt --cache-from letsencrypt,$(LETSENCRYPT_LATEST) . +.PHONY: build start-service run clean -.PHONY: push -push: build - docker tag letsencrypt $(LETSENCRYPT_LATEST) - docker push $(LETSENCRYPT_LATEST) - docker tag letsencrypt $(LETSENCRYPT_IMAGE) - docker push $(LETSENCRYPT_IMAGE) +build: + ../docker-build.sh . Dockerfile $(LETSENCRYPT_IMAGE) start-service: kubectl -n default apply -f service.yaml DRY_RUN ?= false -run: push +run: build echo $(DOMAIN) > domains.txt.out echo internal.$(DOMAIN) >> domains.txt.out sed 's/$$/.$(DOMAIN)/g' subdomains.txt >> domains.txt.out diff --git a/memory/Makefile b/memory/Makefile index 1744f5a6111..6f6342ce62b 100644 --- a/memory/Makefile +++ b/memory/Makefile @@ -3,8 +3,9 @@ include ../config.mk PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear PYTHON := PYTHONPATH=$(PYTHONPATH) python3 -MEMORY_LATEST = $(DOCKER_PREFIX)/memory:latest -MEMORY_IMAGE = $(DOCKER_PREFIX)/memory:$(shell docker images -q --no-trunc memory | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +MEMORY_IMAGE := $(DOCKER_PREFIX)/memory:$(TOKEN) .PHONY: check check: @@ -13,21 +14,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(MEMORY_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -t memory -f Dockerfile.out --cache-from memory,$(MEMORY_LATEST),service-base .. - -.PHONY: push -push: build - docker tag memory $(MEMORY_LATEST) - docker push $(MEMORY_LATEST) - docker tag memory $(MEMORY_IMAGE) - docker push $(MEMORY_IMAGE) + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. memory/Dockerfile.out $(MEMORY_IMAGE) .PHONY: deploy - -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f service-account.yaml python3 ../ci/jinja2_render.py '{"default_ns":{"name":"$(NAMESPACE)"}}' service-account-batch-pods.yaml service-account-batch-pods.yaml.out diff --git a/monitoring/Makefile b/monitoring/Makefile index a740693de7f..e3b5dcf1eec 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -1,7 +1,8 @@ include ../config.mk -MONITORING_LATEST = $(DOCKER_PREFIX)/monitoring:latest -MONITORING_IMAGE = $(DOCKER_PREFIX)/monitoring:$(shell docker images -q --no-trunc monitoring:latest | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +MONITORING_IMAGE := $(DOCKER_PREFIX)/monitoring:$(TOKEN) PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:} PYTHON := PYTHONPATH=$(PYTHONPATH)../hail/python:../gear:../web_common python3 @@ -15,20 +16,12 @@ check: .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(MONITORING_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -f Dockerfile.out -t monitoring --cache-from monitoring,$(MONITORING_LATEST),service-base .. - -.PHONY: push -push: build - docker tag monitoring $(MONITORING_LATEST) - docker push $(MONITORING_LATEST) - docker tag monitoring $(MONITORING_IMAGE) - docker push $(MONITORING_IMAGE) + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. monitoring/Dockerfile.out $(MONITORING_IMAGE) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"monitoring_image":{"image":"$(MONITORING_IMAGE)"},"monitoring_database":{"user_secret_name":"sql-monitoring-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/notebook/Makefile b/notebook/Makefile index c3081759e7a..c3653fe0d17 100644 --- a/notebook/Makefile +++ b/notebook/Makefile @@ -1,9 +1,9 @@ include ../config.mk -NOTEBOOK_LATEST = $(DOCKER_PREFIX)/notebook:latest -NOTEBOOK_IMAGE = $(DOCKER_PREFIX)/notebook:$(shell docker images -q --no-trunc notebook | sed -e 's,[^:]*:,,') -NOTEBOOK_NGINX_LATEST = $(DOCKER_PREFIX)/notebook_nginx:latest -NOTEBOOK_NGINX_IMAGE = $(DOCKER_PREFIX)/notebook_nginx:$(shell docker images -q --no-trunc notebook_nginx | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +NOTEBOOK_IMAGE := $(DOCKER_PREFIX)/notebook:$(TOKEN) +NOTEBOOK_NGINX_IMAGE := $(DOCKER_PREFIX)/notebook_nginx:$(TOKEN) EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -17,35 +17,24 @@ check: .PHONY: build-notebook build-notebook: - $(MAKE) -C ../docker build - -docker pull $(NOTEBOOK_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -t notebook -f Dockerfile.out --cache-from notebook,$(NOTEBOOK_LATEST),service-base .. + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh .. notebook/Dockerfile.out $(NOTEBOOK_IMAGE) .PHONY: build-nginx build-nginx: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(NOTEBOOK_NGINX_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - docker build -t notebook_nginx -f Dockerfile.nginx.out --cache-from notebook_nginx,$(NOTEBOOK_NGINX_LATEST),hail-ubuntu . - -.PHONY: push -push: build-notebook build-nginx - docker tag notebook $(NOTEBOOK_LATEST) - docker push $(NOTEBOOK_LATEST) - docker tag notebook $(NOTEBOOK_IMAGE) - docker push $(NOTEBOOK_IMAGE) - docker tag notebook_nginx $(NOTEBOOK_NGINX_LATEST) - docker push $(NOTEBOOK_NGINX_LATEST) - docker tag notebook_nginx $(NOTEBOOK_NGINX_IMAGE) - docker push $(NOTEBOOK_NGINX_IMAGE) + ../docker-build.sh . Dockerfile.nginx.out $(NOTEBOOK_NGINX_IMAGE) +.PHONY: build +build: build-notebook build-nginx JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"notebook_image":{"image":"$(NOTEBOOK_IMAGE)"},"notebook_nginx_image":{"image":"$(NOTEBOOK_NGINX_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"notebook_database":{"user_secret_name":"sql-notebook-user-config"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"},"scope":"$(SCOPE)"}' .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default E=$(JINJA_ENVIRONMENT) && \ python3 ../ci/jinja2_render.py $$E deployment.yaml deployment.yaml.out && \ diff --git a/notebook/notebook/templates/workshop/resources.html b/notebook/notebook/templates/workshop/resources.html index fd2f4729388..2b22921e7c4 100644 --- a/notebook/notebook/templates/workshop/resources.html +++ b/notebook/notebook/templates/workshop/resources.html @@ -3,6 +3,10 @@ {% block content %}

Workshop Resources

+

Institute for Behavioral Genetics Statistical Genetics Workshop 2021

+

Dates: June 16th, 2021

> +

Hail version: 0.2.69

+

Notebooks, slides, and data

BroadE Workshop 2021

Dates: April 8th, 2021

>

Hail version: 0.2.64

diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000000..3025fbfca27 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,216 @@ +{ + "name": "hail", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "workspaces": [ + "js_common" + ] + }, + "js_common": { + "name": "@hail/common", + "version": "0.0.0", + "license": "ISC", + "dependencies": { + "axios": "^0.21.1", + "react": "^17.0.2" + }, + "devDependencies": { + "@types/react": "^17.0.5", + "svelte": "^3.38.2" + } + }, + "node_modules/@hail/common": { + "resolved": "js_common", + "link": true + }, + "node_modules/@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "node_modules/@types/react": { + "version": "17.0.6", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", + "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", + "dev": true, + "dependencies": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "node_modules/@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "node_modules/axios": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", + "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", + "dependencies": { + "follow-redirects": "^1.10.0" + } + }, + "node_modules/csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "node_modules/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "node_modules/loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "dependencies": { + "js-tokens": "^3.0.0 || ^4.0.0" + }, + "bin": { + "loose-envify": "cli.js" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "dependencies": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true, + "engines": { + "node": ">= 8" + } + } + }, + "dependencies": { + "@hail/common": { + "version": "file:js_common", + "requires": { + "@types/react": "^17.0.5", + "axios": "^0.21.1", + "react": "^17.0.2", + "svelte": "^3.38.2" + } + }, + "@types/prop-types": { + "version": "15.7.3", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.3.tgz", + "integrity": "sha512-KfRL3PuHmqQLOG+2tGpRO26Ctg+Cq1E01D2DMriKEATHgWLfeNDmq9e29Q9WIky0dQ3NPkd1mzYH8Lm936Z9qw==", + "dev": true + }, + "@types/react": { + "version": "17.0.6", + "resolved": "https://registry.npmjs.org/@types/react/-/react-17.0.6.tgz", + "integrity": "sha512-u/TtPoF/hrvb63LdukET6ncaplYsvCvmkceasx8oG84/ZCsoLxz9Z/raPBP4lTAiWW1Jb889Y9svHmv8R26dWw==", + "dev": true, + "requires": { + "@types/prop-types": "*", + "@types/scheduler": "*", + "csstype": "^3.0.2" + } + }, + "@types/scheduler": { + "version": "0.16.1", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.1.tgz", + "integrity": "sha512-EaCxbanVeyxDRTQBkdLb3Bvl/HK7PBK6UJjsSixB0iHKoWxE5uu2Q/DgtpOhPIojN0Zl1whvOd7PoHs2P0s5eA==", + "dev": true + }, + "axios": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", + "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", + "requires": { + "follow-redirects": "^1.10.0" + } + }, + "csstype": { + "version": "3.0.8", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.0.8.tgz", + "integrity": "sha512-jXKhWqXPmlUeoQnF/EhTtTl4C9SnrxSH/jZUih3jmO6lBKr99rP3/+FmrMj4EFpOXzMtXHAZkd3x0E6h6Fgflw==", + "dev": true + }, + "follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-HWqDgT7ZEkqRzBvc2s64vSZ/hfOceEol3ac/7tKwzuvEyWx3/4UegXh5oBOIotkGsObyk3xznnSRVADBgWSQVg==" + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==" + }, + "loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", + "requires": { + "js-tokens": "^3.0.0 || ^4.0.0" + } + }, + "object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" + }, + "react": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react/-/react-17.0.2.tgz", + "integrity": "sha512-gnhPt75i/dq/z3/6q/0asP78D0u592D5L1pd7M8P+dck6Fu/jJeL6iVVK23fptSUZj8Vjf++7wXA8UNclGQcbA==", + "requires": { + "loose-envify": "^1.1.0", + "object-assign": "^4.1.1" + } + }, + "svelte": { + "version": "3.38.2", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-3.38.2.tgz", + "integrity": "sha512-q5Dq0/QHh4BLJyEVWGe7Cej5NWs040LWjMbicBGZ+3qpFWJ1YObRmUDZKbbovddLC9WW7THTj3kYbTOFmU9fbg==", + "dev": true + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 00000000000..363635989b8 --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "workspaces": [ + "js_common" + ] +} diff --git a/prometheus/Makefile b/prometheus/Makefile index 8d972b49c93..09dcc5cd820 100644 --- a/prometheus/Makefile +++ b/prometheus/Makefile @@ -2,23 +2,17 @@ include ../config.mk .PHONY: build push deploy -PROM_NGINX_LATEST = $(DOCKER_PREFIX)/prom_nginx:latest -PROM_NGINX_IMAGE = $(DOCKER_PREFIX)/prom_nginx:$(shell docker images -q --no-trunc prom_nginx | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +PROM_NGINX_IMAGE := $(DOCKER_PREFIX)/prom_nginx:$(TOKEN) build: $(MAKE) -C ../docker hail-ubuntu - -docker pull $(PROM_NGINX_LATEST) - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image": {"image": "hail-ubuntu"}}' Dockerfile.nginx Dockerfile.nginx.out + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image": {"image": "'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - docker build -t prom_nginx -f Dockerfile.nginx.out --cache-from prom_nginx,$(PROM_NGINX_LATEST),hail-ubuntu . - -push: build - docker tag prom_nginx $(PROM_NGINX_LATEST) - docker push $(PROM_NGINX_LATEST) - docker tag prom_nginx $(PROM_NGINX_IMAGE) - docker push $(PROM_NGINX_IMAGE) + ../docker-build.sh . Dockerfile.nginx.out $(PROM_NGINX_IMAGE) -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"}, "prom_nginx_image": {"image": "$(PROM_NGINX_IMAGE)"}}' prometheus.yaml prometheus.yaml.out kubectl -n $(NAMESPACE) apply -f prometheus.yaml.out diff --git a/pylintrc b/pylintrc index f7c3092e140..f6ffe559d8a 100644 --- a/pylintrc +++ b/pylintrc @@ -11,7 +11,7 @@ # C1801 Do not use len(SEQUENCE) as condition value # W0221 Parameters differ from overridden method -disable=C0111,W1203,W1202,C0111,R0913,W0622,W0212,W0621,R0914,W0603,R0902,R0801,C1801,W0221,line-too-long,too-few-public-methods,fixme,too-many-function-args,too-many-branches,too-many-lines,too-many-boolean-expressions,too-many-statements,too-many-nested-blocks,wrong-import-order,logging-not-lazy,unnecessary-lambda,too-many-public-methods,broad-except,too-many-return-statements,bare-except +disable=C0111,W1203,W1202,C0111,R0913,W0622,W0212,W0621,R0914,W0603,R0902,R0801,C1801,W0221,line-too-long,too-few-public-methods,fixme,too-many-function-args,too-many-branches,too-many-lines,too-many-boolean-expressions,too-many-statements,too-many-nested-blocks,wrong-import-order,logging-not-lazy,unnecessary-lambda,too-many-public-methods,broad-except,too-many-return-statements,bare-except,invalid-name,unsubscriptable-object [FORMAT] diff --git a/query/Makefile b/query/Makefile index 3f01a55f6b2..4740927c5be 100644 --- a/query/Makefile +++ b/query/Makefile @@ -3,8 +3,9 @@ include ../config.mk EXTRA_PYTHONPATH := ../hail/python:../gear PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 -QUERY_LATEST = $(DOCKER_PREFIX)/query:latest -QUERY_IMAGE = $(DOCKER_PREFIX)/query:$(shell docker images -q --no-trunc query | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +QUERY_IMAGE := $(DOCKER_PREFIX)/query:$(TOKEN) .PHONY: check check: @@ -17,22 +18,14 @@ build: $(MAKE) -C ../hail shadowJar # janky cp ../hail/build/libs/hail-all-spark.jar ./hail.jar - -docker pull $(QUERY_LATEST) - python3 ../ci/jinja2_render.py '{"service_java_run_base_image":{"image":"service-java-run-base"}}' Dockerfile Dockerfile.out - docker build -t query -f Dockerfile.out --cache-from query,$(QUERY_LATEST),service-base . - -.PHONY: push -push: build - docker tag query $(QUERY_LATEST) - docker push $(QUERY_LATEST) - docker tag query $(QUERY_IMAGE) - docker push $(QUERY_IMAGE) + python3 ../ci/jinja2_render.py '{"service_java_run_base_image":{"image":"'$$(cat ../docker/service-java-run-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh . Dockerfile.out $(QUERY_IMAGE) UPLOAD_QUERY_JAR_TOKEN := $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) HAIL_REVISION := $(shell git rev-parse HEAD) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default kubectl -n $(NAMESPACE) apply -f service-account.yaml gsutil cp ./hail.jar gs://hail-test-dmk9z/$(UPLOAD_QUERY_JAR_TOKEN)/jars/$(HAIL_REVISION).jar diff --git a/shuffler/Makefile b/shuffler/Makefile index ac29f3d32b7..82d60a3e7bf 100644 --- a/shuffler/Makefile +++ b/shuffler/Makefile @@ -1,29 +1,22 @@ include ../config.mk -SHUFFLER_LATEST = $(DOCKER_PREFIX)/shuffler:latest -SHUFFLER_IMAGE = $(DOCKER_PREFIX)/shuffler:$(shell docker images -q --no-trunc shuffler | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +SHUFFLER_IMAGE := $(DOCKER_PREFIX)/shuffler:$(TOKEN) PYTHONPATH := $${PYTHONPATH:+$${PYTHONPATH}:}../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$(PYTHONPATH) python3 .PHONY: build build: - $(MAKE) -C ../docker build - -docker pull $(SHUFFLER_LATEST) - python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"service-base"}}' Dockerfile Dockerfile.out + $(MAKE) -C ../docker service-base + python3 ../ci/jinja2_render.py '{"service_base_image":{"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out $(MAKE) -C ../hail shadowJar cp ../hail/build/libs/hail-all-spark.jar hail.jar - docker build -t shuffler -f Dockerfile.out --cache-from shuffler,$(SHUFFLER_LATEST),service-base . - -.PHONY: push -push: build - docker tag shuffler $(SHUFFLER_LATEST) - docker push $(SHUFFLER_LATEST) - docker tag shuffler $(SHUFFLER_IMAGE) - docker push $(SHUFFLER_IMAGE) + ../docker-build.sh . Dockerfile.out $(SHUFFLER_IMAGE) .PHONY: deploy -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"shuffler_image":{"image":"$(SHUFFLER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"global":{"project":"$(PROJECT)","domain":"$(DOMAIN)","k8s_server_url":"$(KUBERNETES_SERVER_URL)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/site/Makefile b/site/Makefile index 4fc3bf9cfde..49e83ad2a3e 100644 --- a/site/Makefile +++ b/site/Makefile @@ -49,7 +49,7 @@ push: build deploy: push ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"hail-ubuntu"}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"site_image":{"image":"$(IMAGE)"},"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: diff --git a/tls/Dockerfile b/tls/Dockerfile index 8971c02836f..a116655a430 100644 --- a/tls/Dockerfile +++ b/tls/Dockerfile @@ -1,8 +1,17 @@ -FROM {{ service_base_image.image }} +FROM {{ hail_ubuntu_image.image }} +# source: https://cloud.google.com/storage/docs/gsutil_install#linux # re: RANDFILE, https://github.com/openssl/openssl/issues/7754#issuecomment-444063355 -RUN hail-apt-get-install openssl && \ - sed -i 's/^RANDFILE/#RANDFILE/' /etc/ssl/openssl.cnf +# jdk not strictly necessary, but we want keytool +RUN curl -sSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + tar -xf google-cloud-sdk-334.0.0-linux-x86_64.tar.gz && \ + curl -sSLO https://dl.k8s.io/release/v1.19.7/bin/linux/amd64/kubectl && \ + install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \ + hail-apt-get-install openssl openjdk-8-jdk-headless && \ + sed -i 's/^RANDFILE/#RANDFILE/' /etc/ssl/openssl.cnf && \ + hail-pip-install pyyaml + +ENV PATH $PATH:/google-cloud-sdk/bin COPY config.yaml . COPY create_certs.py . diff --git a/tls/create_certs.py b/tls/create_certs.py index c362fb7d215..4fcde6e1382 100644 --- a/tls/create_certs.py +++ b/tls/create_certs.py @@ -5,7 +5,7 @@ import subprocess as sp import tempfile -from hailtop.utils import sync_check_shell +# gear, hailtop, and web_common are not available in the create_certs image parser = argparse.ArgumentParser(prog='create_certs.py', description='create hail certs') parser.add_argument('namespace', type=str, help='kubernetes namespace') @@ -22,7 +22,8 @@ def echo_check_call(cmd): - sync_check_shell(' '.join(cmd), echo=True) + print(cmd) + sp.run(cmd, check=True) def create_key_and_cert(p): diff --git a/ukbb-rg/Makefile b/ukbb-rg/Makefile index ab8b4f7d2bf..e0a9183967e 100644 --- a/ukbb-rg/Makefile +++ b/ukbb-rg/Makefile @@ -1,36 +1,22 @@ include ../config.mk -UKBB_RG_STATIC_LATEST = $(DOCKER_PREFIX)/ukbb-rg-static:latest -UKBB_RG_STATIC_IMAGE = $(DOCKER_PREFIX)/ukbb-rg-static:$(shell docker images -q --no-trunc ukbb-rg-static | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) -UKBB_RG_BROWSER_LATEST = $(DOCKER_PREFIX)/ukbb-rg-browser:latest -UKBB_RG_BROWSER_IMAGE = $(DOCKER_PREFIX)/ukbb-rg-browser:$(shell docker images -q --no-trunc ukbb-rg-browser | sed -e 's,[^:]*:,,') +UKBB_RG_STATIC_IMAGE := $(DOCKER_PREFIX)/ukbb-rg-static:$(TOKEN) +UKBB_RG_BROWSER_IMAGE := $(DOCKER_PREFIX)/ukbb-rg-browser:$(TOKEN) .PHONY: build build: ls app/app.R app/www/rainbowvis.js # read the README - docker pull $(DOCKER_PREFIX)/ubuntu:18.04 - -docker pull $(UKBB_RG_STATIC_LATEST) + python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile.static Dockerfile.static.out - docker build -t ukbb-rg-static -f Dockerfile.static.out --cache-from ukbb-rg-static,$(UKBB_RG_STATIC_LATEST),$(DOCKER_PREFIX)/ubuntu:18.04 . - docker pull $(DOCKER_PREFIX)/ubuntu:19.04 - -docker pull $(UKBB_RG_BROWSER_LATEST) + ../docker-build.sh . Dockerfile.static.out $(UKBB_RG_STATIC_IMAGE) + python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' Dockerfile.browser Dockerfile.browser.out - docker build -t ukbb-rg-browser -f Dockerfile.browser.out --cache-from ukbb-rg-browser,$(UKBB_RG_BROWSER_LATEST),$(DOCKER_PREFIX)/ubuntu:19.04 . - -.PHONY: push -push: build - docker tag ukbb-rg-static $(UKBB_RG_STATIC_LATEST) - docker push $(UKBB_RG_STATIC_LATEST) - docker tag ukbb-rg-static $(UKBB_RG_STATIC_IMAGE) - docker push $(UKBB_RG_STATIC_IMAGE) - docker tag ukbb-rg-browser $(UKBB_RG_BROWSER_LATEST) - docker push $(UKBB_RG_BROWSER_LATEST) - docker tag ukbb-rg-browser $(UKBB_RG_BROWSER_IMAGE) - docker push $(UKBB_RG_BROWSER_IMAGE) + ../docker-build.sh . Dockerfile.browser.out $(UKBB_RG_BROWSER_IMAGE) .PHONY: deploy -deploy: push +deploy: build python3 ../ci/jinja2_render.py '{"docker_prefix":"$(DOCKER_PREFIX)"}' deployment.yaml deployment.yaml.out kubectl apply -f deployment.yaml.out diff --git a/web_common/web_common/styles/main.scss b/web_common/web_common/styles/main.scss index f6faf33a59a..44cbb8c2360 100644 --- a/web_common/web_common/styles/main.scss +++ b/web_common/web_common/styles/main.scss @@ -243,9 +243,25 @@ a { } tr { - &:nth-of-type(even) { - background-color: #f2f2f2; + &:nth-of-type(7n+1) { + background-color: #fff4f4; + } + &:nth-of-type(7n+2) { + background-color: #fff9f1; + } + &:nth-of-type(7n+3) { + background-color: #fdfdf1; } + &:nth-of-type(7n+4) { + background-color: #f4fff4; + } + &:nth-of-type(7n+5) { + background-color: #f2f4ff; + } + &:nth-of-type(7n+6) { + background-color: #fff6ff; + } + td.data-table-bad { color: red; border-color: red; diff --git a/website/Makefile b/website/Makefile index 84039d6e705..2cbafcae65c 100644 --- a/website/Makefile +++ b/website/Makefile @@ -1,8 +1,10 @@ include ../config.mk -.PHONY: docs build run run-docker push deploy clean +.PHONY: docs build run run-docker deploy clean -IMAGE = $(DOCKER_PREFIX)/website:$(shell docker images -q --no-trunc website | sed -e 's,[^:]*:,,') +TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) + +WEBSITE_IMAGE := $(DOCKER_PREFIX)/website:$(TOKEN) check: curlylint . @@ -13,8 +15,8 @@ docs: build: docs $(MAKE) -C ../docker service-base - python3 ../ci/jinja2_render.py '{"service_base_image": {"image":"service-base"}}' Dockerfile Dockerfile.out - docker build -f Dockerfile.out -t website . + python3 ../ci/jinja2_render.py '{"service_base_image": {"image":"'$$(cat ../docker/service-base-image-ref)'"}}' Dockerfile Dockerfile.out + ../docker-build.sh . Dockerfile.out $(WEBSITE_IMAGE) run: docs cd website && tar -xvzf ../docs.tar.gz @@ -23,14 +25,10 @@ run: docs run-docker: build docker run -e HAIL_DOMAIN=localhost:5000 -p 5000:5000 website python3 -m website local -push: build - docker tag website $(IMAGE) - docker push $(IMAGE) - -deploy: push +deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(IMAGE)"}}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"global": {"domain":"$(DOMAIN)"},"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"website_image":{"image":"$(WEBSITE_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out clean: From c18302332fa4e87914fcd53b28329fe2036c4216 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 5 Jul 2021 15:20:59 +1000 Subject: [PATCH 265/501] Remove test-aws-key --- build.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/build.yaml b/build.yaml index 8c7c9b0cf84..5bf04a25af8 100644 --- a/build.yaml +++ b/build.yaml @@ -46,7 +46,6 @@ steps: - gcr-pull-key - gcr-push-service-account-key - test-gsa-key - - test-aws-key - auth-oauth2-client-secret - benchmark-gsa-key - kind: buildImage2 From 3c848799ce7a95b7397660bbcc2356c8221913a6 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 5 Jul 2021 15:35:23 +1000 Subject: [PATCH 266/501] Fix print(version()) --- docker/Dockerfile.service-base | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.service-base b/docker/Dockerfile.service-base index 4c53181ed61..14917525ea0 100644 --- a/docker/Dockerfile.service-base +++ b/docker/Dockerfile.service-base @@ -10,10 +10,11 @@ RUN hail-pip-install /gear && rm -rf /gear COPY web_common/setup.py web_common/MANIFEST.in /web_common/ COPY web_common/web_common /web_common/web_common/ RUN hail-pip-install /web_common && rm -rf /web_common -RUN echo 'from hailtop import version; print(version());' | python3 COPY hail/python/setup-hailtop.py /hailtop/setup.py COPY hail/python/hailtop /hailtop/hailtop/ COPY /hail_version /hailtop/hailtop/hail_version COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in RUN hail-pip-install /hailtop && rm -rf /hailtop + +RUN echo 'from hailtop import version; print(version());' | python3 From f2d58e116fbb43584042204791242dd7757dc4ef Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 8 Jul 2021 14:03:15 +1000 Subject: [PATCH 267/501] Add ability to fetch access tokens from instance metadata server (#122) --- .../hailtop/aiogoogle/auth/credentials.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/hail/python/hailtop/aiogoogle/auth/credentials.py b/hail/python/hailtop/aiogoogle/auth/credentials.py index 60792b1927c..c3d15694569 100644 --- a/hail/python/hailtop/aiogoogle/auth/credentials.py +++ b/hail/python/hailtop/aiogoogle/auth/credentials.py @@ -34,12 +34,15 @@ def default_credentials(): if os.path.exists(application_default_credentials_file): credentials_file = application_default_credentials_file - if credentials_file is None: - raise ValueError('unable to locate Google Cloud credentials') + if credentials_file: + log.info(f'using credentials file {credentials_file}') + return Credentials.from_file(credentials_file) - log.info(f'using credentials file {credentials_file}') + log.warning('unable to locate Google Cloud credentials file, will attempt to ' + 'use instance metadata server instead') + + return InstanceMetadataCredentials() - return Credentials.from_file(credentials_file) async def get_access_token(self, session): pass @@ -97,3 +100,12 @@ async def get_access_token(self, session): 'assertion': encoded_assertion })) as resp: return await resp.json() + +# https://cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances#applications +class InstanceMetadataCredentials(): + async def get_access_token(self, session): + async with await request_retry_transient_errors( + session, 'GET', + 'http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token', + headers={'Metadata-Flavor': 'Google'}) as resp: + return await resp.json() From 88b8f0b63809a432c8f2b65bc04d61516ed24b9c Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 23 Jul 2021 17:01:14 +1000 Subject: [PATCH 268/501] Add deploy_memory step to prod_deploy workflow --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 478b59931c9..4fbbdd98773 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_notebook", "deploy_query"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "deploy_query"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV From 12256aac5f7a517666cc7aad401c21faaed0294d Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 23 Jul 2021 17:07:56 +1000 Subject: [PATCH 269/501] Fix memory service resources --- memory/deployment.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/memory/deployment.yaml b/memory/deployment.yaml index e688e790b22..fa8ed524218 100644 --- a/memory/deployment.yaml +++ b/memory/deployment.yaml @@ -89,11 +89,11 @@ spec: readOnly: true resources: requests: - memory: 187Mi - cpu: 50m + memory: "1.25G" + cpu: "400m" limits: - memory: 1875i - cpu: 500m + memory: "3.75G" + cpu: "1" readinessProbe: tcpSocket: port: 5000 From fa5701a055d76055988e223c5877e05e5200d2a7 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 10 Aug 2021 21:18:30 +1000 Subject: [PATCH 270/501] Revert "Merge upstream (includes 0.2.73)" --- atgu/atgu/atgu.py | 12 +- auth/auth/auth.py | 25 +- batch/Dockerfile.worker | 23 +- batch/Makefile | 4 +- batch/batch/driver/create_instance.py | 61 +- batch/batch/driver/gce.py | 9 +- batch/batch/driver/job_private.py | 8 - batch/batch/driver/main.py | 102 +- batch/batch/driver/pool.py | 18 +- batch/batch/driver/templates/index.html | 18 - batch/batch/front_end/front_end.py | 111 +- batch/batch/front_end/templates/billing.html | 17 +- batch/batch/globals.py | 2 +- batch/batch/inst_coll_config.py | 1 + batch/batch/utils.py | 9 +- batch/batch/worker/disk.py | 22 +- batch/batch/worker/worker.py | 989 ++++++------------ batch/sql/add-frozen-mode.sql | 1 - batch/sql/estimated-current.sql | 3 +- batch/test/test_accounts.py | 15 +- batch/test/test_batch.py | 67 +- .../run/matrix_table_benchmarks.py | 8 +- build.yaml | 55 - ci/ci/build.py | 3 +- ci/ci/ci.py | 9 +- ci/test/resources/build.yaml | 8 - datasets/notebooks/GTEx_MatrixTables.ipynb | 383 ------- ...{GTEx_Tables.ipynb => gtex_datasets.ipynb} | 0 dev-docs/batch-design.md | 61 -- docker-build.sh | 2 - docker/.gitignore | 3 - docker/requirements.txt | 6 +- gear/gear/__init__.py | 4 +- gear/gear/metrics.py | 26 +- hail/Dockerfile.hail-pip-installed-python36 | 4 +- hail/Dockerfile.hail-pip-installed-python37 | 4 +- hail/Makefile | 2 +- .../cluster-tests/cluster-liftover-rg.py | 14 - hail/python/dev-requirements.txt | 2 - hail/python/hail/docs/change_log.md | 52 +- .../GTEx_eQTL_all_snp_gene_associations.rst | 44 - hail/python/hail/docs/functions/index.rst | 2 - hail/python/hail/docs/functions/numeric.rst | 4 - hail/python/hail/docs/install/macosx.rst | 10 +- hail/python/hail/docs/nd/index.rst | 6 - .../01-genome-wide-association-study.ipynb | 2 - hail/python/hail/experimental/datasets.json | 19 - hail/python/hail/expr/__init__.py | 4 +- .../hail/expr/expressions/base_expression.py | 11 - .../expr/expressions/typed_expressions.py | 112 +- hail/python/hail/expr/functions.py | 108 +- hail/python/hail/ir/__init__.py | 3 +- hail/python/hail/ir/ir.py | 169 +-- hail/python/hail/matrixtable.py | 15 - hail/python/hail/methods/impex.py | 2 +- hail/python/hail/methods/statgen.py | 106 +- hail/python/hail/nd/__init__.py | 8 +- hail/python/hail/nd/nd.py | 146 +-- hail/python/hail/utils/misc.py | 4 +- .../hailtop/aiogoogle/auth/credentials.py | 4 +- .../aiogoogle/client/compute_client.py | 76 +- hail/python/hailtop/batch/backend.py | 11 +- hail/python/hailtop/batch/docs/change_log.rst | 8 - hail/python/hailtop/batch/job.py | 32 +- hail/python/hailtop/batch_client/aioclient.py | 20 +- hail/python/hailtop/utils/__init__.py | 10 +- hail/python/hailtop/utils/process.py | 14 - hail/python/hailtop/utils/time.py | 7 - hail/python/hailtop/utils/utils.py | 16 +- hail/python/test/hail/expr/test_expr.py | 13 +- hail/python/test/hail/expr/test_math.py | 15 - hail/python/test/hail/expr/test_ndarrays.py | 110 +- .../hail/matrixtable/test_file_formats.py | 4 +- .../hail/matrixtable/test_matrix_table.py | 7 - hail/python/test/hail/methods/test_impex.py | 28 - hail/python/test/hail/methods/test_statgen.py | 146 +-- hail/python/test/hail/table/test_table.py | 8 - hail/python/test/hail/test_ir.py | 6 +- hail/python/test/hail/utils/test_utils.py | 5 +- hail/python/test/hailtop/batch/test_batch.py | 11 - hail/python/test/hailtop/batch/utils.py | 2 +- hail/src/main/scala/is/hail/HailContext.scala | 4 +- .../is/hail/annotations/ChunkCache.scala | 156 --- .../scala/is/hail/annotations/Region.scala | 15 +- .../is/hail/annotations/RegionMemory.scala | 45 +- .../is/hail/annotations/RegionPool.scala | 27 +- hail/src/main/scala/is/hail/asm4s/Code.scala | 24 +- .../scala/is/hail/asm4s/CodeBuilder.scala | 4 - .../is/hail/backend/HailTaskContext.scala | 3 +- .../main/scala/is/hail/expr/ir/Binds.scala | 4 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 14 +- .../main/scala/is/hail/expr/ir/Children.scala | 26 +- .../src/main/scala/is/hail/expr/ir/Copy.scala | 52 +- .../src/main/scala/is/hail/expr/ir/Emit.scala | 210 ++-- .../is/hail/expr/ir/EmitClassBuilder.scala | 55 +- .../hail/expr/ir/ExtractIntervalFilters.scala | 17 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 37 +- .../scala/is/hail/expr/ir/InferType.scala | 14 +- .../scala/is/hail/expr/ir/Interpret.scala | 14 +- .../scala/is/hail/expr/ir/MatrixValue.scala | 11 +- .../scala/is/hail/expr/ir/NestingDepth.scala | 4 +- .../is/hail/expr/ir/NormalizeNames.scala | 8 +- .../main/scala/is/hail/expr/ir/Parser.scala | 44 +- .../main/scala/is/hail/expr/ir/Pretty.scala | 22 +- .../is/hail/expr/ir/PruneDeadFields.scala | 12 +- .../scala/is/hail/expr/ir/Requiredness.scala | 20 +- .../main/scala/is/hail/expr/ir/Simplify.scala | 24 +- .../main/scala/is/hail/expr/ir/TableIR.scala | 14 +- .../scala/is/hail/expr/ir/TypeCheck.scala | 21 +- .../expr/ir/agg/NDArraySumAggregator.scala | 21 +- .../expr/ir/functions/ArrayFunctions.scala | 129 ++- .../expr/ir/functions/CallFunctions.scala | 22 +- .../expr/ir/functions/DictFunctions.scala | 30 +- .../is/hail/expr/ir/functions/Functions.scala | 143 ++- .../expr/ir/functions/GenotypeFunctions.scala | 8 +- .../expr/ir/functions/IntervalFunctions.scala | 24 +- .../expr/ir/functions/LocusFunctions.scala | 32 +- .../expr/ir/functions/MathFunctions.scala | 28 +- .../expr/ir/functions/NDArrayFunctions.scala | 74 +- .../functions/ReferenceGenomeFunctions.scala | 14 +- .../hail/expr/ir/functions/SetFunctions.scala | 22 +- .../expr/ir/functions/StringFunctions.scala | 30 +- .../expr/ir/functions/UtilFunctions.scala | 36 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 10 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 137 +-- .../hail/expr/ir/ndarrays/EmitNDArray.scala | 18 +- .../expr/ir/orderings/StringOrdering.scala | 11 - .../main/scala/is/hail/expr/ir/package.scala | 13 +- .../is/hail/expr/ir/streams/EmitStream.scala | 18 +- .../is/hail/expr/ir/streams/StreamUtils.scala | 4 +- .../scala/is/hail/io/plink/LoadPlink.scala | 12 +- .../is/hail/io/reference/FASTAReader.scala | 10 +- .../scala/is/hail/io/reference/LiftOver.scala | 23 +- hail/src/main/scala/is/hail/linalg/BLAS.scala | 13 - .../main/scala/is/hail/linalg/LAPACK.scala | 13 +- .../is/hail/linalg/LinalgCodeUtils.scala | 32 +- .../scala/is/hail/rvd/AbstractRVDSpec.scala | 36 +- .../scala/is/hail/types/encoded/EArray.scala | 2 +- .../is/hail/types/encoded/EBaseStruct.scala | 7 +- .../scala/is/hail/types/encoded/EBinary.scala | 5 +- .../scala/is/hail/types/encoded/EInt32.scala | 4 +- .../types/encoded/ENDArrayColumnMajor.scala | 4 +- .../hail/types/physical/PCanonicalArray.scala | 6 +- .../types/physical/PCanonicalBaseStruct.scala | 3 +- .../types/physical/PCanonicalBinary.scala | 19 +- .../hail/types/physical/PCanonicalCall.scala | 2 +- .../types/physical/PCanonicalInterval.scala | 1 - .../hail/types/physical/PCanonicalLocus.scala | 12 +- .../types/physical/PCanonicalNDArray.scala | 314 +++--- .../is/hail/types/physical/PNDArray.scala | 9 + .../is/hail/types/physical/PStruct.scala | 2 +- .../scala/is/hail/types/physical/PType.scala | 22 +- .../is/hail/types/physical/stypes/SType.scala | 21 +- .../stypes/concrete/SBaseStructPointer.scala | 2 +- .../stypes/concrete/SBinaryPointer.scala | 2 +- .../stypes/concrete/SCanonicalCall.scala | 2 +- .../concrete/SCanonicalLocusPointer.scala | 9 +- .../concrete/SCanonicalShufflePointer.scala | 2 +- .../stypes/concrete/SIndexablePointer.scala | 4 +- .../stypes/concrete/SInsertFieldsStruct.scala | 2 +- .../stypes/concrete/SIntervalPointer.scala | 6 +- .../stypes/concrete/SNDArrayPointer.scala | 36 +- .../stypes/concrete/SStackStruct.scala | 4 +- .../stypes/concrete/SStringPointer.scala | 2 +- .../stypes/concrete/SSubsetStruct.scala | 2 +- .../stypes/concrete/SUnreachable.scala | 309 ------ .../stypes/interfaces/SBaseStruct.scala | 2 +- .../stypes/interfaces/SContainer.scala | 2 +- .../stypes/interfaces/SInterval.scala | 4 +- .../physical/stypes/interfaces/SLocus.scala | 2 - .../physical/stypes/interfaces/SNDArray.scala | 86 +- .../physical/stypes/interfaces/SStream.scala | 2 +- .../physical/stypes/interfaces/SVoid.scala | 2 +- .../physical/stypes/primitives/SBoolean.scala | 2 +- .../physical/stypes/primitives/SFloat32.scala | 2 +- .../physical/stypes/primitives/SFloat64.scala | 2 +- .../physical/stypes/primitives/SInt32.scala | 2 +- .../physical/stypes/primitives/SInt64.scala | 2 +- .../scala/is/hail/types/virtual/TArray.scala | 2 - .../is/hail/types/virtual/TBaseStruct.scala | 2 - .../is/hail/types/virtual/TContainer.scala | 2 - .../scala/is/hail/types/virtual/TDict.scala | 2 - .../scala/is/hail/types/virtual/TSet.scala | 2 - .../scala/is/hail/types/virtual/TStruct.scala | 2 + .../hail/utils/richUtils/RichCodeRegion.scala | 9 +- .../is/hail/variant/ReferenceGenome.scala | 106 +- .../1.0.0/table/0.ht/_SUCCESS | 0 .../1.0.0/table/0.ht/globals/metadata.json.gz | Bin 0 -> 311 bytes .../1.0.0/table/0.ht/globals/parts/part-0 | Bin 0 -> 157 bytes .../1.0.0/table/0.ht/metadata.json.gz | Bin 0 -> 408 bytes .../1.0.0/table/0.ht/rows/metadata.json.gz | Bin 0 -> 307 bytes .../1.0.0/table/0.ht/rows/parts/part-0 | Bin 0 -> 162 bytes .../1.0.0/table/0.ht/rows/parts/part-1 | Bin 0 -> 319 bytes .../1.0.0/table/0.ht/rows/parts/part-2 | Bin 0 -> 319 bytes .../1.0.0/table/1.ht/_SUCCESS | 0 .../1.0.0/table/1.ht/globals/metadata.json.gz | Bin 0 -> 320 bytes .../1.0.0/table/1.ht/globals/parts/part-0 | Bin 0 -> 104 bytes .../1.0.0/table/1.ht/metadata.json.gz | Bin 0 -> 408 bytes .../1.0.0/table/1.ht/rows/metadata.json.gz | Bin 0 -> 316 bytes .../1.0.0/table/1.ht/rows/parts/part-0 | Bin 0 -> 106 bytes .../1.0.0/table/1.ht/rows/parts/part-1 | Bin 0 -> 207 bytes .../1.0.0/table/1.ht/rows/parts/part-2 | Bin 0 -> 207 bytes .../1.0.0/table/2.ht/_SUCCESS | 0 .../1.0.0/table/2.ht/globals/metadata.json.gz | Bin 0 -> 319 bytes .../1.0.0/table/2.ht/globals/parts/part-0 | Bin 0 -> 132 bytes .../1.0.0/table/2.ht/metadata.json.gz | Bin 0 -> 408 bytes .../1.0.0/table/2.ht/rows/metadata.json.gz | Bin 0 -> 315 bytes .../1.0.0/table/2.ht/rows/parts/part-0 | Bin 0 -> 137 bytes .../1.0.0/table/2.ht/rows/parts/part-1 | Bin 0 -> 144 bytes .../1.0.0/table/2.ht/rows/parts/part-2 | Bin 0 -> 144 bytes .../1.0.0/table/3.ht/_SUCCESS | 0 .../1.0.0/table/3.ht/globals/metadata.json.gz | Bin 0 -> 327 bytes .../1.0.0/table/3.ht/globals/parts/part-0 | Bin 0 -> 100 bytes .../1.0.0/table/3.ht/metadata.json.gz | Bin 0 -> 408 bytes .../1.0.0/table/3.ht/rows/metadata.json.gz | Bin 0 -> 322 bytes .../1.0.0/table/3.ht/rows/parts/part-0 | Bin 0 -> 102 bytes .../1.0.0/table/3.ht/rows/parts/part-1 | Bin 0 -> 113 bytes .../1.0.0/table/3.ht/rows/parts/part-2 | Bin 0 -> 113 bytes .../test/resources/regressionLinear.weights | 10 - .../is/hail/annotations/RegionSuite.scala | 44 - .../is/hail/expr/ir/ArrayFunctionsSuite.scala | 18 +- .../is/hail/expr/ir/BlockMatrixIRSuite.scala | 6 +- .../scala/is/hail/expr/ir/ETypeSuite.scala | 10 - .../is/hail/expr/ir/ForwardLetsSuite.scala | 2 +- .../scala/is/hail/expr/ir/FunctionSuite.scala | 8 +- .../test/scala/is/hail/expr/ir/IRSuite.scala | 40 +- .../scala/is/hail/expr/ir/IntervalSuite.scala | 4 +- .../is/hail/expr/ir/LocusFunctionsSuite.scala | 2 +- .../is/hail/expr/ir/MathFunctionsSuite.scala | 8 +- .../scala/is/hail/expr/ir/OrderingSuite.scala | 2 +- .../scala/is/hail/expr/ir/PruneSuite.scala | 10 +- .../hail/types/physical/PNDArraySuite.scala | 36 +- .../hail/variant/ReferenceGenomeSuite.scala | 10 +- memory/memory/memory.py | 6 +- monitoring/Makefile | 2 +- monitoring/deployment.yaml | 15 +- monitoring/monitoring/monitoring.py | 93 +- notebook/notebook/notebook.py | 19 +- query/query/query.py | 14 +- website/website/website.py | 5 +- 240 files changed, 1924 insertions(+), 4847 deletions(-) delete mode 100644 batch/sql/add-frozen-mode.sql delete mode 100644 datasets/notebooks/GTEx_MatrixTables.ipynb rename datasets/notebooks/{GTEx_Tables.ipynb => gtex_datasets.ipynb} (100%) delete mode 100644 dev-docs/batch-design.md delete mode 100644 hail/python/cluster-tests/cluster-liftover-rg.py delete mode 100644 hail/python/hail/docs/datasets/schemas/GTEx_eQTL_all_snp_gene_associations.rst delete mode 100644 hail/python/test/hail/expr/test_math.py delete mode 100644 hail/src/main/scala/is/hail/annotations/ChunkCache.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SUnreachable.scala create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/_SUCCESS create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/parts/part-1 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/parts/part-2 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/_SUCCESS create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/globals/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/globals/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-1 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-2 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/_SUCCESS create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/globals/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/globals/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-1 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-2 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/_SUCCESS create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/metadata.json.gz create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/parts/part-0 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/parts/part-1 create mode 100644 hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/parts/part-2 delete mode 100644 hail/src/test/resources/regressionLinear.weights diff --git a/atgu/atgu/atgu.py b/atgu/atgu/atgu.py index 539738673e7..40050a5d073 100644 --- a/atgu/atgu/atgu.py +++ b/atgu/atgu/atgu.py @@ -19,7 +19,7 @@ web_authenticated_developers_only, check_csrf_token, new_csrf_token, - monitor_endpoints_middleware, + monitor_endpoint, ) @@ -71,6 +71,7 @@ def resource_record_to_dict(record): @routes.get('') @routes.get('/') @routes.get('/resources') +@monitor_endpoint @web_authenticated_developers_only() @render_template('resources.html') async def get_resources(request, userdata): # pylint: disable=unused-argument @@ -88,6 +89,7 @@ async def get_resources(request, userdata): # pylint: disable=unused-argument @routes.get('/resources/create') +@monitor_endpoint @web_authenticated_developers_only() @render_template('create_resource.html') async def get_create_resource(request, userdata): # pylint: disable=unused-argument @@ -97,6 +99,7 @@ async def get_create_resource(request, userdata): # pylint: disable=unused-argu @routes.post('/resources/create') # this method has special content handling, can't call `request.post()` # @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only(redirect=False) async def post_create_resource(request, userdata): # pylint: disable=unused-argument db = request.app['db'] @@ -153,6 +156,7 @@ async def post_create_resource(request, userdata): # pylint: disable=unused-arg @routes.get('/resources/{id}') +@monitor_endpoint @web_authenticated_developers_only() @render_template('resource.html') async def get_resource(request, userdata): # pylint: disable=unused-argument @@ -171,6 +175,7 @@ async def get_resource(request, userdata): # pylint: disable=unused-argument @routes.get('/resources/{id}/edit') +@monitor_endpoint @web_authenticated_developers_only() @render_template('edit_resource.html') async def get_edit_resource(request, userdata): # pylint: disable=unused-argument @@ -191,6 +196,7 @@ async def get_edit_resource(request, userdata): # pylint: disable=unused-argume @routes.post('/resources/{id}/edit') # this method has special content handling, can't call `request.post()` # @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only(redirect=False) async def post_edit_resource(request, userdata): # pylint: disable=unused-argument db = request.app['db'] @@ -268,6 +274,7 @@ async def post_edit_resource(request, userdata): # pylint: disable=unused-argum @routes.post('/resources/{id}/delete') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only(redirect=False) async def post_delete_resource(request, userdata): # pylint: disable=unused-argument db = request.app['db'] @@ -305,6 +312,7 @@ async def post_delete_resource(request, userdata): # pylint: disable=unused-arg @routes.get('/resources/{resource_id}/attachments/{attachment_id}') +@monitor_endpoint @web_authenticated_developers_only() async def get_attachment(request, userdata): # pylint: disable=unused-argument db = request.app['db'] @@ -362,7 +370,7 @@ async def on_cleanup(app): def run(): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() setup_aiohttp_session(app) diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 90ac4ed2b3f..afc1f8a6b82 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -24,7 +24,7 @@ transaction, Database, maybe_parse_bearer_header, - monitor_endpoints_middleware, + monitor_endpoint, ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, set_message, render_template @@ -80,11 +80,13 @@ async def get_healthcheck(request): # pylint: disable=W0613 @routes.get('') @routes.get('/') +@monitor_endpoint async def get_index(request): # pylint: disable=unused-argument return aiohttp.web.HTTPFound(deploy_config.external_url('auth', '/login')) @routes.get('/creating') +@monitor_endpoint @web_maybe_authenticated_user async def creating_account(request, userdata): db = request.app['db'] @@ -123,6 +125,7 @@ async def creating_account(request, userdata): @routes.get('/creating/wait') +@monitor_endpoint async def creating_account_wait(request): session = await aiohttp_session.get_session(request) if 'pending' not in session: @@ -167,6 +170,7 @@ async def _wait_websocket(request, email): @routes.get('/signup') +@monitor_endpoint async def signup(request): next_page = request.query.get('next', deploy_config.external_url('notebook', '')) @@ -184,6 +188,7 @@ async def signup(request): @routes.get('/login') +@monitor_endpoint async def login(request): next_page = request.query.get('next', deploy_config.external_url('notebook', '')) @@ -202,6 +207,7 @@ async def login(request): @routes.get('/oauth2callback') +@monitor_endpoint async def callback(request): session = await aiohttp_session.get_session(request) if 'state' not in session: @@ -279,6 +285,7 @@ async def callback(request): @routes.get('/user') +@monitor_endpoint @web_authenticated_users_only() async def user_page(request, userdata): return await render_template('auth', request, userdata, 'user.html', {}) @@ -295,6 +302,7 @@ async def create_copy_paste_token(db, session_id, max_age_secs=300): @routes.post('/copy-paste-token') @check_csrf_token +@monitor_endpoint @web_authenticated_users_only() async def get_copy_paste_token(request, userdata): session = await aiohttp_session.get_session(request) @@ -306,6 +314,7 @@ async def get_copy_paste_token(request, userdata): @routes.post('/api/v1alpha/copy-paste-token') +@monitor_endpoint @rest_authenticated_users_only async def get_copy_paste_token_api(request, userdata): session_id = userdata['session_id'] @@ -316,6 +325,7 @@ async def get_copy_paste_token_api(request, userdata): @routes.post('/logout') @check_csrf_token +@monitor_endpoint @web_maybe_authenticated_user async def logout(request, userdata): if not userdata: @@ -332,6 +342,7 @@ async def logout(request, userdata): @routes.get('/api/v1alpha/login') +@monitor_endpoint async def rest_login(request): callback_port = request.query['callback_port'] @@ -342,6 +353,7 @@ async def rest_login(request): @routes.get('/roles') +@monitor_endpoint @web_authenticated_developers_only() async def get_roles(request, userdata): db = request.app['db'] @@ -352,6 +364,7 @@ async def get_roles(request, userdata): @routes.post('/roles') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def post_create_role(request, userdata): # pylint: disable=unused-argument session = await aiohttp_session.get_session(request) @@ -373,6 +386,7 @@ async def post_create_role(request, userdata): # pylint: disable=unused-argumen @routes.get('/users') +@monitor_endpoint @web_authenticated_developers_only() async def get_users(request, userdata): db = request.app['db'] @@ -383,6 +397,7 @@ async def get_users(request, userdata): @routes.post('/users') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def post_create_user(request, userdata): # pylint: disable=unused-argument session = await aiohttp_session.get_session(request) @@ -418,6 +433,7 @@ async def post_create_user(request, userdata): # pylint: disable=unused-argumen @routes.post('/users/delete') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def delete_user(request, userdata): # pylint: disable=unused-argument session = await aiohttp_session.get_session(request) @@ -444,6 +460,7 @@ async def delete_user(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/oauth2callback') +@monitor_endpoint async def rest_callback(request): state = request.query['state'] code = request.query['code'] @@ -475,6 +492,7 @@ async def rest_callback(request): @routes.post('/api/v1alpha/copy-paste-login') +@monitor_endpoint async def rest_copy_paste_login(request): copy_paste_token = request.query['copy_paste_token'] db = request.app['db'] @@ -501,6 +519,7 @@ async def maybe_pop_token(tx): @routes.post('/api/v1alpha/logout') +@monitor_endpoint @rest_authenticated_users_only async def rest_logout(request, userdata): session_id = userdata['session_id'] @@ -536,6 +555,7 @@ async def get_userinfo(request, session_id): @routes.get('/api/v1alpha/userinfo') +@monitor_endpoint async def userinfo(request): if 'Authorization' not in request.headers: log.info('Authorization not in request.headers') @@ -562,6 +582,7 @@ async def get_session_id(request): @routes.get('/api/v1alpha/verify_dev_credentials') +@monitor_endpoint async def verify_dev_credentials(request): session_id = await get_session_id(request) if not session_id: @@ -584,7 +605,7 @@ async def on_cleanup(app): def run(): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() setup_aiohttp_jinja2(app, 'auth') setup_aiohttp_session(app) diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index bf957e28f81..5c6fffa74bf 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -1,4 +1,4 @@ -FROM {{ hail_ubuntu_image.image }} AS base +FROM {{ global.docker_prefix }}/python:3.7-slim-stretch COPY docker/hail-ubuntu/retry /bin/retry COPY docker/hail-ubuntu/hail-apt-get-install /bin/hail-apt-get-install @@ -6,8 +6,6 @@ RUN chmod 755 /bin/retry && \ chmod 755 /bin/hail-apt-get-install && \ mkdir -p /usr/share/man/man1 /usr/share/man/man2 RUN hail-apt-get-install \ - iproute2 \ - iptables \ openjdk-8-jre-headless \ liblapack3 @@ -17,7 +15,7 @@ COPY docker/requirements.txt . RUN chmod 755 /bin/hail-pip-install && \ hail-pip-install -r requirements.txt pyspark==3.1.1 -ENV SPARK_HOME /usr/local/lib/python3.7/dist-packages/pyspark +ENV SPARK_HOME /usr/local/lib/python3.7/site-packages/pyspark ENV PATH "$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin" ENV PYSPARK_PYTHON python3 @@ -32,23 +30,6 @@ RUN echo "APT::Acquire::Retries \"5\";" > /etc/apt/apt.conf.d/80-retries && \ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ hail-apt-get-install fuse gcsfuse -# Install crun runtime dependencies -RUN hail-apt-get-install libyajl-dev - -# Build crun in separate build step -FROM base AS crun_builder -RUN hail-apt-get-install make git gcc build-essential pkgconf libtool \ - libsystemd-dev libcap-dev libseccomp-dev \ - go-md2man libtool autoconf automake -RUN git clone --depth 1 --branch 0.19.1 https://github.com/containers/crun.git && \ - cd crun && \ - ./autogen.sh && \ - ./configure && \ - make && \ - make install - -FROM base -COPY --from=crun_builder /usr/local/bin/crun /usr/local/bin/crun RUN python3 -m pip install --upgrade --no-cache-dir pip COPY hail/python/setup-hailtop.py /hailtop/setup.py diff --git a/batch/Makefile b/batch/Makefile index 375a0190b84..b928a2f346d 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -22,8 +22,8 @@ build-batch: ../docker-build.sh . Dockerfile.out $(BATCH_IMAGE) .PHONY: build-worker -build-worker: - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.worker Dockerfile.worker.out +build-worker: src/main/java/is/hail/JVMEntryway.class jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar + python3 ../ci/jinja2_render.py '{"global":{"docker_prefix":"$(DOCKER_PREFIX)"}}' Dockerfile.worker Dockerfile.worker.out ../docker-build.sh .. batch/Dockerfile.worker.out $(BATCH_WORKER_IMAGE) .PHONY: build diff --git a/batch/batch/driver/create_instance.py b/batch/batch/driver/create_instance.py index 6f6fd3f0e05..2f22eba6a40 100644 --- a/batch/batch/driver/create_instance.py +++ b/batch/batch/driver/create_instance.py @@ -2,7 +2,6 @@ import logging import base64 import json -import uuid from hailtop import aiogoogle @@ -101,20 +100,6 @@ async def create_instance( #!/bin/bash set -x -NAME=$(curl -s http://metadata.google.internal/computeMetadata/v1/instance/name -H 'Metadata-Flavor: Google') -ZONE=$(curl -s http://metadata.google.internal/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google') - -if [ -f "/started" ]; then - echo "instance $NAME has previously been started" - while true; do - gcloud -q compute instances delete $NAME --zone=$ZONE - sleep 1 - done - exit -else - touch /started -fi - curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/run_script" >./run.sh nohup /bin/bash run.sh >run.log 2>&1 & @@ -126,6 +111,30 @@ async def create_instance( #!/bin/bash set -x +# set up docker networks +docker network create public --opt com.docker.network.bridge.name=public +docker network create private --opt com.docker.network.bridge.name=private +docker network create batch-worker --opt com.docker.network.bridge.name=batch-worker + +# wait for docker to create the DOCKER-USER chain +while ! iptables -L DOCKER-USER ; do sleep 1; done + +# [all docker] ban metadata server +iptables -I DOCKER-USER -d 169.254.169.254 -j DROP + +# [all docker] override: allow udp/53 (dns) to metadata server +iptables -I DOCKER-USER -d 169.254.169.254 -p udp -m udp --destination-port 53 -j ACCEPT + +# [public docker] ban inter-container communication +iptables -I DOCKER-USER -i public -d 172.16.0.0/12 -j DROP + +# [public docker] ban unused ip address range +iptables -I DOCKER-USER -i public -d 192.168.0.0/16 -j DROP + +# [batch worker] override: allow metadata server for batch worker +iptables -I DOCKER-USER -i batch-worker -d 169.254.169.254 -j ACCEPT + + WORKER_DATA_DISK_NAME="{worker_data_disk_name}" UNRESERVED_WORKER_DATA_DISK_SIZE_GB="{unreserved_disk_storage_gb}" @@ -152,15 +161,7 @@ async def create_instance( sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse/ sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/gcsfuse /gcsfuse -sudo mkdir -p /etc/netns - -# private job network = 10.0.0.0/16 -# public job network = 10.1.0.0/16 -# [all networks] Rewrite traffic coming from containers to masquerade as the host -iptables --table nat --append POSTROUTING --source 10.0.0.0/15 --jump MASQUERADE - -# [public] Block public traffic to the metadata server -iptables --insert FORWARD --source 10.1.0.0/16 --destination 169.254.169.254 --jump DROP +export HOME=/root CORES=$(nproc) NAMESPACE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/namespace") @@ -244,8 +245,6 @@ async def create_instance( docker pull $BATCH_WORKER_IMAGE || \ (echo 'pull failed, retrying' && sleep 15 && docker pull $BATCH_WORKER_IMAGE) -BATCH_WORKER_IMAGE_ID=$(docker inspect $BATCH_WORKER_IMAGE --format='{{{{.Id}}}}' | cut -d':' -f2) - # So here I go it's my shot. docker run \ -e CORES=$CORES \ @@ -263,17 +262,13 @@ async def create_instance( -e MAX_IDLE_TIME_MSECS=$MAX_IDLE_TIME_MSECS \ -e WORKER_DATA_DISK_MOUNT=/mnt/disks/$WORKER_DATA_DISK_NAME \ -e BATCH_WORKER_IMAGE=$BATCH_WORKER_IMAGE \ --e BATCH_WORKER_IMAGE_ID=$BATCH_WORKER_IMAGE_ID \ -e UNRESERVED_WORKER_DATA_DISK_SIZE_GB=$UNRESERVED_WORKER_DATA_DISK_SIZE_GB \ -v /var/run/docker.sock:/var/run/docker.sock \ --v /var/run/netns:/var/run/netns:shared \ -v /usr/bin/docker:/usr/bin/docker \ -v /usr/sbin/xfs_quota:/usr/sbin/xfs_quota \ -v /batch:/batch:shared \ -v /logs:/logs \ -v /gcsfuse:/gcsfuse:shared \ --v /etc/netns:/etc/netns \ --v /sys/fs/cgroup:/sys/fs/cgroup \ --mount type=bind,source=/mnt/disks/$WORKER_DATA_DISK_NAME,target=/host \ --mount type=bind,source=/dev,target=/dev,bind-propagation=rshared \ -p 5000:5000 \ @@ -283,7 +278,7 @@ async def create_instance( --privileged \ --cap-add SYS_ADMIN \ --security-opt apparmor:unconfined \ ---network host \ +--network batch-worker \ $BATCH_WORKER_IMAGE \ python3 -u -m batch.worker.worker >worker.log 2>&1 @@ -325,9 +320,7 @@ async def create_instance( {'key': 'worker_config', 'value': base64.b64encode(json.dumps(worker_config.config).encode()).decode()} ) - params = {'requestId': str(uuid.uuid4())} - - await compute_client.post(f'/zones/{zone}/instances', params=params, json=config) + await compute_client.post(f'/zones/{zone}/instances', json=config) log.info(f'created machine {machine_name}') diff --git a/batch/batch/driver/gce.py b/batch/batch/driver/gce.py index 5e1b2151dab..c78686c189c 100644 --- a/batch/batch/driver/gce.py +++ b/batch/batch/driver/gce.py @@ -6,11 +6,12 @@ from gear import Database from hailtop import aiotools, aiogoogle -from hailtop.utils import periodically_call, time_msecs, parse_timestamp_msecs +from hailtop.utils import periodically_call, time_msecs from ..batch_configuration import PROJECT, DEFAULT_NAMESPACE from .zone_monitor import ZoneMonitor from .instance_collection_manager import InstanceCollectionManager +from ..utils import parse_timestamp_msecs log = logging.getLogger('gce_event_monitor') @@ -167,8 +168,8 @@ async def delete_orphaned_disks(self): if instance is None: log.exception(f'deleting disk {disk_name} from instance that no longer exists') elif (last_attach_timestamp_msecs is None - and now_msecs - creation_timestamp_msecs > 60 * 60 * 1000): - log.exception(f'deleting disk {disk_name} that has not attached within 60 minutes') + and now_msecs - creation_timestamp_msecs > 10 * 60 * 1000): + log.exception(f'deleting disk {disk_name} that has not attached within 10 minutes') elif (last_detach_timestamp_msecs is not None and now_msecs - last_detach_timestamp_msecs > 5 * 60 * 1000): log.exception(f'deleting detached disk {disk_name} that has not been cleaned up within 5 minutes') @@ -183,4 +184,4 @@ async def delete_orphaned_disks(self): log.exception(f'error while deleting orphaned disk {disk_name}') async def delete_orphaned_disks_loop(self): - await periodically_call(60, self.delete_orphaned_disks) + await periodically_call(15, self.delete_orphaned_disks) diff --git a/batch/batch/driver/job_private.py b/batch/batch/driver/job_private.py index e1d5867995d..039288f3196 100644 --- a/batch/batch/driver/job_private.py +++ b/batch/batch/driver/job_private.py @@ -97,10 +97,6 @@ async def bump_scheduler(self): self.scheduler_state_changed.set() async def schedule_jobs_loop_body(self): - if self.app['frozen']: - log.info(f'not scheduling any jobs for {self}; batch is frozen') - return True - log.info(f'starting scheduling jobs for {self}') waitable_pool = WaitableSharedPool(self.async_worker_pool) @@ -281,10 +277,6 @@ async def create_instance(self, batch_id, job_id, machine_spec): return (instance, resources) async def create_instances_loop_body(self): - if self.app['frozen']: - log.info(f'not creating instances for {self}; batch is frozen') - return True - log.info(f'create_instances for {self}: starting') start = time_msecs() n_instances_created = 0 diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index c5084e4d02d..cd9d538c1ed 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -18,10 +18,11 @@ web_authenticated_developers_only, check_csrf_token, transaction, - monitor_endpoints_middleware, + monitor_endpoint, ) from hailtop.hail_logging import AccessLogger from hailtop.config import get_deploy_config +from hailtop.httpx import client_session from hailtop.utils import ( time_msecs, RateLimit, @@ -29,6 +30,7 @@ Notice, periodically_call, AsyncWorkerPool, + request_retry_transient_errors, dump_all_stacktraces, ) from hailtop.tls import internal_server_ssl_context @@ -158,6 +160,7 @@ async def get_healthcheck(request): # pylint: disable=W0613 @routes.get('/check_invariants') +@monitor_endpoint @rest_authenticated_developers_only async def get_check_invariants(request, userdata): # pylint: disable=unused-argument app = request.app @@ -169,6 +172,7 @@ async def get_check_invariants(request, userdata): # pylint: disable=unused-arg @routes.patch('/api/v1alpha/batches/{user}/{batch_id}/close') +@monitor_endpoint @batch_only async def close_batch(request): db = request.app['db'] @@ -197,6 +201,7 @@ def set_cancel_state_changed(app): @routes.post('/api/v1alpha/batches/cancel') +@monitor_endpoint @batch_only async def cancel_batch(request): set_cancel_state_changed(request.app) @@ -204,6 +209,7 @@ async def cancel_batch(request): @routes.post('/api/v1alpha/batches/delete') +@monitor_endpoint @batch_only async def delete_batch(request): set_cancel_state_changed(request.app) @@ -230,12 +236,14 @@ async def activate_instance_1(request, instance): @routes.get('/api/v1alpha/instances/gsa_key') +@monitor_endpoint @activating_instances_only async def get_gsa_key(request, instance): # pylint: disable=unused-argument return await asyncio.shield(get_gsa_key_1(instance)) @routes.post('/api/v1alpha/instances/activate') +@monitor_endpoint @activating_instances_only async def activate_instance(request, instance): return await asyncio.shield(activate_instance_1(request, instance)) @@ -249,6 +257,7 @@ async def deactivate_instance_1(instance): @routes.post('/api/v1alpha/instances/deactivate') +@monitor_endpoint @active_instances_only async def deactivate_instance(request, instance): # pylint: disable=unused-argument return await asyncio.shield(deactivate_instance_1(instance)) @@ -296,6 +305,7 @@ async def job_complete_1(request, instance): @routes.post('/api/v1alpha/instances/job_complete') +@monitor_endpoint @active_instances_only async def job_complete(request, instance): return await asyncio.shield(job_complete_1(request, instance)) @@ -319,6 +329,7 @@ async def job_started_1(request, instance): @routes.post('/api/v1alpha/instances/job_started') +@monitor_endpoint @active_instances_only async def job_started(request, instance): return await asyncio.shield(job_started_1(request, instance)) @@ -326,6 +337,7 @@ async def job_started(request, instance): @routes.get('/') @routes.get('') +@monitor_endpoint @web_authenticated_developers_only() async def get_index(request, userdata): app = request.app @@ -349,7 +361,6 @@ async def get_index(request, userdata): 'ready_cores_mcpu': ready_cores_mcpu, 'live_total_cores_mcpu': inst_coll_manager.global_live_total_cores_mcpu, 'live_free_cores_mcpu': inst_coll_manager.global_live_free_cores_mcpu, - 'frozen': app['frozen'], } return await render_template('batch-driver', request, userdata, 'index.html', page_context) @@ -370,8 +381,19 @@ def validate_int(session, url_path, name, value, predicate, description): return validate(session, url_path, name, i, predicate, description) +async def refresh_inst_colls_on_front_end(app): + async with client_session() as session: + await request_retry_transient_errors( + session, + 'PATCH', + deploy_config.url('batch', '/api/v1alpha/inst_colls/refresh'), + headers=app['batch_headers'], + ) + + @routes.post('/config-update/pool/{pool}') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def pool_config_update(request, userdata): # pylint: disable=unused-argument app = request.app @@ -471,6 +493,8 @@ async def pool_config_update(request, userdata): # pylint: disable=unused-argum max_live_instances, ) + await refresh_inst_colls_on_front_end(app) + set_message(session, f'Updated configuration for {pool}.', 'info') return web.HTTPFound(deploy_config.external_url('batch-driver', pool_url_path)) @@ -478,6 +502,7 @@ async def pool_config_update(request, userdata): # pylint: disable=unused-argum @routes.post('/config-update/jpim') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def job_private_config_update(request, userdata): # pylint: disable=unused-argument app = request.app @@ -509,12 +534,15 @@ async def job_private_config_update(request, userdata): # pylint: disable=unuse await job_private_inst_manager.configure(boot_disk_size_gb, max_instances, max_live_instances) + await refresh_inst_colls_on_front_end(app) + set_message(session, f'Updated configuration for {job_private_inst_manager}.', 'info') return web.HTTPFound(deploy_config.external_url('batch-driver', url_path)) @routes.get('/inst_coll/pool/{pool}') +@monitor_endpoint @web_authenticated_developers_only() async def get_pool(request, userdata): app = request.app @@ -549,6 +577,7 @@ async def get_pool(request, userdata): @routes.get('/inst_coll/jpim') +@monitor_endpoint @web_authenticated_developers_only() async def get_job_private_inst_manager(request, userdata): app = request.app @@ -579,55 +608,8 @@ async def get_job_private_inst_manager(request, userdata): return await render_template('batch-driver', request, userdata, 'job_private.html', page_context) -@routes.post('/freeze') -@check_csrf_token -@web_authenticated_developers_only() -async def freeze_batch(request, userdata): # pylint: disable=unused-argument - app = request.app - db: Database = app['db'] - session = await aiohttp_session.get_session(request) - - if app['frozen']: - set_message(session, 'Batch is already frozen.', 'info') - return web.HTTPFound(deploy_config.external_url('batch-driver', '/')) - - await db.execute_update( - ''' -UPDATE globals SET frozen = 1; -''') - - app['frozen'] = True - - set_message(session, 'Froze all instance collections and batch submissions.', 'info') - - return web.HTTPFound(deploy_config.external_url('batch-driver', '/')) - - -@routes.post('/unfreeze') -@check_csrf_token -@web_authenticated_developers_only() -async def unfreeze_batch(request, userdata): # pylint: disable=unused-argument - app = request.app - db: Database = app['db'] - session = await aiohttp_session.get_session(request) - - if not app['frozen']: - set_message(session, 'Batch is already unfrozen.', 'info') - return web.HTTPFound(deploy_config.external_url('batch-driver', '/')) - - await db.execute_update( - ''' -UPDATE globals SET frozen = 0; -''') - - app['frozen'] = False - - set_message(session, 'Unfroze all instance collections and batch submissions.', 'info') - - return web.HTTPFound(deploy_config.external_url('batch-driver', '/')) - - @routes.get('/user_resources') +@monitor_endpoint @web_authenticated_developers_only() async def get_user_resources(request, userdata): app = request.app @@ -935,7 +917,7 @@ async def on_startup(app): row = await db.select_and_fetchone( ''' -SELECT instance_id, internal_token, frozen FROM globals; +SELECT instance_id, internal_token FROM globals; ''' ) @@ -947,8 +929,6 @@ async def on_startup(app): app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} - app['frozen'] = row['frozen'] - resources = db.select_and_fetchall('SELECT resource FROM resources;') app['resources'] = [record['resource'] async for record in resources] @@ -1044,16 +1024,10 @@ async def on_cleanup(app): try: app['task_manager'].shutdown() finally: - try: - await app['logging_client'].close() - finally: - try: - await app['compute_client'].close() - finally: - del app['k8s_cache'].client - await asyncio.gather( - *(t for t in asyncio.all_tasks() if t is not asyncio.current_task()) - ) + del app['k8s_cache'].client + await asyncio.gather( + *(t for t in asyncio.all_tasks() if t is not asyncio.current_task()) + ) def run(): @@ -1068,7 +1042,7 @@ def run(): verbose=3, ) - app = web.Application(client_max_size=HTTP_CLIENT_MAX_SIZE, middlewares=[monitor_endpoints_middleware]) + app = web.Application(client_max_size=HTTP_CLIENT_MAX_SIZE) setup_aiohttp_session(app) setup_aiohttp_jinja2(app, 'batch.driver') diff --git a/batch/batch/driver/pool.py b/batch/batch/driver/pool.py index b4d8a54ee97..e9118246d76 100644 --- a/batch/batch/driver/pool.py +++ b/batch/batch/driver/pool.py @@ -213,12 +213,7 @@ async def create_instance(self, cores=None, max_idle_time_msecs=None, zone=None) async def create_instances_from_ready_cores(self, ready_cores_mcpu, zone=None): n_live_instances = self.n_instances_by_state['pending'] + self.n_instances_by_state['active'] - if zone is None: - live_free_cores_mcpu = self.live_free_cores_mcpu - else: - live_free_cores_mcpu = self.live_free_cores_mcpu_by_zone[zone] - - instances_needed = (ready_cores_mcpu - live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( + instances_needed = (ready_cores_mcpu - self.live_free_cores_mcpu + (self.worker_cores * 1000) - 1) // ( self.worker_cores * 1000 ) instances_needed = min( @@ -237,10 +232,6 @@ async def create_instances_from_ready_cores(self, ready_cores_mcpu, zone=None): await asyncio.gather(*[self.create_instance(zone=zone) for _ in range(instances_needed)]) async def create_instances(self): - if self.app['frozen']: - log.info(f'not creating instances for {self}; batch is frozen') - return - ready_cores_mcpu_per_user = self.db.select_and_fetchall( ''' SELECT user, @@ -387,10 +378,6 @@ def allocate_cores(user, mark): return result async def schedule_loop_body(self): - if self.app['frozen']: - log.info(f'not scheduling any jobs for {self.pool}; batch is frozen') - return True - log.info(f'schedule {self.pool}: starting') start = time_msecs() n_scheduled = 0 @@ -456,8 +443,7 @@ def get_instance(user, cores_mcpu): while i < len(self.pool.healthy_instances_by_free_cores): instance = self.pool.healthy_instances_by_free_cores[i] assert cores_mcpu <= instance.free_cores_mcpu - if user != 'ci' or (user == 'ci' and instance.zone == GCP_ZONE): - return instance + return instance i += 1 histogram = collections.defaultdict(int) for instance in self.pool.healthy_instances_by_free_cores: diff --git a/batch/batch/driver/templates/index.html b/batch/batch/driver/templates/index.html index 227dadf7ffa..e32c75ee557 100644 --- a/batch/batch/driver/templates/index.html +++ b/batch/batch/driver/templates/index.html @@ -7,24 +7,6 @@

Globals

ready cores: {{ ready_cores_mcpu / 1000 }}
-
- {% if not frozen %} - - - - - {% else %} -
- - -
- {% endif %} -
-

Instance Collections

Pools

diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index ca6ef1b94c4..6a0e4ee63a6 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -28,7 +28,6 @@ LoggingTimer, cost_str, dump_all_stacktraces, - periodically_call, ) from hailtop.batch_client.parse import parse_cpu_in_mcpu, parse_memory_in_bytes, parse_storage_in_bytes from hailtop.config import get_deploy_config @@ -44,7 +43,7 @@ web_authenticated_developers_only, check_csrf_token, transaction, - monitor_endpoints_middleware, + monitor_endpoint, ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template, set_message @@ -56,6 +55,7 @@ is_valid_cores_mcpu, cost_from_msec_mcpu, cores_mcpu_to_memory_bytes, + batch_only, ) from ..batch import batch_record_to_dict, job_record_to_dict, cancel_batch_in_db from ..exceptions import ( @@ -112,7 +112,6 @@ async def wrapped(request, userdata, *args, **kwargs): log.exception('error while populating ui page') raise web.HTTPInternalServerError(text=traceback.format_exc()) from e raise - return wrapped @@ -297,6 +296,7 @@ async def _query_batch_jobs(request, batch_id): @routes.get('/api/v1alpha/batches/{batch_id}/jobs') +@monitor_endpoint @rest_billing_project_users_only async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argument db = request.app['db'] @@ -468,6 +468,7 @@ async def _get_full_job_status(app, record): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log') +@monitor_endpoint @rest_billing_project_users_only async def get_job_log(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -585,6 +586,7 @@ async def _query_batches(request, user, q): @routes.get('/api/v1alpha/batches') +@monitor_endpoint @rest_authenticated_users_only async def get_batches(request, userdata): # pylint: disable=unused-argument user = userdata['username'] @@ -609,16 +611,13 @@ def check_service_account_permissions(user, sa): @routes.post('/api/v1alpha/batches/{batch_id}/jobs/create') +@monitor_endpoint @rest_authenticated_users_only async def create_jobs(request, userdata): app = request.app db: Database = app['db'] log_store: LogStore = app['log_store'] - if app['frozen']: - log.info('ignoring batch create request; batch is frozen') - raise web.HTTPServiceUnavailable() - batch_id = int(request.match_info['batch_id']) user = userdata['username'] @@ -994,15 +993,12 @@ async def insert(tx): @routes.post('/api/v1alpha/batches/create') +@monitor_endpoint @rest_authenticated_users_only async def create_batch(request, userdata): app = request.app db: Database = app['db'] - if app['frozen']: - log.info('ignoring batch create jobs request; batch is frozen') - raise web.HTTPServiceUnavailable() - batch_spec = await request.json() try: @@ -1138,12 +1134,14 @@ async def _delete_batch(app, batch_id): @routes.get('/api/v1alpha/batches/{batch_id}') +@monitor_endpoint @rest_billing_project_users_only async def get_batch(request, userdata, batch_id): # pylint: disable=unused-argument return web.json_response(await _get_batch(request.app, batch_id)) @routes.patch('/api/v1alpha/batches/{batch_id}/cancel') +@monitor_endpoint @rest_billing_project_users_only async def cancel_batch(request, userdata, batch_id): # pylint: disable=unused-argument await _handle_api_error(_cancel_batch, request.app, batch_id) @@ -1151,6 +1149,7 @@ async def cancel_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.patch('/api/v1alpha/batches/{batch_id}/close') +@monitor_endpoint @rest_authenticated_users_only async def close_batch(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -1159,10 +1158,6 @@ async def close_batch(request, userdata): app = request.app db: Database = app['db'] - if app['frozen']: - log.info('ignoring batch close request; batch is frozen') - raise web.HTTPServiceUnavailable() - record = await db.select_and_fetchone( ''' SELECT 1 FROM batches @@ -1196,6 +1191,7 @@ async def close_batch(request, userdata): @routes.delete('/api/v1alpha/batches/{batch_id}') +@monitor_endpoint @rest_billing_project_users_only async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-argument await _delete_batch(request.app, batch_id) @@ -1203,6 +1199,7 @@ async def delete_batch(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/batches/{batch_id}') +@monitor_endpoint @web_billing_project_users_only() @catch_ui_error_in_dev async def ui_batch(request, userdata, batch_id): @@ -1222,6 +1219,7 @@ async def ui_batch(request, userdata, batch_id): @routes.post('/batches/{batch_id}/cancel') +@monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) @catch_ui_error_in_dev @@ -1240,6 +1238,7 @@ async def ui_cancel_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.post('/batches/{batch_id}/delete') +@monitor_endpoint @check_csrf_token @web_billing_project_users_only(redirect=False) @catch_ui_error_in_dev @@ -1257,6 +1256,7 @@ async def ui_delete_batch(request, userdata, batch_id): # pylint: disable=unuse @routes.get('/batches', name='batches') +@monitor_endpoint @web_authenticated_users_only() @catch_ui_error_in_dev async def ui_batches(request, userdata): @@ -1353,6 +1353,7 @@ async def _get_attempts(app, batch_id, job_id): @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/attempts') +@monitor_endpoint @rest_billing_project_users_only async def get_attempts(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -1361,6 +1362,7 @@ async def get_attempts(request, userdata, batch_id): # pylint: disable=unused-a @routes.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}') +@monitor_endpoint @rest_billing_project_users_only async def get_job(request, userdata, batch_id): # pylint: disable=unused-argument job_id = int(request.match_info['job_id']) @@ -1369,6 +1371,7 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume @routes.get('/batches/{batch_id}/jobs/{job_id}') +@monitor_endpoint @web_billing_project_users_only() @catch_ui_error_in_dev async def ui_get_job(request, userdata, batch_id): @@ -1446,12 +1449,13 @@ async def ui_get_job(request, userdata, batch_id): 'job_specification': job_specification, 'job_status_str': json.dumps(job, indent=2), 'step_errors': step_errors, - 'error': job_status.get('error'), + 'error': job_status.get('error') } return await render_template('batch', request, userdata, 'job.html', page_context) @routes.get('/billing_limits') +@monitor_endpoint @web_authenticated_users_only() @catch_ui_error_in_dev async def ui_get_billing_limits(request, userdata): @@ -1513,6 +1517,7 @@ async def insert(tx): @routes.post('/api/v1alpha/billing_limits/{billing_project}/edit') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def post_edit_billing_limits(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1524,6 +1529,7 @@ async def post_edit_billing_limits(request, userdata): # pylint: disable=unused @routes.post('/billing_limits/{billing_project}/edit') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1547,7 +1553,8 @@ async def _query_billing(request): default_start = datetime.datetime.now().replace(day=1) default_start = datetime.datetime.strftime(default_start, date_format) - default_end = None + default_end = datetime.datetime.now() + default_end = datetime.datetime.strftime(default_end, date_format) async def parse_error(msg): session = await aiohttp_session.get_session(request) @@ -1563,32 +1570,15 @@ async def parse_error(msg): end_query = request.query.get('end', default_end) try: - if end_query is not None and end_query != '': - end = datetime.datetime.strptime(end_query, date_format) - end = (end + datetime.timedelta(days=1)).timestamp() * 1000 - else: - end = None + end = datetime.datetime.strptime(end_query, date_format) + end = (end + datetime.timedelta(days=1)).timestamp() * 1000 except ValueError: return await parse_error(f"Invalid value for end '{end_query}'; must be in the format of MM/DD/YYYY.") - if end is not None and start > end: + if start > end: return await parse_error('Invalid search; start must be earlier than end.') - where_conditions = ["billing_projects.`status` != 'deleted'"] - where_args = [] - - if end is not None: - where_conditions.append("`time_completed` IS NOT NULL") - where_conditions.append("`time_completed` >= %s") - where_args.append(start) - where_conditions.append("`time_completed` <= %s") - where_args.append(end) - else: - where_conditions.append("(`time_completed` IS NOT NULL AND `time_completed` >= %s) OR " - "(`time_closed` IS NOT NULL AND `time_completed` IS NULL)") - where_args.append(start) - - sql = f''' + sql = ''' SELECT billing_project, `user`, @@ -1601,11 +1591,13 @@ async def parse_error(msg): ON resources.resource = aggregated_batch_resources.resource LEFT JOIN billing_projects ON billing_projects.name = batches.billing_project -WHERE {' AND '.join(where_conditions)} +WHERE `time_completed` >= %s AND + `time_completed` <= %s AND + billing_projects.`status` != 'deleted' GROUP BY billing_project, `user`; ''' - sql_args = where_args + sql_args = (start, end) def billing_record_to_dict(record): cost_msec_mcpu = cost_from_msec_mcpu(record['msec_mcpu']) @@ -1620,6 +1612,7 @@ def billing_record_to_dict(record): @routes.get('/billing') +@monitor_endpoint @web_authenticated_developers_only() @catch_ui_error_in_dev async def ui_get_billing(request, userdata): @@ -1660,6 +1653,7 @@ async def ui_get_billing(request, userdata): @routes.get('/billing_projects') +@monitor_endpoint @web_authenticated_developers_only() @catch_ui_error_in_dev async def ui_get_billing_projects(request, userdata): @@ -1673,6 +1667,7 @@ async def ui_get_billing_projects(request, userdata): @routes.get('/api/v1alpha/billing_projects') +@monitor_endpoint @rest_authenticated_users_only async def get_billing_projects(request, userdata): db: Database = request.app['db'] @@ -1688,6 +1683,7 @@ async def get_billing_projects(request, userdata): @routes.get('/api/v1alpha/billing_projects/{billing_project}') +@monitor_endpoint @rest_authenticated_users_only async def get_billing_project(request, userdata): db: Database = request.app['db'] @@ -1748,6 +1744,7 @@ async def delete(tx): @routes.post('/billing_projects/{billing_project}/users/{user}/remove') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1764,6 +1761,7 @@ async def post_billing_projects_remove_user(request, userdata): # pylint: disab @routes.post('/api/v1alpha/billing_projects/{billing_project}/users/{user}/remove') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_get_billing_projects_remove_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1811,6 +1809,7 @@ async def insert(tx): @routes.post('/billing_projects/{billing_project}/users/add') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1829,6 +1828,7 @@ async def post_billing_projects_add_user(request, userdata): # pylint: disable= @routes.post('/api/v1alpha/billing_projects/{billing_project}/users/{user}/add') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_billing_projects_add_user(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1865,6 +1865,7 @@ async def insert(tx): @routes.post('/billing_projects/create') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1882,6 +1883,7 @@ async def post_create_billing_projects(request, userdata): # pylint: disable=un @routes.post('/api/v1alpha/billing_projects/{billing_project}/create') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_get_create_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1924,6 +1926,7 @@ async def close_project(tx): @routes.post('/billing_projects/{billing_project}/close') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1939,6 +1942,7 @@ async def post_close_billing_projects(request, userdata): # pylint: disable=unu @routes.post('/api/v1alpha/billing_projects/{billing_project}/close') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_close_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -1968,6 +1972,7 @@ async def open_project(tx): @routes.post('/billing_projects/{billing_project}/reopen') +@monitor_endpoint @check_csrf_token @web_authenticated_developers_only(redirect=False) @catch_ui_error_in_dev @@ -1983,6 +1988,7 @@ async def post_reopen_billing_projects(request, userdata): # pylint: disable=un @routes.post('/api/v1alpha/billing_projects/{billing_project}/reopen') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_reopen_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -2011,6 +2017,7 @@ async def delete_project(tx): @routes.post('/api/v1alpha/billing_projects/{billing_project}/delete') +@monitor_endpoint @rest_authenticated_developers_or_auth_only async def api_delete_billing_projects(request, userdata): # pylint: disable=unused-argument db: Database = request.app['db'] @@ -2020,16 +2027,12 @@ async def api_delete_billing_projects(request, userdata): # pylint: disable=unu return web.json_response(billing_project) -async def _refresh(app): - db: Database = app['db'] - inst_coll_configs: InstanceCollectionConfigs = app['inst_coll_configs'] +@routes.patch('/api/v1alpha/inst_colls/refresh') +@batch_only +async def refresh_inst_colls(request): + inst_coll_configs: InstanceCollectionConfigs = request.app['inst_coll_configs'] await inst_coll_configs.refresh() - row = await db.select_and_fetchone( - ''' -SELECT frozen FROM globals; -''' - ) - app['frozen'] = row['frozen'] + return web.Response() @routes.get('') @@ -2078,7 +2081,7 @@ async def on_startup(app): row = await db.select_and_fetchone( ''' -SELECT instance_id, internal_token, n_tokens, frozen FROM globals; +SELECT instance_id, internal_token, n_tokens FROM globals; ''' ) @@ -2092,8 +2095,6 @@ async def on_startup(app): app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} - app['frozen'] = row['frozen'] - credentials = google.oauth2.service_account.Credentials.from_service_account_file('/gsa-key/key.json') app['log_store'] = LogStore(BATCH_BUCKET_NAME, instance_id, pool, credentials=credentials) @@ -2115,10 +2116,6 @@ async def on_startup(app): retry_long_running('delete_batch_loop', run_if_changed, delete_batch_state_changed, delete_batch_loop_body, app) ) - app['task_manager'].ensure_future( - periodically_call(5, _refresh, app) - ) - async def on_cleanup(app): try: @@ -2128,7 +2125,7 @@ async def on_cleanup(app): def run(): - app = web.Application(client_max_size=HTTP_CLIENT_MAX_SIZE, middlewares=[monitor_endpoints_middleware]) + app = web.Application(client_max_size=HTTP_CLIENT_MAX_SIZE) setup_aiohttp_session(app) setup_aiohttp_jinja2(app, 'batch.front_end') diff --git a/batch/batch/front_end/templates/billing.html b/batch/batch/front_end/templates/billing.html index 83db193f5a3..16aa09489e8 100644 --- a/batch/batch/front_end/templates/billing.html +++ b/batch/batch/front_end/templates/billing.html @@ -7,31 +7,22 @@

Billing


-
-
- - -
-

Start must be a date in the format MM/DD/YYYY. End is an optional date in the format - MM/DD/YYYY. Leave End empty to include currently running batches. If End is not empty, - then no currently running batches are included. All dates search for batches that have - completed within that time interval (inclusive).

-

Cost by Billing Project

diff --git a/batch/batch/globals.py b/batch/batch/globals.py index d4db4fb7e61..4bf4adb6de8 100644 --- a/batch/batch/globals.py +++ b/batch/batch/globals.py @@ -39,7 +39,7 @@ BATCH_FORMAT_VERSION = 6 STATUS_FORMAT_VERSION = 5 -INSTANCE_VERSION = 19 +INSTANCE_VERSION = 18 WORKER_CONFIG_VERSION = 3 MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024 diff --git a/batch/batch/inst_coll_config.py b/batch/batch/inst_coll_config.py index 9da3896b699..15b25e2721d 100644 --- a/batch/batch/inst_coll_config.py +++ b/batch/batch/inst_coll_config.py @@ -141,6 +141,7 @@ async def async_init(self): await self.refresh() async def refresh(self): + log.info('loading inst coll configs and resource rates from db') records = self.db.execute_and_fetchall( ''' SELECT inst_colls.*, pools.* diff --git a/batch/batch/utils.py b/batch/batch/utils.py index 55fd2949730..13f7dcd793d 100644 --- a/batch/batch/utils.py +++ b/batch/batch/utils.py @@ -2,6 +2,7 @@ import math import json import secrets +import dateutil.parser from aiohttp import web from functools import wraps from collections import deque @@ -160,7 +161,7 @@ def adjust_cores_for_storage_request( def unreserved_worker_data_disk_size_gib(worker_local_ssd_data_disk, worker_pd_ssd_data_disk_size_gib, worker_cores): - reserved_image_size = 30 + reserved_image_size = 20 reserved_container_size = RESERVED_STORAGE_GB_PER_CORE * worker_cores if worker_local_ssd_data_disk: # local ssd is 375Gi @@ -195,6 +196,12 @@ def is_valid_cores_mcpu(cores_mcpu: int): return quarter_cores & (quarter_cores - 1) == 0 +def parse_timestamp_msecs(ts): + if ts is None: + return ts + return dateutil.parser.isoparse(ts).timestamp() * 1000 + + class Box: def __init__(self, value): self.value = value diff --git a/batch/batch/worker/disk.py b/batch/batch/worker/disk.py index 14400253fd0..ebef846a02e 100644 --- a/batch/batch/worker/disk.py +++ b/batch/batch/worker/disk.py @@ -21,9 +21,6 @@ def __init__(self, compute_client, name, zone, project, instance_name, size_in_g self.size_in_gb = size_in_gb self.mount_path = mount_path - self._created = False - self._attached = False - self.disk_path = f'/dev/disk/by-id/google-{self.name}' async def __aenter__(self, labels=None): @@ -48,10 +45,9 @@ async def delete(self): await self._delete() async def _unmount(self): - if self._attached: - await retry_all_errors_n_times(max_errors=10, msg=f'error while unmounting disk {self.name}', error_logging_interval=3)( - check_shell_output, f'umount -v {self.disk_path} {self.mount_path}' - ) + await retry_all_errors_n_times(max_errors=10, msg=f'error while unmounting disk {self.name}', error_logging_interval=3)( + check_shell_output, f'umount -v {self.disk_path} {self.mount_path}' + ) async def _format(self): async def format_disk(): @@ -75,7 +71,6 @@ async def _create(self, labels=None): } await self.compute_client.create_disk(f'/zones/{self.zone}/disks', json=config) - self._created = True async def _attach(self): async with LoggingTimer(f'attaching disk {self.name} to {self.instance_name}'): @@ -88,19 +83,16 @@ async def _attach(self): await self.compute_client.attach_disk( f'/zones/{self.zone}/instances/{self.instance_name}/attachDisk', json=config ) - self._attached = True async def _detach(self): async with LoggingTimer(f'detaching disk {self.name} from {self.instance_name}'): - if self._attached: - await self.compute_client.detach_disk( - f'/zones/{self.zone}/instances/{self.instance_name}/detachDisk', params={'deviceName': self.name} - ) + await self.compute_client.detach_disk( + f'/zones/{self.zone}/instances/{self.instance_name}/detachDisk', params={'deviceName': self.name} + ) async def _delete(self): async with LoggingTimer(f'deleting disk {self.name}'): - if self._created: - await self.compute_client.delete_disk(f'/zones/{self.zone}/disks/{self.name}') + await self.compute_client.delete_disk(f'/zones/{self.zone}/disks/{self.name}') def __str__(self): return self.name diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 6e7d1cf818d..f21c227cc03 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1,4 +1,4 @@ -from typing import Optional, Dict, Callable, Tuple, Awaitable, Any +from typing import Optional, Dict, Callable, Tuple import os import json import sys @@ -16,12 +16,10 @@ from aiohttp import web import async_timeout import concurrent -import aiodocker # type: ignore -import aiorwlock -from collections import defaultdict, Counter -import psutil -from aiodocker.exceptions import DockerError # type: ignore -import google.oauth2.service_account # type: ignore +import aiodocker +from collections import defaultdict +from aiodocker.exceptions import DockerError +import google.oauth2.service_account from hailtop.utils import ( time_msecs, request_retry_transient_errors, @@ -29,14 +27,12 @@ retry_all_errors, check_shell, CalledProcessError, - check_exec_output, check_shell_output, is_google_registry_domain, find_spark_home, dump_all_stacktraces, parse_docker_image_reference, blocking_to_async, - periodically_call, ) from hailtop.httpx import client_session from hailtop.batch_client.parse import parse_cpu_in_mcpu, parse_memory_in_bytes, parse_storage_in_bytes @@ -82,8 +78,6 @@ MAX_DOCKER_WAIT_SECS = 5 * 60 MAX_DOCKER_OTHER_OPERATION_SECS = 1 * 60 -IPTABLES_WAIT_TIMEOUT_SECS = 60 - CORES = int(os.environ['CORES']) NAME = os.environ['NAME'] NAMESPACE = os.environ['NAMESPACE'] @@ -99,7 +93,6 @@ MAX_IDLE_TIME_MSECS = int(os.environ['MAX_IDLE_TIME_MSECS']) WORKER_DATA_DISK_MOUNT = os.environ['WORKER_DATA_DISK_MOUNT'] BATCH_WORKER_IMAGE = os.environ['BATCH_WORKER_IMAGE'] -BATCH_WORKER_IMAGE_ID = os.environ['BATCH_WORKER_IMAGE_ID'] UNRESERVED_WORKER_DATA_DISK_SIZE_GB = int(os.environ['UNRESERVED_WORKER_DATA_DISK_SIZE_GB']) assert UNRESERVED_WORKER_DATA_DISK_SIZE_GB >= 0 @@ -126,14 +119,9 @@ docker: Optional[aiodocker.Docker] = None port_allocator: Optional['PortAllocator'] = None -network_allocator: Optional['NetworkAllocator'] = None worker: Optional['Worker'] = None -image_configs: Dict[str, Dict[str, Any]] = dict() - -image_lock = aiorwlock.RWLock() - class PortAllocator: def __init__(self): @@ -149,138 +137,6 @@ def free(self, port): self.ports.put_nowait(port) -class NetworkNamespace: - def __init__(self, subnet_index: int, private: bool, internet_interface: str): - assert subnet_index <= 255 - self.subnet_index = subnet_index - self.private = private - self.internet_interface = internet_interface - self.network_ns_name = uuid.uuid4().hex[:5] - self.hostname = 'hostname-' + uuid.uuid4().hex[:10] - self.veth_host = self.network_ns_name + '-host' - self.veth_job = self.network_ns_name + '-job' - - if private: - self.host_ip = f'10.0.{subnet_index}.10' - self.job_ip = f'10.0.{subnet_index}.11' - else: - self.host_ip = f'10.1.{subnet_index}.10' - self.job_ip = f'10.1.{subnet_index}.11' - - self.port = None - self.host_port = None - - async def init(self): - await self.create_netns() - await self.enable_iptables_forwarding() - - os.makedirs(f'/etc/netns/{self.network_ns_name}') - with open(f'/etc/netns/{self.network_ns_name}/hosts', 'w') as hosts: - hosts.write('127.0.0.1 localhost\n') - hosts.write(f'{self.job_ip} {self.hostname}\n') - - # Jobs on the private network should have access to the metadata server - # and our vdc. The public network should not so we use google's public - # resolver. - with open(f'/etc/netns/{self.network_ns_name}/resolv.conf', 'w') as resolv: - if self.private: - resolv.write('nameserver 169.254.169.254\n') - resolv.write('search c.hail-vdc.internal google.internal\n') - else: - resolv.write('nameserver 8.8.8.8\n') - - async def create_netns(self): - await check_shell( - f''' -ip netns add {self.network_ns_name} && \ -ip link add name {self.veth_host} type veth peer name {self.veth_job} && \ -ip link set dev {self.veth_host} up && \ -ip link set {self.veth_job} netns {self.network_ns_name} && \ -ip address add {self.host_ip}/24 dev {self.veth_host} -ip -n {self.network_ns_name} link set dev {self.veth_job} up && \ -ip -n {self.network_ns_name} link set dev lo up && \ -ip -n {self.network_ns_name} address add {self.job_ip}/24 dev {self.veth_job} && \ -ip -n {self.network_ns_name} route add default via {self.host_ip}''' - ) - - async def enable_iptables_forwarding(self): - await check_shell( - f''' -iptables -w {IPTABLES_WAIT_TIMEOUT_SECS} --append FORWARD --in-interface {self.veth_host} --out-interface {self.internet_interface} --jump ACCEPT && \ -iptables -w {IPTABLES_WAIT_TIMEOUT_SECS} --append FORWARD --out-interface {self.veth_host} --in-interface {self.internet_interface} --jump ACCEPT && \ -iptables -w {IPTABLES_WAIT_TIMEOUT_SECS} --append FORWARD --out-interface {self.veth_host} --in-interface {self.veth_host} --jump ACCEPT''' - ) - - async def expose_port(self, port, host_port): - self.port = port - self.host_port = host_port - await self.expose_port_rule(action='append') - - async def expose_port_rule(self, action: str): - # Appending to PREROUTING means this is only exposed to external traffic. - # To expose for locally created packets, we would append instead to the OUTPUT chain. - await check_shell( - f'iptables -w {IPTABLES_WAIT_TIMEOUT_SECS} --table nat --{action} PREROUTING ' - f'--match addrtype --dst-type LOCAL ' - f'--protocol tcp ' - f'--match tcp --dport {self.host_port} ' - f'--jump DNAT --to-destination {self.job_ip}:{self.port}' - ) - - async def cleanup(self): - if self.host_port: - assert self.port - await self.expose_port_rule(action='delete') - self.host_port = None - self.port = None - await check_shell( - f''' -ip link delete {self.veth_host} && \ -ip netns delete {self.network_ns_name}''' - ) - await self.create_netns() - - -class NetworkAllocator: - def __init__(self): - self.private_networks = asyncio.Queue() - self.public_networks = asyncio.Queue() - - for nic in psutil.net_if_addrs().keys(): - if nic.startswith('ens'): - self.internet_interface = nic - break - else: - raise Exception('No ens interface detected') - - async def reserve(self, netns_pool_min_size: int = 64): - for subnet_index in range(netns_pool_min_size): - public = NetworkNamespace(subnet_index, private=False, internet_interface=self.internet_interface) - await public.init() - self.public_networks.put_nowait(public) - - private = NetworkNamespace(subnet_index, private=True, internet_interface=self.internet_interface) - - await private.init() - self.private_networks.put_nowait(private) - - async def allocate_private(self) -> NetworkNamespace: - return await self.private_networks.get() - - async def allocate_public(self) -> NetworkNamespace: - return await self.public_networks.get() - - def free(self, netns: NetworkNamespace): - asyncio.ensure_future(self._free(netns)) - - async def _free(self, netns: NetworkNamespace): - await netns.cleanup() - if netns.private: - self.private_networks.put_nowait(netns) - else: - self.public_networks.put_nowait(netns) - - def docker_call_retry(timeout, name): async def wrapper(f, *args, **kwargs): delay = 0.1 @@ -309,6 +165,73 @@ async def wrapper(f, *args, **kwargs): return wrapper +async def create_container(config, name): + delay = 0.1 + error = 0 + + async def handle_error(e): + nonlocal error, delay + error += 1 + if error < 10: + delay = await sleep_and_backoff(delay) + return + raise ValueError('encountered {error} failures in create_container; aborting') from e + + while True: + try: + return await docker.containers.create(config, name=name) + except DockerError as e: + # 409 container with name already exists + if e.status == 409: + try: + delay = await sleep_and_backoff(delay) + return await docker.containers.get(name) + except DockerError as eget: + # 404 No such container + if eget.status == 404: + await handle_error(eget) + continue + # No such image: {DOCKER_PREFIX}/... + if e.status == 404 and 'No such image' in e.message: + await handle_error(e) + continue + raise + + +async def start_container(container): + try: + return await container.start() + except DockerError as e: + # 304 container has already started + if e.status == 304: + return + if e.status == 500 and e.message == 'OCI runtime start failed: container process is already dead: unknown': + log.info(f'restarting container {container}') + return await container.restart() + raise + + +async def stop_container(container): + try: + return await container.stop() + except DockerError as e: + # 304 container has already stopped + if e.status == 304: + return + raise + + +async def delete_container(container, *args, **kwargs): + try: + return await container.delete(*args, **kwargs) + except DockerError as e: + # 404 container does not exist + # 409 removal of container is already in progress + if e.status in (404, 409): + return + raise + + class JobDeletedError(Exception): pass @@ -357,16 +280,10 @@ def user_error(e): if isinstance(e, DockerError): if e.status == 404 and 'pull access denied' in e.message: return True - if e.status == 404 and ('not found: manifest unknown' in e.message or 'no such image' in e.message): + if e.status == 404 and 'not found: manifest unknown' in e.message: return True if e.status == 400 and 'executable file not found' in e.message: return True - if isinstance(e, CalledProcessError): - # Opening GCS connection...\n', b'daemonize.Run: readFromProcess: sub-process: mountWithArgs: mountWithConn: - # fs.NewServer: create file system: SetUpBucket: OpenBucket: Bad credentials for bucket "BUCKET". Check the - # bucket name and your credentials.\n') - if 'Bad credentials for bucket' in e.outerr: - return True return False @@ -375,9 +292,9 @@ def __init__(self, job, name, spec): self.job = job self.name = name self.spec = spec - self.deleted_event = asyncio.Event() image_ref = parse_docker_image_reference(self.spec['image']) + if image_ref.tag is None and image_ref.digest is None: log.info(f'adding latest tag to image {self.spec["image"]} for {self}') image_ref.tag = 'latest' @@ -392,115 +309,65 @@ def __init__(self, job, name, spec): self.image_ref = image_ref self.image_ref_str = str(image_ref) - self.image_id = None self.port = self.spec.get('port') self.host_port = None self.timeout = self.spec.get('timeout') + self.container = None self.state = 'pending' - self.error = None self.short_error = None - self.container_status = None - self.started_at = None - self.finished_at = None - + self.error = None self.timings = Timings(self.is_job_deleted) - - self.logbuffer = bytearray() + self.container_status = None + self.log = None self.overlay_path = None - self.image_config = None - self.rootfs_path = None - scratch = self.spec['scratch'] - self.container_scratch = f'{scratch}/{self.name}' - self.container_overlay_path = f'{self.container_scratch}/rootfs_overlay' - self.config_path = f'{self.container_scratch}/config' - - self.container_name = f'batch-{self.job.batch_id}-job-{self.job.job_id}-{self.name}' - - self.netns: Optional[NetworkNamespace] = None - self.process = None - - async def run(self, worker: 'Worker'): - try: - - async def localize_rootfs(): - async with image_lock.reader_lock: - # FIXME Authentication is entangled with pulling images. We need a way to test - # that a user has access to a cached image without pulling. - await self.pull_image() - self.image_config = image_configs[self.image_ref_str] - self.image_id = self.image_config['Id'].split(":")[1] - worker.image_ref_count[self.image_id] += 1 - - self.rootfs_path = f'/host/rootfs/{self.image_id}' - async with worker.rootfs_locks[self.image_id]: - if not os.path.exists(self.rootfs_path): - await self.extract_rootfs() - log.info(f'Added expanded image to cache: {self.image_ref_str}, ID: {self.image_id}') - - with self.step('pulling'): - await self.run_until_done_or_deleted(localize_rootfs) + def container_config(self): + weight = worker_fraction_in_1024ths(self.spec['cpu']) + host_config = {'CpuShares': weight, 'Memory': self.spec['memory'], 'BlkioWeight': min(weight, 1000)} - with self.step('setting up overlay'): - await self.run_until_done_or_deleted(self.setup_overlay) + config = { + "AttachStdin": False, + "AttachStdout": False, + "AttachStderr": False, + "Tty": False, + 'OpenStdin': False, + 'Cmd': self.spec['command'], + 'Image': self.image_ref_str, + 'Entrypoint': '', + } - with self.step('setting up network'): - await self.run_until_done_or_deleted(self.setup_network_namespace) + env = self.spec.get('env', []) - with self.step('running'): - timed_out = await self.run_until_done_or_deleted(self.run_container) + if self.port is not None: + assert self.host_port is not None + config['ExposedPorts'] = {f'{self.port}/tcp': {}} + host_config['PortBindings'] = {f'{self.port}/tcp': [{'HostIp': '', 'HostPort': str(self.host_port)}]} + env = list(env) + env.append(f'HAIL_BATCH_WORKER_PORT={self.host_port}') + env.append(f'HAIL_BATCH_WORKER_IP={IP_ADDRESS}') - self.container_status = await self.get_container_status() + volume_mounts = self.spec.get('volume_mounts') + if volume_mounts: + host_config['Binds'] = volume_mounts - with self.step('uploading_log'): - await self.upload_log() + if env: + config['Env'] = env - if timed_out: - self.short_error = 'timed out' - raise JobTimeoutError(f'timed out after {self.timeout}s') + network = self.spec.get('network') + if network is None: + network = 'public' + host_config['NetworkMode'] = network # not documented, I used strace to inspect the packets - if self.container_status['exit_code'] == 0: - self.state = 'succeeded' - else: - if self.container_status['out_of_memory']: - self.short_error = 'out of memory' - self.state = 'failed' - except asyncio.CancelledError: - raise - except Exception as e: - if not isinstance(e, (JobDeletedError, JobTimeoutError)) and not user_error(e): - log.exception(f'while running {self}') + unconfined = self.spec.get('unconfined') + if unconfined: + host_config['SecurityOpt'] = ["seccomp:unconfined", "apparmor:unconfined"] - self.state = 'error' - self.error = traceback.format_exc() - finally: - try: - await self.delete_container() - finally: - if self.image_id: - worker.image_ref_count[self.image_id] -= 1 - assert worker.image_ref_count[self.image_id] >= 0 + config['HostConfig'] = host_config - async def run_until_done_or_deleted(self, f: Callable[[], Awaitable[Any]]): - step = asyncio.ensure_future(f()) - deleted = asyncio.ensure_future(self.deleted_event.wait()) - try: - await asyncio.wait([deleted, step], return_when=asyncio.FIRST_COMPLETED) - if deleted.done(): - raise JobDeletedError() - assert step.done() - return step.result() - finally: - for t in (step, deleted): - if t.done(): - e = t.exception() - if e and not user_error(e): - log.exception(e) - else: - t.cancel() + return config def is_job_deleted(self) -> bool: return self.job.deleted @@ -508,34 +375,31 @@ def is_job_deleted(self) -> bool: def step(self, name: str): return self.timings.step(name) - async def pull_image(self): - is_google_image = is_google_registry_domain(self.image_ref.domain) - is_public_image = self.image_ref.name() in PUBLIC_IMAGES + async def get_container_status(self): + if not self.container: + return None try: - if not is_google_image: - await self.ensure_image_is_pulled() - elif is_public_image: - auth = await self.batch_worker_access_token() - await self.ensure_image_is_pulled(auth=auth) - else: - # Pull to verify this user has access to this - # image. - # FIXME improve the performance of this with a - # per-user image cache. - auth = self.current_user_access_token() - await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( - docker.images.pull, self.image_ref_str, auth=auth - ) + c = await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(self.container.show) except DockerError as e: - if e.status == 404 and 'pull access denied' in e.message: - self.short_error = 'image cannot be pulled' - elif 'not found: manifest unknown' in e.message: - self.short_error = 'image not found' + if e.status == 404: + return None raise - image_config, _ = await check_exec_output('docker', 'inspect', self.image_ref_str) - image_configs[self.image_ref_str] = json.loads(image_config)[0] + cstate = c['State'] + status = { + 'state': cstate['Status'], + 'started_at': cstate['StartedAt'], + 'finished_at': cstate['FinishedAt'], + 'out_of_memory': cstate['OOMKilled'], + } + cerror = cstate['Error'] + if cerror: + status['error'] = cerror + else: + status['exit_code'] = cstate['ExitCode'] + + return status async def ensure_image_is_pulled(self, auth=None): try: @@ -546,6 +410,10 @@ async def ensure_image_is_pulled(self, auth=None): docker.images.pull, self.image_ref_str, auth=auth ) + def current_user_access_token(self): + key = base64.b64decode(self.job.gsa_key['key.json']).decode() + return {'username': '_json_key', 'password': key} + async def batch_worker_access_token(self): async with aiohttp.ClientSession(raise_for_status=True, timeout=aiohttp.ClientTimeout(total=60)) as session: async with await request_retry_transient_errors( @@ -557,308 +425,140 @@ async def batch_worker_access_token(self): access_token = (await resp.json())['access_token'] return {'username': 'oauth2accesstoken', 'password': access_token} - def current_user_access_token(self): - key = base64.b64decode(self.job.gsa_key['key.json']).decode() - return {'username': '_json_key', 'password': key} + async def run(self, worker): + try: + with self.step('pulling'): + is_google_image = is_google_registry_domain(self.image_ref.domain) + is_public_image = self.image_ref.name() in PUBLIC_IMAGES - async def extract_rootfs(self): - assert self.rootfs_path - os.makedirs(self.rootfs_path) - await check_shell( - f'id=$(docker create {self.image_id}) && docker export $id | tar -C {self.rootfs_path} -xf - && docker rm $id' - ) - log.info(f'Extracted rootfs for image {self.image_ref_str}') - - async def setup_overlay(self): - lower_dir = self.rootfs_path - upper_dir = f'{self.container_overlay_path}/upper' - work_dir = f'{self.container_overlay_path}/work' - merged_dir = f'{self.container_overlay_path}/merged' - for d in (upper_dir, work_dir, merged_dir): - os.makedirs(d) - await check_shell( - f'mount -t overlay overlay -o lowerdir={lower_dir},upperdir={upper_dir},workdir={work_dir} {merged_dir}' - ) + try: + if not is_google_image: + await self.ensure_image_is_pulled() + elif is_public_image: + auth = await self.batch_worker_access_token() + await self.ensure_image_is_pulled(auth=auth) + else: + # Pull to verify this user has access to this + # image. + # FIXME improve the performance of this with a + # per-user image cache. + auth = self.current_user_access_token() + await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( + docker.images.pull, self.image_ref_str, auth=auth + ) + except DockerError as e: + if e.status == 404: + if 'pull access denied' in e.message: + self.short_error = 'image cannot be pulled' + elif 'not found: manifest unknown' in e.message: + self.short_error = 'image not found' + raise - async def setup_network_namespace(self): - network = self.spec.get('network') - if network is None or network is True: - self.netns = await network_allocator.allocate_public() - else: - assert network == 'private' - self.netns = await network_allocator.allocate_private() - if self.port is not None: - self.host_port = await port_allocator.allocate() - await self.netns.expose_port(self.port, self.host_port) + if self.port is not None: + with self.step('allocating_port'): + self.host_port = await port_allocator.allocate() - async def run_container(self) -> bool: - self.started_at = time_msecs() - try: - await self.write_container_config() - async with async_timeout.timeout(self.timeout): - log.info('Creating the crun run process') - self.process = await asyncio.create_subprocess_exec( - 'crun', - 'run', - '--bundle', - f'{self.container_overlay_path}/merged', - '--config', - f'{self.config_path}/config.json', - self.container_name, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, + with self.step('creating'): + config = self.container_config() + log.info(f'starting {self}') + self.container = await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')( + create_container, config, name=f'batch-{self.job.batch_id}-job-{self.job.job_id}-{self.name}' ) - await asyncio.gather(self.pipe_to_log(self.process.stdout), self.pipe_to_log(self.process.stderr)) - await self.process.wait() - log.info('crun process completed') - except asyncio.TimeoutError: - return True - finally: - self.finished_at = time_msecs() - return False + c = await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(self.container.show) - async def write_container_config(self): - os.makedirs(self.config_path) - with open(f'{self.config_path}/config.json', 'w') as f: - f.write(json.dumps(await self.container_config())) + merged_overlay_path = c['GraphDriver']['Data']['MergedDir'] + assert merged_overlay_path.endswith('/merged') + self.overlay_path = merged_overlay_path[:-7].replace(WORKER_DATA_DISK_MOUNT, '/host') + os.makedirs(f'{self.overlay_path}/', exist_ok=True) - # https://github.com/opencontainers/runtime-spec/blob/master/config.md - async def container_config(self): - uid, gid = await self._get_in_container_user() - weight = worker_fraction_in_1024ths(self.spec['cpu']) - workdir = self.image_config['Config']['WorkingDir'] - default_docker_capabilities = [ - 'CAP_CHOWN', - 'CAP_DAC_OVERRIDE', - 'CAP_FSETID', - 'CAP_FOWNER', - 'CAP_MKNOD', - 'CAP_NET_RAW', - 'CAP_SETGID', - 'CAP_SETUID', - 'CAP_SETFCAP', - 'CAP_SETPCAP', - 'CAP_NET_BIND_SERVICE', - 'CAP_SYS_CHROOT', - 'CAP_KILL', - 'CAP_AUDIT_WRITE', - ] - config = { - 'ociVersion': '1.0.1', - 'root': { - 'path': '.', - 'readonly': False, - }, - 'hostname': self.netns.hostname, - 'mounts': self._mounts(uid, gid), - 'process': { - 'user': { # uid/gid *inside the container* - 'uid': uid, - 'gid': gid, - }, - 'args': self.spec['command'], - 'env': self._env(), - 'cwd': workdir if workdir != "" else "/", - 'capabilities': { - 'bounding': default_docker_capabilities, - 'effective': default_docker_capabilities, - 'inheritable': default_docker_capabilities, - 'permitted': default_docker_capabilities, - }, - }, - 'linux': { - 'namespaces': [ - {'type': 'pid'}, - { - 'type': 'network', - 'path': f'/var/run/netns/{self.netns.network_ns_name}', - }, - {'type': 'mount'}, - {'type': 'ipc'}, - {'type': 'uts'}, - {'type': 'cgroup'}, - ], - 'uidMappings': [], - 'gidMappings': [], - 'resources': { - 'cpu': {'shares': weight}, - 'memory': { - 'limit': self.spec['memory'], - 'reservation': self.spec['memory'], - }, - # 'blockIO': {'weight': min(weight, 1000)}, FIXME blkio.weight not supported - }, - 'maskedPaths': [ - '/proc/asound', - '/proc/acpi', - '/proc/kcore', - '/proc/keys', - '/proc/latency_stats', - '/proc/timer_list', - '/proc/timer_stats', - '/proc/sched_debug', - '/proc/scsi', - '/sys/firmware', - ], - 'readonlyPaths': [ - '/proc/bus', - '/proc/fs', - '/proc/irq', - '/proc/sys', - '/proc/sysrq-trigger', - ], - }, - } + await check_shell_output( + f'xfs_quota -x -c "project -s -p {self.overlay_path} {self.job.project_id}" /host/' + ) - if self.spec.get('unconfined'): - config['linux']['maskedPaths'] = [] - config['linux']['readonlyPaths'] = [] - config['process']['apparmorProfile'] = 'unconfined' - config['linux']['seccomp'] = {'defaultAction': "SCMP_ACT_ALLOW"} + with self.step('starting'): + await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(start_container, self.container) - return config + timed_out = False + with self.step('running'): + try: + async with async_timeout.timeout(self.timeout): + await docker_call_retry(MAX_DOCKER_WAIT_SECS, f'{self}')(self.container.wait) + except asyncio.TimeoutError: + timed_out = True - async def _get_in_container_user(self): - user = self.image_config['Config']['User'] - if not user: - uid, gid = 0, 0 - elif ":" in user: - uid, gid = user.split(":") - else: - uid, gid = await self._read_user_from_rootfs(user) - return int(uid), int(gid) - - async def _read_user_from_rootfs(self, user) -> Tuple[str, str]: - with open(f'{self.rootfs_path}/etc/passwd', 'r') as passwd: - for record in passwd: - if record.startswith(user): - _, _, uid, gid, _, _, _ = record.split(":") - return uid, gid - raise ValueError("Container user not found in image's /etc/passwd") - - def _mounts(self, uid, gid): - # Only supports empty volumes - external_volumes = [] - volumes = self.image_config['Config']['Volumes'] - if volumes: - for v_container_path in volumes: - if not v_container_path.startswith('/'): - v_container_path = '/' + v_container_path - v_host_path = f'{self.container_scratch}/volumes{v_container_path}' - os.makedirs(v_host_path) - if uid != 0 or gid != 0: - os.chown(v_host_path, uid, gid) - external_volumes.append( - { - 'source': v_host_path, - 'destination': v_container_path, - 'type': 'none', - 'options': ['rbind', 'rw', 'shared'], - } + self.container_status = await self.get_container_status() + + with self.step('uploading_log'): + await worker.log_store.write_log_file( + self.job.format_version, + self.job.batch_id, + self.job.job_id, + self.job.attempt_id, + self.name, + await self.get_container_log(), ) - return ( - self.spec.get('volume_mounts') - + external_volumes - + [ - # Recommended filesystems: - # https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#default-filesystems - { - 'source': 'proc', - 'destination': '/proc', - 'type': 'proc', - 'options': ['nosuid', 'noexec', 'nodev'], - }, - { - 'source': 'tmpfs', - 'destination': '/dev', - 'type': 'tmpfs', - 'options': ['nosuid', 'strictatime', 'mode=755', 'size=65536k'], - }, - { - 'source': 'sysfs', - 'destination': '/sys', - 'type': 'sysfs', - 'options': ['nosuid', 'noexec', 'nodev', 'ro'], - }, - { - 'source': 'cgroup', - 'destination': '/sys/fs/cgroup', - 'type': 'cgroup', - 'options': ['nosuid', 'noexec', 'nodev', 'ro'], - }, - { - 'source': 'devpts', - 'destination': '/dev/pts', - 'type': 'devpts', - 'options': ['nosuid', 'noexec', 'nodev'], - }, - { - 'source': 'mqueue', - 'destination': '/dev/mqueue', - 'type': 'mqueue', - 'options': ['nosuid', 'noexec', 'nodev'], - }, - { - 'source': 'shm', - 'destination': '/dev/shm', - 'type': 'tmpfs', - 'options': ['nosuid', 'noexec', 'nodev', 'mode=1777', 'size=67108864'], - }, - { - 'source': f'/etc/netns/{self.netns.network_ns_name}/resolv.conf', - 'destination': '/etc/resolv.conf', - 'type': 'none', - 'options': ['rbind', 'ro'], - }, - { - 'source': f'/etc/netns/{self.netns.network_ns_name}/hosts', - 'destination': '/etc/hosts', - 'type': 'none', - 'options': ['rbind', 'ro'], - }, - ] + with self.step('deleting'): + await self.delete_container() + + if timed_out: + self.short_error = 'timed out' + raise JobTimeoutError(f'timed out after {self.timeout}s') + + if self.container_status['out_of_memory']: + self.short_error = 'out of memory' + + if 'error' in self.container_status: + self.state = 'error' + elif self.container_status['exit_code'] == 0: + self.state = 'succeeded' + else: + self.state = 'failed' + except asyncio.CancelledError: + raise + except Exception as e: + if not isinstance(e, (JobDeletedError, JobTimeoutError)): + log.exception(f'while running {self}') + + self.state = 'error' + self.error = traceback.format_exc() + finally: + await self.delete_container() + + async def get_container_log(self): + logs = await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')( + self.container.log, stderr=True, stdout=True ) + self.log = "".join(logs) + return self.log - def _env(self): - env = self.image_config['Config']['Env'] + self.spec.get('env', []) - if self.port is not None: - assert self.host_port is not None - env.append(f'HAIL_BATCH_WORKER_PORT={self.host_port}') - env.append(f'HAIL_BATCH_WORKER_IP={IP_ADDRESS}') - return env + async def get_log(self): + if self.container: + return await self.get_container_log() + return self.log async def delete_container(self): - if self.container_is_running(): + if self.container: try: - log.info(f'{self} container is still running, killing crun process') - self.process.terminate() - self.process = None - await check_exec_output('crun', 'kill', '--all', self.container_name, 'SIGTERM') - except asyncio.CancelledError: - raise - except Exception: - log.exception('while deleting container', exc_info=True) - - try: - await check_shell(f'umount -l {self.container_overlay_path}/merged') + log.info(f'{self}: deleting container') + await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(stop_container, self.container) + # v=True deletes anonymous volumes created by the container + await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')( + delete_container, self.container, v=True + ) + self.container = None except asyncio.CancelledError: raise except Exception: - log.exception('while unmounting overlay', exc_info=True) + log.warning('while deleting container, ignoring', exc_info=True) if self.host_port is not None: port_allocator.free(self.host_port) self.host_port = None - if self.netns: - network_allocator.free(self.netns) - self.netns = None - async def delete(self): log.info(f'deleting {self}') - self.deleted_event.set() await self.delete_container() # { @@ -867,11 +567,12 @@ async def delete(self): # timing: dict(str, float), # error: str, (optional) # short_error: str, (optional) - # container_status: { + # container_status: { (from docker container state) # state: str, # started_at: str, (date) # finished_at: str, (date) - # out_of_memory: bool, + # out_of_memory: bool + # error: str, (one of error, exit_code will be present) # exit_code: int # } # } @@ -885,52 +586,11 @@ async def status(self, state=None): status['short_error'] = self.short_error if self.container_status: status['container_status'] = self.container_status - elif self.container_is_running(): + elif self.container: status['container_status'] = await self.get_container_status() - return status - - async def get_container_status(self): - if not self.process: - return None - - status = { - 'started_at': self.started_at, - 'finished_at': self.finished_at, - } - if self.container_is_running(): - status['state'] = 'running' - status['out_of_memory'] = False - else: - status['state'] = 'finished' - status['exit_code'] = self.process.returncode - status['out_of_memory'] = self.process.returncode == 137 return status - def container_is_running(self): - return self.process is not None and self.process.returncode is None - - def container_finished(self): - return self.process is not None and self.process.returncode is not None - - async def upload_log(self): - await worker.log_store.write_log_file( - self.job.format_version, - self.job.batch_id, - self.job.job_id, - self.job.attempt_id, - self.name, - await self.get_log(), - ) - - async def get_log(self): - return self.logbuffer.decode() - - async def pipe_to_log(self, strm: Optional[asyncio.StreamReader]): - if strm is not None: - while not strm.at_eof() and not strm.exception(): - self.logbuffer.extend(await strm.readline()) - def __str__(self): return f'container {self.job.id}/{self.name}' @@ -973,13 +633,13 @@ async def add_gcsfuse_bucket(mount_path, bucket, key_file, read_only): delay = await sleep_and_backoff(delay) -def copy_container(job, name, files, volume_mounts, cpu, memory, scratch, requester_pays_project): +def copy_container(job, name, files, volume_mounts, cpu, memory, requester_pays_project): assert files copy_spec = { 'image': BATCH_WORKER_IMAGE, 'name': name, 'command': [ - '/usr/bin/python3', + '/usr/local/bin/python3', '-m', 'batch.copy', json.dumps(requester_pays_project), @@ -988,7 +648,6 @@ def copy_container(job, name, files, volume_mounts, cpu, memory, scratch, reques 'env': ['GOOGLE_APPLICATION_CREDENTIALS=/gsa-key/key.json'], 'cpu': cpu, 'memory': memory, - 'scratch': scratch, 'volume_mounts': volume_mounts, } return Container(job, name, copy_spec) @@ -1103,12 +762,7 @@ def __init__( self.main_volume_mounts = [] self.output_volume_mounts = [] - io_volume_mount = { - 'source': self.io_host_path(), - 'destination': '/io', - 'type': 'none', - 'options': ['rbind', 'rw'], - } + io_volume_mount = f'{self.io_host_path()}:/io' self.input_volume_mounts.append(io_volume_mount) self.main_volume_mounts.append(io_volume_mount) self.output_volume_mounts.append(io_volume_mount) @@ -1117,15 +771,7 @@ def __init__( self.gcsfuse = gcsfuse if gcsfuse: for b in gcsfuse: - b['mounted'] = False - self.main_volume_mounts.append( - { - 'source': self.gcsfuse_path(b["bucket"]), - 'destination': b['mount_path'], - 'type': 'none', - 'options': ['rbind', 'rw', 'shared'], - } - ) + self.main_volume_mounts.append(f'{self.gcsfuse_path(b["bucket"])}:{b["mount_path"]}:shared') secrets = job_spec.get('secrets') self.secrets = secrets @@ -1214,16 +860,12 @@ def __init__( requester_pays_project = job_spec.get('requester_pays_project') - self.timings = Timings(lambda: False) + if job_spec['process'].get('mount_docker_socket'): + self.main_volume_mounts.append('/var/run/docker.sock:/var/run/docker.sock') if self.secrets: for secret in self.secrets: - volume_mount = { - 'source': self.secret_host_path(secret), - 'destination': secret["mount_path"], - 'type': 'none', - 'options': ['rbind', 'rw'], - } + volume_mount = f'{self.secret_host_path(secret)}:{secret["mount_path"]}' self.main_volume_mounts.append(volume_mount) # this will be the user gsa-key if secret.get('mount_in_copy', False): @@ -1241,7 +883,6 @@ def __init__( self.input_volume_mounts, self.cpu_in_mcpu, self.memory_in_bytes, - self.scratch, requester_pays_project, ) @@ -1268,7 +909,6 @@ def __init__( unconfined = job_spec.get('unconfined') if unconfined: main_spec['unconfined'] = unconfined - main_spec['scratch'] = self.scratch containers['main'] = Container(self, 'main', main_spec) if output_files: @@ -1279,15 +919,11 @@ def __init__( self.output_volume_mounts, self.cpu_in_mcpu, self.memory_in_bytes, - self.scratch, requester_pays_project, ) self.containers = containers - def step(self, name: str): - return self.timings.step(name) - async def setup_io(self): if not worker_config.job_private: if worker.data_disk_space_remaining.value < self.external_storage_in_gib: @@ -1333,8 +969,7 @@ async def run(self, worker): os.makedirs(f'{self.scratch}/') - with self.step('setup_io'): - await self.setup_io() + await self.setup_io() if not self.disk: data_disk_storage_in_bytes = storage_gib_to_bytes( @@ -1343,30 +978,25 @@ async def run(self, worker): else: data_disk_storage_in_bytes = storage_gib_to_bytes(self.data_disk_storage_in_gib) - with self.step('configuring xfsquota'): - # Quota will not be applied to `/io` if the job has an attached disk mounted there - await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') - await check_shell_output( - f'xfs_quota -x -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_id}" /host/' - ) + await check_shell_output(f'xfs_quota -x -c "project -s -p {self.scratch} {self.project_id}" /host/') + await check_shell_output( + f'xfs_quota -x -c "limit -p bsoft={data_disk_storage_in_bytes} bhard={data_disk_storage_in_bytes} {self.project_id}" /host/' + ) - with self.step('populating secrets'): - if self.secrets: - for secret in self.secrets: - populate_secret_host_path(self.secret_host_path(secret), secret['data']) - - with self.step('adding gcsfuse bucket'): - if self.gcsfuse: - populate_secret_host_path(self.gsa_key_file_path(), self.gsa_key) - for b in self.gcsfuse: - bucket = b['bucket'] - await add_gcsfuse_bucket( - mount_path=self.gcsfuse_path(bucket), - bucket=bucket, - key_file=f'{self.gsa_key_file_path()}/key.json', - read_only=b['read_only'], - ) - b['mounted'] = True + if self.secrets: + for secret in self.secrets: + populate_secret_host_path(self.secret_host_path(secret), secret['data']) + + if self.gcsfuse: + populate_secret_host_path(self.gsa_key_file_path(), self.gsa_key) + for b in self.gcsfuse: + bucket = b['bucket'] + await add_gcsfuse_bucket( + mount_path=self.gcsfuse_path(bucket), + bucket=bucket, + key_file=f'{self.gsa_key_file_path()}/key.json', + read_only=b['read_only'], + ) self.state = 'running' @@ -1407,17 +1037,16 @@ async def run(self, worker): self.state = 'error' self.error = traceback.format_exc() finally: - with self.step('post-job finally block'): - if self.disk: - try: - await self.disk.delete() - log.info(f'deleted disk {self.disk.name} for {self.id}') - except Exception: - log.exception(f'while detaching and deleting disk {self.disk.name} for {self.id}') - else: - worker.data_disk_space_remaining.value += self.external_storage_in_gib + if self.disk: + try: + await self.disk.delete() + log.info(f'deleted disk {self.disk.name} for {self.id}') + except Exception: + log.exception(f'while detaching and deleting disk {self.disk.name} for {self.id}') + else: + worker.data_disk_space_remaining.value += self.external_storage_in_gib - await self.cleanup() + await self.cleanup() async def cleanup(self): self.end_time = time_msecs() @@ -1430,12 +1059,10 @@ async def cleanup(self): try: if self.gcsfuse: for b in self.gcsfuse: - if b['mounted']: - bucket = b['bucket'] - mount_path = self.gcsfuse_path(bucket) - await check_shell(f'fusermount -u {mount_path}') - log.info(f'unmounted gcsfuse bucket {bucket} from {mount_path}') - b['mounted'] = False + bucket = b['bucket'] + mount_path = self.gcsfuse_path(bucket) + await check_shell(f'fusermount -u {mount_path}') + log.info(f'unmounted gcsfuse bucket {bucket} from {mount_path}') await check_shell(f'xfs_quota -x -c "limit -p bsoft=0 bhard=0 {self.project_id}" /host') @@ -1450,13 +1077,13 @@ async def get_log(self): async def delete(self): await super().delete() - await asyncio.wait([c.delete() for c in self.containers.values()]) + for c in self.containers.values(): + await c.delete() async def status(self): status = await super().status() cstatuses = {name: await c.status() for name, c in self.containers.items()} status['container_statuses'] = cstatuses - status['timing'] = self.timings.to_dict() return status @@ -1691,27 +1318,21 @@ def __init__(self): self.task_manager = aiotools.BackgroundTaskManager() self.jar_download_locks = defaultdict(asyncio.Lock) - self.rootfs_locks = defaultdict(asyncio.Lock) - self.image_ref_count = Counter({BATCH_WORKER_IMAGE_ID: 1}) - # filled in during activation self.log_store = None self.headers = None self.compute_client = None - async def shutdown(self): + def shutdown(self): self.task_manager.shutdown() - if self.compute_client: - await self.compute_client.close() async def run_job(self, job): try: await job.run(self) except asyncio.CancelledError: raise - except Exception as e: - if not user_error(e): - log.exception(f'while running {job}, ignoring') + except Exception: + log.exception(f'while running {job}, ignoring') async def create_job_1(self, request): body = await request.json() @@ -1833,7 +1454,6 @@ async def run(self): site = web.TCPSite(app_runner, '0.0.0.0', 5000) await site.start() - self.task_manager.ensure_future(periodically_call(60, self.cleanup_old_images)) try: while True: try: @@ -2009,40 +1629,19 @@ async def activate(self): self.headers = {'X-Hail-Instance-Name': NAME, 'Authorization': f'Bearer {resp_json["token"]}'} self.active = True - async def cleanup_old_images(self): - try: - async with image_lock.writer_lock: - log.info(f"Obtained writer lock. The image ref counts are: {self.image_ref_count}") - for image_id in list(self.image_ref_count.keys()): - if self.image_ref_count[image_id] == 0: - assert image_id != BATCH_WORKER_IMAGE_ID - log.info(f'Found an unused image with ID {image_id}') - await check_shell(f'docker rmi -f {image_id}') - image_path = f'/host/rootfs/{image_id}' - await blocking_to_async(self.pool, shutil.rmtree, image_path) - del self.image_ref_count[image_id] - log.info(f'Deleted image from cache with ID {image_id}') - except asyncio.CancelledError: - raise - except Exception as e: - log.exception(f'Error while deleting unused image: {e}') - async def async_main(): - global port_allocator, network_allocator, worker, docker + global port_allocator, worker, docker docker = aiodocker.Docker() port_allocator = PortAllocator() - network_allocator = NetworkAllocator() - await network_allocator.reserve() - worker = Worker() try: await worker.run() finally: try: - await worker.shutdown() + worker.shutdown() log.info('worker shutdown') finally: await docker.close() diff --git a/batch/sql/add-frozen-mode.sql b/batch/sql/add-frozen-mode.sql deleted file mode 100644 index 675e055b17e..00000000000 --- a/batch/sql/add-frozen-mode.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TABLE globals ADD COLUMN `frozen` BOOLEAN NOT NULL DEFAULT FALSE; diff --git a/batch/sql/estimated-current.sql b/batch/sql/estimated-current.sql index 326e82fba13..10363073720 100644 --- a/batch/sql/estimated-current.sql +++ b/batch/sql/estimated-current.sql @@ -1,8 +1,7 @@ CREATE TABLE IF NOT EXISTS `globals` ( `instance_id` VARCHAR(100) NOT NULL, `internal_token` VARCHAR(100) NOT NULL, - `n_tokens` INT NOT NULL, - `frozen` BOOLEAN NOT NULL DEFAULT FALSE + `n_tokens` INT NOT NULL ) ENGINE = InnoDB; CREATE TABLE IF NOT EXISTS `resources` ( diff --git a/batch/test/test_accounts.py b/batch/test/test_accounts.py index 18d4a214882..b5c5c6af5a3 100644 --- a/batch/test/test_accounts.py +++ b/batch/test/test_accounts.py @@ -28,9 +28,8 @@ async def factory(project): @pytest.fixture async def dev_client() -> AsyncGenerator[BatchClient, Any]: - bc = BatchClient( - 'billing-project-not-needed-but-required-by-BatchClient', token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE'] - ) + bc = BatchClient('billing-project-not-needed-but-required-by-BatchClient', + token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE']) yield bc await bc.close() @@ -102,7 +101,7 @@ async def test_get_billing_project(make_client): c = await make_client('billing-project-not-needed-but-required-by-BatchClient') r = await c.get_billing_project('test') assert r['billing_project'] == 'test', r - assert {'test', 'test-dev'}.issubset(set(r['users'])), r + assert set(r['users']) == {'test', 'test-dev'}, r assert r['status'] == 'open', r @@ -113,7 +112,7 @@ async def test_list_billing_projects(make_client): assert len(test_bps) == 1, r bp = test_bps[0] assert bp['billing_project'] == 'test', bp - assert {'test', 'test-dev'}.issubset(set(bp['users'])), bp + assert set(bp['users']) == {'test', 'test-dev'}, bp assert bp['status'] == 'open', bp @@ -599,9 +598,9 @@ async def test_batch_cannot_be_accessed_by_users_outside_the_billing_project( async def test_deleted_open_batches_do_not_prevent_billing_project_closure( - dev_client: BatchClient, - make_client: Callable[[str], Awaitable[BatchClient]], - get_billing_project_name: Callable[[], str], + dev_client: BatchClient, + make_client: Callable[[str], Awaitable[BatchClient]], + get_billing_project_name: Callable[[], str] ): try: project = await dev_client.create_billing_project(get_billing_project_name()) diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index 6f2396b3888..021561b78f3 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -75,7 +75,9 @@ def test_bad_command(client): j = builder.create_job(DOCKER_ROOT_IMAGE, ['sleep 5']) builder.submit() status = j.wait() - assert status['state'] == 'Failed', str(status) + assert j._get_exit_codes(status) == {'main': None}, status + assert j._get_error(status, 'main') is not None + assert status['state'] == 'Error', str(status) def test_invalid_resource_requests(client): @@ -132,46 +134,6 @@ def test_out_of_storage(client): assert "fallocate failed: No space left on device" in j.log()['main'] -def test_quota_applies_to_volume(client): - builder = client.create_batch() - resources = {'cpu': '0.25', 'memory': '10M', 'storage': '5Gi'} - j = builder.create_job( - os.environ['HAIL_VOLUME_IMAGE'], ['/bin/sh', '-c', 'fallocate -l 100GiB /data/foo'], resources=resources - ) - builder.submit() - status = j.wait() - assert status['state'] == 'Failed', str(status) - assert "fallocate failed: No space left on device" in j.log()['main'] - - -def test_quota_shared_by_io_and_rootfs(client): - builder = client.create_batch() - resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} - j = builder.create_job(DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'fallocate -l 7GiB /foo'], resources=resources) - builder.submit() - status = j.wait() - assert status['state'] == 'Success', str(status) - - builder = client.create_batch() - resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} - j = builder.create_job(DOCKER_ROOT_IMAGE, ['/bin/sh', '-c', 'fallocate -l 7GiB /io/foo'], resources=resources) - builder.submit() - status = j.wait() - assert status['state'] == 'Success', str(status) - - builder = client.create_batch() - resources = {'cpu': '0.25', 'memory': '10M', 'storage': '10Gi'} - j = builder.create_job( - DOCKER_ROOT_IMAGE, - ['/bin/sh', '-c', 'fallocate -l 7GiB /foo; fallocate -l 7GiB /io/foo'], - resources=resources, - ) - builder.submit() - status = j.wait() - assert status['state'] == 'Failed', str(status) - assert "fallocate failed: No space left on device" in j.log()['main'] - - def test_nonzero_storage(client): builder = client.create_batch() resources = {'cpu': '0.25', 'memory': '10M', 'storage': '20Gi'} @@ -190,15 +152,6 @@ def test_attached_disk(client): assert status['state'] == 'Success', str((status, j.log())) -def test_cwd_from_image_workdir(client): - builder = client.create_batch() - j = builder.create_job(os.environ['HAIL_WORKDIR_IMAGE'], ['/bin/sh', '-c', 'pwd']) - builder.submit() - status = j.wait() - assert status['state'] == 'Success', str(status) - assert "/work" in j.log()['main'] - - def test_unsubmitted_state(client): builder = client.create_batch() j = builder.create_job(DOCKER_ROOT_IMAGE, ['echo', 'test']) @@ -274,9 +227,8 @@ def test_list_jobs(client): j_error.wait() def assert_job_ids(expected, q=None): - jobs = b.jobs(q=q) - actual = set([j['job_id'] for j in jobs]) - assert actual == expected, f'Expected {expected} job IDs, but got jobs: {jobs}' + actual = set([j['job_id'] for j in b.jobs(q=q)]) + assert actual == expected assert_job_ids({j_success.job_id}, 'success') assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id}, 'done') @@ -656,14 +608,7 @@ def test_verify_no_access_to_metadata_server(client): builder.submit() status = j.wait() assert status['state'] == 'Failed', str(status) - assert "Could not resolve host" in j.log()['main'], (str(j.log()['main']), status) - - builder = client.create_batch() - j = builder.create_job(os.environ['HAIL_CURL_IMAGE'], ['curl', '-fsSL', '169.254.169.254', '--max-time', '10']) - builder.submit() - status = j.wait() - assert status['state'] == 'Failed', str(status) - assert "Connection timed out" in j.log()['main'], (str(j.log()['main']), status) + assert "Connection timed out" in j.log()['main'], str(j.log()['main'], status) def test_can_use_google_credentials(client): diff --git a/benchmark/python/benchmark_hail/run/matrix_table_benchmarks.py b/benchmark/python/benchmark_hail/run/matrix_table_benchmarks.py index a1d101b4271..4f2dc3d4d21 100644 --- a/benchmark/python/benchmark_hail/run/matrix_table_benchmarks.py +++ b/benchmark/python/benchmark_hail/run/matrix_table_benchmarks.py @@ -352,16 +352,16 @@ def kyle_sex_specific_qc(mt_path): @benchmark() -def matrix_table_scan_count_rows_2(): +def matrix_table_scan_count_rows(): mt = hl.utils.range_matrix_table(n_rows=200_000_000, n_cols=10, n_partitions=16) - mt = mt.annotate_rows(x=hl.scan.count()) + mt.annotate_rows(x=hl.scan.count()) mt._force_count_rows() @benchmark() -def matrix_table_scan_count_cols_2(): +def matrix_table_scan_count_cols(): mt = hl.utils.range_matrix_table(n_cols=10_000_000, n_rows=10) - mt = mt.annotate_cols(x=hl.scan.count()) + mt.annotate_cols(x=hl.scan.count()) mt._force_count_rows() diff --git a/build.yaml b/build.yaml index b090895c715..5bf04a25af8 100644 --- a/build.yaml +++ b/build.yaml @@ -53,10 +53,6 @@ steps: dockerFile: /io/echo/Dockerfile contextPath: /io/echo publishAs: echo - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi inputs: - from: /repo/echo to: /io/echo @@ -126,10 +122,6 @@ steps: dockerFile: /io/repo/docker/Dockerfile.service-base contextPath: /io/repo publishAs: service-base - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi dependsOn: - base_image - merge_code @@ -487,10 +479,6 @@ steps: dockerFile: /io/hail/Dockerfile.hail-run contextPath: /io/hail publishAs: hail-run - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi inputs: - from: /repo/hail to: /io/hail @@ -798,7 +786,6 @@ steps: to: /io/repo/hail_version dependsOn: - merge_code - - hail_ubuntu_image - kind: buildImage2 name: service_java_run_base_image dockerFile: /io/repo/docker/Dockerfile.service-java-run-base @@ -1112,10 +1099,6 @@ steps: dockerFile: /io/repo/hail/Dockerfile.hail-base contextPath: /io/repo publishAs: hail-base - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi inputs: - from: /repo to: /io/repo @@ -2296,8 +2279,6 @@ steps: script: /io/sql/do-not-lock-entire-batch.sql - name: add-fail-fast script: /io/sql/add-fail-fast.sql - - name: add-frozen-mode - script: /io/sql/add-frozen-mode.sql inputs: - from: /repo/batch/sql to: /io/sql @@ -2574,22 +2555,6 @@ steps: RUN hail-apt-get-install netcat dependsOn: - hail_ubuntu_image - - kind: buildImage2 - name: volume_image - dockerFile: - inline: | - FROM {{ hail_ubuntu_image.image }} - VOLUME ["/data"] - dependsOn: - - hail_ubuntu_image - - kind: buildImage2 - name: workdir_image - dockerFile: - inline: | - FROM {{ hail_ubuntu_image.image }} - WORKDIR ["/work"] - dependsOn: - - hail_ubuntu_image - kind: buildImage2 name: curl_image dockerFile: @@ -2652,8 +2617,6 @@ steps: export HAIL_CURL_IMAGE={{ curl_image.image }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} - export HAIL_VOLUME_IMAGE={{ volume_image.image }} - export HAIL_WORKDIR_IMAGE={{ workdir_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -2709,8 +2672,6 @@ steps: - ci_utils_image - deploy_batch - netcat_ubuntu_image - - volume_image - - workdir_image - curl_image - kind: runImage name: test_batch_1 @@ -2726,8 +2687,6 @@ steps: export HAIL_CURL_IMAGE={{ curl_image.image }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} - export HAIL_VOLUME_IMAGE={{ volume_image.image }} - export HAIL_WORKDIR_IMAGE={{ workdir_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -2783,8 +2742,6 @@ steps: - ci_utils_image - deploy_batch - netcat_ubuntu_image - - volume_image - - workdir_image - curl_image - kind: runImage name: test_batch_2 @@ -2800,8 +2757,6 @@ steps: export HAIL_CURL_IMAGE={{ curl_image.image }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} - export HAIL_VOLUME_IMAGE={{ volume_image.image }} - export HAIL_WORKDIR_IMAGE={{ workdir_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -2857,8 +2812,6 @@ steps: - ci_utils_image - deploy_batch - netcat_ubuntu_image - - volume_image - - workdir_image - curl_image - kind: runImage name: test_batch_3 @@ -2874,8 +2827,6 @@ steps: export HAIL_CURL_IMAGE={{ curl_image.image }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} - export HAIL_VOLUME_IMAGE={{ volume_image.image }} - export HAIL_WORKDIR_IMAGE={{ workdir_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -2931,8 +2882,6 @@ steps: - ci_utils_image - deploy_batch - netcat_ubuntu_image - - volume_image - - workdir_image - curl_image - kind: runImage name: test_batch_4 @@ -2948,8 +2897,6 @@ steps: export HAIL_CURL_IMAGE={{ curl_image.image }} export HAIL_DEFAULT_NAMESPACE={{ default_ns.name }} export HAIL_NETCAT_UBUNTU_IMAGE={{ netcat_ubuntu_image.image }} - export HAIL_VOLUME_IMAGE={{ volume_image.image }} - export HAIL_WORKDIR_IMAGE={{ workdir_image.image }} export HAIL_HAIL_BASE_IMAGE={{ hail_base_image.image }} export DOCKER_PREFIX="{{ global.docker_prefix }}" export DOCKER_ROOT_IMAGE="{{ global.docker_root_image }}" @@ -3005,8 +2952,6 @@ steps: - ci_utils_image - deploy_batch - netcat_ubuntu_image - - volume_image - - workdir_image - curl_image - kind: runImage name: delete_test_billing_projects diff --git a/ci/ci/build.py b/ci/ci/build.py index f138c5b33e9..e07dcc6526f 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -319,7 +319,7 @@ def build(self, batch, code, scope): /bin/sh /home/user/convert-google-application-credentials-to-docker-auth-config set -x -export BUILDKITD_FLAGS='--oci-worker-no-process-sandbox --oci-worker-snapshotter=overlayfs' +export BUILDKITD_FLAGS=--oci-worker-no-process-sandbox export BUILDCTL_CONNECT_RETRIES_MAX=100 # https://github.com/moby/buildkit/issues/1423 buildctl-daemonless.sh \ build \ @@ -354,7 +354,6 @@ def build(self, batch, code, scope): resources=self.resources, input_files=input_files, parents=self.deps_parents(), - network='private', unconfined=True, ) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index b8b4f9b3ee0..fdd2e53479d 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -15,7 +15,7 @@ rest_authenticated_developers_only, rest_authenticated_users_only, web_authenticated_developers_only, - monitor_endpoints_middleware, + monitor_endpoint, ) from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template @@ -32,6 +32,7 @@ @routes.get('') @routes.get('/') +@monitor_endpoint @web_authenticated_developers_only() async def index(request, userdata): # pylint: disable=unused-argument # Redirect to /batches. @@ -39,6 +40,7 @@ async def index(request, userdata): # pylint: disable=unused-argument @routes.get('/batches') +@monitor_endpoint @web_authenticated_developers_only() async def get_batches(request, userdata): batch_client = request.app['batch_client'] @@ -49,6 +51,7 @@ async def get_batches(request, userdata): @routes.get('/batches/{batch_id}') +@monitor_endpoint @web_authenticated_developers_only() async def get_batch(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -63,6 +66,7 @@ async def get_batch(request, userdata): @routes.get('/batches/{batch_id}/jobs/{job_id}') +@monitor_endpoint @web_authenticated_developers_only() async def get_job(request, userdata): batch_id = int(request.match_info['batch_id']) @@ -80,6 +84,7 @@ async def get_job(request, userdata): @routes.post('/api/v1alpha/dev_deploy_branch') +@monitor_endpoint @rest_authenticated_developers_only async def dev_deploy_branch(request, userdata): app = request.app @@ -184,7 +189,7 @@ async def on_cleanup(app): def run(): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() setup_aiohttp_jinja2(app, 'ci') setup_aiohttp_session(app) diff --git a/ci/test/resources/build.yaml b/ci/test/resources/build.yaml index 808ba570dde..bf8f1b377f9 100644 --- a/ci/test/resources/build.yaml +++ b/ci/test/resources/build.yaml @@ -94,10 +94,6 @@ steps: dockerFile: /io/repo/docker/Dockerfile.service-base contextPath: /io/repo/ publishAs: service-base - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi dependsOn: - base_image - merge_code @@ -156,10 +152,6 @@ steps: dockerFile: /io/repo/ci/test/resources/Dockerfile contextPath: /io/repo publishAs: ci-hello - resources: - storage: 10Gi - cpu: "2" - memory: 7.5Gi inputs: - from: /repo to: /io/repo diff --git a/datasets/notebooks/GTEx_MatrixTables.ipynb b/datasets/notebooks/GTEx_MatrixTables.ipynb deleted file mode 100644 index 357dc5c0e8f..00000000000 --- a/datasets/notebooks/GTEx_MatrixTables.ipynb +++ /dev/null @@ -1,383 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## GTEx MatrixTables" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To create MatrixTables containing all variant-gene associations tested in each tissue (including non-significant associations) for [GTEx](https://gtexportal.org/home/datasets) v8.\n", - "\n", - "There are two MatrixTables, one is for the eQTL tissue-specific all SNP gene associations data and the other is for the sQTL tissue-specific all SNP gene associations data. \n", - "\n", - "Hail Tables for each tissue were already created previously from the data [here](https://console.cloud.google.com/storage/browser/hail-datasets-tmp/GTEx/GTEx_Analysis_v8_QTLs). For eQTL each table is ~7 GiB, and for sQTL each table is ~40 GiB or so. A README describing the fields in the GTEx QTL datasets is available [here](https://storage.googleapis.com/gtex_analysis_v8/single_tissue_qtl_data/README_eQTL_v8.txt).\n", - "\n", - "Each MatrixTable has rows keyed by `[\"locus\", \"alleles\"]`, and columns keyed by `[\"tissue\"]`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import subprocess\n", - "import hail as hl\n", - "hl.init()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First we can grab a list of the GTEx tissue names:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "list_tissues = subprocess.run([\"gsutil\", \"-u\", \"broad-ctsa\", \"ls\", \n", - " \"gs://hail-datasets-tmp/GTEx/GTEx_Analysis_v8_QTLs/GTEx_Analysis_v8_eQTL_all_associations\"], \n", - " stdout=subprocess.PIPE)\n", - "tissue_files = list_tissues.stdout.decode(\"utf-8\").split()\n", - "tissue_names = [x.split(\"/\")[-1].split(\".\")[0] for x in tissue_files]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Take a peek at the tissue names we get to make sure they're what we expect:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tissue_names[0:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can start with the process for the eQTL tables since they are smaller and a bit easier to work with. There are pretty much three steps here\n", - " - Generate individual MatrixTables from the existing Hail Tables for each tissue type, there are 49 tissue types in total.\n", - " - Perform a multi-way union cols (MWUC) on these 49 MatrixTables to create a single MatrixTable where there is a column for each tissue.\n", - " - After the MWUC the resulting MatrixTable has pretty imbalanced partitions (some are KiBs, others are GiBs) so we have to repartition the unioned MatrixTable." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### eQTL tissue-specific all SNP gene associations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Generate individual MatrixTables from the existing Hail Tables for each tissue type (49 total).\n", - "\n", - "Write output to `gs://hail-datasets-tmp/GTEx/eQTL_MatrixTables/`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for tissue_name in tissue_names:\n", - " print(f\"eQTL: {tissue_name}\")\n", - " ht = hl.read_table(f\"gs://hail-datasets-us/GTEx_eQTL_allpairs_{tissue_name}_v8_GRCh38.ht\", _n_partitions=64)\n", - "\n", - " ht = ht.annotate(_gene_id = ht.gene_id, _tss_distance = ht.tss_distance)\n", - " ht = ht.drop(\"variant_id\", \"metadata\")\n", - " ht = ht.key_by(\"locus\", \"alleles\", \"_gene_id\", \"_tss_distance\")\n", - " ht = ht.annotate(**{tissue_name: ht.row_value.drop(\"gene_id\", \"tss_distance\")})\n", - " ht = ht.select(tissue_name)\n", - "\n", - " mt = ht.to_matrix_table_row_major(columns=[tissue_name], col_field_name=\"tissue\")\n", - " mt = mt.checkpoint(\n", - " f\"gs://hail-datasets-tmp/GTEx/eQTL_MatrixTables/GTEx_eQTL_all_snp_gene_associations_{tissue_name}_v8_GRCh38.mt\", \n", - " overwrite=False,\n", - " _read_if_exists=True\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To ensure that everything is joined correctly later on, we add both the `_gene_id` and `tss_distance` fields to the table keys here. \n", - "\n", - "After the unioned MatrixTable is created we will re-key the rows to just be `[\"locus\", \"alleles\"]`, and rename the fields above back to `gene_id` and `tss_distance` (they will now be row fields)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Perform multi-way union cols (MWUC) on MatrixTables generated above" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The function below was used to take a list of MatrixTables and a list with the column key fields and output a single MatrixTable with the columns unioned." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List\n", - "def multi_way_union_cols(mts: List[hl.MatrixTable], column_keys: List[str]) -> hl.MatrixTable:\n", - " missing_struct = \"struct{ma_samples: int32, ma_count: int32, maf: float64, pval_nominal: float64, slope: float64, slope_se: float64}\"\n", - " \n", - " mts = [mt._localize_entries(\"_mt_entries\", \"_mt_cols\") for mt in mts]\n", - " \n", - " joined = hl.Table.multi_way_zip_join(mts, \"_t_entries\", \"_t_cols\")\n", - " joined = joined.annotate(_t_entries_missing = joined._t_entries.map(lambda x: hl.is_missing(x)))\n", - " \n", - " rows = [(r, joined._t_entries.map(lambda x: x[r])[0])\n", - " for r in joined._t_entries.dtype.element_type.fields \n", - " if r != \"_mt_entries\"]\n", - " \"\"\"\n", - " Need to provide a dummy array for if tissues are not present to make sure missing elements not\n", - " dropped from flattened array. \n", - " \n", - " Otherwise we will get a HailException: length mismatch between entry array and column array in \n", - " 'to_matrix_table_row_major'.\n", - " \"\"\"\n", - " entries = [(\"_t_entries_flatten\", \n", - " hl.flatten(\n", - " joined._t_entries.map(\n", - " lambda x: hl.if_else(\n", - " hl.is_defined(x), \n", - " x._mt_entries,\n", - " hl.array([\n", - " hl.struct(\n", - " ma_samples = hl.missing(hl.tint32), \n", - " ma_count = hl.missing(hl.tint32), \n", - " maf = hl.missing(hl.tfloat64), \n", - " pval_nominal = hl.missing(hl.tfloat64), \n", - " slope = hl.missing(hl.tfloat64), \n", - " slope_se = hl.missing(hl.tfloat64)\n", - " )\n", - " ])\n", - " )\n", - " )\n", - " )\n", - " )]\n", - " joined = joined.annotate(**dict(rows + entries))\n", - " \"\"\"\n", - " Also want to make sure that if entry is missing, it is replaced with a missing struct of the same form\n", - " at the same index in the array.\n", - " \"\"\"\n", - " joined = joined.annotate(_t_entries_new = hl.zip(joined._t_entries_missing, \n", - " joined._t_entries_flatten, \n", - " fill_missing=False))\n", - " joined = joined.annotate(\n", - " _t_entries_new = joined._t_entries_new.map(\n", - " lambda x: hl.if_else(x[0] == True, hl.missing(missing_struct), x[1])\n", - " )\n", - " ) \n", - " joined = joined.annotate_globals(_t_cols = hl.flatten(joined._t_cols.map(lambda x: x._mt_cols)))\n", - " joined = joined.drop(\"_t_entries\", \"_t_entries_missing\", \"_t_entries_flatten\")\n", - " mt = joined._unlocalize_entries(\"_t_entries_new\", \"_t_cols\", [\"tissue\"])\n", - " return mt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can read in each individual MatrixTable and add it to the list we will pass to `multi_way_union_cols`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get list of file paths for individual eQTL MatrixTables\n", - "list_eqtl_mts = subprocess.run([\"gsutil\", \"-u\", \"broad-ctsa\", \"ls\", \"gs://hail-datasets-tmp/GTEx/eQTL_MatrixTables\"], \n", - " stdout=subprocess.PIPE)\n", - "eqtl_mts = list_eqtl_mts.stdout.decode(\"utf-8\").split()\n", - "\n", - "# Load MatrixTables for each tissue type to store in list for MWUC\n", - "mts_list = []\n", - "for eqtl_mt in eqtl_mts:\n", - " tissue_name = eqtl_mt.replace(\"gs://hail-datasets-tmp/GTEx/eQTL_MatrixTables/GTEx_eQTL_all_snp_gene_associations_\", \"\")\n", - " tissue_name = tissue_name.replace(\"_v8_GRCh38.mt/\", \"\")\n", - " print(tissue_name)\n", - " \n", - " mt = hl.read_matrix_table(eqtl_mt)\n", - " mts_list.append(mt)\n", - "\n", - "full_mt = multi_way_union_cols(mts_list, [\"tissue\"])\n", - "full_mt = full_mt.checkpoint(\"gs://hail-datasets-tmp/GTEx/checkpoints/GTEx_eQTL_all_snp_gene_associations_cols_unioned.mt\", \n", - " overwrite=False,\n", - " _read_if_exists=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Repartition unioned MatrixTable" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After the MWUC the resulting MatrixTable has pretty imbalanced partitions (some are KiBs, others are GiBs) so we want to repartition the unioned MatrixTable. \n", - "\n", - "First we can re-key the rows of our MatrixTable:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Re-key rows and repartition\n", - "full_mt = hl.read_matrix_table(\"gs://hail-datasets-tmp/GTEx/checkpoints/GTEx_eQTL_all_snp_gene_associations_cols_unioned.mt\", \n", - " _n_partitions=1000)\n", - "full_mt = full_mt.key_rows_by(\"locus\", \"alleles\")\n", - "full_mt = full_mt.checkpoint(\"gs://hail-datasets-tmp/GTEx/GTEx_eQTL_all_snp_gene_associations.mt\", \n", - " overwrite=False, \n", - " _read_if_exists=True)\n", - "full_mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "I tried reading in the MatrixTable with `_n_partitions=1000` to see how our partitions would look, but we still had a few that were much larger than the rest. So after this I ended up doing using `repartition` with a full shuffle, and it balanced things out." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add metadata to globals and write final MatrixTable to hail-datasets-us\n", - "full_mt = hl.read_matrix_table(\"gs://hail-datasets-tmp/GTEx/GTEx_eQTL_all_snp_gene_associations.mt\")\n", - "full_mt = full_mt.repartition(1000, shuffle=True)\n", - "\n", - "n_rows, n_cols = full_mt.count()\n", - "n_partitions = full_mt.n_partitions()\n", - "\n", - "full_mt = full_mt.rename({\"_gene_id\": \"gene_id\", \"_tss_distance\": \"tss_distance\"})\n", - "full_mt = full_mt.annotate_globals(\n", - " metadata = hl.struct(name = \"GTEx_eQTL_all_snp_gene_associations\",\n", - " reference_genome = \"GRCh38\",\n", - " n_rows = n_rows,\n", - " n_cols = n_cols,\n", - " n_partitions = n_partitions)\n", - ")\n", - "# Final eQTL MatrixTable is ~224 GiB w/ 1000 partitions\n", - "full_mt.write(\"gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And now we have a single MatrixTable for the GTEx eQTL data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.read_matrix_table(\"gs://hail-datasets-us/GTEx_eQTL_all_snp_gene_associations_v8_GRCh38.mt\").describe()" - ] - }, - { - "cell_type": "raw", - "metadata": { - "pycharm": { - "name": "#%% raw\n" - } - }, - "source": [ - "----------------------------------------\n", - "Global fields:\n", - " 'metadata': struct {\n", - " name: str, \n", - " reference_genome: str, \n", - " n_rows: int32, \n", - " n_cols: int32, \n", - " n_partitions: int32\n", - " }\n", - "----------------------------------------\n", - "Column fields:\n", - " 'tissue': str\n", - "----------------------------------------\n", - "Row fields:\n", - " 'locus': locus\n", - " 'alleles': array\n", - " 'gene_id': str\n", - " 'tss_distance': int32\n", - "----------------------------------------\n", - "Entry fields:\n", - " 'ma_samples': int32\n", - " 'ma_count': int32\n", - " 'maf': float64\n", - " 'pval_nominal': float64\n", - " 'slope': float64\n", - " 'slope_se': float64\n", - "----------------------------------------\n", - "Column key: ['tissue']\n", - "Row key: ['locus', 'alleles']\n", - "----------------------------------------\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/datasets/notebooks/GTEx_Tables.ipynb b/datasets/notebooks/gtex_datasets.ipynb similarity index 100% rename from datasets/notebooks/GTEx_Tables.ipynb rename to datasets/notebooks/gtex_datasets.ipynb diff --git a/dev-docs/batch-design.md b/dev-docs/batch-design.md deleted file mode 100644 index ee320db8c43..00000000000 --- a/dev-docs/batch-design.md +++ /dev/null @@ -1,61 +0,0 @@ -# Container runtime - -Containers in batch are run using the [crun](https://github.com/containers/crun) container runtime. -`crun` is a low-level container runtime like `runc` (what Docker uses) which implements the -Open Container Initiative (OCI) specification for running containers given an image's filesystem and a -[runtime configuration](https://github.com/opencontainers/runtime-spec/blob/master/config.md). The JSON -configuration specifies, among other things, the linux namespaces and cgroups under which to run the container -and the user command to run. - -All images run on a worker are preprocessed by extracting their root filesystem into `/host/rootfs/` and -storing any additional image configuration like environment variables and users in memory in the worker -process. These root filesystems are immutable and job containers cannot write to them. All directories and -files relating to a user's job except for the underlying rootfs are stored under the job's scratch directory. -The scratch directory contains directories for each container in the job (input, main, output) and an `io` -directory that is mounted into each container. Each container directory contains -- The upper, merged, and work directories for the overlay filesystem used in the container. For - a great explanation of how overlayfs works, see - [here](https://jvns.ca/blog/2019/11/18/how-containers-work--overlayfs/). -- Any volumes specified in the user's image that are mounted into the container -- The container's `config.json` that the worker creates and passes to `crun`. - -Batch uses [xfs_quota](https://man7.org/linux/man-pages/man8/xfs_quota.8.html) to enforce storage -limits for jobs. Each job receives its own XFS project rooted at the scratch directory. Any writes from -the main, input and output containers into their root filesystems contribute to the overall job storage quota. -Storage in `/io` is subject to the user's quota *unless* `/io` is mounted from an external disk. - -Below is the layout of job's scratch directory on the worker. NOTE: Since the underlying image/root filesystem -is not stored per-job, it does not contribute toward a job's storage quota. - -``` -scratch/ -├─ io/ (potentially mounted from an external disk) -├─ input/ -│ ├─ rootfs_overlay/ -│ │ ├─ upperdir/ (writeable layer) -│ │ ├─ merged/ (what the container sees as its root) -│ │ │ ├─ bin/ (from the overlay's lowerdir [the image's rootfs]) -│ │ │ ├─ etc/ (from the overlay's lowerdir [the image's rootfs]) -│ │ │ ├─ ... -│ │ │ ├─ io/ (bind mount) -│ │ ├─ workdir/ -│ ├─ volumes/ -│ ├─ config/ -│ │ ├─ config.json -├─ main/ -│ ├─ rootfs_overlay/ -│ │ ├─ upperdir/ (writeable layer) -│ │ ├─ merged/ (what crun/the container sees as its root) -│ │ │ ├─ bin/ (from the overlay's lowerdir [the image's rootfs]) -│ │ │ ├─ etc/ (from the overlay's lowerdir [the image's rootfs]) -│ │ │ ├─ ... -│ │ │ ├─ io/ (bind mount) -│ │ │ ├─ image/specified/volume/ (bind mount from volumes/) -│ │ ├─ workdir/ -│ ├─ volumes/ -│ │ ├─ image/specified/volume/ -│ ├─ config/ -│ │ ├─ config.json -├─ output/ -│ ├─ ... -``` diff --git a/docker-build.sh b/docker-build.sh index aeeeb7a33a9..73367a15d9f 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -ex - CONTEXT="$(cd $1 && pwd)" DOCKERFILE="$CONTEXT/$2" REMOTE_IMAGE_NAME=$3 diff --git a/docker/.gitignore b/docker/.gitignore index a5c2226e9d2..c715301c89a 100644 --- a/docker/.gitignore +++ b/docker/.gitignore @@ -1,4 +1 @@ /Dockerfile.service-base.out -base-image-ref -hail-ubuntu-image-ref -service-base-image-ref diff --git a/docker/requirements.txt b/docker/requirements.txt index 309238d491d..f9ce6903019 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -5,7 +5,6 @@ aiohttp-session==2.7.0 aiohttp==3.7.4 aiomysql==0.0.20 aioredis==1.3.1 -aiorwlock==1.0.0 async-timeout==3.0.1 asyncinit==0.2.4 Authlib==0.11 @@ -30,8 +29,6 @@ google-cloud-logging==1.12.1 google-cloud-storage==1.25.0 humanize==1.0.0 hurry.filesize==0.9 -# importlib-metadata<4: in dev-requirements, jupyter depends on (an unpinned) ipykernel which needs importlib-metadata<4 -importlib-metadata<4 janus==0.6.1 Jinja2==2.11.3 # keyrings.alt>3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6 @@ -42,8 +39,7 @@ mypy==0.780 nest_asyncio==1.0.0 parsimonious==0.8.1 prometheus_async==19.2.0 -prometheus_client==0.11.0 -psutil==5.8.0 +prometheus_client==0.7.1 pyjwt==1.7.1 pylint==2.6.0 astroid<2.5 # https://github.com/PyCQA/pylint/issues/4131 diff --git a/gear/gear/__init__.py b/gear/gear/__init__.py index 85dd6bed010..315661f83fc 100644 --- a/gear/gear/__init__.py +++ b/gear/gear/__init__.py @@ -12,7 +12,7 @@ ) from .csrf import new_csrf_token, check_csrf_token from .auth_utils import insert_user, create_session -from .metrics import monitor_endpoints_middleware +from .metrics import monitor_endpoint __all__ = [ 'create_database_pool', @@ -31,5 +31,5 @@ 'create_session', 'transaction', 'maybe_parse_bearer_header', - 'monitor_endpoints_middleware', + 'monitor_endpoint', ] diff --git a/gear/gear/metrics.py b/gear/gear/metrics.py index 8cba7171e81..de794adc885 100644 --- a/gear/gear/metrics.py +++ b/gear/gear/metrics.py @@ -1,27 +1,21 @@ -from aiohttp import web +from functools import wraps import prometheus_client as pc # type: ignore from prometheus_async.aio import time as prom_async_time # type: ignore REQUEST_TIME = pc.Summary('http_request_latency_seconds', 'Endpoint latency in seconds', ['endpoint', 'verb']) REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests', ['endpoint', 'verb', 'status']) -CONCURRENT_REQUESTS = pc.Gauge('http_concurrent_requests', 'Number of in progress HTTP requests', ['endpoint', 'verb']) -@web.middleware -async def monitor_endpoints_middleware(request, handler): - if request.match_info.route.resource: +def monitor_endpoint(handler): + @wraps(handler) + async def wrapped(request, *args, **kwargs): # Use the path template given to @route., not the fully resolved one endpoint = request.match_info.route.resource.canonical - else: - endpoint = '' - verb = request.method - CONCURRENT_REQUESTS.labels(endpoint=endpoint, verb=verb).inc() - try: - response = await prom_async_time(REQUEST_TIME.labels(endpoint=endpoint, verb=verb), handler(request)) + verb = request.method + response = await prom_async_time( + REQUEST_TIME.labels(endpoint=endpoint, verb=verb), handler(request, *args, **kwargs) + ) REQUEST_COUNT.labels(endpoint=endpoint, verb=verb, status=response.status).inc() return response - except web.HTTPException as e: - REQUEST_COUNT.labels(endpoint=endpoint, verb=verb, status=e.status).inc() - raise e - finally: - CONCURRENT_REQUESTS.labels(endpoint=endpoint, verb=verb).dec() + + return wrapped diff --git a/hail/Dockerfile.hail-pip-installed-python36 b/hail/Dockerfile.hail-pip-installed-python36 index dd394a2f9e9..de1e8758a4f 100644 --- a/hail/Dockerfile.hail-pip-installed-python36 +++ b/hail/Dockerfile.hail-pip-installed-python36 @@ -8,9 +8,7 @@ RUN hail-apt-get-install \ COPY hail/python/requirements.txt requirements.txt COPY hail/python/dev-requirements.txt dev-requirements.txt -RUN file=$(mktemp) && \ - cat requirements.txt dev-requirements.txt > $file && \ - hail-pip-install -r $file +RUN hail-pip-install -r requirements.txt -r dev-requirements.txt COPY wheel-container.tar wheel-container.tar RUN tar -xf wheel-container.tar && \ diff --git a/hail/Dockerfile.hail-pip-installed-python37 b/hail/Dockerfile.hail-pip-installed-python37 index 6d20b3e05b5..796b430f251 100644 --- a/hail/Dockerfile.hail-pip-installed-python37 +++ b/hail/Dockerfile.hail-pip-installed-python37 @@ -6,9 +6,7 @@ RUN hail-apt-get-install openjdk-8-jdk-headless COPY hail/python/requirements.txt requirements.txt COPY hail/python/dev-requirements.txt dev-requirements.txt -RUN file=$(mktemp) && \ - cat requirements.txt dev-requirements.txt > $file && \ - hail-pip-install -r $file +RUN hail-pip-install -r requirements.txt -r dev-requirements.txt COPY wheel-container.tar wheel-container.tar RUN tar -xf wheel-container.tar && \ diff --git a/hail/Makefile b/hail/Makefile index 13c8e041dd8..d1da1b43436 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.13 SPARK_VERSION ?= 3.1.1 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 73 +HAIL_PATCH_VERSION := 70 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 7 diff --git a/hail/python/cluster-tests/cluster-liftover-rg.py b/hail/python/cluster-tests/cluster-liftover-rg.py deleted file mode 100644 index 67981148806..00000000000 --- a/hail/python/cluster-tests/cluster-liftover-rg.py +++ /dev/null @@ -1,14 +0,0 @@ -import hail as hl -hl.init() - -mt = hl.balding_nichols_model(3, 10, 10) - -mt._force_count_rows() - -rg = mt.locus.dtype.reference_genome -rg.add_liftover('gs://hail-common/references/grch37_to_grch38.over.chain.gz', 'GRCh38') -mt = mt.annotate_rows(locus2=hl.liftover(mt.locus, 'GRCh38')) - -mt._force_count_rows() - -hl.locus(contig='20', pos=1, reference_genome='GRCh37').show() diff --git a/hail/python/dev-requirements.txt b/hail/python/dev-requirements.txt index 56cf18af3bb..53b8e4bb81b 100644 --- a/hail/python/dev-requirements.txt +++ b/hail/python/dev-requirements.txt @@ -14,7 +14,5 @@ sphinx-autodoc-typehints==1.11.1 nbsphinx==0.8.3 sphinx_rtd_theme==0.4.2 jupyter==1.0.0 -# importlib-metadata<4: in dev-requirements, jupyter depends on (an unpinned) ipykernel which needs importlib-metadata<4 -importlib-metadata<4 sphinxcontrib.katex==0.5.1 fswatch==0.1.1 diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 55f40e30ae9..223e52edb02 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -24,56 +24,6 @@ an earlier version of Hail to read files written in a later version. --- -## Version 0.2.73 - -Released 2021-07-22 - -### Bug fixes - -- (hail#10684) Fixed a rare bug reading arrays from disk where short arrays would have their first elements corrupted and long arrays would cause segfaults. -- (hail#10523) Fixed bug where liftover would fail with "Could not initialize class" errors. - ---- - -## Version 0.2.72 - -Released 2021-07-19 - -### New Features - -- (hail#10655) Revamped many hail error messages to give useful python stack traces. -- (hail#10663) Added `DictExpression.items()` to mirror python's `dict.items()`. -- (hail#10657) `hl.map` now supports mapping over multiple lists like Python's built-in `map`. - -### Bug fixes - -- (hail#10662) Fixed partitioning logic in `hl.import_plink`. -- (hail#10669) `NDArrayNumericExpression.sum()` now works correctly on ndarrays of booleans. - ---- - -## Version 0.2.71 - -Released 2021-07-08 - -### New Features - -- (hail#10632) Added support for weighted linear regression to `hl.linear_regression_rows`. -- (hail#10635) Added `hl.nd.maximum` and `hl.nd.minimum`. -- (hail#10602) Added `hl.starmap`. - -### Bug fixes - -- (hail#10038) Fixed crashes when writing/reading matrix tables with 0 partitions. -- (hail#10624) Fixed out of bounds bug with `_quantile_from_cdf`. - - -### hailctl dataproc - -- (hail#10633) Added `--scopes` parameter to `hailctl dataproc start`. - ---- - ## Version 0.2.70 Released 2021-06-21 @@ -97,7 +47,7 @@ Released 2021-06-14 ### hailctl dataproc -- (hail#10574) Hail logs will now be stored in `/home/hail` by default. +- (hail#10574) Hail logs will now be stored in `/home/hail` by default. --- diff --git a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_all_snp_gene_associations.rst b/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_all_snp_gene_associations.rst deleted file mode 100644 index acea8335c67..00000000000 --- a/hail/python/hail/docs/datasets/schemas/GTEx_eQTL_all_snp_gene_associations.rst +++ /dev/null @@ -1,44 +0,0 @@ -.. _GTEx_eQTL_all_snp_gene_associations: - -GTEx_eQTL_all_snp_gene_associations -=================================== - -* **Versions:** v8 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (v8, GRCh38) -~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'metadata': struct { - name: str, - reference_genome: str, - n_rows: int32, - n_cols: int32, - n_partitions: int32 - } - ---------------------------------------- - Column fields: - 'tissue': str - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'gene_id': str - 'tss_distance': int32 - ---------------------------------------- - Entry fields: - 'ma_samples': int32 - 'ma_count': int32 - 'maf': float64 - 'pval_nominal': float64 - 'slope': float64 - 'slope_se': float64 - ---------------------------------------- - Column key: ['tissue'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/functions/index.rst b/hail/python/hail/docs/functions/index.rst index b85a7bb25f4..75d8704757d 100644 --- a/hail/python/hail/docs/functions/index.rst +++ b/hail/python/hail/docs/functions/index.rst @@ -99,13 +99,11 @@ These functions are exposed at the top level of the module, e.g. ``hl.case``. bit_rshift bit_not exp - expit is_nan is_finite is_infinite log log10 - logit sign sqrt int diff --git a/hail/python/hail/docs/functions/numeric.rst b/hail/python/hail/docs/functions/numeric.rst index cad65a776e9..34e3667bc8f 100644 --- a/hail/python/hail/docs/functions/numeric.rst +++ b/hail/python/hail/docs/functions/numeric.rst @@ -15,13 +15,11 @@ Numeric functions bit_rshift bit_not exp - expit is_nan is_finite is_infinite log log10 - logit sign sqrt int @@ -61,13 +59,11 @@ Numeric functions .. autofunction:: bit_rshift .. autofunction:: bit_not .. autofunction:: exp -.. autofunction:: expit .. autofunction:: is_nan .. autofunction:: is_finite .. autofunction:: is_infinite .. autofunction:: log .. autofunction:: log10 -.. autofunction:: logit .. autofunction:: floor .. autofunction:: ceil .. autofunction:: sqrt diff --git a/hail/python/hail/docs/install/macosx.rst b/hail/python/hail/docs/install/macosx.rst index f0d64890018..a826a4520d4 100644 --- a/hail/python/hail/docs/install/macosx.rst +++ b/hail/python/hail/docs/install/macosx.rst @@ -2,15 +2,7 @@ Install Hail on Mac OS X ======================== -- Install Java 8. We recommend using a - `packaged installation from Azul `__ - (make sure the OS version and architecture match your system) or using `Homebrew `__: - - .. code-block:: - - brew cask install adoptopenjdk8 - brew install --cask adoptopenjdk8 - +- Install `Java 8 `__. - Install Python 3.6+. We recommend `Miniconda `__. - Open Terminal.app and execute ``pip install hail``. - `Run your first Hail query! `__ diff --git a/hail/python/hail/docs/nd/index.rst b/hail/python/hail/docs/nd/index.rst index 0a8267413ff..3352e8665c2 100644 --- a/hail/python/hail/docs/nd/index.rst +++ b/hail/python/hail/docs/nd/index.rst @@ -23,7 +23,6 @@ As much as possible, we try to mimic the numpy array interface. ones diagonal solve - solve_triangular qr svd inv @@ -32,8 +31,6 @@ As much as possible, we try to mimic the numpy array interface. vstack eye identity - maximum - minimum .. autofunction:: array .. autofunction:: arange @@ -42,7 +39,6 @@ As much as possible, we try to mimic the numpy array interface. .. autofunction:: ones .. autofunction:: diagonal .. autofunction:: solve -.. autofunction:: solve_triangular .. autofunction:: qr .. autofunction:: svd .. autofunction:: inv @@ -51,6 +47,4 @@ As much as possible, we try to mimic the numpy array interface. .. autofunction:: vstack .. autofunction:: eye .. autofunction:: identity -.. autofunction:: maximum -.. autofunction:: minimum diff --git a/hail/python/hail/docs/tutorials/01-genome-wide-association-study.ipynb b/hail/python/hail/docs/tutorials/01-genome-wide-association-study.ipynb index 9a67a1f2ee9..39128f8f317 100644 --- a/hail/python/hail/docs/tutorials/01-genome-wide-association-study.ipynb +++ b/hail/python/hail/docs/tutorials/01-genome-wide-association-study.ipynb @@ -879,8 +879,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We use the [MatrixTable.entries](https://hail.is/docs/0.2/hail.MatrixTable.html#hail.MatrixTable.entries) method to convert our matrix table to a table (with one row for each sample for each variant). In this representation, it is easy to aggregate over any fields we like, which is often the first step of rare variant analysis.\n", - "\n", "What if we want to group by minor allele frequency bin and hair color, and calculate the mean GQ?" ] }, diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index 4817828e77d..89477f742d1 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -1524,25 +1524,6 @@ } ] }, - "GTEx_eQTL_all_snp_gene_associations": { - "description": "GTEx: eQTL tissue-specific all SNP gene associations MatrixTable. All variant-gene cis-eQTL associations tested in each tissue (including non-significant associations). Contains all available tissues, with columns keyed by tissue.", - "url": "https://gtexportal.org/home/datasets", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://hail-datasets-us-east-1/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt" - }, - "gcp": { - "eu": "gs://hail-datasets-eu/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt", - "us": "gs://hail-datasets-us/GTEx/v8/eQTL/GRCh38/all_snp_gene_associations.mt" - } - }, - "version": "v8" - } - ] - }, "GTEx_sQTL_Adipose_Subcutaneous_all_snp_gene_associations": { "annotation_db": { "key_properties": [] diff --git a/hail/python/hail/expr/__init__.py b/hail/python/hail/expr/__init__.py index 1a476fa2c9f..9f168591b0b 100644 --- a/hail/python/hail/expr/__init__.py +++ b/hail/python/hail/expr/__init__.py @@ -44,7 +44,7 @@ parse_int32, parse_int64, bool, get_sequence, reverse_complement, \ is_valid_contig, is_valid_locus, contig_length, liftover, min_rep, \ uniroot, format, approx_equal, reversed, bit_and, bit_or, bit_xor, \ - bit_lshift, bit_rshift, bit_not, binary_search, logit, expit, \ + bit_lshift, bit_rshift, bit_not, binary_search, \ _values_similar, _showstr, _sort_by, _compare, _locus_windows_per_contig, \ shuffle @@ -126,8 +126,6 @@ 'parse_json', 'log', 'log10', - 'logit', - 'expit', 'null', 'missing', 'or_else', diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index c49dbf0470e..7e9f6c13c38 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -628,17 +628,6 @@ def _ir_lambda_method(self, irf, f, input_type, ret_type_f, *args): x = irf(self._ir, new_id, lambda_result._ir, *args) return expressions.construct_expr(x, ret_type_f(lambda_result._type), indices, aggregations) - def _ir_lambda_method2(self, other, irf, f, input_type1, input_type2, ret_type_f, *args): - args = (to_expr(arg)._ir for arg in args) - new_id1 = Env.get_uid() - new_id2 = Env.get_uid() - lambda_result = to_expr( - f(expressions.construct_variable(new_id1, input_type1, self._indices, self._aggregations), - expressions.construct_variable(new_id2, input_type2, other._indices, other._aggregations))) - indices, aggregations = unify_all(self, other, lambda_result) - x = irf(self._ir, other._ir, new_id1, new_id2, lambda_result._ir, *args) - return expressions.construct_expr(x, ret_type_f(lambda_result._type), indices, aggregations) - @property def dtype(self) -> HailType: """The data type of the expression. diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 6652608a49d..47d8a071920 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -8,8 +8,7 @@ unify_all, unify_types from .expression_typecheck import coercer_from_dtype, \ expr_any, expr_array, expr_set, expr_bool, expr_numeric, expr_int32, \ - expr_int64, expr_str, expr_dict, expr_interval, expr_tuple, expr_oneof, \ - expr_ndarray + expr_int64, expr_str, expr_dict, expr_interval, expr_tuple, expr_oneof from hail.expr.types import HailType, tint32, tint64, tfloat32, \ tfloat64, tbool, tcall, tset, tarray, tstruct, tdict, ttuple, tstr, \ tndarray, tlocus, tinterval, is_numeric @@ -478,7 +477,31 @@ def __getitem__(self, item): raise TypeError("array expects key to be type 'slice' or expression of type 'int32', " "found expression of type '{}'".format(item._type)) else: - return self._method("indexArray", self.dtype.element_type, item) + import traceback + stack = traceback.format_stack() + i = len(stack) + while i > 0: + candidate = stack[i - 1] + if 'IPython' in candidate: + break + i -= 1 + filt_stack = [] + + forbidden_phrases = [ + '_ir_lambda_method', + 'decorator.py', + 'typecheck/check', + 'interactiveshell.py', + 'expressions.construct_variable', + 'traceback.format_stack()' + ] + while i < len(stack): + candidate = stack[i] + i += 1 + if any(phrase in candidate for phrase in forbidden_phrases): + continue + filt_stack.append(candidate) + return self._method("indexArray", self.dtype.element_type, item, '\n'.join(filt_stack)) @typecheck_method(item=expr_any) def contains(self, item): @@ -1505,22 +1528,6 @@ def values(self): """ return self._method("values", tarray(self.dtype.value_type)) - def items(self): - """Returns an array of tuples containing key/value pairs in the dictionary. - - Examples - -------- - - >>> hl.eval(d.items()) # doctest: +SKIP_OUTPUT_CHECK - [('Alice', 430), ('Bob', 330), ('Charles', 440)] - - Returns - ------- - :class:`.ArrayExpression` - All key/value pairs in the dictionary. - """ - return hl.array(self) - def _extra_summary_fields(self, agg_result): return { 'Min Size': agg_result[0], @@ -3920,7 +3927,7 @@ def reshape(self, *shape): @typecheck_method(f=func_spec(1, expr_any)) def map(self, f): - """Applies an element-wise operation on an NDArray. + """Transform each element of an NDArray. Parameters ---------- @@ -3939,37 +3946,6 @@ def map(self, f): assert isinstance(self._type, tndarray) return ndarray_map - @typecheck_method(other=oneof(expr_ndarray(), list), f=func_spec(2, expr_any)) - def map2(self, other, f): - """Applies an element-wise binary operation on two NDArrays. - - Parameters - ---------- - other : class:`.NDArrayExpression`, :class:`.ArrayExpression`, numpy NDarray, - or nested python list/tuples. Both NDArrays must be the same shape or - broadcastable into common shape. - f : function ((arg1, arg2)-> :class:`.Expression`) - Function to be applied to each element of both NDArrays. - - Returns - ------- - :class:`.NDArrayExpression`. - Element-wise result of applying `f` to each index in NDArrays. - """ - - if isinstance(other, list) or isinstance(other, np.ndarray): - other = hl.nd.array(other) - - self_broadcast, other_broadcast = self._broadcast_to_same_ndim(other) - - element_type1 = self_broadcast._type.element_type - element_type2 = other_broadcast._type.element_type - ndarray_map2 = self_broadcast._ir_lambda_method2(other_broadcast, ir.NDArrayMap2, f, element_type1, - element_type2, lambda t: tndarray(t, self_broadcast.ndim)) - - assert isinstance(self._type, tndarray) - return ndarray_map2 - def _broadcast_to_same_ndim(self, other): if isinstance(other, NDArrayExpression): if self.ndim < other.ndim: @@ -4187,29 +4163,25 @@ def sum(self, axis=None): if axis is None: axis = tuple(range(self.ndim)) - if self._type.element_type is hl.tbool: - return self.map(lambda x: hl.int(x)).sum(axis) + axis = wrap_to_tuple(axis) + res_ir = ir.NDArrayAgg(self._ir, axis) - else: - axis = wrap_to_tuple(axis) - res_ir = ir.NDArrayAgg(self._ir, axis) - - axes_set = set(axis) - if len(axes_set) < len(axis): - raise ValueError("duplicate value in 'axis'") - for element in axes_set: - if element < 0 or element >= self.ndim: - raise ValueError(f"axis {element} is out of bounds for ndarray of dimension {self.ndim}") + axes_set = set(axis) + if len(axes_set) < len(axis): + raise ValueError("duplicate value in 'axis'") + for element in axes_set: + if element < 0 or element >= self.ndim: + raise ValueError(f"axis {element} is out of bounds for ndarray of dimension {self.ndim}") - num_axes_deleted = len(axes_set) + num_axes_deleted = len(axes_set) - result_ndim = self.ndim - num_axes_deleted - result = construct_expr(res_ir, tndarray(self._type.element_type, result_ndim), self._indices, self._aggregations) + result_ndim = self.ndim - num_axes_deleted + result = construct_expr(res_ir, tndarray(self._type.element_type, result_ndim), self._indices, self._aggregations) - if result_ndim == 0: - return result[()] - else: - return result + if result_ndim == 0: + return result[()] + else: + return result scalars = {tbool: BooleanExpression, diff --git a/hail/python/hail/expr/functions.py b/hail/python/hail/expr/functions.py index fc3ee896126..e44db3b3631 100644 --- a/hail/python/hail/expr/functions.py +++ b/hail/python/hail/expr/functions.py @@ -59,7 +59,10 @@ def _quantile_from_cdf(cdf, q): def compute(cdf): n = cdf.ranks[cdf.ranks.length() - 1] pos = hl.int64(q * n) + 1 - idx = hl.max(0, hl.min(cdf.values.length() - 1, _lower_bound(cdf.ranks, pos) - 1)) + idx = (hl.switch(q) + .when(0.0, 0) + .when(1.0, cdf.values.length() - 1) + .default(_lower_bound(cdf.ranks, pos) - 1)) res = hl.if_else(n == 0, hl.missing(cdf.values.dtype.element_type), cdf.values[idx]) @@ -1695,51 +1698,6 @@ def log10(x) -> Float64Expression: return _func("log10", tfloat64, x) -@typecheck(x=expr_float64) -def logit(x) -> Float64Expression: - """The logistic function. - - Examples - -------- - >>> hl.eval(hl.logit(.01)) - -4.59511985013459 - >>> hl.eval(hl.logit(.5)) - 0.0 - - Parameters - ---------- - x : float or :class:`.Expression` of type :py:data:`.tfloat64` - - Returns - ------- - :class:`.Expression` of type :py:data:`.tfloat64` - """ - return hl.log(x / (1 - x)) - - -@typecheck(x=expr_float64) -def expit(x) -> Float64Expression: - """The logistic sigmoid function. - - Examples - -------- - >>> hl.eval(hl.expit(.01)) - 0.5024999791668749 - >>> hl.eval(hl.expit(0.0)) - 0.5 - - - Parameters - ---------- - x : float or :class:`.Expression` of type :py:data:`.tfloat64` - - Returns - ------- - :class:`.Expression` of type :py:data:`.tfloat64` - """ - return hl.if_else(x >= 0, 1 / (1 + hl.exp(-x)), hl.rbind(hl.exp(x), lambda exped: exped / (exped + 1))) - - @typecheck(args=expr_any) def coalesce(*args): """Returns the first non-missing value of `args`. @@ -3628,40 +3586,32 @@ def zip_with_index(a, index_first=True): return enumerate(a, index_first=index_first) -@typecheck(f=anyfunc, - collections=expr_oneof(expr_set(), expr_array(), expr_ndarray())) -def map(f: Callable, *collections): - r"""Transform each element of a collection. +@typecheck(f=func_spec(1, expr_any), + collection=expr_oneof(expr_set(), expr_array(), expr_ndarray())) +def map(f: Callable, collection): + """Transform each element of a collection. Examples -------- >>> a = ['The', 'quick', 'brown', 'fox'] - >>> b = [2, 4, 6, 8] >>> hl.eval(hl.map(lambda x: hl.len(x), a)) [3, 5, 5, 3] - >>> hl.eval(hl.map(lambda s, n: hl.len(s) + n, a, b)) - [5, 9, 11, 11] - Parameters ---------- - f : function ( (\*arg) -> :class:`.Expression`) + f : function ( (arg) -> :class:`.Expression`) Function to transform each element of the collection. - \*collections : :class:`.ArrayExpression` or :class:`.SetExpression` - A single collection expression or multiple array expressions. + collection : :class:`.ArrayExpression` or :class:`.SetExpression` + Collection expression. Returns ------- :class:`.ArrayExpression` or :class:`.SetExpression`. Collection where each element has been transformed by `f`. """ - - if builtins.len(collections) == 1: - return collections[0].map(f) - else: - return hl.zip(*collections).starmap(f) + return collection.map(f) @typecheck(f=anyfunc, @@ -5565,35 +5515,35 @@ def uniroot(recur, a, b, c, fa, fb, fc, prev, iterations_remaining): t1 = fb / fc t2 = fb / fa q1 = fa / fc # = t1 / t2 - pq = if_else( + pq = cond( a == c, (cb * t1) / (t1 - 1.0), # linear -t2 * (cb * q1 * (q1 - t1) - (b - a) * (t1 - 1.0)) / ((q1 - 1.0) * (t1 - 1.0) * (t2 - 1.0))) # quadratic - interpolated = if_else((sign(pq) == sign(cb)) - & (.75 * abs(cb) > abs(pq) + tol / 2) # b + pq within [b, c] - & (abs(pq) < abs(prev / 2)), # pq not too large - pq, cb / 2) + interpolated = cond((sign(pq) == sign(cb)) + & (.75 * abs(cb) > abs(pq) + tol / 2) # b + pq within [b, c] + & (abs(pq) < abs(prev / 2)), # pq not too large + pq, cb / 2) - new_step = if_else( + new_step = cond( (abs(prev) >= tol) & (abs(fa) > abs(fb)), # try interpolation interpolated, cb / 2) - new_b = b + if_else(new_step < 0, hl.min(new_step, -tol), hl.max(new_step, tol)) + new_b = b + cond(new_step < 0, hl.min(new_step, -tol), hl.max(new_step, tol)) new_fb = wrapped_f(new_b) - return if_else( + return cond( iterations_remaining == 0, - missing('float'), - if_else(abs(fc) < abs(fb), - recur(b, c, b, fb, fc, fb, prev, iterations_remaining), - if_else((abs(cb / 2) <= tol) | (fb == 0), - b, # acceptable approximation found - if_else(sign(new_fb) == sign(fc), # use c = b for next iteration if signs match - recur(b, new_b, b, fb, new_fb, fb, new_step, iterations_remaining - 1), - recur(b, new_b, c, fb, new_fb, fc, new_step, iterations_remaining - 1) - )))) + null('float'), + cond(abs(fc) < abs(fb), + recur(b, c, b, fb, fc, fb, prev, iterations_remaining), + cond((abs(cb / 2) <= tol) | (fb == 0), + b, # acceptable approximation found + cond(sign(new_fb) == sign(fc), # use c = b for next iteration if signs match + recur(b, new_b, b, fb, new_fb, fb, new_step, iterations_remaining - 1), + recur(b, new_b, c, fb, new_fb, fc, new_step, iterations_remaining - 1) + )))) fmin = wrapped_f(min) fmax = wrapped_f(max) diff --git a/hail/python/hail/ir/__init__.py b/hail/python/hail/ir/__init__.py index 44887e2d7ec..ef5999428e8 100644 --- a/hail/python/hail/ir/__init__.py +++ b/hail/python/hail/ir/__init__.py @@ -8,7 +8,7 @@ Void, Cast, NA, IsNA, If, Coalesce, Let, AggLet, Ref, TopLevelReference, \ TailLoop, Recur, ApplyBinaryPrimOp, ApplyUnaryPrimOp, ApplyComparisonOp, \ MakeArray, ArrayRef, ArrayLen, ArrayZeros, StreamRange, StreamGrouped, MakeNDArray, \ - NDArrayShape, NDArrayReshape, NDArrayMap, NDArrayMap2, NDArrayRef, NDArraySlice, NDArraySVD, \ + NDArrayShape, NDArrayReshape, NDArrayMap, NDArrayRef, NDArraySlice, NDArraySVD, \ NDArrayReindex, NDArrayAgg, NDArrayMatMul, NDArrayQR, NDArrayInv, NDArrayConcat, NDArrayWrite, \ ArraySort, ToSet, ToDict, ToArray, CastToArray, ToStream, \ LowerBoundOnOrderedCollection, GroupByKey, StreamMap, StreamZip, \ @@ -150,7 +150,6 @@ 'NDArrayShape', 'NDArrayReshape', 'NDArrayMap', - 'NDArrayMap2', 'NDArrayRef', 'NDArraySlice', 'NDArrayReindex', diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index 2fcdf599dc4..22224c2c2d0 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -558,26 +558,21 @@ def _compute_type(self, env, agg_env): class ArrayRef(IR): - @typecheck_method(a=IR, i=IR, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, a, i, error_id=None, stack_trace=None): - super().__init__(a, i) + @typecheck_method(a=IR, i=IR, s=IR) + def __init__(self, a, i, s): + super().__init__(a, i, s) self.a = a self.i = i - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() + self.s = s - @typecheck_method(a=IR, i=IR) - def copy(self, a, i): - return ArrayRef(a, i, self._error_id, self._stack_trace) - - def head_str(self): - return str(self._error_id) + @typecheck_method(a=IR, i=IR, s=IR) + def copy(self, a, i, s): + return ArrayRef(a, i, s) def _compute_type(self, env, agg_env): self.a._compute_type(env, agg_env) self.i._compute_type(env, agg_env) + self.s._compute_type(env, agg_env) self._type = self.a.typ.element_type @@ -612,26 +607,20 @@ def _compute_type(self, env, agg_env): class StreamRange(IR): - @typecheck_method(start=IR, stop=IR, step=IR, requires_memory_management_per_element=bool, - error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, start, stop, step, requires_memory_management_per_element=False, - error_id=None, stack_trace=None): + @typecheck_method(start=IR, stop=IR, step=IR, requires_memory_management_per_element=bool) + def __init__(self, start, stop, step, requires_memory_management_per_element=False): super().__init__(start, stop, step) self.start = start self.stop = stop self.step = step self.requires_memory_management_per_element = requires_memory_management_per_element - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() @typecheck_method(start=IR, stop=IR, step=IR) def copy(self, start, stop, step): - return StreamRange(start, stop, step, error_id=self._error_id, stack_trace=self._stack_trace) + return StreamRange(start, stop, step) def head_str(self): - return f'{self._error_id} {self.requires_memory_management_per_element}' + return self.requires_memory_management_per_element def _compute_type(self, env, agg_env): self.start._compute_type(env, agg_env) @@ -699,21 +688,14 @@ def _compute_type(self, env, agg_env): class NDArrayReshape(IR): - @typecheck_method(nd=IR, shape=IR, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, nd, shape, error_id=None, stack_trace=None): + @typecheck_method(nd=IR, shape=IR) + def __init__(self, nd, shape): super().__init__(nd, shape) self.nd = nd self.shape = shape - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() def copy(self, nd, shape): - return NDArrayReshape(nd, shape, self._error_id, self._stack_trace) - - def head_str(self): - return str(self._error_id) + return NDArrayReshape(nd, shape) def _compute_type(self, env, agg_env): self.nd._compute_type(env, agg_env) @@ -759,51 +741,6 @@ def renderable_bindings(self, i, default_value=None): return {} -class NDArrayMap2(IR): - @typecheck_method(left=IR, right=IR, lname=str, rname=str, body=IR, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, left, right, lname, rname, body, error_id=None, stack_trace=None): - super().__init__(left, right, body) - self.right = right - self.left = left - self.lname = lname - self.rname = rname - self.body = body - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() - - @typecheck_method(l=IR, r=IR, body=IR) - def copy(self, left, right, body): - return NDArrayMap2(left, right, self.lname, self.rname, body, self._error_id, self._stack_trace) - - def head_str(self): - return f'{self._error_id} {escape_id(self.lname)} {escape_id(self.rname)}' - - def _eq(self, other): - return self.lname == other.lname and \ - self.rname == other.rname - - @property - def bound_variables(self): - return {self.lname, self.rname} | super().bound_variables - - def _compute_type(self, env, agg_env): - self.left._compute_type(env, agg_env) - self.right._compute_type(env, agg_env) - self.body._compute_type(_env_bind(env, self.bindings(2)), agg_env) - self._type = tndarray(self.body.typ, self.left.typ.ndim) - - def renderable_bindings(self, i, default_value=None): - if i == 2: - if default_value is None: - return {self.lname: self.left.typ.element_type, self.rname: self.right.typ.element_type} - else: - return {self.lname: default_value, self.rname: default_value} - else: - return {} - - class NDArrayRef(IR): @typecheck_method(nd=IR, idxs=sequenceof(IR), error_id=nullable(int), stack_trace=nullable(str)) def __init__(self, nd, idxs, error_id=None, stack_trace=None): @@ -893,22 +830,15 @@ def _compute_type(self, env, agg_env): class NDArrayMatMul(IR): - @typecheck_method(left=IR, right=IR, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, left, right, error_id=None, stack_trace=None): + @typecheck_method(left=IR, right=IR) + def __init__(self, left, right): super().__init__(left, right) self.left = left self.right = right - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() @typecheck_method(left=IR, right=IR) def copy(self, left, right): - return NDArrayMatMul(left, right, self._error_id, self._stack_trace) - - def head_str(self): - return str(self._error_id) + return NDArrayMatMul(left, right) def _compute_type(self, env, agg_env): self.left._compute_type(env, agg_env) @@ -921,21 +851,17 @@ def _compute_type(self, env, agg_env): class NDArrayQR(IR): - @typecheck_method(nd=IR, mode=str, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, nd, mode, error_id=None, stack_trace=None): + @typecheck_method(nd=IR, mode=str) + def __init__(self, nd, mode): super().__init__(nd) self.nd = nd self.mode = mode - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() def copy(self): - return NDArrayQR(self.nd, self.mode, self._error_id, self._stack_trace) + return NDArrayQR(self.nd, self.mode) def head_str(self): - return f'{self._error_id} "{self.mode}"' + return f'"{self.mode}"' def _compute_type(self, env, agg_env): self.nd._compute_type(env, agg_env) @@ -951,22 +877,18 @@ def _compute_type(self, env, agg_env): class NDArraySVD(IR): - @typecheck_method(nd=IR, full_matrices=bool, compute_uv=bool, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, nd, full_matrices, compute_uv, error_id=None, stack_trace=None): + @typecheck_method(nd=IR, full_matrices=bool, compute_uv=bool) + def __init__(self, nd, full_matrices, compute_uv): super().__init__(nd) self.nd = nd self.full_matrices = full_matrices self.compute_uv = compute_uv - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() def copy(self): - return NDArraySVD(self.nd, self.full_matrices, self.compute_uv, self._error_id, self._stack_trace) + return NDArraySVD(self.nd, self.full_matrices, self.compute_uv) def head_str(self): - return f'{self._error_id} {self.full_matrices} {self.compute_uv}' + return f'{self.full_matrices} {self.compute_uv}' def _compute_type(self, env, agg_env): self.nd._compute_type(env, agg_env) @@ -977,20 +899,13 @@ def _compute_type(self, env, agg_env): class NDArrayInv(IR): - @typecheck_method(nd=IR, error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, nd, error_id=None, stack_trace=None): + @typecheck_method(nd=IR) + def __init__(self, nd): super().__init__(nd) self.nd = nd - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() def copy(self): - return NDArrayInv(self.nd, self._error_id, self._stack_trace) - - def head_str(self): - return str(self._error_id) + return NDArrayInv(self.nd) def _compute_type(self, env, agg_env): self.nd._compute_type(env, agg_env) @@ -1234,25 +1149,20 @@ def renderable_bindings(self, i, default_value=None): class StreamZip(IR): - @typecheck_method(streams=sequenceof(IR), names=sequenceof(str), body=IR, behavior=str, - error_id=nullable(int), stack_trace=nullable(str)) - def __init__(self, streams, names, body, behavior, error_id=None, stack_trace=None): + @typecheck_method(streams=sequenceof(IR), names=sequenceof(str), body=IR, behavior=str) + def __init__(self, streams, names, body, behavior): super().__init__(*streams, body) self.streams = streams self.names = names self.body = body self.behavior = behavior - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() @typecheck_method(children=IR) def copy(self, *children): - return StreamZip(children[:-1], self.names, children[-1], self.behavior, self._error_id, self._stack_trace) + return StreamZip(children[:-1], self.names, children[-1], self.behavior) def head_str(self): - return f'{self._error_id} {escape_id(self.behavior)} ({" ".join(map(escape_id, self.names))})' + return f'{escape_id(self.behavior)} ({" ".join(map(escape_id, self.names))})' def _eq(self, other): return self.names == other.names and self.behavior == other.behavior @@ -2124,25 +2034,20 @@ def wrapper(__original_func, *args, **kwargs): class Apply(IR): - @typecheck_method(function=str, return_type=hail_type, args=IR, - error_id=nullable(int), stack_trace=nullable(str), type_args=tupleof(hail_type)) - def __init__(self, function, return_type, *args, type_args=(), error_id=None, stack_trace=None,): + @typecheck_method(function=str, return_type=hail_type, args=IR, type_args=tupleof(hail_type)) + def __init__(self, function, return_type, *args, type_args=()): super().__init__(*args) self.function = function self.return_type = return_type self.type_args = type_args self.args = args - self._error_id = error_id - self._stack_trace = stack_trace - if error_id is None or stack_trace is None: - self.save_error_info() def copy(self, *args): - return Apply(self.function, self.return_type, *args, type_args=self.type_args, error_id=self._error_id, stack_trace=self._stack_trace,) + return Apply(self.function, self.return_type, *args, type_args=self.type_args) def head_str(self): type_args = "(" + " ".join([a._parsable_string() for a in self.type_args]) + ")" - return f'{self._error_id} {escape_id(self.function)} {type_args} {self.return_type._parsable_string()}' + return f'{escape_id(self.function)} {type_args} {self.return_type._parsable_string()}' def _eq(self, other): return other.function == self.function and \ diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index f3e40397e0b..76fbdc2c17b 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -2703,21 +2703,6 @@ def entries(self) -> Table: >>> entries_table = dataset.entries() - Notes - ----- - The coordinate table representation of the source matrix table contains - one row for each **non-filtered** entry of the matrix -- if a matrix table - has no filtered entries and contains N rows and M columns, the table will contain - ``M * N`` rows, which can be **a very large number**. - - This representation can be useful for aggregating over both axes of a matrix table - at the same time -- it is not possible to aggregate over a matrix table using - :meth:`group_rows_by` and :meth:`group_cols_by` at the same time (aggregating - by population and chromosome from a variant-by-sample genetics representation, - for instance). After moving to the coordinate representation with :meth:`entries`, - it is possible to group and aggregate the resulting table much more flexibly, - albeit with potentially poorer computational performance. - Warning ------- The table returned by this method should be used for aggregation or queries, diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index 79e224adf57..37e26ebd336 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -488,7 +488,7 @@ def export_vcf(dataset, output, append_to_header=None, parallel=None, metadata=N ``'separate_header'``, return a separate VCF header file and a set of VCF files (one per partition) without the header. If ``None``, concatenate the header and all partitions into one VCF file. - metadata : :obj:`dict` [:obj:`str`, :obj:`dict` [:obj:`str`, :obj:`dict` [:obj:`str`, :obj:`str`]]], optional + metadata : :obj:`dict` [:obj:`str`, :obj:`dict` [:obj:`str`, :obj:`dict` [obj:`str`, obj:`str`]]]`, optional Dictionary with information to fill in the VCF header. See :func:`get_vcf_metadata` for how this dictionary should be structured. diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index d45702b4bcc..1fda56a868a 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -201,9 +201,8 @@ def _get_regression_row_fields(mt, pass_through, method) -> Dict[str, str]: x=expr_float64, covariates=sequenceof(expr_float64), block_size=int, - pass_through=sequenceof(oneof(str, Expression)), - weights=nullable(oneof(expr_float64, sequenceof(expr_float64)))) -def linear_regression_rows(y, x, covariates, block_size=16, pass_through=(), *, weights=None) -> hail.Table: + pass_through=sequenceof(oneof(str, Expression))) +def linear_regression_rows(y, x, covariates, block_size=16, pass_through=()) -> hail.Table: r"""For each row, test an input variable for association with response variables using linear regression. @@ -304,16 +303,13 @@ def linear_regression_rows(y, x, covariates, block_size=16, pass_through=(), *, require more memory but may improve performance. pass_through : :obj:`list` of :class:`str` or :class:`.Expression` Additional row fields to include in the resulting table. - weights : :class:`.Float64Expression` or :obj:`list` of :class:`.Float64Expression` - Optional column-indexed weighting for doing weighted least squares regression. Specify a single weight if a - single y or list of ys is specified. If a list of lists of ys is specified, specify one weight per inner list. Returns ------- :class:`.Table` """ - if not isinstance(Env.backend(), SparkBackend) or weights is not None: - return _linear_regression_rows_nd(y, x, covariates, block_size, weights, pass_through) + if not isinstance(Env.backend(), SparkBackend): + return _linear_regression_rows_nd(y, x, covariates, block_size, pass_through) mt = matrix_table_source('linear_regression_rows/x', x) check_entry_indexed('linear_regression_rows/x', x) @@ -378,9 +374,8 @@ def linear_regression_rows(y, x, covariates, block_size=16, pass_through=(), *, x=expr_float64, covariates=sequenceof(expr_float64), block_size=int, - weights=nullable(oneof(expr_float64, sequenceof(expr_float64))), pass_through=sequenceof(oneof(str, Expression))) -def _linear_regression_rows_nd(y, x, covariates, block_size=16, weights=None, pass_through=()) -> hail.Table: +def _linear_regression_rows_nd(y, x, covariates, block_size=16, pass_through=()) -> hail.Table: mt = matrix_table_source('linear_regression_rows_nd/x', x) check_entry_indexed('linear_regression_rows_nd/x', x) @@ -394,16 +389,6 @@ def _linear_regression_rows_nd(y, x, covariates, block_size=16, weights=None, pa y = wrap_to_list(y) - if weights is not None: - if y_is_list and is_chained and not isinstance(weights, list): - raise ValueError("When y is a list of lists, weights should be a list.") - elif y_is_list and not is_chained and isinstance(weights, list): - raise ValueError("When y is a single list, weights should be a single expression.") - elif not y_is_list and isinstance(weights, list): - raise ValueError("When y is a single expression, weights should be a single expression.") - - weights = wrap_to_list(weights) if weights is not None else None - for e in (itertools.chain.from_iterable(y) if is_chained else y): analyze('linear_regression_rows_nd/y', e, mt._col_indices) @@ -416,25 +401,19 @@ def _linear_regression_rows_nd(y, x, covariates, block_size=16, weights=None, pa if is_chained: y_field_name_groups = [[f'__y_{i}_{j}' for j in range(len(y[i]))] for i in range(len(y))] y_dict = dict(zip(itertools.chain.from_iterable(y_field_name_groups), itertools.chain.from_iterable(y))) - if weights is not None and len(weights) != len(y): - raise ValueError("Must specify same number of weights as groups of phenotypes") + else: y_field_name_groups = list(f'__y_{i}' for i in range(len(y))) y_dict = dict(zip(y_field_name_groups, y)) # Wrapping in a list since the code is written for the more general chained case. y_field_name_groups = [y_field_name_groups] - if weights is not None and len(weights) != 1: - raise ValueError("Must specify same number of weights as groups of phenotypes") cov_field_names = list(f'__cov{i}' for i in range(len(covariates))) - weight_field_names = list(f'__weight_for_group_{i}' for i in range(len(weights))) if weights is not None else None - weight_dict = dict(zip(weight_field_names, weights)) if weights is not None else {} row_field_names = _get_regression_row_fields(mt, pass_through, 'linear_regression_rows_nd') # FIXME: selecting an existing entry field should be emitted as a SelectFields mt = mt._select_all(col_exprs=dict(**y_dict, - **weight_dict, **dict(zip(cov_field_names, covariates))), row_exprs=row_field_names, col_key=[], @@ -471,57 +450,37 @@ def setup_globals(ht): else: ht = ht.annotate_globals(cov_arrays=ht[sample_field_name].map(lambda sample_struct: hl.empty_array(hl.tfloat64))) - y_arrays_per_group = [ht[sample_field_name].map(lambda sample_struct: [sample_struct[y_name] for y_name in one_y_field_name_set]) for one_y_field_name_set in y_field_name_groups] - - if weight_field_names: - weight_arrays = ht[sample_field_name].map(lambda sample_struct: [sample_struct[weight_name] for weight_name in weight_field_names]) - else: - weight_arrays = ht[sample_field_name].map(lambda sample_struct: hl.empty_array(hl.tfloat64)) - ht = ht.annotate_globals( - y_arrays_per_group=y_arrays_per_group, - weight_arrays=weight_arrays + y_arrays_per_group=[ht[sample_field_name].map(lambda sample_struct: [sample_struct[y_name] for y_name in one_y_field_name_set]) for one_y_field_name_set in y_field_name_groups] ) - ht = ht.annotate_globals(all_covs_defined=ht.cov_arrays.map(lambda sample_covs: no_missing(sample_covs))) + all_covs_defined = ht.cov_arrays.map(lambda sample_covs: no_missing(sample_covs)) - def get_kept_samples(group_idx, sample_ys): + def get_kept_samples(sample_ys): # sample_ys is an array of samples, with each element being an array of the y_values return hl.enumerate(sample_ys).filter( - lambda idx_and_y_values: ht.all_covs_defined[idx_and_y_values[0]] & no_missing(idx_and_y_values[1]) & (hl.is_defined(ht.weight_arrays[idx_and_y_values[0]][group_idx]) if weights else True) + lambda idx_and_y_values: all_covs_defined[idx_and_y_values[0]] & no_missing(idx_and_y_values[1]) ).map(lambda idx_and_y_values: idx_and_y_values[0]) - ht = ht.annotate_globals(kept_samples=hl.enumerate(ht.y_arrays_per_group).starmap(get_kept_samples)) - ht = ht.annotate_globals(y_nds=hl.zip(ht.kept_samples, ht.y_arrays_per_group).starmap( - lambda sample_indices, y_arrays: hl.nd.array(sample_indices.map(lambda idx: y_arrays[idx])))) - ht = ht.annotate_globals(cov_nds=ht.kept_samples.map(lambda group: hl.nd.array(group.map(lambda idx: ht.cov_arrays[idx])))) - - if weights is None: - ht = ht.annotate_globals(sqrt_weights=hl.missing(hl.tarray(hl.tndarray(hl.tfloat64, 2)))) - ht = ht.annotate_globals(scaled_y_nds=ht.y_nds) - ht = ht.annotate_globals(scaled_cov_nds=ht.cov_nds) - else: - ht = ht.annotate_globals(weight_nds=hl.enumerate(ht.kept_samples).starmap( - lambda group_idx, group_sample_indices: hl.nd.array(group_sample_indices.map(lambda group_sample_idx: ht.weight_arrays[group_sample_idx][group_idx])))) - ht = ht.annotate_globals(sqrt_weights=ht.weight_nds.map(lambda weight_nd: weight_nd.map(lambda e: hl.sqrt(e)))) - ht = ht.annotate_globals(scaled_y_nds=hl.zip(ht.y_nds, ht.sqrt_weights).starmap(lambda y, sqrt_weight: y * sqrt_weight.reshape(-1, 1))) - ht = ht.annotate_globals(scaled_cov_nds=hl.zip(ht.cov_nds, ht.sqrt_weights).starmap(lambda cov, sqrt_weight: cov * sqrt_weight.reshape(-1, 1))) + kept_samples = ht.y_arrays_per_group.map(get_kept_samples) + y_nds = hl.zip(kept_samples, ht.y_arrays_per_group).map(lambda sample_indices_and_y_arrays: + hl.nd.array(sample_indices_and_y_arrays[0].map(lambda idx: + sample_indices_and_y_arrays[1][idx]))) + cov_nds = kept_samples.map(lambda group: hl.nd.array(group.map(lambda idx: ht.cov_arrays[idx]))) k = builtins.len(covariates) - ht = ht.annotate_globals(ns=ht.kept_samples.map(lambda one_sample_set: hl.len(one_sample_set))) - ht = ht.annotate_globals(cov_Qts=hl.if_else(k > 0, - ht.scaled_cov_nds.map(lambda one_cov_nd: hl.nd.qr(one_cov_nd)[0].T), - ht.ns.map(lambda n: hl.nd.zeros((0, n))))) - ht = ht.annotate_globals(Qtys=hl.zip(ht.cov_Qts, ht.scaled_y_nds).starmap(lambda cov_qt, y: cov_qt @ y)) - - return ht.select_globals( - kept_samples=ht.kept_samples, - __scaled_y_nds=ht.scaled_y_nds, - __sqrt_weight_nds=ht.sqrt_weights, - ns=ht.ns, - ds=ht.ns.map(lambda n: n - k - 1), - __cov_Qts=ht.cov_Qts, - __Qtys=ht.Qtys, - __yyps=hl.range(num_y_lists).map(lambda i: dot_rows_with_themselves(ht.scaled_y_nds[i].T) - dot_rows_with_themselves(ht.Qtys[i].T))) + ns = kept_samples.map(lambda one_sample_set: hl.len(one_sample_set)) + cov_Qts = hl.if_else(k > 0, + cov_nds.map(lambda one_cov_nd: hl.nd.qr(one_cov_nd)[0].T), + ns.map(lambda n: hl.nd.zeros((0, n)))) + Qtys = hl.zip(cov_Qts, y_nds).map(lambda cov_qt_and_y: cov_qt_and_y[0] @ cov_qt_and_y[1]) + return ht.annotate_globals( + kept_samples=kept_samples, + __y_nds=y_nds, + ns=ns, + ds=ns.map(lambda n: n - k - 1), + __cov_Qts=cov_Qts, + __Qtys=Qtys, + __yyps=hl.range(num_y_lists).map(lambda i: dot_rows_with_themselves(y_nds[i].T) - dot_rows_with_themselves(Qtys[i].T))) ht = setup_globals(ht) @@ -530,14 +489,11 @@ def process_block(block): # Processes one block group based on given idx. Returns a single struct. def process_y_group(idx): - if weights is not None: - X = (hl.nd.array(block[entries_field_name].map(lambda row: mean_impute(select_array_indices(row, ht.kept_samples[idx])))) * ht.__sqrt_weight_nds[idx]).T - else: - X = hl.nd.array(block[entries_field_name].map(lambda row: mean_impute(select_array_indices(row, ht.kept_samples[idx])))).T + X = hl.nd.array(block[entries_field_name].map(lambda row: mean_impute(select_array_indices(row, ht.kept_samples[idx])))).T n = ht.ns[idx] sum_x = X.sum(0) Qtx = ht.__cov_Qts[idx] @ X - ytx = ht.__scaled_y_nds[idx].T @ X + ytx = ht.__y_nds[idx].T @ X xyp = ytx - (ht.__Qtys[idx].T @ Qtx) xxpRec = (dot_rows_with_themselves(X.T) - dot_rows_with_themselves(Qtx.T)).map(lambda entry: 1 / entry) b = xyp * xxpRec @@ -866,7 +822,7 @@ def mean_impute(hl_array): def sigmoid(hl_nd): - return hl_nd.map(lambda x: hl.expit(x)) + return hl_nd.map(lambda x: hl.if_else(x > 0, hl.rbind(hl.exp(x), lambda exped: exped / (exped + 1)), 1 / (1 + hl.exp(-x)))) def nd_max(hl_nd): diff --git a/hail/python/hail/nd/__init__.py b/hail/python/hail/nd/__init__.py index eea4d022ce9..711ce6da441 100644 --- a/hail/python/hail/nd/__init__.py +++ b/hail/python/hail/nd/__init__.py @@ -1,9 +1,9 @@ -from .nd import array, from_column_major, arange, full, zeros, ones, svd, qr, solve, solve_triangular, diagonal, inv, concatenate, \ - eye, identity, vstack, hstack, maximum, minimum +from .nd import array, from_column_major, arange, full, zeros, ones, svd, qr, solve, diagonal, inv, concatenate, \ + eye, identity, vstack, hstack newaxis = None __all__ = [ - 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'solve_triangular', 'svd', 'diagonal', 'inv', - 'concatenate', 'eye', 'identity', 'vstack', 'hstack', 'newaxis', 'maximum', 'minimum' + 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'svd', 'diagonal', 'inv', + 'concatenate', 'eye', 'identity', 'vstack', 'hstack', 'newaxis' ] diff --git a/hail/python/hail/nd/nd.py b/hail/python/hail/nd/nd.py index b3dd65713a5..1d20486b505 100644 --- a/hail/python/hail/nd/nd.py +++ b/hail/python/hail/nd/nd.py @@ -3,15 +3,14 @@ import hail as hl from hail.expr.functions import _ndarray from hail.expr.functions import array as aarray -from hail.expr.types import HailType, tfloat64, tfloat32, ttuple, tndarray +from hail.expr.types import HailType, tfloat64, ttuple, tndarray from hail.typecheck import typecheck, nullable, oneof, tupleof, sequenceof from hail.expr.expressions import ( expr_int32, expr_int64, expr_tuple, expr_any, expr_array, expr_ndarray, - expr_numeric, Int64Expression, cast_expr, construct_expr, expr_bool) + expr_numeric, Int64Expression, cast_expr, construct_expr) from hail.expr.expressions.typed_expressions import NDArrayNumericExpression from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat, NDArraySVD, Apply - tsequenceof_nd = oneof(sequenceof(expr_ndarray()), expr_array(expr_ndarray())) shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple()) @@ -239,8 +238,19 @@ def solve(a, b, no_crash=False): Solution to the system Ax = B. Shape is same as shape of B. """ + assert a.ndim == 2 + assert b.ndim == 1 or b.ndim == 2 + b_ndim_orig = b.ndim - a, b = solve_helper(a, b, b_ndim_orig) + + if b_ndim_orig == 1: + b = b.reshape((-1, 1)) + + if a.dtype.element_type != hl.tfloat64: + a = a.map(lambda e: hl.float64(e)) + if b.dtype.element_type != hl.tfloat64: + b = b.map(lambda e: hl.float64(e)) + if no_crash: name = "linear_solve_no_crash" return_type = hl.tstruct(solution=hl.tndarray(hl.tfloat64, 2), failed=hl.tbool) @@ -259,50 +269,6 @@ def solve(a, b, no_crash=False): return result -@typecheck(nd_coef=expr_ndarray(), nd_dep=expr_ndarray(), lower=expr_bool) -def solve_triangular(nd_coef, nd_dep, lower=False): - """Solve a triangular linear system. - - Parameters - ---------- - nd_coef : :class:`.NDArrayNumericExpression`, (N, N) - Triangular coefficient matrix. - nd_dep : :class:`.NDArrayNumericExpression`, (N,) or (N, K) - Dependent variables. - lower : `bool`: - If true, nd_coef is interpreted as a lower triangular matrix - If false, nd_coef is interpreted as a upper triangular matrix - - Returns - ------- - :class:`.NDArrayNumericExpression`, (N,) or (N, K) - Solution to the triangular system Ax = B. Shape is same as shape of B. - - """ - nd_dep_ndim_orig = nd_dep.ndim - nd_coef, nd_dep = solve_helper(nd_coef, nd_dep, nd_dep_ndim_orig) - return_type = hl.tndarray(hl.tfloat64, 2) - ir = Apply("linear_triangular_solve", return_type, nd_coef._ir, nd_dep._ir, lower._ir) - result = construct_expr(ir, return_type, nd_coef._indices, nd_coef._aggregations) - if nd_dep_ndim_orig == 1: - result = result.reshape((-1)) - return result - - -def solve_helper(nd_coef, nd_dep, nd_dep_ndim_orig): - assert nd_coef.ndim == 2 - assert nd_dep_ndim_orig == 1 or nd_dep_ndim_orig == 2 - - if nd_dep_ndim_orig == 1: - nd_dep = nd_dep.reshape((-1, 1)) - - if nd_coef.dtype.element_type != hl.tfloat64: - nd_coef = nd_coef.map(lambda e: hl.float64(e)) - if nd_dep.dtype.element_type != hl.tfloat64: - nd_dep = nd_dep.map(lambda e: hl.float64(e)) - return nd_coef, nd_dep - - @typecheck(nd=expr_ndarray(), mode=str) def qr(nd, mode="reduced"): """Performs a QR decomposition. @@ -590,7 +556,7 @@ def hstack(arrs): Examples -------- >>> a = hl.nd.array([1,2,3]) - >>> b = hl.nd.array([2, 3, 4]) + >>> b = hl.nd.array([2,3,4]) >>> hl.eval(hl.nd.hstack((a,b))) array([1, 2, 3, 2, 3, 4], dtype=int32) >>> a = hl.nd.array([[1],[2],[3]]) @@ -608,85 +574,3 @@ def hstack(arrs): axis = 1 return concatenate(arrs, axis) - - -@typecheck(nd1=expr_ndarray(), nd2=oneof(expr_ndarray(), list)) -def maximum(nd1, nd2): - """ - Compares elements at corresponding indexes in arrays - and returns an array of the maximum element found - at each compared index. - - If an array element being compared has the value NaN, - the maximum for that index will be NaN. - - Parameters - ---------- - nd1 : :class:`.NDArrayExpression` - nd2 : class:`.NDArrayExpression`, `.ArrayExpression`, numpy ndarray, or nested python lists/tuples. - Nd1 and nd2 must be the same shape or broadcastable into common shape. Nd1 and nd2 must - have elements of comparable types - - Returns - ------- - max_array : :class:`.NDArrayExpression` Element-wise maximums of nd1 and nd2. If nd1 has the - same shape as nd2, the resulting array will be of that shape. If nd1 and nd2 were broadcasted - into a common shape, the resulting array will be of that shape - - Examples - -------- - >>> a = hl.nd.array([1, 5, 3]) - >>> b = hl.nd.array([2, 3, 4]) - >>> hl.eval(hl.nd.maximum(a, b)) - array([2, 5, 4], dtype=int32) - >>> a = hl.nd.array([hl.float64(float("NaN")), 5.0, 3.0]) - >>> b = hl.nd.array([2.0, 3.0, hl.float64(float("NaN"))]) - >>> hl.eval(hl.nd.maximum(a, b)) - array([nan, 5., nan]) - """ - - if (nd1.dtype.element_type or nd2.dtype.element_type) == (tfloat64 or tfloat32): - return nd1.map2(nd2, lambda a, b: hl.if_else(hl.is_nan(a) | hl.is_nan(b), - hl.float64(float("NaN")), hl.if_else(a > b, a, b))) - return nd1.map2(nd2, lambda a, b: hl.if_else(a > b, a, b)) - - -@typecheck(nd1=expr_ndarray(), nd2=oneof(expr_ndarray(), list)) -def minimum(nd1, nd2): - """ - Compares elements at corresponding indexes in arrays - and returns an array of the minimum element found - at each compared index. - - If an array element being compared has the value NaN, - the minimum for that index will be NaN. - - Parameters - ---------- - nd1 : :class:`.NDArrayExpression` - nd2 : class:`.NDArrayExpression`, `.ArrayExpression`, numpy ndarray, or nested python lists/tuples. - nd1 and nd2 must be the same shape or broadcastable into common shape. Nd1 and nd2 must - have elements of comparable types - - Returns - ------- - min_array : :class:`.NDArrayExpression` Element-wise minimums of nd1 and nd2. If nd1 has the - same shape as nd2, the resulting array will be of that shape. If nd1 and nd2 were broadcasted - into a common shape, resulting array will be of that shape - - Examples - -------- - >>> a = hl.nd.array([1, 5, 3]) - >>> b = hl.nd.array([2, 3, 4]) - >>> hl.eval(hl.nd.minimum(a, b)) - array([1, 3, 3], dtype=int32) - >>> a = hl.nd.array([hl.float64(float("NaN")), 5.0, 3.0]) - >>> b = hl.nd.array([2.0, 3.0, hl.float64(float("NaN"))]) - >>> hl.eval(hl.nd.minimum(a, b)) - array([nan, 3., nan]) - """ - - if (nd1.dtype.element_type or nd2.dtype.element_type) == (tfloat64 or tfloat32): - return nd1.map2(nd2, lambda a, b: hl.if_else(hl.is_nan(a) | hl.is_nan(b), - hl.float64(float("NaN")), hl.if_else(a < b, a, b))) - return nd1.map2(nd2, lambda a, b: hl.if_else(a < b, a, b)) diff --git a/hail/python/hail/utils/misc.py b/hail/python/hail/utils/misc.py index 4c4cf2f6cb2..21a9088e2e8 100644 --- a/hail/python/hail/utils/misc.py +++ b/hail/python/hail/utils/misc.py @@ -489,10 +489,10 @@ def cleanup(table): def divide_null(num, denom): from hail.expr.expressions.base_expression import unify_types_limited - from hail.expr import missing, if_else + from hail.expr import null, cond typ = unify_types_limited(num.dtype, denom.dtype) assert typ is not None - return if_else(denom != 0, num / denom, missing(typ)) + return cond(denom != 0, num / denom, null(typ)) class HailSeedGenerator(object): diff --git a/hail/python/hailtop/aiogoogle/auth/credentials.py b/hail/python/hailtop/aiogoogle/auth/credentials.py index 9bd4336689d..c3d15694569 100644 --- a/hail/python/hailtop/aiogoogle/auth/credentials.py +++ b/hail/python/hailtop/aiogoogle/auth/credentials.py @@ -40,9 +40,10 @@ def default_credentials(): log.warning('unable to locate Google Cloud credentials file, will attempt to ' 'use instance metadata server instead') - + return InstanceMetadataCredentials() + async def get_access_token(self, session): pass @@ -100,7 +101,6 @@ async def get_access_token(self, session): })) as resp: return await resp.json() - # https://cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances#applications class InstanceMetadataCredentials(): async def get_access_token(self, session): diff --git a/hail/python/hailtop/aiogoogle/client/compute_client.py b/hail/python/hailtop/aiogoogle/client/compute_client.py index 2566500daeb..d0b8b25cfa3 100644 --- a/hail/python/hailtop/aiogoogle/client/compute_client.py +++ b/hail/python/hailtop/aiogoogle/client/compute_client.py @@ -1,25 +1,29 @@ import uuid -from typing import Mapping, Any, Optional, MutableMapping, List, Dict +from typing import Mapping, Any, Optional, MutableMapping import logging -import aiohttp from .base_client import BaseClient -from hailtop.utils import retry_transient_errors, sleep_and_backoff +from hailtop.utils import sleep_and_backoff log = logging.getLogger('compute_client') -class GCPOperationError(Exception): - def __init__(self, status: int, message: str, error_codes: Optional[List[str]], error_messages: Optional[List[str]], response: Dict[str, Any]): - super().__init__(message) - self.status = status - self.message = message - self.error_codes = error_codes - self.error_messages = error_messages - self.response = response +async def request_with_wait_for_done(request_f, path, params: MutableMapping[str, Any] = None, **kwargs): + assert 'params' not in kwargs - def __str__(self): - return f'GCPOperationError: {self.status}:{self.message} {self.error_codes} {self.error_messages}; {self.response}' + if params is None: + params = {} + + request_uuid = str(uuid.uuid4()) + if 'requestId' not in params: + params['requestId'] = request_uuid + + delay = 0.2 + while True: + resp = await request_f(path, params=params, **kwargs) + if resp['status'] == 'DONE': + return resp + delay = await sleep_and_backoff(delay) class PagedIterator: @@ -63,7 +67,6 @@ def __init__(self, project, **kwargs): super().__init__(f'https://compute.googleapis.com/compute/v1/projects/{project}', **kwargs) # docs: - # https://cloud.google.com/compute/docs/api/how-tos/api-requests-responses#handling_api_responses # https://cloud.google.com/compute/docs/reference/rest/v1 # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert # https://cloud.google.com/compute/docs/reference/rest/v1/instances/get @@ -74,50 +77,13 @@ async def list(self, path: str, *, params: MutableMapping[str, Any] = None, **kw return PagedIterator(self, path, params, kwargs) async def create_disk(self, path: str, *, params: MutableMapping[str, Any] = None, **kwargs): - return await self._request_with_zonal_operations_response(self.post, path, params, **kwargs) + return await request_with_wait_for_done(self.post, path, params, **kwargs) async def attach_disk(self, path: str, *, params: MutableMapping[str, Any] = None, **kwargs): - return await self._request_with_zonal_operations_response(self.post, path, params, **kwargs) + return await request_with_wait_for_done(self.post, path, params, **kwargs) async def detach_disk(self, path: str, *, params: MutableMapping[str, Any] = None, **kwargs): - return await self._request_with_zonal_operations_response(self.post, path, params, **kwargs) + return await request_with_wait_for_done(self.post, path, params, **kwargs) async def delete_disk(self, path: str, *, params: MutableMapping[str, Any] = None, **kwargs): - return await self.delete(path, params=params, **kwargs) - - async def _request_with_zonal_operations_response(self, request_f, path, params: MutableMapping[str, Any] = None, **kwargs): - params = params or dict() - assert 'requestId' not in params - - async def request_and_wait(): - params['requestId'] = str(uuid.uuid4()) - - resp = await request_f(path, params=params, **kwargs) - - operation_id = resp['id'] - zone = resp['zone'].rsplit('/', 1)[1] - - delay = 2 - while True: - result = await self.post(f'/zones/{zone}/operations/{operation_id}/wait', - timeout=aiohttp.ClientTimeout(total=150)) - if result['status'] == 'DONE': - error = result.get('error') - if error: - assert result.get('httpErrorStatusCode') is not None - assert result.get('httpErrorMessage') is not None - - error_codes = [e['code'] for e in error['errors']] - error_messages = [e['message'] for e in error['errors']] - - raise GCPOperationError(result['httpErrorStatusCode'], - result['httpErrorMessage'], - error_codes, - error_messages, - result) - - return result - - delay = await sleep_and_backoff(delay, max_delay=15) - - await retry_transient_errors(request_and_wait) + return await request_with_wait_for_done(self.delete, path, params, **kwargs) diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 9c5d1587b7c..5d7bbffd709 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -571,12 +571,11 @@ def _cp(src, dst): f"You must specify 'image' for Python jobs if you are using a Python version other than 3.6, 3.7, or 3.8 (you are using {version})") job._image = f'hailgenetics/python-dill:{version.major}.{version.minor}-slim' - if len(pyjobs) > 0: - with tqdm(total=len(pyjobs), desc='upload python functions', disable=disable_progress_bar) as pbar: - async def compile_job(job): - await job._compile(local_tmpdir, batch_remote_tmpdir) - pbar.update(1) - await bounded_gather(*[functools.partial(compile_job, j) for j in pyjobs], parallelism=150) + with tqdm(total=len(pyjobs), desc='upload python functions', disable=disable_progress_bar) as pbar: + async def compile_job(job): + await job._compile(local_tmpdir, batch_remote_tmpdir) + pbar.update(1) + await bounded_gather(*[functools.partial(compile_job, j) for j in pyjobs], parallelism=150) for job in tqdm(batch._jobs, desc='create job objects', disable=disable_progress_bar): inputs = [x for r in job._inputs for x in copy_input(r)] diff --git a/hail/python/hailtop/batch/docs/change_log.rst b/hail/python/hailtop/batch/docs/change_log.rst index 8d335c74a58..1b8e258b35e 100644 --- a/hail/python/hailtop/batch/docs/change_log.rst +++ b/hail/python/hailtop/batch/docs/change_log.rst @@ -3,14 +3,6 @@ Change Log ========== -**Version 0.2.72** - -- Made failed Python Jobs have non-zero exit codes. - -**Version 0.2.71** - -- Added the ability to set values for ``Job.cpu``, ``Job.memory``, ``Job.storage``, and ``Job.timeout`` to `None` - **Version 0.2.70** - Made submitting ``PythonJob`` faster when using the ``ServiceBackend`` diff --git a/hail/python/hailtop/batch/job.py b/hail/python/hailtop/batch/job.py index 0fa6673de16..f2d6ce87767 100644 --- a/hail/python/hailtop/batch/job.py +++ b/hail/python/hailtop/batch/job.py @@ -26,12 +26,6 @@ def _add_resource_to_set(resource_set, resource, include_rg=True): resource_set.add(resource_file) -def opt_str(x): - if x is None: - return x - return str(x) - - class Job: """ Object representing a single job to execute. @@ -150,7 +144,7 @@ def depends_on(self, *jobs: 'Job') -> 'Job': def env(self, variable: str, value: str): self._env[variable] = value - def storage(self, storage: Optional[Union[str, int]]) -> 'Job': + def storage(self, storage: Union[str, int]) -> 'Job': """ Set the job's storage size. @@ -188,18 +182,17 @@ def storage(self, storage: Optional[Union[str, int]]) -> 'Job': Parameters ---------- storage: - Units are in bytes if `storage` is an :obj:`int`. If `None`, use the - default storage size for the :class:`.ServiceBackend` (0 Gi). + Units are in bytes if `storage` is an :obj:`int`. Returns ------- Same job object with storage set. """ - self._storage = opt_str(storage) + self._storage = str(storage) return self - def memory(self, memory: Optional[Union[str, int]]) -> 'Job': + def memory(self, memory: Union[str, int]) -> 'Job': """ Set the job's memory requirements. @@ -231,18 +224,17 @@ def memory(self, memory: Optional[Union[str, int]]) -> 'Job': Parameters ---------- memory: - Units are in bytes if `memory` is an :obj:`int`. If `None`, - use the default value for the :class:`.ServiceBackend` ('standard'). + Units are in bytes if `memory` is an :obj:`int`. Returns ------- Same job object with memory requirements set. """ - self._memory = opt_str(memory) + self._memory = str(memory) return self - def cpu(self, cores: Optional[Union[str, int, float]]) -> 'Job': + def cpu(self, cores: Union[str, int, float]) -> 'Job': """ Set the job's CPU requirements. @@ -270,16 +262,14 @@ def cpu(self, cores: Optional[Union[str, int, float]]) -> 'Job': Parameters ---------- cores: - Units are in cpu if `cores` is numeric. If `None`, - use the default value for the :class:`.ServiceBackend` - (1 cpu). + Units are in cpu if `cores` is numeric. Returns ------- Same job object with CPU requirements set. """ - self._cpu = opt_str(cores) + self._cpu = str(cores) return self def always_run(self, always_run: bool = True) -> 'Job': @@ -318,7 +308,7 @@ def always_run(self, always_run: bool = True) -> 'Job': self._always_run = always_run return self - def timeout(self, timeout: Optional[Union[float, int]]) -> 'Job': + def timeout(self, timeout: Union[float, int]) -> 'Job': """ Set the maximum amount of time this job can run for. @@ -338,7 +328,6 @@ def timeout(self, timeout: Optional[Union[float, int]]) -> 'Job': ---------- timeout: Maximum amount of time for a job to run before being killed. - If `None`, there is no timeout. Returns ------- @@ -976,5 +965,4 @@ def wrapped(*args, **kwargs): except Exception as e: traceback.print_exc() dill.dump((e, traceback.format_exception(type(e), e, e.__traceback__)), dill_out, recurse=True) - raise e "''') diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index ea7d714e40f..54274fe43e1 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -35,7 +35,15 @@ def _get_error(job_status, task): if not container_status: return None - return container_status.get('error') + error = container_status.get('error') + if error: + return error + + docker_container_status = container_status.get('container_status') + if not docker_container_status: + return None + + return docker_container_status.get('error') @staticmethod def _get_out_of_memory(job_status, task): @@ -47,15 +55,15 @@ def _get_out_of_memory(job_status, task): if not container_statuses: return None - task_status = container_statuses.get(task) - if not task_status: + container_status = container_statuses.get(task) + if not container_status: return None - container_status = task_status.get('container_status') - if not container_status: + docker_container_status = container_status.get('container_status') + if not docker_container_status: return None - return container_status['out_of_memory'] + return docker_container_status['out_of_memory'] @staticmethod def _get_container_status_exit_code(container_status): diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py index 7928b97bf29..dd94f1f81bd 100644 --- a/hail/python/hailtop/utils/__init__.py +++ b/hail/python/hailtop/utils/__init__.py @@ -1,4 +1,4 @@ -from .time import time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs +from .time import time_msecs, time_msecs_str, humanize_timedelta_msecs from .utils import ( unzip, async_to_blocking, blocking_to_async, AsyncWorkerPool, bounded_gather, grouped, sync_sleep_and_backoff, sleep_and_backoff, is_transient_error, @@ -12,8 +12,8 @@ url_scheme, Notice, periodically_call, dump_all_stacktraces, find_spark_home, TransientError, bounded_gather2, OnlineBoundedGather2, unpack_comma_delimited_inputs, retry_all_errors_n_times) from .process import ( - CalledProcessError, check_shell, check_shell_output, check_exec_output, - sync_check_shell, sync_check_shell_output) + CalledProcessError, check_shell, check_shell_output, sync_check_shell, + sync_check_shell_output) from .tqdm import tqdm, TQDM_DEFAULT_DISABLE from .rates import ( rate_cpu_hour_to_mcpu_msec, rate_gib_hour_to_mib_msec, rate_gib_month_to_mib_msec, @@ -34,7 +34,6 @@ 'CalledProcessError', 'check_shell', 'check_shell_output', - 'check_exec_output', 'sync_check_shell', 'sync_check_shell_output', 'bounded_gather', @@ -83,6 +82,5 @@ 'unpack_comma_delimited_inputs', 'is_google_registry_domain', 'parse_docker_image_reference', - 'retry_all_errors_n_times', - 'parse_timestamp_msecs', + 'retry_all_errors_n_times' ] diff --git a/hail/python/hailtop/utils/process.py b/hail/python/hailtop/utils/process.py index 05c966fe8cd..23c8c8d075d 100644 --- a/hail/python/hailtop/utils/process.py +++ b/hail/python/hailtop/utils/process.py @@ -28,20 +28,6 @@ async def check_shell_output(script, echo=False): return outerr -async def check_exec_output(command, *args, echo=False): - if echo: - print([command, *args]) - proc = await asyncio.create_subprocess_exec( - command, *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE) - outerr = await proc.communicate() - if proc.returncode != 0: - script = ' '.join([command, *args]) - raise CalledProcessError(script, proc.returncode, outerr) - return outerr - - async def check_shell(script, echo=False): # discard output await check_shell_output(script, echo) diff --git a/hail/python/hailtop/utils/time.py b/hail/python/hailtop/utils/time.py index 0ef533de4bc..8e1b8739743 100644 --- a/hail/python/hailtop/utils/time.py +++ b/hail/python/hailtop/utils/time.py @@ -1,7 +1,6 @@ import time import datetime import humanize -import dateutil.parser def time_msecs() -> int: @@ -18,9 +17,3 @@ def humanize_timedelta_msecs(delta_msecs): return None return humanize.naturaldelta(datetime.timedelta(milliseconds=delta_msecs)) - - -def parse_timestamp_msecs(ts): - if ts is None: - return ts - return dateutil.parser.isoparse(ts).timestamp() * 1000 diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 6e137d5dbb2..16ecf752e9f 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -23,6 +23,8 @@ from requests.adapters import HTTPAdapter from urllib3.poolmanager import PoolManager +import hailtop + from .time import time_msecs @@ -550,18 +552,12 @@ def is_transient_error(e): # # OSError: [Errno 51] Connect call failed ('35.188.91.25', 443) # https://hail.zulipchat.com/#narrow/stream/223457-Batch-support/topic/ssl.20error - import hailtop.aiogoogle.client.compute_client # pylint: disable=import-outside-toplevel,cyclic-import - import hailtop.httpx # pylint: disable=import-outside-toplevel,cyclic-import - if isinstance(e, aiohttp.ClientResponseError) and ( e.status in RETRYABLE_HTTP_STATUS_CODES): # nginx returns 502 if it cannot connect to the upstream server # 408 request timeout, 500 internal server error, 502 bad gateway # 503 service unavailable, 504 gateway timeout return True - if (isinstance(e, hailtop.aiogoogle.client.compute_client.GCPOperationError) - and 'QUOTA_EXCEEDED' in e.error_codes): - return True if isinstance(e, hailtop.httpx.ClientResponseError) and ( e.status == 403 and 'rateLimitExceeded' in e.body): return True @@ -615,11 +611,11 @@ def is_transient_error(e): return False -async def sleep_and_backoff(delay, max_delay=30.0): - # exponentially back off, up to (expected) max_delay +async def sleep_and_backoff(delay): + # exponentially back off, up to (expected) max of 30s t = delay * random.uniform(0.9, 1.1) await asyncio.sleep(t) - return min(delay * 2, max_delay) + return min(delay * 2, 30.0) def sync_sleep_and_backoff(delay): @@ -773,8 +769,6 @@ async def retry_long_running(name, f, *args, **kwargs): try: start_time = time_msecs() return await f(*args, **kwargs) - except asyncio.CancelledError: - raise except Exception: end_time = time_msecs() diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 78a701d1952..11b7c364f69 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -55,7 +55,7 @@ def same_as_python(*args): same_as_python(10, -5, -1) same_as_python(10, -5, -4) - with self.assertRaisesRegex(hl.utils.HailUserError, 'Array range cannot have step size 0'): + with self.assertRaisesRegex(hl.utils.FatalError, 'Array range cannot have step size 0'): hl.eval(hl.range(0, 1, 0)) def test_zeros(self): @@ -209,22 +209,15 @@ def test_dict_methods(self): x6=kt.a.keys(), x7=kt.a.values(), x8=kt.a.size(), - x9=kt.a.map_values(lambda v: v * 2.0), - x10=kt.a.items(), + x9=kt.a.map_values(lambda v: v * 2.0) ).take(1)[0]) expected = {'a': {'cat': 3, 'dog': 7}, 'x': 2.0, 'x1': 3, 'x2': 7, 'x3': False, 'x4': False, 'x5': {'cat', 'dog'}, 'x6': ['cat', 'dog'], - 'x7': [3, 7], 'x8': 2, 'x9': {'cat': 6.0, 'dog': 14.0}, - 'x10': [('cat', 3), ('dog', 7)]} + 'x7': [3, 7], 'x8': 2, 'x9': {'cat': 6.0, 'dog': 14.0}} self.assertDictEqual(result, expected) - def test_dict_missing_error(self): - d = hl.dict({'a': 2, 'b': 3}) - with pytest.raises(hl.utils.HailUserError, match='Key NA not found in dictionary'): - hl.eval(d[hl.missing(hl.tstr)]) - def test_numeric_conversion(self): schema = hl.tstruct(a=hl.tfloat64, b=hl.tfloat64, c=hl.tint32, d=hl.tint32) rows = [{'a': 2.0, 'b': 4.0, 'c': 1, 'd': 5}] diff --git a/hail/python/test/hail/expr/test_math.py b/hail/python/test/hail/expr/test_math.py deleted file mode 100644 index d13a3766df0..00000000000 --- a/hail/python/test/hail/expr/test_math.py +++ /dev/null @@ -1,15 +0,0 @@ -import hail as hl -import scipy.special as scsp -import pytest - -def test_logit(): - assert hl.eval(hl.logit(.5)) == 0.0 - assert hl.eval(hl.is_infinite(hl.logit(1.0))) - assert hl.eval(hl.is_nan(hl.logit(1.01))) - assert hl.eval(hl.logit(.27)) == scsp.logit(.27) - -def test_expit(): - assert hl.eval(hl.expit(0.0)) == 0.5 - assert hl.eval(hl.expit(800)) == 1.0 - assert hl.eval(hl.expit(-920)) == 0.0 - assert hl.eval(hl.expit(.75)) == scsp.expit(.75) diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index 36726641466..3192a9f4c0e 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -335,31 +335,31 @@ def test_ndarray_reshape(): assert hl.eval(hl.nd.array(hl.range(20)).reshape( hl.missing(hl.ttuple(hl.tint64, hl.tint64)))) is None - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((-1, -1))) assert "more than one -1" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((20,))) assert "requested shape is incompatible with number of elements" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(a.reshape((3,))) assert "requested shape is incompatible with number of elements" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(a.reshape(())) assert "requested shape is incompatible with number of elements" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((0, 2, 2))) assert "requested shape is incompatible with number of elements" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.literal(np_cube).reshape((2, 2, -2))) assert "must contain only nonnegative numbers or -1" in str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(shape_zero.reshape((0, -1))) assert "Can't reshape" in str(exc.value) @@ -419,11 +419,11 @@ def test_ndarray_map2(): (b + na, np.array(a + b)), (nx + y, x + y), (ncube1 + cube2, cube1 + cube2), + + # Addition (na + na, np.array(a + a)), (nx + ny, x + y), (ncube1 + ncube2, cube1 + cube2), - (nx.map2(y, lambda c, d: c+d), x + y), - (ncube1.map2(cube2, lambda c, d: c+d), cube1 + cube2), # Broadcasting (ncube1 + na, cube1 + a), (na + ncube1, a + cube1), @@ -431,9 +431,6 @@ def test_ndarray_map2(): (ny + ncube1, y + cube1), (nrow_vec + ncube1, row_vec + cube1), (ncube1 + nrow_vec, cube1 + row_vec), - (ncube1.map2(na, lambda c, d: c+d), cube1 + a), - (nrow_vec.map2(ncube1, lambda c, d: c+d), row_vec + cube1), - # Subtraction (na - na, np.array(a - a)), @@ -461,7 +458,6 @@ def test_ndarray_map2(): (ncube1 * nrow_vec, cube1 * row_vec), (nrow_vec * ncube1, row_vec * cube1), - # Floor div (na // na, np.array(a // a)), (nx // nx, x // x), @@ -541,9 +537,6 @@ def test_ndarray_sum(): assert hl.eval(m.sum()) == 10 assert hl.eval(m.sum((0, 1))) == 10 - bool_nd = hl.nd.array([[True, False, True], [False, True, True]]) - assert hl.eval(bool_nd.sum()) == 4 - with pytest.raises(ValueError) as exc: m.sum(3) assert "out of bounds for ndarray of dimension 2" in str(exc.value) @@ -667,30 +660,14 @@ def test_ndarray_matmul(): with pytest.raises(ValueError): cube @ hl.nd.array(5) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(r @ r) assert "Matrix dimensions incompatible: (2, 3) can't be multiplied by matrix with dimensions (2, 3)" in str(exc.value), str(exc.value) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.nd.array([1, 2]) @ hl.nd.array([1, 2, 3])) assert "Matrix dimensions incompatible" in str(exc.value) -def test_ndarray_matmul_dgemv(): - np_mat_3_4 = np.arange(12, dtype=np.float64).reshape((3, 4)) - np_mat_4_3 = np.arange(12, dtype=np.float64).reshape((4, 3)) - np_vec_3 = np.array([4, 2, 7], dtype=np.float64) - np_vec_4 = np.array([9, 17, 3, 1], dtype=np.float64) - - mat_3_4 = hl.nd.array(np_mat_3_4) - mat_4_3 = hl.nd.array(np_mat_4_3) - vec_3 = hl.nd.array(np_vec_3) - vec_4 = hl.nd.array(np_vec_4) - - assert_ndarrays_eq( - (mat_3_4 @ vec_4, np_mat_3_4 @ np_vec_4), - (mat_4_3 @ vec_3, np_mat_4_3 @ np_vec_3), - (mat_3_4.T @ vec_3, np_mat_3_4.T @ np_vec_3) - ) def test_ndarray_big(): assert hl.eval(hl.nd.array(hl.range(100_000))).size == 100_000 @@ -718,7 +695,7 @@ def test_ndarray_arange(): (hl.nd.arange(2, 47, 13), np.arange(2, 47, 13)) ) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.nd.arange(5, 20, 0)) assert "Array range cannot have step size 0" in str(exc.value) @@ -752,25 +729,6 @@ def test_ndarray_diagonal(): assert "2 dimensional" in str(exc.value) -def test_ndarray_solve_triangular(): - a = hl.nd.array([[1, 1], [0, 1]]) - b = hl.nd.array([2, 1]) - b2 = hl.nd.array([[11, 5], [6, 3]]) - - a_low = hl.nd.array([[4, 0], [2, 1]]) - b_low = hl.nd.array([4, 5]) - - a_sing = hl.nd.array([[0, 1], [0, 1]]) - b_sing = hl.nd.array([2, 2]) - - assert np.allclose(hl.eval(hl.nd.solve_triangular(a, b)), np.array([1., 1.])) - assert np.allclose(hl.eval(hl.nd.solve_triangular(a, b2)), np.array([[5., 2.], [6., 3.]])) - assert np.allclose(hl.eval(hl.nd.solve_triangular(a_low, b_low, True)), np.array([[1., 3.]])) - with pytest.raises(HailUserError) as exc: - hl.eval(hl.nd.solve_triangular(a_sing, b_sing)) - assert "singular" in str(exc.value), str(exc.value) - - def test_ndarray_solve(): a = hl.nd.array([[1, 2], [3, 5]]) b = hl.nd.array([1, 2]) @@ -780,7 +738,7 @@ def test_ndarray_solve(): assert np.allclose(hl.eval(hl.nd.solve(a, b2)), np.array([[-1., -16.], [1, 12]])) assert np.allclose(hl.eval(hl.nd.solve(a.T, b2.T)), np.array([[19., 26.], [-6, -8]])) - with pytest.raises(HailUserError) as exc: + with pytest.raises(FatalError) as exc: hl.eval(hl.nd.solve(hl.nd.array([[1, 2], [1, 2]]), hl.nd.array([8, 10]))) assert "singular" in str(exc.value), str(exc.value) @@ -1164,45 +1122,3 @@ def test_agg_ndarray_sum(): mismatched = mismatched.annotate(x=hl.nd.ones((mismatched.idx,))) mismatched.aggregate(hl.agg.ndarray_sum(mismatched.x)) assert "Can't sum" in str(exc.value) - - -def test_maximum_minimuim(): - x = np.arange(4) - y = np.array([7, 0, 2, 4]) - z = [5, 2, 3, 1] - nan_elem = np.array([1.0, float("nan"), 3.0, 6.0]) - f = np.array([1.0, 3.0, 6.0, 4.0]) - nx = hl.nd.array(x) - ny = hl.nd.array(y) - nf = hl.nd.array(f) - ndnan_elem = hl.nd.array([1.0, hl.float64(float("NaN")), 3.0, 6.0]) - - assert_ndarrays_eq( - (hl.nd.maximum(nx, ny), np.maximum(x, y)), - (hl.nd.maximum(ny, z), np.maximum(y, z)), - (hl.nd.minimum(nx, ny), np.minimum(x, y)), - (hl.nd.minimum(ny, z), np.minimum(y, z)), - ) - - np_nan_max = np.maximum(nan_elem, f) - nan_max = hl.eval(hl.nd.maximum(ndnan_elem, nf)) - np_nan_min = np.minimum(nan_elem, f) - nan_min = hl.eval(hl.nd.minimum(ndnan_elem, nf)) - max_matches = 0 - min_matches = 0 - for a, b in zip(np_nan_max, nan_max): - if a == b: - max_matches += 1 - elif np.isnan(a) and np.isnan(b): - max_matches += 1 - for a, b in zip(np_nan_min, nan_min): - if a == b: - min_matches += 1 - elif np.isnan(a) and np.isnan(b): - min_matches += 1 - - assert(nan_max.size == max_matches) - assert(nan_min.size == min_matches) - - - diff --git a/hail/python/test/hail/matrixtable/test_file_formats.py b/hail/python/test/hail/matrixtable/test_file_formats.py index c9d635452a3..43279bf7949 100644 --- a/hail/python/test/hail/matrixtable/test_file_formats.py +++ b/hail/python/test/hail/matrixtable/test_file_formats.py @@ -37,6 +37,8 @@ class Tests(unittest.TestCase): def test_write(self): create_backward_compatibility_files() + @fails_service_backend() + @fails_local_backend() def test_backward_compatability(self): import os @@ -79,4 +81,4 @@ def backward_compatible_same(current, old): f = os.path.join(matrix_table_dir, '{}.hmt'.format(i)) n += 1 - assert n == 72 + assert n == 76 diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index 6ab70d18e3f..01cfa5536f9 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -1229,13 +1229,6 @@ def test_write_checkpoint_file(self): self.assertTrue(mt._same(mt2)) @fails_service_backend() - def test_write_no_parts(self): - mt = hl.utils.range_matrix_table(10, 10, 2).filter_rows(False) - path = new_temp_file(extension='mt') - path2 = new_temp_file(extension='mt') - assert mt.checkpoint(path)._same(mt) - hl.read_matrix_table(path, _drop_rows=True).write(path2) - def test_nulls_in_distinct_joins(self): # MatrixAnnotateRowsTable uses left distinct join diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index e073544f9db..6a4fe7b4425 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -775,34 +775,6 @@ def get_data(a2_reference): (j.a1_vqc.homozygote_count[0] == j.a2_vqc.homozygote_count[1]) & (j.a1_vqc.homozygote_count[1] == j.a2_vqc.homozygote_count[0]))) - @fails_service_backend() - @fails_local_backend() - def test_import_plink_same_locus(self): - mt = hl.balding_nichols_model(n_populations=2, n_samples=10, n_variants=100) - mt = mt.key_rows_by(locus=hl.locus('1', 100, reference_genome='GRCh37'), alleles=mt.alleles).select_rows() - mt = mt.key_cols_by(s=hl.str(mt.sample_idx)).select_cols() - mt = mt.select_globals() - out = new_temp_file(prefix='plink') - hl.export_plink(mt, out) - mt2 = hl.import_plink(f'{out}.bed', f'{out}.bim', f'{out}.fam').select_cols().select_rows() - assert mt2._same(mt) - - mt3 = hl.import_plink(f'{out}.bed', f'{out}.bim', f'{out}.fam', min_partitions=10).select_cols().select_rows() - assert mt3._same(mt) - - @fails_service_backend() - @fails_local_backend() - def test_import_plink_partitions(self): - mt = hl.balding_nichols_model(n_populations=2, n_samples=10, n_variants=100) - mt = mt.select_rows() - mt = mt.key_cols_by(s=hl.str(mt.sample_idx)).select_cols() - mt = mt.select_globals() - out = new_temp_file(prefix='plink') - hl.export_plink(mt, out) - mt2 = hl.import_plink(f'{out}.bed', f'{out}.bim', f'{out}.fam', min_partitions=10).select_cols().select_rows() - assert mt2.n_partitions() == 10 - assert mt2._same(mt) - @fails_service_backend() @fails_local_backend() def test_import_plink_contig_recoding_w_reference(self): diff --git a/hail/python/test/hail/methods/test_statgen.py b/hail/python/test/hail/methods/test_statgen.py index 6653e8db5f2..02525e206c8 100644 --- a/hail/python/test/hail/methods/test_statgen.py +++ b/hail/python/test/hail/methods/test_statgen.py @@ -1,5 +1,4 @@ import os -import math import unittest import pytest import numpy as np @@ -462,150 +461,6 @@ def eq(x1, x2): logreg_functions = [hl.logistic_regression_rows, hl._logistic_regression_rows_nd] if backend_name == "spark" else [hl._logistic_regression_rows_nd] - def test_weighted_linear_regression(self): - covariates = hl.import_table(resource('regressionLinear.cov'), - key='Sample', - types={'Cov1': hl.tfloat, 'Cov2': hl.tfloat}) - pheno = hl.import_table(resource('regressionLinear.pheno'), - key='Sample', - missing='0', - types={'Pheno': hl.tfloat}) - - weights = hl.import_table(resource('regressionLinear.weights'), - key='Sample', - missing='0', - types={'Sample': hl.tstr, 'Weight1': hl.tfloat, 'Weight2': hl.tfloat}) - - mt = hl.import_vcf(resource('regressionLinear.vcf')) - mt = mt.add_col_index() - - mt = mt.annotate_cols(y=hl.coalesce(pheno[mt.s].Pheno, 1.0)) - mt = mt.annotate_entries(x=hl.coalesce(mt.GT.n_alt_alleles(), 1.0)) - my_covs = [1.0] + list(covariates[mt.s].values()) - - ht_with_weights = hl._linear_regression_rows_nd(y=mt.y, - x=mt.x, - covariates=my_covs, - weights=mt.col_idx) - - ht_pre_weighted_1 = hl._linear_regression_rows_nd(y=mt.y * hl.sqrt(mt.col_idx), - x=mt.x * hl.sqrt(mt.col_idx), - covariates=list(map(lambda e: e * hl.sqrt(mt.col_idx), my_covs))) - - ht_pre_weighted_2 = hl._linear_regression_rows_nd(y=mt.y * hl.sqrt(mt.col_idx + 5), - x=mt.x * hl.sqrt(mt.col_idx + 5), - covariates=list(map(lambda e: e * hl.sqrt(mt.col_idx + 5), my_covs))) - - ht_from_agg = mt.annotate_rows(my_linreg=hl.agg.linreg(mt.y, [1, mt.x] + list(covariates[mt.s].values()), weight=mt.col_idx)).rows() - - betas_with_weights = ht_with_weights.beta.collect() - betas_pre_weighted_1 = ht_pre_weighted_1.beta.collect() - betas_pre_weighted_2 = ht_pre_weighted_2.beta.collect() - - betas_from_agg = ht_from_agg.my_linreg.beta[1].collect() - - def equal_with_nans(arr1, arr2): - def both_nan_or_none(a, b): - return (a is None or np.isnan(a)) and (b is None or np.isnan(b)) - - return all([both_nan_or_none(a, b) or math.isclose(a, b) for a, b in zip(arr1, arr2)]) - - assert equal_with_nans(betas_with_weights, betas_pre_weighted_1) - assert equal_with_nans(betas_with_weights, betas_from_agg) - - ht_with_multiple_weights = hl._linear_regression_rows_nd(y=[[mt.y], [hl.abs(mt.y)]], - x=mt.x, - covariates=my_covs, - weights=[mt.col_idx, mt.col_idx + 5]) - - # Check that preweighted 1 and preweighted 2 match up with fields 1 and 2 of multiple - multi_weight_betas = ht_with_multiple_weights.beta.collect() - multi_weight_betas_1 = [e[0][0] for e in multi_weight_betas] - multi_weight_betas_2 = [e[1][0] for e in multi_weight_betas] - - assert np.array(multi_weight_betas).shape == (10, 2, 1) - - assert(equal_with_nans(multi_weight_betas_1, betas_pre_weighted_1)) - assert(equal_with_nans(multi_weight_betas_2, betas_pre_weighted_2)) - - # Now making sure that missing weights get excluded. - ht_with_missing_weights = hl._linear_regression_rows_nd(y=[[mt.y], [hl.abs(mt.y)]], - x=mt.x, - covariates=[1], - weights=[weights[mt.s].Weight1, weights[mt.s].Weight2]) - - mt_with_missing_weights = mt.annotate_cols(Weight1 = weights[mt.s].Weight1, Weight2 = weights[mt.s].Weight2) - mt_with_missing_weight1_filtered = mt_with_missing_weights.filter_cols(hl.is_defined(mt_with_missing_weights.Weight1)) - mt_with_missing_weight2_filtered = mt_with_missing_weights.filter_cols(hl.is_defined(mt_with_missing_weights.Weight2)) - ht_from_agg_weight_1 = mt_with_missing_weight1_filtered.annotate_rows( - my_linreg=hl.agg.linreg(mt_with_missing_weight1_filtered.y, [1, mt_with_missing_weight1_filtered.x], weight=weights[mt_with_missing_weight1_filtered.s].Weight1) - ).rows() - ht_from_agg_weight_2 = mt_with_missing_weight2_filtered.annotate_rows( - my_linreg=hl.agg.linreg(mt_with_missing_weight2_filtered.y, [1, mt_with_missing_weight2_filtered.x], weight=weights[mt_with_missing_weight2_filtered.s].Weight2) - ).rows() - - multi_weight_missing_results = ht_with_missing_weights.collect() - multi_weight_missing_betas = [e.beta for e in multi_weight_missing_results] - multi_weight_missing_betas_1 = [e[0][0] for e in multi_weight_missing_betas] - multi_weight_missing_betas_2 = [e[1][0] for e in multi_weight_missing_betas] - - betas_from_agg_weight_1 = ht_from_agg_weight_1.my_linreg.beta[1].collect() - betas_from_agg_weight_2 = ht_from_agg_weight_2.my_linreg.beta[1].collect() - - assert equal_with_nans(multi_weight_missing_betas_1, betas_from_agg_weight_1) - assert equal_with_nans(multi_weight_missing_betas_2, betas_from_agg_weight_2) - - multi_weight_missing_p_values = [e.p_value for e in multi_weight_missing_results] - multi_weight_missing_p_values_1 = [e[0][0] for e in multi_weight_missing_p_values] - multi_weight_missing_p_values_2 = [e[1][0] for e in multi_weight_missing_p_values] - - p_values_from_agg_weight_1 = ht_from_agg_weight_1.my_linreg.p_value[1].collect() - p_values_from_agg_weight_2 = ht_from_agg_weight_2.my_linreg.p_value[1].collect() - - assert equal_with_nans(multi_weight_missing_p_values_1, p_values_from_agg_weight_1) - assert equal_with_nans(multi_weight_missing_p_values_2, p_values_from_agg_weight_2) - - multi_weight_missing_t_stats = [e.t_stat for e in multi_weight_missing_results] - multi_weight_missing_t_stats_1 = [e[0][0] for e in multi_weight_missing_t_stats] - multi_weight_missing_t_stats_2 = [e[1][0] for e in multi_weight_missing_t_stats] - - t_stats_from_agg_weight_1 = ht_from_agg_weight_1.my_linreg.t_stat[1].collect() - t_stats_from_agg_weight_2 = ht_from_agg_weight_2.my_linreg.t_stat[1].collect() - - assert equal_with_nans(multi_weight_missing_t_stats_1, t_stats_from_agg_weight_1) - assert equal_with_nans(multi_weight_missing_t_stats_2, t_stats_from_agg_weight_2) - - multi_weight_missing_se = [e.standard_error for e in multi_weight_missing_results] - multi_weight_missing_se_1 = [e[0][0] for e in multi_weight_missing_se] - multi_weight_missing_se_2 = [e[1][0] for e in multi_weight_missing_se] - - se_from_agg_weight_1 = ht_from_agg_weight_1.my_linreg.standard_error[1].collect() - se_from_agg_weight_2 = ht_from_agg_weight_2.my_linreg.standard_error[1].collect() - - assert equal_with_nans(multi_weight_missing_se_1, se_from_agg_weight_1) - assert equal_with_nans(multi_weight_missing_se_2, se_from_agg_weight_2) - - def test_errors_weighted_linear_regression(self): - mt = hl.utils.range_matrix_table(20, 10).annotate_entries(x=2) - mt = mt.annotate_cols(**{f"col_{i}": i for i in range(4)}) - - self.assertRaises(ValueError, lambda: hl._linear_regression_rows_nd(y=[[mt.col_1]], - x=mt.x, - covariates=[1], - weights=[mt.col_2, mt.col_3])) - - self.assertRaises(ValueError, lambda: hl._linear_regression_rows_nd(y=[mt.col_1], - x=mt.x, - covariates=[1], - weights=[mt.col_2])) - - self.assertRaises(ValueError, lambda: hl._linear_regression_rows_nd(y=[[mt.col_1]], - x=mt.x, - covariates=[1], - weights=mt.col_2)) - - - # comparing to R: # x = c(0, 1, 0, 0, 0, 1, 0, 0, 0, 0) # y = c(0, 0, 1, 1, 1, 1, 0, 0, 1, 1) @@ -731,6 +586,7 @@ def test_logistic_regression_wald_test_multi_pheno_bgen_dosage(self): self.assertAlmostEqual(multi_results[1001].logistic_regression[0].p_value,single_results[1001].p_value, places=6) #TODO test handling of missingness + def test_logistic_regression_wald_test_pl(self): covariates = hl.import_table(resource('regressionLogistic.cov'), key='Sample', diff --git a/hail/python/test/hail/table/test_table.py b/hail/python/test/hail/table/test_table.py index 8e9e0ed89ad..641639c3b3b 100644 --- a/hail/python/test/hail/table/test_table.py +++ b/hail/python/test/hail/table/test_table.py @@ -794,14 +794,6 @@ def test_write_stage_locally(self): t2 = hl.read_table(f) self.assertTrue(t._same(t2)) - @fails_service_backend() - def test_write_no_parts(self): - ht = hl.utils.range_table(10, n_partitions=2).filter(False) - path = new_temp_file(extension='ht') - path2 = new_temp_file(extension='ht') - assert ht.checkpoint(path)._same(ht) - hl.read_table(path).write(path2) - def test_min_partitions(self): assert hl.import_table(resource('variantAnnotations.tsv'), min_partitions=50).n_partitions() == 50 diff --git a/hail/python/test/hail/test_ir.py b/hail/python/test/hail/test_ir.py index 3c0bc58fe52..d311f04e90e 100644 --- a/hail/python/test/hail/test_ir.py +++ b/hail/python/test/hail/test_ir.py @@ -50,7 +50,7 @@ def value_irs(self): ir.ApplyUnaryPrimOp('-', i), ir.ApplyComparisonOp('EQ', i, j), ir.MakeArray([i, ir.NA(hl.tint32), ir.I32(-3)], hl.tarray(hl.tint32)), - ir.ArrayRef(a, i), + ir.ArrayRef(a, i, ir.Str('foo')), ir.ArrayLen(a), ir.ArraySort(ir.ToStream(a), 'l', 'r', ir.ApplyComparisonOp("LT", ir.Ref('l'), ir.Ref('r'))), ir.ToSet(a), @@ -147,7 +147,7 @@ class TableIRTests(unittest.TestCase): def table_irs(self): b = ir.TrueIR() table_read = ir.TableRead( - ir.TableNativeReader(resource('backward_compatability/1.1.0/table/0.ht'), None, False), False) + ir.TableNativeReader(resource('backward_compatability/1.0.0/table/0.ht'), None, False), False) table_read_row_type = hl.dtype('struct{idx: int32, f32: float32, i64: int64, m: float64, astruct: struct{a: int32, b: float64}, mstruct: struct{x: int32, y: str}, aset: set, mset: set, d: dict, float64>, md: dict, h38: locus, ml: locus, i: interval>, c: call, mc: call, t: tuple(call, str, str), mt: tuple(locus, bool)}') matrix_read = ir.MatrixRead( @@ -228,7 +228,7 @@ def matrix_irs(self): resource('backward_compatability/1.0.0/matrix_table/0.hmt'), None, False), False, False) table_read = ir.TableRead( - ir.TableNativeReader(resource('backward_compatability/1.1.0/table/0.ht'), None, False), False) + ir.TableNativeReader(resource('backward_compatability/1.0.0/table/0.ht'), None, False), False) matrix_range = ir.MatrixRead(ir.MatrixRangeReader(1, 1, 10)) matrix_irs = [ diff --git a/hail/python/test/hail/utils/test_utils.py b/hail/python/test/hail/utils/test_utils.py index 512e8c2b7c7..2fc814809ac 100644 --- a/hail/python/test/hail/utils/test_utils.py +++ b/hail/python/test/hail/utils/test_utils.py @@ -5,7 +5,6 @@ from hail.utils.misc import escape_str, escape_id from hail.utils.java import Env from hail.utils.linkedlist import LinkedList - from ..helpers import * setUpModule = startTestHailContext @@ -181,10 +180,10 @@ def test_struct_ops(self): self.assertEqual(s.annotate(**{'a': 5, 'x': 10, 'y': 15}), Struct(a=5, b=2, c=3, x=10, y=15)) - def test_expr_exception_results_in_hail_user_error(self): + def test_expr_exception_results_in_fatal_error(self): df = range_table(10) df = df.annotate(x=[1, 2]) - with self.assertRaises(HailUserError): + with self.assertRaises(FatalError): df.filter(df.x[5] == 0).count() def test_interval_ops(self): diff --git a/hail/python/test/hailtop/batch/test_batch.py b/hail/python/test/hailtop/batch/test_batch.py index 3bfbdf6f294..9f692de8f09 100644 --- a/hail/python/test/hailtop/batch/test_batch.py +++ b/hail/python/test/hailtop/batch/test_batch.py @@ -792,17 +792,6 @@ def multiply(r): assert res.status()['state'] == 'success', debug_info(res) assert res.get_job_log(3)['main'] == "15\n", debug_info(res) - def test_python_job_w_non_zero_ec(self): - b = self.batch(default_python_image='gcr.io/hail-vdc/python-dill:3.7-slim') - j = b.new_python_job() - - def error(): - raise Exception("this should fail") - - j.call(error) - res = b.run() - assert res.status()['state'] == 'failure', debug_info(res) - def test_fail_fast(self): b = self.batch(cancel_after_n_failures=1) diff --git a/hail/python/test/hailtop/batch/utils.py b/hail/python/test/hailtop/batch/utils.py index 2f72ca03acc..95b5127de78 100644 --- a/hail/python/test/hailtop/batch/utils.py +++ b/hail/python/test/hailtop/batch/utils.py @@ -2,7 +2,7 @@ def debug_info(batch: bc.Batch): - jobs = list(batch.jobs()) + jobs = batch.jobs() for j_status in jobs: j_status['log'] = batch.get_job_log(j_status['job_id']) return str(jobs) diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala index 26bb5ae1894..5846d92c09b 100644 --- a/hail/src/main/scala/is/hail/HailContext.scala +++ b/hail/src/main/scala/is/hail/HailContext.scala @@ -93,8 +93,8 @@ object HailContext { if (major.toInt < 8) fatal(s"Hail requires Java 1.8, found $versionString") case javaVersion(major, minor, security) => - if (major.toInt != 11) - fatal(s"Hail requires Java 8 or 11, found $versionString") + if (major.toInt > 8) + fatal(s"Hail requires Java 8, found $versionString") case _ => fatal(s"Unknown JVM version string: $versionString") } diff --git a/hail/src/main/scala/is/hail/annotations/ChunkCache.scala b/hail/src/main/scala/is/hail/annotations/ChunkCache.scala deleted file mode 100644 index e2eae91e2cd..00000000000 --- a/hail/src/main/scala/is/hail/annotations/ChunkCache.scala +++ /dev/null @@ -1,156 +0,0 @@ -package is.hail.annotations - -import is.hail.expr.ir.LongArrayBuilder - -import java.util.TreeMap -import java.util.function.BiConsumer -import scala.collection.mutable - - /** - * ChunkCache minimizes calls to free and allocate by holding onto - * chunks when they are no longer in use. When a chunk is needed, the cache - * is searched. If the size requested is less than a certain amount, the size - * is rounded up to the nearest power of 2 and the small chunk cache is checked - * for available chunk. If bigger, the big chunk cache returns the chunk whose size - * is the ceiling match. If the size requested is at least 90 percent of the size of - * the chunk returned, then that chunk is used. If no acceptable chunk is found, a new - * chunk is created. If the chunk created plus the current allocation is greater than - * peak usage, than chunks from the cache are deallocated until this condition is not - * true or the cache is empty. - * When freeChunk is called on RegionPool, the chunks get put in the cache that - * corresponds to their size. freeAll releases all chunks and is called when - * RegionPool is closed. - */ - -private class ChunkCache (allocator: Long => Long, freer: Long => Unit){ - private[this] val highestSmallChunkPowerOf2 = 24 - private[this] val biggestSmallChunk = Math.pow(2,highestSmallChunkPowerOf2) - private[this] val bigChunkCache = new TreeMap[Long, LongArrayBuilder]() - private[this] val chunksEncountered = mutable.Map[Long, Long]() - private[this] val minSpaceRequirements = .9 - private[this] var chunksRequested = 0 - private[this] var cacheHits = 0 - private[this] var smallChunkCacheSize = 0 - private[this] val smallChunkCache = new Array[LongArrayBuilder](highestSmallChunkPowerOf2 + 1) - (0 until highestSmallChunkPowerOf2 + 1).foreach(index => { - smallChunkCache(index) = new LongArrayBuilder() - }) - - def getChunkSize(chunkPointer: Long): Long = chunksEncountered(chunkPointer) - - def freeChunkFromMemory(pool: RegionPool, chunkPointer: Long):Unit = { - val size = chunksEncountered(chunkPointer) - pool.incrementAllocatedBytes(-1 * size) - freer(chunkPointer) - chunksEncountered -= chunkPointer - } - - def freeChunksFromCacheToFit(pool: RegionPool, sizeToFit: Long): Unit = { - var smallChunkIndex = highestSmallChunkPowerOf2 - while((sizeToFit + pool.getTotalAllocatedBytes) > pool.getHighestTotalUsage && - smallChunkIndex >= 0 && !chunksEncountered.isEmpty) { - if (!bigChunkCache.isEmpty) { - val toFree = bigChunkCache.lastEntry() - freeChunkFromMemory(pool, toFree.getValue.pop()) - if (toFree.getValue.size == 0) bigChunkCache.remove(toFree.getKey) - } - else { - if (smallChunkCacheSize == 0) smallChunkIndex = -1 - else { - val toFree = smallChunkCache(smallChunkIndex) - if (toFree.size != 0) { - freeChunkFromMemory(pool, toFree.pop()) - smallChunkCacheSize -= 1 - } - if (toFree.size == 0) smallChunkIndex -= 1 - } - } - } - } - - def newChunk(pool: RegionPool, size: Long): Long = { - if ((size + pool.getTotalAllocatedBytes) > pool.getHighestTotalUsage) { - freeChunksFromCacheToFit(pool, size) - } - val newChunkPointer = allocator(size) - chunksEncountered += (newChunkPointer -> size) - pool.incrementAllocatedBytes(size) - newChunkPointer - } - - def freeAll(pool: RegionPool): Unit = { - if (!chunksEncountered.isEmpty) { - smallChunkCache.foreach(ab => { - while (ab.size > 0) { - freeChunkFromMemory(pool, ab.pop()) - smallChunkCacheSize -= 1 - }}) - //BiConsumer needed to work with scala 2.11.12 - bigChunkCache.forEach(new BiConsumer[Long, LongArrayBuilder]() { - def accept(key: Long, value: LongArrayBuilder): Unit = - while (value.size > 0) freeChunkFromMemory(pool, value.pop()) - }) - } - } - - def getUsage(): (Int, Int) = { - (chunksRequested, cacheHits) - } - - def indexInSmallChunkCache(size: Long): Int = { - var closestPower = highestSmallChunkPowerOf2 - while((size >> closestPower) != 1) closestPower = closestPower - 1 - if (size % (1 << closestPower) != 0) closestPower +=1 - closestPower - } - - def getChunk(pool: RegionPool, size: Long): (Long, Long) = { - chunksRequested += 1 - assert(size > 0L) - if (size <= biggestSmallChunk) { - val closestPower = indexInSmallChunkCache(size) - if(smallChunkCache(closestPower).size == 0 ) { - val sizePowerOf2 = (1 << closestPower).toLong - (newChunk(pool, sizePowerOf2), sizePowerOf2) - } - else { - cacheHits += 1 - (smallChunkCache(closestPower).pop(), size) - } - } - else { - val closestSize = bigChunkCache.ceilingEntry(size) - if (closestSize != null && (closestSize.getKey == size - || ((closestSize.getKey * minSpaceRequirements) <= size))) { - cacheHits += 1 - val chunkPointer = closestSize.getValue.pop() - if (closestSize.getValue.size == 0) bigChunkCache.remove(closestSize.getKey) - (chunkPointer, size) - } - else (newChunk(pool, size), size) - } - } - - def freeChunkToCache(chunkPointer: Long): Unit = { - val chunkSize = chunksEncountered(chunkPointer) - if (chunkSize <= biggestSmallChunk) { - smallChunkCache(indexInSmallChunkCache(chunkSize)) += chunkPointer - smallChunkCacheSize += 1 - } - else { - val sameSizeEntries = bigChunkCache.get(chunkSize) - if (sameSizeEntries == null) { - val newSize = new LongArrayBuilder() - newSize += chunkPointer - bigChunkCache.put(chunkSize, newSize) - } - else sameSizeEntries += chunkPointer - } - } - - def freeChunksToCache( ab: LongArrayBuilder): Unit = { - while (ab.size > 0) freeChunkToCache(ab.pop()) - } -} - - diff --git a/hail/src/main/scala/is/hail/annotations/Region.scala b/hail/src/main/scala/is/hail/annotations/Region.scala index 8c46a3a66af..2e30b37cabe 100644 --- a/hail/src/main/scala/is/hail/annotations/Region.scala +++ b/hail/src/main/scala/is/hail/annotations/Region.scala @@ -218,13 +218,6 @@ object Region { false } - val sharedChunkHeaderBytes = 16L - def getSharedChunkRefCount(ndAddr: Long): Long = Region.loadLong(ndAddr - sharedChunkHeaderBytes) - def storeSharedChunkRefCount(ndAddr: Long, newCount: Long): Unit = Region.storeLong(ndAddr - sharedChunkHeaderBytes, newCount) - def getSharedChunkByteSize(ndAddr: Long): Long = Region.loadLong(ndAddr - 8L) - def getSharedChunkByteSize(ndAddr: Code[Long]): Code[Long] = Region.loadLong(ndAddr - 8L) - def storeSharedChunkByteSize(ndAddr: Long, byteSize: Long): Unit = Region.storeLong(ndAddr - 8L, byteSize) - def stagedCreate(blockSize: Size, pool: Code[RegionPool]): Code[Region] = Code.invokeScalaObject2[Int, RegionPool, Region](Region.getClass, "apply", asm4s.const(blockSize), pool) @@ -348,13 +341,13 @@ final class Region protected[annotations](var blockSize: Region.Size, var pool: } } - def allocateSharedChunk(nBytes: Long): Long = { + def allocateNDArray(nBytes: Long): Long = { assert(nBytes >= 0L) - memory.allocateSharedChunk(nBytes) + memory.allocateNDArray(nBytes) } - def trackSharedChunk(addr: Long): Unit = { - memory.trackSharedChunk(addr) + def trackNDArray(addr: Long): Unit = { + memory.trackNDArray(addr) } def close(): Unit = { diff --git a/hail/src/main/scala/is/hail/annotations/RegionMemory.scala b/hail/src/main/scala/is/hail/annotations/RegionMemory.scala index a3a6c89ebe4..4077e16f3ae 100644 --- a/hail/src/main/scala/is/hail/annotations/RegionMemory.scala +++ b/hail/src/main/scala/is/hail/annotations/RegionMemory.scala @@ -57,10 +57,10 @@ final class RegionMemory(pool: RegionPool) extends AutoCloseable { def getCurrentBlock(): Long = currentBlock private def allocateBigChunk(size: Long): Long = { - val (chunkPointer, chunkSize) = pool.getChunk(size) - bigChunks.add(chunkPointer) - totalChunkMemory += chunkSize - chunkPointer + val o = pool.getChunk(size) + bigChunks.add(o) + totalChunkMemory += size + o } def allocate(n: Long): Long = { @@ -134,15 +134,18 @@ final class RegionMemory(pool: RegionPool) extends AutoCloseable { var i = 0 while (i < ndarrayRefs.size) { val addr = this.ndarrayRefs(i) - val curCount = Region.getSharedChunkRefCount(addr) + val curCount = PNDArray.getReferenceCount(addr) if (curCount == 1) { - Region.storeSharedChunkRefCount(addr, 0L) - pool.freeChunk(addr - Region.sharedChunkHeaderBytes) + PNDArray.storeReferenceCount(addr, 0L) + val bytesToFree = PNDArray.getByteSize(addr) + PNDArray.headerBytes + pool.incrementAllocatedBytes(-bytesToFree) + Memory.free(addr - PNDArray.headerBytes) } else { - Region.storeSharedChunkRefCount(addr, curCount - 1) + PNDArray.storeReferenceCount(addr, curCount - 1) } i += 1 } + this.ndarrayRefs.clear() } @@ -282,27 +285,27 @@ final class RegionMemory(pool: RegionPool) extends AutoCloseable { references.update(idx, null) } - def allocateSharedChunk(size: Long): Long = { - if (size < 0L) { - throw new IllegalArgumentException(s"Can't request ndarray of negative memory size, got ${size}") + def allocateNDArray(size: Long): Long = { + if (size <= 0L) { + throw new IllegalArgumentException(s"Can't request ndarray of non-positive memory size, got ${size}") } - val extra = Region.sharedChunkHeaderBytes + val extra = PNDArray.headerBytes // This adjusted address is where the ndarray content starts - val (allocatedChunk, _) = pool.getChunk(size + extra) - val newChunkPointer = allocatedChunk + extra + val allocatedAddr = pool.getChunk(size + extra) + extra + // The reference count and total size are stored just before the content. - Region.storeSharedChunkRefCount(newChunkPointer, 0L) - Region.storeSharedChunkByteSize(newChunkPointer, size) - this.trackSharedChunk(newChunkPointer) - newChunkPointer + PNDArray.storeReferenceCount(allocatedAddr, 0L) + PNDArray.storeByteSize(allocatedAddr, size) + this.trackNDArray(allocatedAddr) + allocatedAddr } - def trackSharedChunk(alloc: Long): Unit = { + def trackNDArray(alloc: Long): Unit = { this.ndarrayRefs.add(alloc) - val curRefCount = Region.getSharedChunkRefCount(alloc) - Region.storeSharedChunkRefCount(alloc, curRefCount + 1L) + val curRefCount = Region.loadLong(alloc - 16) + Region.storeLong(alloc - 16, curRefCount + 1L) } def listNDArrayRefs(): IndexedSeq[Long] = { diff --git a/hail/src/main/scala/is/hail/annotations/RegionPool.scala b/hail/src/main/scala/is/hail/annotations/RegionPool.scala index 467044e95d3..d151ee074a9 100644 --- a/hail/src/main/scala/is/hail/annotations/RegionPool.scala +++ b/hail/src/main/scala/is/hail/annotations/RegionPool.scala @@ -3,10 +3,6 @@ package is.hail.annotations import is.hail.expr.ir.LongArrayBuilder import is.hail.utils._ -import java.util.TreeMap -import java.util.function.BiConsumer -import scala.collection.mutable - object RegionPool { def apply(strictMemoryCheck: Boolean = false): RegionPool = { @@ -27,7 +23,6 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t private[this] var allocationEchoThreshold: Long = 256 * 1024 private[this] var numJavaObjects: Long = 0L private[this] var highestTotalUsage = 0L - private[this] val chunkCache = new ChunkCache(Memory.malloc, Memory.free) def addJavaObject(): Unit = { numJavaObjects += 1 @@ -40,7 +35,7 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t def getTotalAllocatedBytes: Long = totalAllocatedBytes def getHighestTotalUsage: Long = highestTotalUsage - def getUsage: (Int, Int) = chunkCache.getUsage() + private[annotations] def incrementAllocatedBytes(toAdd: Long): Unit = { totalAllocatedBytes += toAdd if (totalAllocatedBytes >= allocationEchoThreshold) { @@ -61,7 +56,6 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t if (pool.size > 0) { pool.pop() } else { - chunkCache.freeChunksFromCacheToFit(this, size.toLong) blocks(size) += 1 val blockByteSize = Region.SIZES(size) incrementAllocatedBytes(blockByteSize) @@ -69,15 +63,17 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t } } - protected[annotations] def getChunk(size: Long): (Long, Long) = { - chunkCache.getChunk(this, size) + protected[annotations] def getChunk(size: Long): Long = { + incrementAllocatedBytes(size) + Memory.malloc(size) } protected[annotations] def freeChunks(ab: LongArrayBuilder, totalSize: Long): Unit = { - chunkCache.freeChunksToCache(ab) - } - protected[annotations] def freeChunk(chunkPointer: Long): Unit = { - chunkCache.freeChunkToCache(chunkPointer) + while (ab.size > 0) { + val addr = ab.pop() + Memory.free(addr) + } + totalAllocatedBytes -= totalSize } protected[annotations] def getMemory(size: Int): RegionMemory = { @@ -167,13 +163,12 @@ final class RegionPool private(strictMemoryCheck: Boolean, threadName: String, t val blockSize = Region.SIZES(i) val blocks = freeBlocks(i) while (blocks.size > 0) { - val popped = blocks.pop() - Memory.free(popped) + Memory.free(blocks.pop()) totalAllocatedBytes -= blockSize } i += 1 } - chunkCache.freeAll(pool = this) + if (totalAllocatedBytes != 0) { val msg = s"RegionPool: total allocated bytes not 0 after closing! total allocated: " + s"$totalAllocatedBytes (${ readableBytes(totalAllocatedBytes) })" diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index e0e2b3a7034..e322912a823 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -277,28 +277,6 @@ object Code { cls, method, Array[Class[_]]( a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass, a6ct.runtimeClass, a7ct.runtimeClass, a8ct.runtimeClass), Array(a1, a2, a3, a4, a5, a6, a7, a8)) - def invokeScalaObject9[A1, A2, A3, A4, A5, A6, A7, A8, A9, S]( - cls: Class[_], method: String, a1: Code[A1], a2: Code[A2], a3: Code[A3], a4: Code[A4], a5: Code[A5], a6: Code[A6], a7: Code[A7], a8: Code[A8], a9: Code[A9])( - implicit a1ct: ClassTag[A1], a2ct: ClassTag[A2], a3ct: ClassTag[A3], a4ct: ClassTag[A4], a5ct: ClassTag[A5], a6ct: ClassTag[A6], a7ct: ClassTag[A7], a8ct: ClassTag[A8], a9ct: ClassTag[A9], sct: ClassTag[S] - ): Code[S] = - invokeScalaObject[S]( - cls, method, Array[Class[_]]( - a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass, a6ct.runtimeClass, a7ct.runtimeClass, a8ct.runtimeClass, a9ct.runtimeClass), Array(a1, a2, a3, a4, a5, a6, a7, a8, a9)) - - def invokeScalaObject11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, S]( - cls: Class[_], method: String, a1: Code[A1], a2: Code[A2], a3: Code[A3], a4: Code[A4], a5: Code[A5], a6: Code[A6], a7: Code[A7], a8: Code[A8], - a9: Code[A9], a10: Code[A10], a11: Code[A11])( - implicit a1ct: ClassTag[A1], a2ct: ClassTag[A2], a3ct: ClassTag[A3], a4ct: ClassTag[A4], a5ct: ClassTag[A5], a6ct: ClassTag[A6], a7ct: ClassTag[A7], - a8ct: ClassTag[A8], a9ct: ClassTag[A9], a10ct: ClassTag[A10], a11ct: ClassTag[A11], sct: ClassTag[S] - ): Code[S] = - invokeScalaObject[S]( - cls, method, - Array[Class[_]]( - a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass, a6ct.runtimeClass, a7ct.runtimeClass, a8ct.runtimeClass, - a9ct.runtimeClass, a10ct.runtimeClass, a11ct.runtimeClass), - Array(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11) - ) - def invokeScalaObject13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, S]( cls: Class[_], method: String, a1: Code[A1], a2: Code[A2], a3: Code[A3], a4: Code[A4], a5: Code[A5], a6: Code[A6], a7: Code[A7], a8: Code[A8], a9: Code[A9], a10: Code[A10], a11: Code[A11], a12: Code[A12], a13: Code[A13])( @@ -384,7 +362,7 @@ object Code { Code._throw[is.hail.utils.HailException, U](cerr, lineNumber) } - def _fatalWithID[U](msg: Code[String], errorId: Code[Int])(implicit uti: TypeInfo[U]): Code[U] = + def _fatalWithID[U](msg: Code[String], errorId: Int)(implicit uti: TypeInfo[U]): Code[U] = Code._throw[is.hail.utils.HailException, U](Code.newInstance[is.hail.utils.HailException, String, Int]( msg, errorId)) diff --git a/hail/src/main/scala/is/hail/asm4s/CodeBuilder.scala b/hail/src/main/scala/is/hail/asm4s/CodeBuilder.scala index 762226e7405..fa3ea003fd8 100644 --- a/hail/src/main/scala/is/hail/asm4s/CodeBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/CodeBuilder.scala @@ -151,10 +151,6 @@ trait CodeBuilderLike { append(Code._fatal[Unit](msgs.reduce(_.concat(_)))) } - def _fatalWithError(errorId: Code[Int], msgs: Code[String]*): Unit = { - append(Code._fatalWithID[Unit](msgs.reduce(_.concat(_)), errorId)) - } - def _throw[T <: java.lang.Throwable](cerr: Code[T]): Unit = { append(Code._throw[T, Unit](cerr)) } diff --git a/hail/src/main/scala/is/hail/backend/HailTaskContext.scala b/hail/src/main/scala/is/hail/backend/HailTaskContext.scala index 6c9894415b1..02b71d1f94c 100644 --- a/hail/src/main/scala/is/hail/backend/HailTaskContext.scala +++ b/hail/src/main/scala/is/hail/backend/HailTaskContext.scala @@ -22,8 +22,7 @@ abstract class HailTaskContext { def finish(): Unit = { log.info(s"TaskReport: stage=${ stageId() }, partition=${ partitionId() }, attempt=${ attemptNumber() }, " + - s"peakBytes=${ thePool.getHighestTotalUsage }, peakBytesReadable=${ formatSpace(thePool.getHighestTotalUsage) }, "+ - s"chunks requested=${thePool.getUsage._1}, cache hits=${thePool.getUsage._2}") + s"peakBytes=${ thePool.getHighestTotalUsage }, peakBytesReadable=${ formatSpace(thePool.getHighestTotalUsage) }") thePool.close() } } diff --git a/hail/src/main/scala/is/hail/expr/ir/Binds.scala b/hail/src/main/scala/is/hail/expr/ir/Binds.scala index 23417c2e638..5c806813ef9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Binds.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Binds.scala @@ -16,7 +16,7 @@ object Bindings { args.map { case (name, ir) => name -> ir.typ } :+ name -> TTuple(TTuple(args.map(_._2.typ): _*), body.typ) else empty case StreamMap(a, name, _) => if (i == 1) Array(name -> coerce[TStream](a.typ).elementType) else empty - case StreamZip(as, names, _, _, _) => if (i == as.length) names.zip(as.map(a => coerce[TStream](a.typ).elementType)) else empty + case StreamZip(as, names, _, _) => if (i == as.length) names.zip(as.map(a => coerce[TStream](a.typ).elementType)) else empty case StreamZipJoin(as, key, curKey, curVals, _) => val eltType = coerce[TStruct](coerce[TStream](as.head.typ).elementType) if (i == as.length) @@ -42,7 +42,7 @@ object Bindings { case ArraySort(a, left, right, _) => if (i == 1) Array(left -> coerce[TStream](a.typ).elementType, right -> coerce[TStream](a.typ).elementType) else empty case AggArrayPerElement(a, _, indexName, _, _, _) => if (i == 1) FastIndexedSeq(indexName -> TInt32) else empty case NDArrayMap(nd, name, _) => if (i == 1) Array(name -> coerce[TNDArray](nd.typ).elementType) else empty - case NDArrayMap2(l, r, lName, rName, _, _) => if (i == 2) Array(lName -> coerce[TNDArray](l.typ).elementType, rName -> coerce[TNDArray](r.typ).elementType) else empty + case NDArrayMap2(l, r, lName, rName, _) => if (i == 2) Array(lName -> coerce[TNDArray](l.typ).elementType, rName -> coerce[TNDArray](r.typ).elementType) else empty case CollectDistributedArray(contexts, globals, cname, gname, _, _) => if (i == 2) Array(cname -> coerce[TStream](contexts.typ).elementType, gname -> globals.typ) else empty case TableAggregate(child, _) => if (i == 1) child.typ.globalEnv.m else empty case MatrixAggregate(child, _) => if (i == 1) child.typ.globalEnv.m else empty diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index 6d6f9ed8ca1..6afeafdc96b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -271,7 +271,7 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen val functionArgs = f match { case ApplyUnaryPrimOp(_, arg1) => IndexedSeq(arg1) - case Apply(_, _, args, _, _) => args + case Apply(_, _, args, _) => args case ApplyBinaryPrimOp(_, l, r) => IndexedSeq(l, r) } @@ -282,13 +282,13 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen val (name, breezeF): (String, DenseMatrix[Double] => DenseMatrix[Double]) = f match { case ApplyUnaryPrimOp(Negate(), _) => ("negate", BlockMatrix.negationOp) - case Apply("abs", _, _, _, _) => ("abs", numerics.abs(_)) - case Apply("log", _, _, _, _) => ("log", numerics.log(_)) - case Apply("sqrt", _, _, _,_) => ("sqrt", numerics.sqrt(_)) - case Apply("ceil", _, _, _,_) => ("ceil", numerics.ceil(_)) - case Apply("floor", _, _, _,_) => ("floor", numerics.floor(_)) + case Apply("abs", _, _, _) => ("abs", numerics.abs(_)) + case Apply("log", _, _, _) => ("log", numerics.log(_)) + case Apply("sqrt", _, _, _) => ("sqrt", numerics.sqrt(_)) + case Apply("ceil", _, _, _) => ("ceil", numerics.ceil(_)) + case Apply("floor", _, _, _) => ("floor", numerics.floor(_)) - case Apply("pow", _, Seq(Ref(`eltName`, _), r), _, _) if !Mentions(r, eltName) => + case Apply("pow", _, Seq(Ref(`eltName`, _), r), _) if !Mentions(r, eltName) => ("**", binaryOp(evalIR(ctx, r), numerics.pow(_, _))) case ApplyBinaryPrimOp(Add(), Ref(`eltName`, _), r) if !Mentions(r, eltName) => ("+", binaryOp(evalIR(ctx, r), _ + _)) diff --git a/hail/src/main/scala/is/hail/expr/ir/Children.scala b/hail/src/main/scala/is/hail/expr/ir/Children.scala index e96f67d589e..a88734dbeb8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Children.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Children.scala @@ -52,11 +52,11 @@ object Children { args.toFastIndexedSeq case MakeStream(args, _, _) => args.toFastIndexedSeq - case ArrayRef(a, i, _) => - Array(a, i) + case ArrayRef(a, i, s) => + Array(a, i, s) case ArrayLen(a) => Array(a) - case StreamRange(start, stop, step, _, _) => + case StreamRange(start, stop, step, _) => Array(start, stop, step) case ArrayZeros(length) => Array(length) @@ -64,7 +64,7 @@ object Children { Array(data, shape, rowMajor) case NDArrayShape(nd) => Array(nd) - case NDArrayReshape(nd, shape, _) => + case NDArrayReshape(nd, shape) => Array(nd, shape) case NDArrayConcat(nds, _) => Array(nds) @@ -96,7 +96,7 @@ object Children { Array(a) case StreamMap(a, name, body) => Array(a, body) - case StreamZip(as, names, body, _, _) => + case StreamZip(as, names, body, _) => as :+ body case StreamZipJoin(as, _, _, _, joinF) => as :+ joinF @@ -132,19 +132,19 @@ object Children { nd +: keep case NDArrayMap(nd, _, body) => Array(nd, body) - case NDArrayMap2(l, r, _, _, body, _) => + case NDArrayMap2(l, r, _, _, body) => Array(l, r, body) case NDArrayReindex(nd, _) => Array(nd) case NDArrayAgg(nd, _) => Array(nd) - case NDArrayMatMul(l, r, _) => + case NDArrayMatMul(l, r) => Array(l, r) - case NDArrayQR(nd, _, _) => + case NDArrayQR(nd, _) => Array(nd) - case NDArraySVD(nd, _, _, _) => + case NDArraySVD(nd, _, _) => Array(nd) - case NDArrayInv(nd, errorID) => + case NDArrayInv(nd) => Array(nd) case NDArrayWrite(nd, path) => Array(nd, path) @@ -187,13 +187,13 @@ object Children { case Die(message, typ, errorId) => Array(message) case Trap(child) => Array(child) - case ApplyIR(_, _, args, _) => + case ApplyIR(_, _, args) => args.toFastIndexedSeq - case Apply(_, _, args, _, _) => + case Apply(_, _, args, _) => args.toFastIndexedSeq case ApplySeeded(_, args, seed, _) => args.toFastIndexedSeq - case ApplySpecial(_, _, args, _, _) => + case ApplySpecial(_, _, args, _) => args.toFastIndexedSeq // from MatrixIR case MatrixWrite(child, _) => Array(child) diff --git a/hail/src/main/scala/is/hail/expr/ir/Copy.scala b/hail/src/main/scala/is/hail/expr/ir/Copy.scala index d90e0424988..75ce84ea45f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Copy.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Copy.scala @@ -63,16 +63,15 @@ object Copy { case MakeStream(args, typ, requiresMemoryManagementPerElement) => assert(args.length == newChildren.length) MakeStream(newChildren.map(_.asInstanceOf[IR]), typ, requiresMemoryManagementPerElement) - case ArrayRef(_, _, errorID) => - assert(newChildren.length == 2) - ArrayRef(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], errorID) + case ArrayRef(_, _, _) => + assert(newChildren.length == 3) + ArrayRef(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], newChildren(2).asInstanceOf[IR]) case ArrayLen(_) => assert(newChildren.length == 1) ArrayLen(newChildren(0).asInstanceOf[IR]) - case StreamRange(_, _, _, requiresMemoryManagementPerElement, errorID) => + case StreamRange(_, _, _, requiresMemoryManagementPerElement) => assert(newChildren.length == 3) - StreamRange(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], newChildren(2).asInstanceOf[IR], - requiresMemoryManagementPerElement, errorID) + StreamRange(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], newChildren(2).asInstanceOf[IR], requiresMemoryManagementPerElement) case ArrayZeros(_) => assert(newChildren.length == 1) ArrayZeros(newChildren(0).asInstanceOf[IR]) @@ -82,9 +81,9 @@ object Copy { case NDArrayShape(_) => assert(newChildren.length == 1) NDArrayShape(newChildren(0).asInstanceOf[IR]) - case NDArrayReshape(_, _, errorID) => + case NDArrayReshape(_, _) => assert(newChildren.length == 2) - NDArrayReshape(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], errorID) + NDArrayReshape(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR]) case NDArrayConcat(_, axis) => assert(newChildren.length == 1) NDArrayConcat(newChildren(0).asInstanceOf[IR], axis) @@ -98,27 +97,27 @@ object Copy { case NDArrayMap(_, name, _) => assert(newChildren.length == 2) NDArrayMap(newChildren(0).asInstanceOf[IR], name, newChildren(1).asInstanceOf[IR]) - case NDArrayMap2(_, _, lName, rName, _, errorID) => + case NDArrayMap2(_, _, lName, rName, _) => assert(newChildren.length == 3) - NDArrayMap2(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], lName, rName, newChildren(2).asInstanceOf[IR], errorID) + NDArrayMap2(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], lName, rName, newChildren(2).asInstanceOf[IR]) case NDArrayReindex(_, indexExpr) => assert(newChildren.length == 1) NDArrayReindex(newChildren(0).asInstanceOf[IR], indexExpr) case NDArrayAgg(_, axes) => assert(newChildren.length == 1) NDArrayAgg(newChildren(0).asInstanceOf[IR], axes) - case NDArrayMatMul(_, _, errorID) => + case NDArrayMatMul(_, _) => assert(newChildren.length == 2) - NDArrayMatMul(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR], errorID) - case NDArrayQR(_, mode, errorID) => + NDArrayMatMul(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR]) + case NDArrayQR(_, mode) => assert(newChildren.length == 1) - NDArrayQR(newChildren(0).asInstanceOf[IR], mode, errorID) - case NDArraySVD(_, fullMatrices, computeUV, errorID) => + NDArrayQR(newChildren(0).asInstanceOf[IR], mode) + case NDArraySVD(_, fullMatrices, computeUV) => assert(newChildren.length == 1) - NDArraySVD(newChildren(0).asInstanceOf[IR], fullMatrices, computeUV, errorID) - case NDArrayInv(_, errorID) => + NDArraySVD(newChildren(0).asInstanceOf[IR], fullMatrices, computeUV) + case NDArrayInv(_) => assert(newChildren.length == 1) - NDArrayInv(newChildren(0).asInstanceOf[IR], errorID) + NDArrayInv(newChildren(0).asInstanceOf[IR]) case NDArrayWrite(_, _) => assert(newChildren.length == 2) NDArrayWrite(newChildren(0).asInstanceOf[IR], newChildren(1).asInstanceOf[IR]) @@ -163,10 +162,9 @@ object Copy { case StreamMap(_, name, _) => assert(newChildren.length == 2) StreamMap(newChildren(0).asInstanceOf[IR], name, newChildren(1).asInstanceOf[IR]) - case StreamZip(_, names, _, behavior, errorID) => + case StreamZip(_, names, _, behavior) => assert(newChildren.length == names.length + 1) - StreamZip(newChildren.init.asInstanceOf[IndexedSeq[IR]], names, newChildren(names.length).asInstanceOf[IR], - behavior, errorID) + StreamZip(newChildren.init.asInstanceOf[IndexedSeq[IR]], names, newChildren(names.length).asInstanceOf[IR], behavior) case StreamZipJoin(as, key, curKey, curVals, _) => assert(newChildren.length == as.length + 1) StreamZipJoin(newChildren.init.asInstanceOf[IndexedSeq[IR]], key, curKey, curVals, newChildren(as.length).asInstanceOf[IR]) @@ -285,17 +283,17 @@ object Copy { case Trap(child) => assert(newChildren.length == 1) Trap(newChildren(0).asInstanceOf[IR]) - case x@ApplyIR(fn, typeArgs, args, errorID) => - val r = ApplyIR(fn, typeArgs, newChildren.map(_.asInstanceOf[IR]), errorID) + case x@ApplyIR(fn, typeArgs, args) => + val r = ApplyIR(fn, typeArgs, newChildren.map(_.asInstanceOf[IR])) r.conversion = x.conversion r.inline = x.inline r - case Apply(fn, typeArgs, args, t, errorID) => - Apply(fn, typeArgs, newChildren.map(_.asInstanceOf[IR]), t, errorID) + case Apply(fn, typeArgs, args, t) => + Apply(fn, typeArgs, newChildren.map(_.asInstanceOf[IR]), t) case ApplySeeded(fn, args, seed, t) => ApplySeeded(fn, newChildren.map(_.asInstanceOf[IR]), seed, t) - case ApplySpecial(fn, typeArgs, args, t, errorID) => - ApplySpecial(fn, typeArgs, newChildren.map(_.asInstanceOf[IR]), t, errorID) + case ApplySpecial(fn, typeArgs, args, t) => + ApplySpecial(fn, typeArgs, newChildren.map(_.asInstanceOf[IR]), t) // from MatrixIR case MatrixWrite(_, writer) => assert(newChildren.length == 1) diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 7586568b803..9b98956f382 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -323,8 +323,11 @@ case class IEmitCodeGen[+A](Lmissing: CodeLabel, Lpresent: CodeLabel, value: A, value } - def get(cb: EmitCodeBuilder, errorMsg: Code[String]=s"expected non-missing", errorID: Code[Int] = const(ErrorIDs.NO_ERROR)): A = - handle(cb, cb._fatalWithError(errorID, errorMsg)) + def get(cb: EmitCodeBuilder, errorMsg: String = s"expected non-missing"): A = + handle(cb, cb._fatal(errorMsg)) + + def get(cb: EmitCodeBuilder, errorMsg: Code[String]): A = + handle(cb, cb._fatal(errorMsg)) def consume(cb: EmitCodeBuilder, ifMissing: => Unit, ifPresent: (A) => Unit): Unit = { val Lafter = CodeLabel() @@ -842,7 +845,7 @@ class Emit[C]( emitI(v) .map(cb)(pc => pc.st.castRename(_typ).fromCodes(pc.makeCodeTuple(cb))) case NA(typ) => - IEmitCode.missing(cb, SUnreachable.fromVirtualType(typ).defaultValue) + IEmitCode(cb, const(true), typeWithReq.canonicalEmitType.st.defaultValue) case IsNA(v) => val m = emitI(v).consumeCode(cb, true, _ => false) presentPC(primitive(m)) @@ -972,24 +975,40 @@ class Emit[C]( outputPType.loadCheapSCode(cb, arrayAddress) } - case x@ArrayRef(a, i, errorID) => + case x@ArrayRef(a, i, s) => def boundsCheck(cb: EmitCodeBuilder, index: Value[Int], len: Value[Int]): Unit = { - val bcMb = mb.getOrGenEmitMethod("arrayref_bounds_check", "arrayref_bounds_check", - IndexedSeq[ParamType](IntInfo, IntInfo, IntInfo), UnitInfo)({ mb => - mb.voidWithBuilder { cb => - val index = mb.getCodeParam[Int](1) - val len = mb.getCodeParam[Int](2) - val errorID = mb.getCodeParam[Int](3) - cb.ifx(index < 0 || index >= len, { - cb._fatalWithError(errorID, const("array index out of bounds: index=") - .concat(index.toS) - .concat(", length=") - .concat(len.toS)) - }) - + s match { + case Str(constant) => + val baseMsg = constant match { + case "" => s"\n----------\nIR:\n${ Pretty.short(x) }" + case c => s"\n----------\nPython traceback:\n$c" } - }) - cb.invokeVoid(bcMb, index, len, const(errorID)) + val bcMb = mb.getOrGenEmitMethod("arrayref_bounds_check", ("arrayref_bounds_check", baseMsg), + IndexedSeq[ParamType](IntInfo, IntInfo), UnitInfo)({ mb => + mb.voidWithBuilder { cb => + val index = mb.getCodeParam[Int](1) + val len = mb.getCodeParam[Int](2) + cb.ifx(index < 0 || index >= len, { + cb._fatal(const("array index out of bounds: index=") + .concat(index.toS) + .concat(", length=") + .concat(len.toS) + .concat(baseMsg)) + }) + + } + }) + cb.invokeVoid(bcMb, index, len) + case s => + cb.ifx(index < 0 || index >= len, { + val msg = cb.newLocal[String]("arrayref_msg", const("array index out of bounds: index=") + .concat(index.toS) + .concat(", length=") + .concat(len.toS)) + emitI(s).consume(cb, (), sc => cb.assign(msg, msg.concat(sc.asString.loadString()))) + cb._fatal(msg) + }) + } } emitI(a).flatMap(cb) { (ac) => @@ -1003,7 +1022,7 @@ class Emit[C]( case ArrayLen(a) => emitI(a).map(cb) { (ac) => - primitive(ac.asIndexable.codeLoadLength()) + primitive(ac.asIndexable.loadLength()) } case GetField(o, name) => @@ -1246,7 +1265,7 @@ class Emit[C]( } } - case x@MakeNDArray(dataIR, shapeIR, rowMajorIR, errorID) => + case x@MakeNDArray(dataIR, shapeIR, rowMajorIR, errorId) => emitI(rowMajorIR).flatMap(cb) { isRowMajorCode => emitI(shapeIR).flatMap(cb) { case shapeTupleCode: SBaseStructCode => @@ -1261,13 +1280,13 @@ class Emit[C]( cb.ifx(memoData.hasMissingValues(cb), { cb._throw(Code.newInstance[HailException, String, Int]( - "Cannot construct an ndarray with missing values.", errorID + "Cannot construct an ndarray with missing values.", errorId )) }) (0 until nDims).foreach { index => cb.ifx(shapeTupleValue.isFieldMissing(index), - cb.append(Code._fatalWithID[Unit](s"shape missing at index $index", errorID))) + cb.append(Code._fatal[Unit](s"shape missing at index $index"))) } val stridesSettables = (0 until nDims).map(i => cb.newLocal[Long](s"make_ndarray_stride_$i")) @@ -1298,14 +1317,14 @@ class Emit[C]( emitI(ndIR).map(cb) { case pc: SNDArrayCode => pc.shape(cb) } case x@NDArrayReindex(child, indexMap) => val childEC = emitI(child) - childEC.map(cb) { case sndCode: SNDArrayPointerCode => - val childPType = sndCode.st.pType - val sndVal = sndCode.memoize(cb, "ndarray_reindex_child") - val childShape = sndVal.shapes(cb) - val childStrides = sndVal.strides(cb) + childEC.map(cb) { case pndCode: SNDArrayPointerCode => + val childPType = pndCode.st.pType + val pndVal = pndCode.memoize(cb, "ndarray_reindex_child") + val childShape = pndVal.shapes(cb) + val childStrides = pndVal.strides(cb) - val pndAddr = SingleCodeSCode.fromSCode(cb, sndVal, region) - val dataPtr = sndVal.firstDataAddress(cb) + val pndAddr = SingleCodeSCode.fromSCode(cb, pndVal, region) + val dataArray = childPType.dataType.loadCheapSCode(cb, childPType.dataPArrayPointer(pndAddr.code.asInstanceOf[Code[Long]])) val newShape = indexMap.map { childIndex => if (childIndex < childPType.nDims) childShape(childIndex) else const(1L) @@ -1315,10 +1334,10 @@ class Emit[C]( } val newPType = childPType.copy(nDims = indexMap.length) - newPType.constructByCopyingDataPointer( + newPType.constructByCopyingArray( newShape, newStrides, - dataPtr, + dataArray, cb, region) } @@ -1340,30 +1359,31 @@ class Emit[C]( } } - case NDArrayMatMul(lChild, rChild, errorID) => + case NDArrayMatMul(lChild, rChild) => emitNDArrayStandardStriding(lChild).flatMap(cb) { case (leftPVal: SNDArrayValue, leftIsColumnMajor: Value[Boolean]) => emitNDArrayStandardStriding(rChild).map(cb) { case (rightPVal: SNDArrayValue, rightIsColumnMajor: Value[Boolean]) => - val lSType = leftPVal.st - val rSType = rightPVal.st + val lPType = leftPVal.st.asInstanceOf[SNDArrayPointer].pType + val rPType = rightPVal.st.asInstanceOf[SNDArrayPointer].pType val lShape = leftPVal.shapes(cb) val rShape = rightPVal.shapes(cb) - val unifiedShape = NDArrayEmitter.matmulShape(cb, lShape, rShape, errorID) + val unifiedShape = NDArrayEmitter.matmulShape(cb, lShape, rShape) - val leftBroadcastMask = if (lSType.nDims > 2) NDArrayEmitter.broadcastMask(lShape) else IndexedSeq[Value[Long]]() - val rightBroadcastMask = if (rSType.nDims > 2) NDArrayEmitter.broadcastMask(rShape) else IndexedSeq[Value[Long]]() + val leftBroadcastMask = if (lPType.nDims > 2) NDArrayEmitter.broadcastMask(lShape) else IndexedSeq[Value[Long]]() + val rightBroadcastMask = if (rPType.nDims > 2) NDArrayEmitter.broadcastMask(rShape) else IndexedSeq[Value[Long]]() - val outputPType = PCanonicalNDArray(lSType.elementType.canonicalPType().setRequired(true), - TNDArray.matMulNDims(lSType.nDims, rSType.nDims)) + val outputPType = PCanonicalNDArray(lPType.elementType, TNDArray.matMulNDims(lPType.nDims, rPType.nDims)) - if ((lSType.elementType.virtualType == TFloat64 || lSType.elementType.virtualType == TFloat32) && lSType.nDims == 2 && rSType.nDims == 2) { - val leftDataAddress = leftPVal.firstDataAddress(cb) - val rightDataAddress = rightPVal.firstDataAddress(cb) + if ((lPType.elementType.isInstanceOf[PFloat64] || lPType.elementType.isInstanceOf[PFloat32]) && lPType.nDims == 2 && rPType.nDims == 2) { + val leftPValAddr = SingleCodeSCode.fromSCode(cb, leftPVal, region) + val rightPValAddr = SingleCodeSCode.fromSCode(cb, rightPVal, region) + val leftDataAddress = lPType.dataFirstElementPointer(leftPValAddr.code.asInstanceOf[Code[Long]]) + val rightDataAddress = rPType.dataFirstElementPointer(rightPValAddr.code.asInstanceOf[Code[Long]]) - val M = lShape(lSType.nDims - 2) - val N = rShape(rSType.nDims - 1) - val K = lShape(lSType.nDims - 1) + val M = lShape(lPType.nDims - 2) + val N = rShape(rPType.nDims - 1) + val K = lShape(lPType.nDims - 1) val LDA = leftIsColumnMajor.mux(M, K) val LDB = rightIsColumnMajor.mux(K, N) @@ -1379,8 +1399,8 @@ class Emit[C]( region) cb.ifx((M cne 0L) && (N cne 0L) && (K cne 0L), { - cb.append(lSType.elementType.virtualType match { - case TFloat32 => + cb.append(lPType.elementType match { + case PFloat32(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Float, Long, Int, Long, Int, Float, Long, Int, Unit](BLAS.getClass, method = "sgemm", TRANSA, TRANSB, @@ -1396,7 +1416,7 @@ class Emit[C]( answerFirstElementAddr, LDC.toI ) - case TFloat64 => + case PFloat64(_) => Code.invokeScalaObject13[String, String, Int, Int, Int, Double, Long, Int, Long, Int, Double, Long, Int, Unit](BLAS.getClass, method = "dgemm", TRANSA, TRANSB, @@ -1420,46 +1440,8 @@ class Emit[C]( ) answerFinisher(cb) - } else if (lSType.elementType.virtualType == TFloat64 && lSType.nDims == 2 && rSType.nDims == 1) { - val leftDataAddress = leftPVal.firstDataAddress(cb) - val rightDataAddress = rightPVal.firstDataAddress(cb) - - val numRows = lShape(lSType.nDims - 2) - val numCols = lShape(lSType.nDims - 1) - val M = cb.newLocal[Long]("dgemv_m", leftIsColumnMajor.mux(numRows, numCols)) - val N = cb.newLocal[Long]("dgemv_n", leftIsColumnMajor.mux(numCols, numRows)) - val outputSize = cb.newLocal[Long]("output_size", numRows) - - val alpha = 1.0 - val beta = 0.0 - - val LDA = M - val TRANS: Code[String] = leftIsColumnMajor.mux("N", "T") - - val (answerFirstElementAddr, answerFinisher) = outputPType.constructDataFunction( - IndexedSeq(outputSize), - outputPType.makeColumnMajorStrides(IndexedSeq(outputSize), region, cb), - cb, - region) - - cb.append(Code.invokeScalaObject11[String, Int, Int, Double, Long, Int, Long, Int, Double, Long, Int, Unit](BLAS.getClass, method="dgemv", - TRANS, - M.toI, - N.toI, - alpha, - leftDataAddress, - LDA.toI, - rightDataAddress, - 1, - beta, - answerFirstElementAddr, - 1 - )) - - - answerFinisher(cb) - } else { - val numericElementType = coerce[PNumeric](lSType.elementType.canonicalPType()) + } else { + val numericElementType = coerce[PNumeric](lPType.elementType) val eVti = typeToTypeInfo(numericElementType) val emitter = new NDArrayEmitter(unifiedShape, leftPVal.st.elementType) { @@ -1467,7 +1449,7 @@ class Emit[C]( val element = coerce[Any](cb.newField("matmul_element")(eVti)) val k = cb.newField[Long]("ndarray_matmul_k") - val (lIndices: IndexedSeq[Value[Long]], rIndices: IndexedSeq[Value[Long]]) = (lSType.nDims, rSType.nDims, idxVars) match { + val (lIndices: IndexedSeq[Value[Long]], rIndices: IndexedSeq[Value[Long]]) = (lPType.nDims, rPType.nDims, idxVars) match { case (1, 1, Seq()) => (IndexedSeq(k), IndexedSeq(k)) case (1, _, stack :+ m) => val rStackVars = NDArrayEmitter.zeroBroadcastedDims(stack, rightBroadcastMask) @@ -1496,7 +1478,7 @@ class Emit[C]( } } - cb.assign(kLen, lShape(lSType.nDims - 1)) + cb.assign(kLen, lShape(lPType.nDims - 1)) cb.assign(element, numericElementType.zero) cb.forLoop(cb.assign(k, 0L), k < kLen, cb.assign(k, k + 1L), { val lElem = leftPVal.loadElement(lIndices, cb) @@ -1511,7 +1493,7 @@ class Emit[C]( } } } - case NDArrayInv(nd, errorID) => + case NDArrayInv(nd) => // Based on https://github.com/numpy/numpy/blob/v1.19.0/numpy/linalg/linalg.py#L477-L547 emitNDArrayColumnMajorStrides(nd).map(cb) { case pNDCode: SNDArrayCode => val pndVal = pNDCode.memoize(cb, "ndarray_inverse_nd") @@ -1537,9 +1519,9 @@ class Emit[C]( val INFOdgetrf = mb.newLocal[Int]() val INFOdgetri = mb.newLocal[Int]() val INFOerror = (fun: String, info: LocalRef[Int]) => (info cne 0) - .orEmpty(Code._fatalWithID[Unit](const(s"LAPACK error ${ fun }. Error code = ").concat(info.toS), const(errorID))) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error ${ fun }. Error code = ").concat(info.toS))) - cb.append((N cne M).orEmpty(Code._fatalWithID[Unit](const("Can only invert square matrix"), const(errorID)))) + cb.append((N cne M).orEmpty(Code._fatal[Unit](const("Can only invert square matrix")))) cb.assign(An, (M * N).toI) @@ -1573,13 +1555,13 @@ class Emit[C]( finish(cb) } - case x@NDArraySVD(nd, full_matrices, computeUV, errorID) => + case x@NDArraySVD(nd, full_matrices, computeUV) => emitNDArrayColumnMajorStrides(nd).flatMap(cb) { case ndPCode: SNDArrayCode => val ndPVal = ndPCode.memoize(cb, "nd_svd_value") val infoDGESDDResult = cb.newLocal[Int]("infoDGESDD") val infoDGESDDErrorTest = (extraErrorMsg: String) => (infoDGESDDResult cne 0) - .orEmpty(Code._fatalWithID[Unit](const(s"LAPACK error DGESDD. $extraErrorMsg Error code = ").concat(infoDGESDDResult.toS), errorID)) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error DGESDD. $extraErrorMsg Error code = ").concat(infoDGESDDResult.toS))) val LWORKAddress = mb.newLocal[Long]("svd_lwork_address") val shapes = ndPVal.shapes(cb) @@ -1690,7 +1672,7 @@ class Emit[C]( IEmitCode(cb, false, resultPCode) } - case x@NDArrayQR(nd, mode, errorID) => + case x@NDArrayQR(nd, mode) => // See here to understand different modes: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.qr.html emitNDArrayColumnMajorStrides(nd).map(cb) { case pndCode: SNDArrayCode => @@ -1732,7 +1714,7 @@ class Emit[C]( val infoDGEQRFResult = cb.newLocal[Int]("ndaray_qr_infoDGEQRFResult") val infoDGEQRFErrorTest = (extraErrorMsg: String) => (infoDGEQRFResult cne 0) - .orEmpty(Code._fatalWithID[Unit](const(s"LAPACK error DGEQRF. $extraErrorMsg Error code = ").concat(infoDGEQRFResult.toS), errorID)) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error DGEQRF. $extraErrorMsg Error code = ").concat(infoDGEQRFResult.toS))) // Computing H and Tau cb.assign(aNumElements, ndPT.numElements(shapeArray)) @@ -1836,7 +1818,7 @@ class Emit[C]( val infoDORGQRResult = cb.newLocal[Int]("ndarray_qr_DORGQR_info") val infoDORQRErrorTest = (extraErrorMsg: String) => (infoDORGQRResult cne 0) - .orEmpty(Code._fatalWithID[Unit](const(s"LAPACK error DORGQR. $extraErrorMsg Error code = ").concat(infoDORGQRResult.toS), errorID)) + .orEmpty(Code._fatal[Unit](const(s"LAPACK error DORGQR. $extraErrorMsg Error code = ").concat(infoDORGQRResult.toS))) val qCondition = cb.newLocal[Boolean]("ndarray_qr_qCondition") val numColsToUse = cb.newLocal[Long]("ndarray_qr_numColsToUse") @@ -2093,7 +2075,7 @@ class Emit[C]( { sc => cb.assign(msg, sc.asString.loadString()) }) cb._throw[HailException](Code.newInstance[HailException, String, Int](msg, errorId)) - IEmitCode.present(cb, SUnreachable.fromVirtualType(typ).defaultValue) + IEmitCode.present(cb, typeWithReq.canonicalEmitType.st.defaultValue) case CastToArray(a) => emitI(a).map(cb) { ind => ind.asIndexable.castToArray(cb) } @@ -2490,7 +2472,7 @@ class Emit[C]( val ev = env.inputValues(i).apply(region) ev - case ir@Apply(fn, typeArgs, args, rt, errorID) => + case ir@Apply(fn, typeArgs, args, rt) => val impl = ir.implementation val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) @@ -2512,16 +2494,16 @@ class Emit[C]( EmitCode.fromI(mb) { cb => val emitArgs = args.map(a => EmitCode.fromI(cb.emb)(emitI(a, _))).toFastIndexedSeq IEmitCode.multiMapEmitCodes(cb, emitArgs) { codeArgs => - cb.invokeSCode(meth, FastIndexedSeq[Param](CodeParam(region), CodeParam(errorID)) ++ codeArgs.map(pc => pc: Param): _*) + cb.invokeSCode(meth, FastIndexedSeq[Param](CodeParam(region)) ++ codeArgs.map(pc => pc: Param): _*) } } - case x@ApplySpecial(_, typeArgs, args, rt, errorID) => + case x@ApplySpecial(_, typeArgs, args, rt) => val codeArgs = args.map(a => emit(a)) val impl = x.implementation val unified = impl.unify(typeArgs, args.map(_.typ), rt) assert(unified) val retType = impl.computeReturnEmitType(x.typ, codeArgs.map(_.emitType)) - impl.apply(EmitRegion(mb, region), retType.st, typeArgs, errorID, codeArgs: _*) + impl.apply(EmitRegion(mb, region), retType.st, typeArgs, codeArgs: _*) case x@WritePartition(stream, pctx, writer) => val ctxCode = emit(pctx) @@ -2610,26 +2592,26 @@ object NDArrayEmitter { } } - def unifyShapes2(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]], errorID: Int): IndexedSeq[Value[Long]] = { + def unifyShapes2(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { val shape = leftShape.zip(rightShape).zipWithIndex.map { case ((left, right), i) => val notSameAndNotBroadcastable = !((left ceq right) || (left ceq 1L) || (right ceq 1L)) cb.newField[Long]( s"unify_shapes2_shape$i", notSameAndNotBroadcastable.mux( - Code._fatalWithID[Long](rightShape.foldLeft[Code[String]]( + Code._fatal[Long](rightShape.foldLeft[Code[String]]( leftShape.foldLeft[Code[String]]( - const("Incompatible NDArray shapes: [ ") + const("Incompatible NDArrayshapes: [ ") )((accum, v) => accum.concat(v.toS).concat(" ")) .concat("] vs [ ") )((accum, v) => accum.concat(v.toS).concat(" ")) - .concat("]"), errorID), + .concat("]")), (left > right).mux(left, right))) } shape } - def matmulShape(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]], errorID: Int): IndexedSeq[Value[Long]] = { + def matmulShape(cb: EmitCodeBuilder, leftShape: IndexedSeq[Value[Long]], rightShape: IndexedSeq[Value[Long]]): IndexedSeq[Value[Long]] = { val mb = cb.emb assert(leftShape.nonEmpty) @@ -2657,7 +2639,7 @@ object NDArrayEmitter { rK = rightShape(rightShape.length - 2) val unifiedShape = unifyShapes2(cb, leftShape.slice(0, leftShape.length - 2), - rightShape.slice(0, rightShape.length - 2), errorID) + rightShape.slice(0, rightShape.length - 2)) shape = unifiedShape :+ leftShape(leftShape.length - 2) :+ rightShape.last } } @@ -2667,8 +2649,10 @@ object NDArrayEmitter { cb.ifx(lK.cne(rK), { - cb._fatalWithError(errorID,"Matrix dimensions incompatible: ", leftShapeString, - " can't be multiplied by matrix with dimensions ", rightShapeString) + cb._fatal("Matrix dimensions incompatible: ", + leftShapeString, + " can't be multiplied by matrix with dimensions ", + rightShapeString) }) shape @@ -2693,7 +2677,7 @@ abstract class NDArrayEmitter(val outputShape: IndexedSeq[Value[Long]], val elem SNDArray.forEachIndexColMajor(cb, shapeArray, "ndarrayemitter_emitloops") { case (cb, idxVars) => val element = IEmitCode.present(cb, outputElement(cb, idxVars)).consume(cb, { - cb._fatal("NDArray elements cannot be missing") + cb._fatal("NDArray elements cannot be missing") }, { elementPc => targetType.elementType.storeAtAddress(cb, firstElementAddress + (idx.toL * targetType.elementType.byteSize), region, elementPc, true) }) diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index 731f80ae6e5..c008e2505cb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir import is.hail.annotations.{Region, RegionPool, RegionValueBuilder} import is.hail.asm4s._ -import is.hail.backend.{BackendUtils, BroadcastValue} +import is.hail.backend.BackendUtils import is.hail.expr.ir.functions.IRRandomness import is.hail.expr.ir.orderings.CodeOrdering import is.hail.io.fs.FS @@ -14,9 +14,9 @@ import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.variant.ReferenceGenome import org.apache.spark.TaskContext + import java.io._ import java.lang.reflect.InvocationTargetException - import scala.collection.mutable import scala.language.existentials @@ -27,7 +27,7 @@ class EmitModuleBuilder(val ctx: ExecuteContext, val modb: ModuleBuilder) { def genEmitClass[C](baseName: String, sourceFile: Option[String] = None)(implicit cti: TypeInfo[C]): EmitClassBuilder[C] = newEmitClass[C](genName("C", baseName), sourceFile) - private[this] val _staticFS: StaticField[FS] = { + private[this] var _staticFS: StaticField[FS] = { val cls = genEmitClass[Unit]("FSContainer") cls.newStaticField[FS]("filesystem", Code._null[FS]) } @@ -36,20 +36,14 @@ class EmitModuleBuilder(val ctx: ExecuteContext, val modb: ModuleBuilder) { def getFS: Value[FS] = new StaticFieldRef(_staticFS) - private val rgContainers: mutable.Map[ReferenceGenome, StaticField[ReferenceGenome]] = mutable.Map.empty - - def hasReferences: Boolean = rgContainers.nonEmpty + private[this] val rgMap: mutable.Map[ReferenceGenome, Value[ReferenceGenome]] = + mutable.Map[ReferenceGenome, Value[ReferenceGenome]]() - def getReferenceGenome(rg: ReferenceGenome): Value[ReferenceGenome] = { - val rgField = rgContainers.getOrElseUpdate(rg, { - val cls = genEmitClass[Unit](s"RGContainer_${rg.name}") - cls.newStaticField("reference_genome", Code._null[ReferenceGenome]) - }) - new StaticFieldRef(rgField) - } - - def referenceGenomes(): IndexedSeq[ReferenceGenome] = rgContainers.keys.toFastIndexedSeq - def referenceGenomeFields(): IndexedSeq[StaticField[ReferenceGenome]] = rgContainers.values.toFastIndexedSeq + def getReferenceGenome(rg: ReferenceGenome): Value[ReferenceGenome] = rgMap.getOrElseUpdate(rg, { + val cls = genEmitClass[Unit](s"RGContainer_${rg.name}") + val fld = cls.newStaticField("reference_genome", rg.codeSetup(ctx.localTmpdir, cls)) + new StaticFieldRef(fld) + }) } trait WrappedEmitModuleBuilder { @@ -578,20 +572,6 @@ class EmitClassBuilder[C]( } } - def makeAddReferenceGenomes(): Unit = { - cb.addInterface(typeInfo[FunctionWithReferences].iname) - val mb = newEmitMethod("addReferenceGenomes", FastIndexedSeq[ParamType](typeInfo[Array[ReferenceGenome]]), typeInfo[Unit]) - mb.voidWithBuilder { cb => - val rgFields = emodb.referenceGenomeFields() - val rgs = mb.getCodeParam[Array[ReferenceGenome]](1) - cb.ifx(rgs.length().cne(const(rgFields.length)), cb._fatal("Invalid number of references, expected ", rgFields.length.toString, " got ", rgs.length().toS)) - for ((fld, i) <- rgFields.zipWithIndex) { - cb += fld.put(rgs(i)) - cb += fld.get().invoke[String, FS, Unit]("heal", ctx.localTmpdir, getFS) - } - } - } - def makeRNGs() { cb.addInterface(typeInfo[FunctionWithSeededRandomness].iname) @@ -626,9 +606,6 @@ class EmitClassBuilder[C]( makeAddFS() val hasLiterals: Boolean = literalsMap.nonEmpty || encodedLiteralsMap.nonEmpty - val hasReferences: Boolean = emodb.hasReferences - if (hasReferences) - makeAddReferenceGenomes() val literalsBc = if (hasLiterals) ctx.backend.broadcast(encodeLiterals()) @@ -636,12 +613,6 @@ class EmitClassBuilder[C]( // if there are no literals, there might not be a HailContext null - val references: Array[ReferenceGenome] = if (hasReferences) - emodb.referenceGenomes().toArray - else - null - - val nSerializedAggs = _nSerialized val useBackend = _backendField != null @@ -681,8 +652,6 @@ class EmitClassBuilder[C]( f.asInstanceOf[FunctionWithObjects].setObjects(objects) if (hasLiterals) f.asInstanceOf[FunctionWithLiterals].addLiterals(literalsBc.value) - if (hasReferences) - f.asInstanceOf[FunctionWithReferences].addReferenceGenomes(references) if (nSerializedAggs != 0) f.asInstanceOf[FunctionWithAggRegion].setNumSerialized(nSerializedAggs) f.asInstanceOf[FunctionWithSeededRandomness].setPartitionIndex(idx) @@ -805,10 +774,6 @@ trait FunctionWithFS { def addFS(fs: FS): Unit } -trait FunctionWithReferences { - def addReferenceGenomes(rgs: Array[ReferenceGenome]): Unit -} - trait FunctionWithPartitionRegion { def addPartitionRegion(r: Region): Unit def setPool(pool: RegionPool): Unit diff --git a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala index 01b45fd3469..a2a6a601a5f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala @@ -131,7 +131,7 @@ object ExtractIntervalFilters { def extractAndRewrite(cond1: IR, es: ExtractionState): Option[(IR, Array[Interval])] = { cond1 match { - case ApplySpecial("lor", _, Seq(l, r), t, _) => + case ApplySpecial("lor", _, Seq(l, r), t) => extractAndRewrite(l, es) .liftedZip(extractAndRewrite(r, es)) .flatMap { @@ -139,7 +139,7 @@ object ExtractIntervalFilters { Some((True(), Interval.union(i1 ++ i2, es.iOrd))) case _ => None } - case ApplySpecial("land", _, Seq(l, r), t, _) => + case ApplySpecial("land", _, Seq(l, r), t) => val ll = extractAndRewrite(l, es) val rr = extractAndRewrite(r, es) (ll, rr) match { @@ -157,8 +157,7 @@ object ExtractIntervalFilters { } case StreamFold(ToStream(lit: Literal, _), False(), acc, value, body) => body match { - case ApplySpecial("lor", _, Seq(Ref(`acc`, _), - ApplySpecial("contains", _, Seq(Ref(`value`, _), k), _, _)), _, _) if es.isFirstKey(k) => + case ApplySpecial("lor", _, Seq(Ref(`acc`, _), ApplySpecial("contains", _, Seq(Ref(`value`, _), k), _)), _) if es.isFirstKey(k) => assert(lit.typ.asInstanceOf[TContainer].elementType.isInstanceOf[TInterval]) Some((True(), Interval.union(constValue(lit).asInstanceOf[Iterable[_]] @@ -174,7 +173,7 @@ object ExtractIntervalFilters { } case Coalesce(Seq(x, False())) => extractAndRewrite(x, es) .map { case (ir, intervals) => (Coalesce(FastSeq(ir, False())), intervals) } - case ApplyIR("contains", _, Seq(lit: Literal, Apply("contig", _, Seq(k), _, _)),_) if es.isFirstKey(k) => + case ApplyIR("contains", _, Seq(lit: Literal, Apply("contig", _, Seq(k), _))) if es.isFirstKey(k) => val rg = k.typ.asInstanceOf[TLocus].rg.asInstanceOf[ReferenceGenome] val intervals = (lit.value: @unchecked) match { @@ -183,7 +182,7 @@ object ExtractIntervalFilters { case x: Map[_, _] => x.keys.map(elt => getIntervalFromContig(elt.asInstanceOf[String], rg)).toArray } Some((True(), intervals)) - case ApplyIR("contains", _, Seq(lit: Literal, k), _) if literalSizeOkay(lit) => + case ApplyIR("contains", _, Seq(lit: Literal, k)) if literalSizeOkay(lit) => val wrap = if (es.isFirstKey(k)) Some(true) else if (es.isKeyStructPrefix(k)) Some(false) else None wrap.map { wrapStruct => val intervals = (lit.value: @unchecked) match { @@ -199,7 +198,7 @@ object ExtractIntervalFilters { } (True(), intervals) } - case ApplySpecial("contains", _, Seq(lit: Literal, k), _, _) => + case ApplySpecial("contains", _, Seq(lit: Literal, k), _) => k match { case x if es.isFirstKey(x) => val intervals = (lit.value: @unchecked) match { @@ -231,13 +230,13 @@ object ExtractIntervalFilters { assert(op.isInstanceOf[EQ]) val c = constValue(const) Some((True(), Array(Interval(endpoint(c, -1), endpoint(c, 1))))) - case Apply("contig", _, Seq(x), _, _) if es.isFirstKey(x) => + case Apply("contig", _, Seq(x), _) if es.isFirstKey(x) => // locus contig comparison val intervals = (constValue(const): @unchecked) match { case s: String => Array(getIntervalFromContig(s, es.firstKeyType.asInstanceOf[TLocus].rg.asInstanceOf[ReferenceGenome])) } Some((True(), intervals)) - case Apply("position", _, Seq(x), _, _) if es.isFirstKey(x) => + case Apply("position", _, Seq(x), _) if es.isFirstKey(x) => // locus position comparison val pos = constValue(const).asInstanceOf[Int] val rg = es.firstKeyType.asInstanceOf[TLocus].rg.asInstanceOf[ReferenceGenome] diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index 71dde2644de..96c2c31a09a 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -221,14 +221,13 @@ object MakeStream { final case class MakeStream(args: Seq[IR], _typ: TStream, requiresMemoryManagementPerElement: Boolean = false) extends IR object ArrayRef { - def apply(a: IR, i: IR): ArrayRef = ArrayRef(a, i, ErrorIDs.NO_ERROR) + def apply(a: IR, i: IR): ArrayRef = ArrayRef(a, i, Str("")) } -final case class ArrayRef(a: IR, i: IR, errorId: Int) extends IR +final case class ArrayRef(a: IR, i: IR, msg: IR) extends IR final case class ArrayLen(a: IR) extends IR final case class ArrayZeros(length: IR) extends IR -final case class StreamRange(start: IR, stop: IR, step: IR, requiresMemoryManagementPerElement: Boolean = false, - errorID: Int = ErrorIDs.NO_ERROR) extends IR +final case class StreamRange(start: IR, stop: IR, step: IR, requiresMemoryManagementPerElement: Boolean = false) extends IR object ArraySort { def apply(a: IR, ascending: IR = True(), onKey: Boolean = false): ArraySort = { @@ -285,8 +284,7 @@ object ArrayZipBehavior extends Enumeration { val ExtendNA: Value = Value(3) } -final case class StreamZip(as: IndexedSeq[IR], names: IndexedSeq[String], body: IR, behavior: ArrayZipBehavior, - errorID: Int = ErrorIDs.NO_ERROR) extends IR { +final case class StreamZip(as: IndexedSeq[IR], names: IndexedSeq[String], body: IR, behavior: ArrayZipBehavior) extends IR { lazy val nameIdx: Map[String, Int] = names.zipWithIndex.toMap override def typ: TStream = coerce[TStream](super.typ) } @@ -386,7 +384,7 @@ sealed trait NDArrayIR extends TypedIR[TNDArray] { object MakeNDArray { def fill(elt: IR, shape: IndexedSeq[Long], rowMajor: IR): MakeNDArray = MakeNDArray( - ToArray(StreamMap(StreamRange(0, shape.product.toInt, 1, errorID = ErrorIDs.NO_ERROR), genUID(), elt)), + ToArray(StreamMap(StreamRange(0, shape.product.toInt, 1), genUID(), elt)), MakeTuple.ordered(shape.map(I64)), rowMajor, ErrorIDs.NO_ERROR) } @@ -394,7 +392,7 @@ final case class MakeNDArray(data: IR, shape: IR, rowMajor: IR, errorId: Int) ex final case class NDArrayShape(nd: IR) extends IR -final case class NDArrayReshape(nd: IR, shape: IR, errorID: Int) extends NDArrayIR +final case class NDArrayReshape(nd: IR, shape: IR) extends NDArrayIR final case class NDArrayConcat(nds: IR, axis: Int) extends NDArrayIR @@ -403,13 +401,13 @@ final case class NDArraySlice(nd: IR, slices: IR) extends NDArrayIR final case class NDArrayFilter(nd: IR, keep: IndexedSeq[IR]) extends NDArrayIR final case class NDArrayMap(nd: IR, valueName: String, body: IR) extends NDArrayIR -final case class NDArrayMap2(l: IR, r: IR, lName: String, rName: String, body: IR, errorID: Int) extends NDArrayIR +final case class NDArrayMap2(l: IR, r: IR, lName: String, rName: String, body: IR) extends NDArrayIR final case class NDArrayReindex(nd: IR, indexExpr: IndexedSeq[Int]) extends NDArrayIR final case class NDArrayAgg(nd: IR, axes: IndexedSeq[Int]) extends IR final case class NDArrayWrite(nd: IR, path: IR) extends IR -final case class NDArrayMatMul(l: IR, r: IR, errorID: Int) extends NDArrayIR +final case class NDArrayMatMul(l: IR, r: IR) extends NDArrayIR object NDArrayQR { def pType(mode: String, req: Boolean): PType = { @@ -437,11 +435,11 @@ object NDArrayInv { val pType = PCanonicalNDArray(PFloat64Required, 2) } -final case class NDArrayQR(nd: IR, mode: String, errorID: Int) extends IR +final case class NDArrayQR(nd: IR, mode: String) extends IR -final case class NDArraySVD(nd: IR, fullMatrices: Boolean, computeUV: Boolean, errorID: Int) extends IR +final case class NDArraySVD(nd: IR, fullMatrices: Boolean, computeUV: Boolean) extends IR -final case class NDArrayInv(nd: IR, errorID: Int) extends IR +final case class NDArrayInv(nd: IR) extends IR final case class AggFilter(cond: IR, aggIR: IR, isScan: Boolean) extends IR @@ -483,8 +481,7 @@ final case class InitOp(i: Int, args: IndexedSeq[IR], aggSig: PhysicalAggSig) ex final case class SeqOp(i: Int, args: IndexedSeq[IR], aggSig: PhysicalAggSig) extends IR final case class CombOp(i1: Int, i2: Int, aggSig: PhysicalAggSig) extends IR final case class ResultOp(startIdx: Int, aggSigs: IndexedSeq[PhysicalAggSig]) extends IR - -private final case class CombOpValue(i: Int, value: IR, aggSig: PhysicalAggSig) extends IR +final case class CombOpValue(i: Int, value: IR, aggSig: PhysicalAggSig) extends IR final case class AggStateValue(i: Int, aggSig: AggStateSig) extends IR final case class InitFromSerializedValue(i: Int, value: IR, aggSig: AggStateSig) extends IR @@ -554,12 +551,12 @@ object Die { final case class Trap(child: IR) extends IR final case class Die(message: IR, _typ: Type, errorId: Int) extends IR -final case class ApplyIR(function: String, typeArgs: Seq[Type], args: Seq[IR], errorID: Int) extends IR { - var conversion: (Seq[Type], Seq[IR], Int) => IR = _ +final case class ApplyIR(function: String, typeArgs: Seq[Type], args: Seq[IR]) extends IR { + var conversion: (Seq[Type], Seq[IR]) => IR = _ var inline: Boolean = _ private lazy val refs = args.map(a => Ref(genUID(), a.typ)).toArray - lazy val body: IR = conversion(typeArgs, refs, errorID).deepCopy() + lazy val body: IR = conversion(typeArgs, refs).deepCopy() lazy val refIdx: Map[String, Int] = refs.map(_.name).zipWithIndex.toMap lazy val explicitNode: IR = { @@ -578,13 +575,13 @@ sealed abstract class AbstractApplyNode[F <: JVMFunction] extends IR { .asInstanceOf[F] } -final case class Apply(function: String, typeArgs: Seq[Type], args: Seq[IR], returnType: Type, errorID: Int) extends AbstractApplyNode[UnseededMissingnessObliviousJVMFunction] +final case class Apply(function: String, typeArgs: Seq[Type], args: Seq[IR], returnType: Type) extends AbstractApplyNode[UnseededMissingnessObliviousJVMFunction] final case class ApplySeeded(function: String, args: Seq[IR], seed: Long, returnType: Type) extends AbstractApplyNode[SeededJVMFunction] { val typeArgs: Seq[Type] = Seq.empty[Type] } -final case class ApplySpecial(function: String, typeArgs: Seq[Type], args: Seq[IR], returnType: Type, errorID: Int) extends AbstractApplyNode[UnseededMissingnessAwareJVMFunction] +final case class ApplySpecial(function: String, typeArgs: Seq[Type], args: Seq[IR], returnType: Type) extends AbstractApplyNode[UnseededMissingnessAwareJVMFunction] final case class LiftMeOut(child: IR) extends IR final case class TableCount(child: TableIR) extends IR diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index 0f040626e8f..53ddc49ac04 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -114,7 +114,7 @@ object InferType { TStream(a.typ) case StreamMap(a, name, body) => TStream(body.typ) - case StreamZip(as, _, body, _, _) => + case StreamZip(as, _, body, _) => TStream(body.typ) case StreamZipJoin(_, _, _, _, joinF) => TStream(joinF.typ) @@ -144,13 +144,13 @@ object InferType { case NDArrayShape(nd) => val ndType = nd.typ.asInstanceOf[TNDArray] ndType.shapeType - case NDArrayReshape(nd, shape, _) => + case NDArrayReshape(nd, shape) => TNDArray(coerce[TNDArray](nd.typ).elementType, Nat(shape.typ.asInstanceOf[TTuple].size)) case NDArrayConcat(nds, _) => coerce[TArray](nds.typ).elementType case NDArrayMap(nd, _, body) => TNDArray(body.typ, coerce[TNDArray](nd.typ).nDimsBase) - case NDArrayMap2(l, _, _, _, body, _) => + case NDArrayMap2(l, _, _, _, body) => TNDArray(body.typ, coerce[TNDArray](l.typ).nDimsBase) case NDArrayReindex(nd, indexExpr) => TNDArray(coerce[TNDArray](nd.typ).elementType, Nat(indexExpr.length)) @@ -168,11 +168,11 @@ object InferType { TNDArray(childTyp.elementType, remainingDims) case NDArrayFilter(nd, _) => nd.typ - case NDArrayMatMul(l, r, _) => + case NDArrayMatMul(l, r) => val lTyp = coerce[TNDArray](l.typ) val rTyp = coerce[TNDArray](r.typ) TNDArray(lTyp.elementType, Nat(TNDArray.matMulNDims(lTyp.nDims, rTyp.nDims))) - case NDArrayQR(nd, mode, _) => + case NDArrayQR(nd, mode) => if (Array("complete", "reduced").contains(mode)) { TTuple(TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2))) } else if (mode == "raw") { @@ -182,13 +182,13 @@ object InferType { } else { throw new NotImplementedError(s"Cannot infer type for mode $mode") } - case NDArraySVD(nd, _, compute_uv, _) => + case NDArraySVD(nd, _, compute_uv) => if (compute_uv) { TTuple(TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(1)), TNDArray(TFloat64, Nat(2))) } else { TNDArray(TFloat64, Nat(1)) } - case NDArrayInv(_, _) => + case NDArrayInv(_) => TNDArray(TFloat64, Nat(2)) case NDArrayWrite(_, _) => TVoid case AggFilter(_, aggIR, _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala index 0bc32139476..33e765ea877 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala @@ -261,12 +261,12 @@ object Interpret { null else aValue.asInstanceOf[IndexedSeq[Any]].length - case StreamRange(start, stop, step, _, errorID) => + case StreamRange(start, stop, step, _) => val startValue = interpret(start, env, args) val stopValue = interpret(stop, env, args) val stepValue = interpret(step, env, args) if (stepValue == 0) - fatal("Array range cannot have step size 0.", errorID) + fatal("Array range cannot have step size 0.") if (startValue == null || stopValue == null || stepValue == null) null else @@ -420,7 +420,7 @@ object Interpret { interpret(body, env.bind(name, element), args) } } - case StreamZip(as, names, body, behavior, errorID) => + case StreamZip(as, names, body, behavior) => val aValues = as.map(interpret(_, env, args).asInstanceOf[IndexedSeq[_]]) if (aValues.contains(null)) null @@ -429,7 +429,7 @@ object Interpret { case ArrayZipBehavior.AssertSameLength | ArrayZipBehavior.AssumeSameLength => val lengths = aValues.map(_.length).toSet if (lengths.size != 1) - fatal(s"zip: length mismatch: ${ lengths.mkString(", ") }", errorID) + fatal(s"zip: length mismatch: ${ lengths.mkString(", ") }") lengths.head case ArrayZipBehavior.TakeMinLength => aValues.map(_.length).min @@ -713,9 +713,9 @@ object Interpret { } catch { case e: HailException => Row(Row(e.msg, e.errorId), null) } - case ir@ApplyIR(function, _, functionArgs, _) => + case ir@ApplyIR(function, _, functionArgs) => interpret(ir.explicitNode, env, args) - case ApplySpecial("lor", _, Seq(left_, right_), _, _) => + case ApplySpecial("lor", _, Seq(left_, right_), _) => val left = interpret(left_) if (left == true) true @@ -727,7 +727,7 @@ object Interpret { null else false } - case ApplySpecial("land", _, Seq(left_, right_), _, _) => + case ApplySpecial("land", _, Seq(left_, right_), _) => val left = interpret(left_) if (left == false) false diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala index ef02c6b8e4f..490f9533b61 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixValue.scala @@ -197,13 +197,10 @@ case class MatrixValue( val partitionBytesWritten = fileData.map(_.bytesWritten) val totalRowsEntriesBytes = partitionBytesWritten.sum val totalBytesWritten: Long = totalRowsEntriesBytes + colBytesWritten + globalBytesWritten - val (smallestStr, largestStr) = if (fileData.isEmpty) ("N/A", "N/A") else { - val smallestPartition = fileData.minBy(_.bytesWritten) - val largestPartition = fileData.maxBy(_.bytesWritten) - val smallestStr = s"${ smallestPartition.rowsWritten } rows (${ formatSpace(smallestPartition.bytesWritten) })" - val largestStr = s"${ largestPartition.rowsWritten } rows (${ formatSpace(largestPartition.bytesWritten) })" - (smallestStr, largestStr) - } + val smallestPartition = fileData.minBy(_.bytesWritten) + val largestPartition = fileData.maxBy(_.bytesWritten) + val smallestStr = s"${ smallestPartition.rowsWritten } rows (${ formatSpace(smallestPartition.bytesWritten) })" + val largestStr = s"${ largestPartition.rowsWritten } rows (${ formatSpace(largestPartition.bytesWritten) })" printer(s"wrote matrix table with $nRows ${ plural(nRows, "row") } " + s"and $nCols ${ plural(nCols, "column") } " + diff --git a/hail/src/main/scala/is/hail/expr/ir/NestingDepth.scala b/hail/src/main/scala/is/hail/expr/ir/NestingDepth.scala index 43ec4f757e8..8fd2d3c8b4e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/NestingDepth.scala +++ b/hail/src/main/scala/is/hail/expr/ir/NestingDepth.scala @@ -45,7 +45,7 @@ object NestingDepth { case StreamMap(a, name, body) => computeIR(a, depth) computeIR(body, depth.incrementEval) - case StreamZip(as, _, body, _, _) => + case StreamZip(as, _, body, _) => as.foreach(computeIR(_, depth)) computeIR(body, depth.incrementEval) case StreamFor(a, valueName, body) => @@ -80,7 +80,7 @@ object NestingDepth { case NDArrayMap(nd, _, body) => computeIR(nd, depth) computeIR(body, depth.incrementEval) - case NDArrayMap2(nd1, nd2, _, _, body, _) => + case NDArrayMap2(nd1, nd2, _, _, body) => computeIR(nd1, depth) computeIR(nd2, depth) computeIR(body, depth.incrementEval) diff --git a/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala b/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala index 098e5a5f115..4be109dacdc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala +++ b/hail/src/main/scala/is/hail/expr/ir/NormalizeNames.scala @@ -85,12 +85,12 @@ class NormalizeNames(normFunction: Int => String, allowFreeVariables: Boolean = newA <- normalize(a) newBody <- normalize(body, env.bindEval(name, newName)) } yield StreamMap(newA, newName, newBody) - case StreamZip(as, names, body, behavior, errorID) => + case StreamZip(as, names, body, behavior) => val newNames = names.map(_ => gen()) for { newAs <- as.mapRecur(normalize(_)) newBody <- normalize(body, env.bindEval(names.zip(newNames): _*)) - } yield StreamZip(newAs, newNames, newBody, behavior, errorID) + } yield StreamZip(newAs, newNames, newBody, behavior) case StreamZipJoin(as, key, curKey, curVals, joinF) => val newCurKey = gen() val newCurVals = gen() @@ -190,14 +190,14 @@ class NormalizeNames(normFunction: Int => String, allowFreeVariables: Boolean = newNd <- normalize(nd) newBody <- normalize(body, env.bindEval(name -> newName)) } yield NDArrayMap(newNd, newName, newBody) - case NDArrayMap2(l, r, lName, rName, body, errorID) => + case NDArrayMap2(l, r, lName, rName, body) => val newLName = gen() val newRName = gen() for { newL <- normalize(l) newR <- normalize(r) newBody <- normalize(body, env.bindEval(lName -> newLName, rName -> newRName)) - } yield NDArrayMap2(newL, newR, newLName, newRName, newBody, errorID) + } yield NDArrayMap2(newL, newR, newLName, newRName, newBody) case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) => val newElementName = gen() val newIndexName = gen() diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 0db29fb71ec..9f4d510402f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -887,21 +887,20 @@ object IRParser { MakeStream(args, typ, requiresMemoryManagementPerElement) } case "ArrayRef" => - val errorID = int32_literal(it) for { a <- ir_value_expr(env)(it) i <- ir_value_expr(env)(it) - } yield ArrayRef(a, i, errorID) + s <- ir_value_expr(env)(it) + } yield ArrayRef(a, i, s) case "ArrayLen" => ir_value_expr(env)(it).map(ArrayLen) case "StreamLen" => ir_value_expr(env)(it).map(StreamLen) case "StreamRange" => - val errorID = int32_literal(it) val requiresMemoryManagementPerElement = boolean_literal(it) for { start <- ir_value_expr(env)(it) stop <- ir_value_expr(env)(it) step <- ir_value_expr(env)(it) - } yield StreamRange(start, stop, step, requiresMemoryManagementPerElement, errorID) + } yield StreamRange(start, stop, step, requiresMemoryManagementPerElement) case "StreamGrouped" => for { s <- ir_value_expr(env)(it) @@ -917,19 +916,18 @@ object IRParser { lessThan <- ir_value_expr(env + (l -> elt) + (r -> elt))(it) } yield ArraySort(a, l, r, lessThan) case "MakeNDArray" => - val errorID = int32_literal(it) + val errorId = int32_literal(it) for { data <- ir_value_expr(env)(it) shape <- ir_value_expr(env)(it) rowMajor <- ir_value_expr(env)(it) - } yield MakeNDArray(data, shape, rowMajor, errorID) + } yield MakeNDArray(data, shape, rowMajor, errorId) case "NDArrayShape" => ir_value_expr(env)(it).map(NDArrayShape) case "NDArrayReshape" => - val errorID = int32_literal(it) for { nd <- ir_value_expr(env)(it) shape <- ir_value_expr(env)(it) - } yield NDArrayReshape(nd, shape, errorID) + } yield NDArrayReshape(nd, shape) case "NDArrayConcat" => val axis = int32_literal(it) ir_value_expr(env)(it).map { nds => @@ -942,7 +940,6 @@ object IRParser { body <- ir_value_expr(env + (name -> coerce[TNDArray](nd.typ).elementType))(it) } yield NDArrayMap(nd, name, body) case "NDArrayMap2" => - val errorID = int32_literal(it) val lName = identifier(it) val rName = identifier(it) for { @@ -951,7 +948,7 @@ object IRParser { body_env = (env + (lName -> coerce[TNDArray](l.typ).elementType) + (rName -> coerce[TNDArray](r.typ).elementType)) body <- ir_value_expr(body_env)(it) - } yield NDArrayMap2(l, r, lName, rName, body, errorID) + } yield NDArrayMap2(l, r, lName, rName, body) case "NDArrayReindex" => val indexExpr = int32_literals(it) ir_value_expr(env)(it).map { nd => @@ -963,11 +960,11 @@ object IRParser { NDArrayAgg(nd, axes) } case "NDArrayRef" => - val errorID = int32_literal(it) + val errorId = int32_literal(it) for { nd <- ir_value_expr(env)(it) idxs <- ir_value_children(env)(it) - } yield NDArrayRef(nd, idxs, errorID) + } yield NDArrayRef(nd, idxs, errorId) case "NDArraySlice" => for { nd <- ir_value_expr(env)(it) @@ -979,32 +976,27 @@ object IRParser { filters <- fillArray(coerce[TNDArray](nd.typ).nDims)(ir_value_expr(env)(it)) } yield NDArrayFilter(nd, filters.toFastIndexedSeq) case "NDArrayMatMul" => - val errorID = int32_literal(it) for { l <- ir_value_expr(env)(it) r <- ir_value_expr(env)(it) - } yield NDArrayMatMul(l, r, errorID) + } yield NDArrayMatMul(l, r) case "NDArrayWrite" => for { nd <- ir_value_expr(env)(it) path <- ir_value_expr(env)(it) } yield NDArrayWrite(nd, path) case "NDArrayQR" => - val errorID = int32_literal(it) val mode = string_literal(it) ir_value_expr(env)(it).map { nd => - NDArrayQR(nd, mode, errorID) + NDArrayQR(nd, mode) } case "NDArraySVD" => - val errorID = int32_literal(it) val fullMatrices = boolean_literal(it) val computeUV = boolean_literal(it) ir_value_expr(env)(it).map { nd => - NDArraySVD(nd, fullMatrices, computeUV, errorID) + NDArraySVD(nd, fullMatrices, computeUV) } - case "NDArrayInv" => - val errorID = int32_literal(it) - ir_value_expr(env)(it).map{ nd => NDArrayInv(nd, errorID) } + case "NDArrayInv" => ir_value_expr(env)(it).map(NDArrayInv(_)) case "ToSet" => ir_value_expr(env)(it).map(ToSet) case "ToDict" => ir_value_expr(env)(it).map(ToDict) case "ToArray" => ir_value_expr(env)(it).map(ToArray) @@ -1038,7 +1030,6 @@ object IRParser { num <- ir_value_expr(env)(it) } yield StreamDrop(a, num) case "StreamZip" => - val errorID = int32_literal(it) val behavior = identifier(it) match { case "AssertSameLength" => ArrayZipBehavior.AssertSameLength case "TakeMinLength" => ArrayZipBehavior.TakeMinLength @@ -1049,7 +1040,7 @@ object IRParser { for { as <- names.mapRecur(_ => ir_value_expr(env)(it)) body <- ir_value_expr(env ++ names.zip(as.map(a => coerce[TStream](a.typ).elementType)))(it) - } yield StreamZip(as, names, body, behavior, errorID) + } yield StreamZip(as, names, body, behavior) case "StreamFilter" => val name = identifier(it) for { @@ -1264,9 +1255,9 @@ object IRParser { } case "Die" => val typ = type_expr(env.typEnv)(it) - val errorID = int32_literal(it) + val errorId = int32_literal(it) ir_value_expr(env)(it).map { msg => - Die(msg, typ, errorID) + Die(msg, typ, errorId) } case "Trap" => ir_value_expr(env)(it).map { child => @@ -1281,12 +1272,11 @@ object IRParser { ApplySeeded(function, args, seed, rt) } case "ApplyIR" | "ApplySpecial" | "Apply" => - val errorID = int32_literal(it) val function = identifier(it) val typeArgs = type_exprs(env.typEnv)(it) val rt = type_expr(env.typEnv)(it) ir_value_children(env)(it).map { args => - invoke(function, rt, typeArgs, errorID, args: _*) + invoke(function, rt, typeArgs, args: _*) } case "MatrixCount" => matrix_ir(env.withRefMap(Map.empty))(it).map(MatrixCount) diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index a2123dcfbd9..bb8a79050b2 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -156,10 +156,10 @@ object Pretty { case MakeArray(_, typ) => single(typ.parsableString()) case MakeStream(_, typ, requiresMemoryManagementPerElement) => FastSeq(typ.parsableString(), prettyBooleanLiteral(requiresMemoryManagementPerElement)) - case StreamRange(_, _, _, requiresMemoryManagementPerElement, errorID) => FastSeq(errorID.toString, prettyBooleanLiteral(requiresMemoryManagementPerElement)) + case StreamRange(_, _, _, requiresMemoryManagementPerElement) => single(prettyBooleanLiteral(requiresMemoryManagementPerElement)) case ToStream(_, requiresMemoryManagementPerElement) => single(prettyBooleanLiteral(requiresMemoryManagementPerElement)) case StreamMap(_, name, _) => single(prettyIdentifier(name)) - case StreamZip(_, names, _, behavior, errorID) => FastSeq(errorID.toString, behavior match { + case StreamZip(_, names, _, behavior) => FastSeq(behavior match { case ArrayZipBehavior.AssertSameLength => "AssertSameLength" case ArrayZipBehavior.TakeMinLength => "TakeMinLength" case ArrayZipBehavior.ExtendNA => "ExtendNA" @@ -185,26 +185,20 @@ object Pretty { case AggArrayPerElement(_, elementName, indexName, _, knownLength, isScan) => FastSeq(prettyIdentifier(elementName), prettyIdentifier(indexName), prettyBooleanLiteral(isScan), prettyBooleanLiteral(knownLength.isDefined)) case NDArrayMap(_, name, _) => single(prettyIdentifier(name)) - case NDArrayMap2(_, _, lName, rName, _, errorID) => FastSeq(s"$errorID", prettyIdentifier(lName), prettyIdentifier(rName)) + case NDArrayMap2(_, _, lName, rName, _) => FastSeq(prettyIdentifier(lName), prettyIdentifier(rName)) case NDArrayReindex(_, indexExpr) => single(prettyInts(indexExpr, elideLiterals)) case NDArrayConcat(_, axis) => single(axis.toString) case NDArrayAgg(_, axes) => single(prettyInts(axes, elideLiterals)) - case NDArrayRef(_, _, errorID) => single(s"$errorID") - case NDArrayReshape(_, _, errorID) => single(s"$errorID") - case NDArrayMatMul(_, _, errorID) => single(s"$errorID") - case NDArrayQR(_, mode, errorID) => FastSeq(errorID.toString, mode) - case NDArraySVD(_, fullMatrices, computeUV, errorID) => FastSeq(errorID.toString, fullMatrices.toString, computeUV.toString) - case NDArrayInv(_, errorID) => single(s"$errorID") + case NDArrayRef(_, _, errorId) => single(s"$errorId") case ArraySort(_, l, r, _) => FastSeq(prettyIdentifier(l), prettyIdentifier(r)) - case ArrayRef(_,_, errorID) => single(s"$errorID") - case ApplyIR(function, typeArgs, _, errorID) => FastSeq(s"$errorID", prettyIdentifier(function), prettyTypes(typeArgs), ir.typ.parsableString()) - case Apply(function, typeArgs, _, t, errorID) => FastSeq(s"$errorID", prettyIdentifier(function), prettyTypes(typeArgs), t.parsableString()) + case ApplyIR(function, typeArgs, _) => FastSeq(prettyIdentifier(function), prettyTypes(typeArgs), ir.typ.parsableString()) + case Apply(function, typeArgs, _, t) => FastSeq(prettyIdentifier(function), prettyTypes(typeArgs), t.parsableString()) case ApplySeeded(function, _, seed, t) => FastSeq(prettyIdentifier(function), seed.toString, t.parsableString()) - case ApplySpecial(function, typeArgs, _, t, errorID) => FastSeq(s"$errorID", prettyIdentifier(function), prettyTypes(typeArgs), t.parsableString()) + case ApplySpecial(function, typeArgs, _, t) => FastSeq(prettyIdentifier(function), prettyTypes(typeArgs), t.parsableString()) case SelectFields(_, fields) => single(fillList(fields.view.map(f => text(prettyIdentifier(f))))) case LowerBoundOnOrderedCollection(_, _, onKey) => single(prettyBooleanLiteral(onKey)) case In(i, typ) => FastSeq(typ.toString, i.toString) - case Die(message, typ, errorID) => FastSeq(typ.parsableString(), errorID.toString) + case Die(message, typ, errorId) => FastSeq(typ.parsableString(), errorId.toString) case CollectDistributedArray(_, _, cname, gname, _, _) => FastSeq(prettyIdentifier(cname), prettyIdentifier(gname)) case MatrixRead(typ, dropCols, dropRows, reader) => diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala index 876b721d5d4..2b035a5aaa6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala @@ -975,7 +975,7 @@ object PruneDeadFields { val reqStructT = coerce[TStruct](coerce[TStream](coerce[TStream](requestedType).elementType).elementType) val origStructT = coerce[TStruct](coerce[TStream](a.typ).elementType) memoizeValueIR(a, TStream(unify(origStructT, reqStructT, selectKey(origStructT, key))), memo) - case StreamZip(as, names, body, behavior, _) => + case StreamZip(as, names, body, behavior) => val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TStream].elementType, memo) @@ -1140,7 +1140,7 @@ object PruneDeadFields { bodyEnv.deleteEval(valueName), memoizeValueIR(nd, ndType.copy(elementType = valueType), memo) ) - case NDArrayMap2(left, right, leftName, rightName, body, _) => + case NDArrayMap2(left, right, leftName, rightName, body) => val leftType = left.typ.asInstanceOf[TNDArray] val rightType = right.typ.asInstanceOf[TNDArray] val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TNDArray].elementType, memo) @@ -1761,12 +1761,12 @@ object PruneDeadFields { case StreamMap(a, name, body) => val a2 = rebuildIR(a, env, memo) StreamMap(a2, name, rebuildIR(body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) - case StreamZip(as, names, body, b, errorID) => + case StreamZip(as, names, body, b) => val (newAs, newNames) = as.zip(names) .flatMap { case (a, name) => if (memo.requestedType.contains(a)) Some((rebuildIR(a, env, memo), name)) else None } .unzip StreamZip(newAs, newNames, rebuildIR(body, - env.bindEval(newNames.zip(newAs.map(a => a.typ.asInstanceOf[TStream].elementType)): _*), memo), b, errorID) + env.bindEval(newNames.zip(newAs.map(a => a.typ.asInstanceOf[TStream].elementType)): _*), memo), b) case StreamZipJoin(as, key, curKey, curVals, joinF) => val newAs = as.map(a => rebuildIR(a, env, memo)) val newEltType = as.head.typ.asInstanceOf[TStream].elementType.asInstanceOf[TStruct] @@ -1840,13 +1840,13 @@ object PruneDeadFields { case NDArrayMap(nd, valueName, body) => val nd2 = rebuildIR(nd, env, memo) NDArrayMap(nd2, valueName, rebuildIR(body, env.bindEval(valueName, nd2.typ.asInstanceOf[TNDArray].elementType), memo)) - case NDArrayMap2(left, right, leftName, rightName, body, errorID) => + case NDArrayMap2(left, right, leftName, rightName, body) => val left2 = rebuildIR(left, env, memo) val right2 = rebuildIR(right, env, memo) val body2 = rebuildIR(body, env.bindEval(leftName, left2.typ.asInstanceOf[TNDArray].elementType).bindEval(rightName, right2.typ.asInstanceOf[TNDArray].elementType), memo) - NDArrayMap2(left2, right2, leftName, rightName, body2, errorID) + NDArrayMap2(left2, right2, leftName, rightName, body2) case MakeStruct(fields) => val depStruct = requestedType.asInstanceOf[TStruct] // drop unnecessary field IRs diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index 7667b129373..40907aa4b2d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -173,14 +173,14 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { i += 1 } states.bind(node, s) - case x@ApplyIR(_, _, args, _) => + case x@ApplyIR(_, _, args) => x.refIdx.foreach { case (n, i) => addBinding(n, args(i)) } case ArraySort(a, l, r, c) => addElementBinding(l, a, makeRequired = true) addElementBinding(r, a, makeRequired = true) case StreamMap(a, name, body) => addElementBinding(name, a) - case x@StreamZip(as, names, body, behavior, _) => + case x@StreamZip(as, names, body, behavior) => var i = 0 while (i < names.length) { addElementBinding(names(i), as(i), @@ -251,7 +251,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { refMap(idx).foreach { use => defs.bind(use, Array[BaseTypeWithRequiredness](RPrimitive())) } case NDArrayMap(nd, name, body) => addElementBinding(name, nd) - case NDArrayMap2(left, right, l, r, body, _) => + case NDArrayMap2(left, right, l, r, body) => addElementBinding(l, left) addElementBinding(r, right) case CollectDistributedArray(ctxs, globs, c, g, body, _) => @@ -541,7 +541,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { case StreamDrop(a, n) => requiredness.union(lookup(n).required) requiredness.unionFrom(lookup(a)) - case StreamZip(as, names, body, behavior, _) => + case StreamZip(as, names, body, behavior) => requiredness.union(as.forall(lookup(_).required)) coerce[RIterable](requiredness).elementType.unionFrom(lookup(body)) case StreamZipJoin(as, _, curKey, curVals, joinF) => @@ -603,7 +603,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.union(lookup(shape).required) case NDArrayShape(nd) => requiredness.union(lookup(nd).required) - case NDArrayReshape(nd, shape, _) => + case NDArrayReshape(nd, shape) => val sReq = lookupAs[RBaseStruct](shape) val ndReq = lookup(nd) requiredness.unionFrom(ndReq) @@ -629,15 +629,15 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { case NDArrayMap(nd, name, body) => requiredness.union(lookup(nd).required) coerce[RNDArray](requiredness).unionElement(lookup(body)) - case NDArrayMap2(l, r, _, _, body, _) => + case NDArrayMap2(l, r, _, _, body) => requiredness.union(lookup(l).required && lookup(r).required) coerce[RNDArray](requiredness).unionElement(lookup(body)) - case NDArrayMatMul(l, r, _) => + case NDArrayMatMul(l, r) => requiredness.unionFrom(lookup(l)) requiredness.union(lookup(r).required) - case NDArrayQR(child, mode, _) => requiredness.fromPType(NDArrayQR.pType(mode, lookup(child).required)) - case NDArraySVD(child, _, computeUV, _) => requiredness.fromPType(NDArraySVD.pTypes(computeUV, lookup(child).required)) - case NDArrayInv(child, _) => requiredness.unionFrom(lookup(child)) + case NDArrayQR(child, mode) => requiredness.fromPType(NDArrayQR.pType(mode, lookup(child).required)) + case NDArraySVD(child, _, computeUV) => requiredness.fromPType(NDArraySVD.pTypes(computeUV, lookup(child).required)) + case NDArrayInv(child) => requiredness.unionFrom(lookup(child)) case MakeStruct(fields) => fields.foreach { case (n, f) => coerce[RStruct](requiredness).field(n).unionFrom(lookup(f)) diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 35a4406e0d5..1557ab833fc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -141,8 +141,8 @@ object Simplify { case x@StreamMap(NA(_), _, _) => NA(x.typ) - case StreamZip(as, names, body, _, _) if as.length == 1 => StreamMap(as.head, names.head, body) - case StreamMap(StreamZip(as, names, zipBody, b, errorID), name, mapBody) => StreamZip(as, names, Let(name, zipBody, mapBody), b, errorID) + case StreamZip(as, names, body, _) if as.length == 1 => StreamMap(as.head, names.head, body) + case StreamMap(StreamZip(as, names, zipBody, b), name, mapBody) => StreamZip(as, names, Let(name, zipBody, mapBody), b) case StreamMap(StreamFlatMap(child, flatMapName, flatMapBody), mapName, mapBody) => StreamFlatMap(child, flatMapName, StreamMap(flatMapBody, mapName, mapBody)) case x@StreamFlatMap(NA(_), _, _) => NA(x.typ) @@ -182,15 +182,15 @@ object Simplify { case ApplyBinaryPrimOp(Subtract(), I32(0), x) => x case ApplyBinaryPrimOp(Subtract(), x, I32(0)) => x - case ApplyIR("indexArray", _, Seq(a, i@I32(v)), errorID) if v >= 0 => - ArrayRef(a, i, errorID) + case ApplyIR("indexArray", _, Seq(a, i@I32(v))) if v >= 0 => + ArrayRef(a, i) - case ApplyIR("contains", _, Seq(CastToArray(x), element), _) if x.typ.isInstanceOf[TSet] => invoke("contains", TBoolean, x, element) + case ApplyIR("contains", _, Seq(CastToArray(x), element)) if x.typ.isInstanceOf[TSet] => invoke("contains", TBoolean, x, element) - case ApplyIR("contains", _, Seq(Literal(t, v), element), _) if t.isInstanceOf[TArray] => + case ApplyIR("contains", _, Seq(Literal(t, v), element)) if t.isInstanceOf[TArray] => invoke("contains", TBoolean, Literal(TSet(t.asInstanceOf[TArray].elementType), v.asInstanceOf[IndexedSeq[_]].toSet), element) - case ApplyIR("contains", _, Seq(ToSet(x), element), _) if x.typ.isInstanceOf[TArray] => invoke("contains", TBoolean, x, element) + case ApplyIR("contains", _, Seq(ToSet(x), element)) if x.typ.isInstanceOf[TArray] => invoke("contains", TBoolean, x, element) case x: ApplyIR if x.inline || x.body.size < 10 => x.explicitNode @@ -527,7 +527,7 @@ object Simplify { // ArrayAgg(GetField(Ref(uid, rowsAndGlobal.typ), "rows"), "row", query))) // } - case ApplyIR("annotate", _, Seq(s, MakeStruct(fields)), _) => + case ApplyIR("annotate", _, Seq(s, MakeStruct(fields))) => InsertFields(s, fields) // simplify Boolean equality @@ -590,7 +590,7 @@ object Simplify { case TableFilter(TableFilter(t, p1), p2) => TableFilter(t, - ApplySpecial("land", Array.empty[Type], Array(p1, p2), TBoolean, ErrorIDs.NO_ERROR)) + ApplySpecial("land", Array.empty[Type], Array(p1, p2), TBoolean)) case TableFilter(TableKeyBy(child, key, isSorted), p) if canRepartition => TableKeyBy(TableFilter(child, p), key, isSorted) case TableFilter(TableRepartition(child, n, strategy), p) => TableRepartition(TableFilter(child, p), n, strategy) @@ -882,11 +882,11 @@ object Simplify { case MatrixFilterCols(m, True()) => m - case MatrixFilterRows(MatrixFilterRows(child, pred1), pred2) => MatrixFilterRows(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean, ErrorIDs.NO_ERROR)) + case MatrixFilterRows(MatrixFilterRows(child, pred1), pred2) => MatrixFilterRows(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean)) - case MatrixFilterCols(MatrixFilterCols(child, pred1), pred2) => MatrixFilterCols(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean, ErrorIDs.NO_ERROR)) + case MatrixFilterCols(MatrixFilterCols(child, pred1), pred2) => MatrixFilterCols(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean)) - case MatrixFilterEntries(MatrixFilterEntries(child, pred1), pred2) => MatrixFilterEntries(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean, ErrorIDs.NO_ERROR)) + case MatrixFilterEntries(MatrixFilterEntries(child, pred1), pred2) => MatrixFilterEntries(child, ApplySpecial("land", FastSeq(), FastSeq(pred1, pred2), TBoolean)) case MatrixMapGlobals(MatrixMapGlobals(child, ng1), ng2) => val uid = genUID() diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index 86beed8ef2f..390464e1d0d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -883,10 +883,12 @@ class TableNativeReader( else params.options.map(opts => new RVDPartitioner(tr.typ.keyType, opts.intervals)) val rvd = spec.rowsComponent.read(ctx, params.path, tr.typ.rowType, partitioner, filterIntervals) - if (!rvd.typ.key.startsWith(tr.typ.key)) - fatal(s"Error while reading table ${params.path}: legacy table written without key." + - s"\n Read and write with version 0.2.70 or earlier") - rvd + if (rvd.typ.key startsWith tr.typ.key) + rvd + else { + log.info("Sorting a table after read. Rewrite the table to prevent this in the future.") + rvd.changeKey(ctx, tr.typ.key) + } } TableValue(ctx, tr.typ, BroadcastRow(ctx, RegionValue(ctx.r, globalsOffset), globalType.setRequired(true).asInstanceOf[PStruct]), rvd) } @@ -1863,7 +1865,7 @@ case class TableMapPartitions(child: TableIR, globalName -> In(0, SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(globalPType))), partitionStreamName -> In(1, SingleCodeEmitParamType(true, StreamSingleCodeType(requiresMemoryManagementPerElement = true, rowPType))))))) - val globalsBc = tv.globals.broadcast + val globalsOff = tv.globals.value.offset val fsBc = tv.ctx.fsBc val itF = { (idx: Int, consumerCtx: RVDContext, partition: (RVDContext) => Iterator[Long]) => @@ -1872,7 +1874,7 @@ case class TableMapPartitions(child: TableIR, partition(new RVDContext(outerRegion, eltRegion)).map(box) } makeIterator(fsBc.value, idx, consumerCtx, - globalsBc.value.readRegionValue(consumerCtx.partitionRegion), + globalsOff, boxedPartition ).map(l => l.longValue()) } diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index 2020ff9fe66..2d57dc4233e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -137,12 +137,13 @@ object TypeCheck { args.map(_.typ).zipWithIndex.foreach { case (x, i) => assert(x == typ.elementType, s"at position $i type mismatch: ${ typ.elementType.parsableString() } ${ x.parsableString() }") } - case x@ArrayRef(a, i, _) => + case x@ArrayRef(a, i, s) => assert(i.typ == TInt32) + assert(s.typ == TString) assert(x.typ == coerce[TArray](a.typ).elementType) case ArrayLen(a) => assert(a.typ.isInstanceOf[TArray]) - case x@StreamRange(a, b, c, _, _) => + case x@StreamRange(a, b, c, _) => assert(a.typ == TInt32) assert(b.typ == TInt32) assert(c.typ == TInt32) @@ -154,7 +155,7 @@ object TypeCheck { assert(rowMajor.typ == TBoolean) case x@NDArrayShape(nd) => assert(nd.typ.isInstanceOf[TNDArray]) - case x@NDArrayReshape(nd, shape, _) => + case x@NDArrayReshape(nd, shape) => assert(nd.typ.isInstanceOf[TNDArray]) assert(shape.typ.asInstanceOf[TTuple].types.forall(t => t == TInt64)) case x@NDArrayConcat(nds, axis) => @@ -178,7 +179,7 @@ object TypeCheck { assert(filters.forall(f => coerce[TArray](f.typ).elementType == TInt64)) case x@NDArrayMap(_, _, body) => assert(x.elementTyp == body.typ) - case x@NDArrayMap2(l, r, _, _, body, _) => + case x@NDArrayMap2(l, r, _, _, body) => val lTyp = coerce[TNDArray](l.typ) val rTyp = coerce[TNDArray](r.typ) assert(lTyp.nDims == rTyp.nDims) @@ -199,7 +200,7 @@ object TypeCheck { case x@NDArrayWrite(nd, path) => assert(nd.typ.isInstanceOf[TNDArray]) assert(path.typ == TString) - case x@NDArrayMatMul(l, r, _) => + case x@NDArrayMatMul(l, r) => assert(l.typ.isInstanceOf[TNDArray]) assert(r.typ.isInstanceOf[TNDArray]) val lType = l.typ.asInstanceOf[TNDArray] @@ -208,15 +209,15 @@ object TypeCheck { assert(lType.nDims > 0) assert(rType.nDims > 0) assert(lType.nDims == 1 || rType.nDims == 1 || lType.nDims == rType.nDims) - case x@NDArrayQR(nd, mode, _) => + case x@NDArrayQR(nd, mode) => val ndType = nd.typ.asInstanceOf[TNDArray] assert(ndType.elementType == TFloat64) assert(ndType.nDims == 2) - case x@NDArraySVD(nd, _, _, _) => + case x@NDArraySVD(nd, _, _) => val ndType = nd.typ.asInstanceOf[TNDArray] assert(ndType.elementType == TFloat64) assert(ndType.nDims == 2) - case x@NDArrayInv(nd, _) => + case x@NDArrayInv(nd) => val ndType = nd.typ.asInstanceOf[TNDArray] assert(ndType.elementType == TFloat64) assert(ndType.nDims == 2) @@ -268,7 +269,7 @@ object TypeCheck { case x@StreamMap(a, name, body) => assert(a.typ.isInstanceOf[TStream]) assert(x.elementTyp == body.typ) - case x@StreamZip(as, names, body, _, _) => + case x@StreamZip(as, names, body, _) => assert(as.length == names.length) assert(x.typ.elementType == body.typ) assert(as.forall(_.typ.isInstanceOf[TStream])) @@ -404,7 +405,7 @@ object TypeCheck { case Die(msg, typ, _) => assert(msg.typ == TString) case Trap(child) => - case x@ApplyIR(fn, typeArgs, args, _) => + case x@ApplyIR(fn, typeArgs, args) => case x: AbstractApplyNode[_] => assert(x.implementation.unify(x.typeArgs, x.args.map(_.typ), x.returnType)) case MatrixWrite(_, _) => diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala index b110b732a11..00816d997f8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/NDArraySumAggregator.scala @@ -2,7 +2,7 @@ package is.hail.expr.ir.agg import is.hail.annotations.Region import is.hail.asm4s._ -import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, SCodeEmitParamType, uuid4} +import is.hail.expr.ir.{CodeParamType, EmitCode, EmitCodeBuilder, EmitParamType, SCodeEmitParamType} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable @@ -43,12 +43,9 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator val statePV = state.storageType.loadCheapSCode(cb, state.off).asBaseStruct.memoize(cb, "ndarray_sum_seq_op_state") statePV.loadField(cb, ndarrayFieldNumber).consume(cb, { - cb += state.region.getNewRegion(Region.TINY) + cb += (state.region.getNewRegion(Region.TINY)) cb += state.storageType.setFieldPresent(state.off, ndarrayFieldNumber) - val tempRegionForCreation = cb.newLocal[Region]("ndarray_sum_agg_temp_region", Region.stagedCreate(Region.REGULAR, cb.emb.ecb.pool())) - val fullyCopiedNDArray = ndTyp.constructByActuallyCopyingData(nextNDPV, cb, tempRegionForCreation).memoize(cb, "ndarray_sum_seq_op_full_copy") - state.storeNonmissing(cb, fullyCopiedNDArray) - cb += tempRegionForCreation.clearRegion() + state.storeNonmissing(cb, nextNDPV) }, { currentNDPCode => val currentNDPValue = currentNDPCode.asNDArray.memoize(cb, "ndarray_sum_seqop_current") @@ -80,6 +77,7 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator } ) } + cb.invokeVoid(combOpMethod) } @@ -88,19 +86,16 @@ class NDArraySumAggregator(ndVTyp: VirtualTypeWithReq) extends StagedAggregator cb.ifx(!leftNdValue.sameShape(rightNdValue, cb), cb += Code._fatal[Unit]("Can't sum ndarrays of different shapes.")) - leftNdValue.coiterateMutate(cb, region, (rightNdValue.get, "right")) { + SNDArray.coiterate(cb, region, FastIndexedSeq((leftNdValue.get, "left"), (rightNdValue.get, "right")), { case Seq(l, r) => val newElement = SCode.add(cb, l, r, true) - newElement.copyToRegion(cb, region, leftNdValue.st.elementType) - } + cb.assign(l, newElement.copyToRegion(cb, region, leftNdValue.st.elementType)) + }) } protected def _storeResult(cb: EmitCodeBuilder, state: State, pt: PType, addr: Value[Long], region: Value[Region], ifMissing: EmitCodeBuilder => Unit): Unit = { state.get(cb).consume(cb, ifMissing(cb), - { sc => - val lastNDInAggState = sc.asNDArray.memoize(cb, "ndarray_sum_agg_last_state") - pt.storeAtAddress(cb, addr, region, lastNDInAggState, deepCopy = true) - }) + { sc => pt.storeAtAddress(cb, addr, region, sc, deepCopy = true) }) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala index 38350657ba2..92c77f32490 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ArrayFunctions.scala @@ -11,21 +11,18 @@ import is.hail.types.virtual._ import is.hail.utils._ object ArrayFunctions extends RegistryFunctions { - val arrayOps: Array[(String, Type, Type, (IR, IR, Int) => IR)] = + val arrayOps: Array[(String, Type, Type, (IR, IR) => IR)] = Array( - ("mul", tnum("T"), tv("T"), (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(Multiply(), ir1, ir2)), - ("div", TInt32, TFloat32, (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(FloatingPointDivide(), ir1, ir2)), - ("div", TInt64, TFloat32, (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(FloatingPointDivide(), ir1, ir2)), - ("div", TFloat32, TFloat32, (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(FloatingPointDivide(),ir1, ir2)), - ("div", TFloat64, TFloat64, (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(FloatingPointDivide(), ir1, ir2)), - ("floordiv", tnum("T"), tv("T"), (ir1: IR, ir2: IR, _) => - ApplyBinaryPrimOp(RoundToNegInfDivide(), ir1, ir2)), - ("add", tnum("T"), tv("T"), (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(Add(),ir1, ir2)), - ("sub", tnum("T"), tv("T"), (ir1: IR, ir2: IR, _) =>ApplyBinaryPrimOp(Subtract(), ir1, ir2)), - ("pow", tnum("T"), TFloat64, (ir1: IR, ir2: IR, errorID: Int) => - Apply("pow", Seq(), Seq(ir1, ir2), TFloat64, errorID)), - ("mod", tnum("T"), tv("T"), (ir1: IR, ir2: IR, errorID: Int) => - Apply("mod", Seq(), Seq(ir1, ir2), ir2.typ, errorID))) + ("mul", tnum("T"), tv("T"), ApplyBinaryPrimOp(Multiply(), _, _)), + ("div", TInt32, TFloat32, ApplyBinaryPrimOp(FloatingPointDivide(), _, _)), + ("div", TInt64, TFloat32, ApplyBinaryPrimOp(FloatingPointDivide(), _, _)), + ("div", TFloat32, TFloat32, ApplyBinaryPrimOp(FloatingPointDivide(), _, _)), + ("div", TFloat64, TFloat64, ApplyBinaryPrimOp(FloatingPointDivide(), _, _)), + ("floordiv", tnum("T"), tv("T"), ApplyBinaryPrimOp(RoundToNegInfDivide(), _, _)), + ("add", tnum("T"), tv("T"), ApplyBinaryPrimOp(Add(), _, _)), + ("sub", tnum("T"), tv("T"), ApplyBinaryPrimOp(Subtract(), _, _)), + ("pow", tnum("T"), TFloat64, (ir1: IR, ir2: IR) => Apply("pow", Seq(), Seq(ir1, ir2), TFloat64)), + ("mod", tnum("T"), tv("T"), (ir1: IR, ir2: IR) => Apply("mod", Seq(), Seq(ir1, ir2), ir2.typ))) def mean(args: Seq[IR]): IR = { val Seq(a) = args @@ -93,40 +90,39 @@ object ArrayFunctions extends RegistryFunctions { } def registerAll() { - registerIR1("isEmpty", TArray(tv("T")), TBoolean)((_, a,_) => isEmpty(a)) + registerIR1("isEmpty", TArray(tv("T")), TBoolean)((_, a) => isEmpty(a)) - registerIR2("extend", TArray(tv("T")), TArray(tv("T")), TArray(tv("T")))((_, a, b, _) => extend(a, b)) + registerIR2("extend", TArray(tv("T")), TArray(tv("T")), TArray(tv("T")))((_, a, b) => extend(a, b)) - registerIR2("append", TArray(tv("T")), tv("T"), TArray(tv("T"))) { (_, a, c, _) => + registerIR2("append", TArray(tv("T")), tv("T"), TArray(tv("T"))) { (_, a, c) => extend(a, MakeArray(Seq(c), TArray(c.typ))) } - registerIR2("contains", TArray(tv("T")), tv("T"), TBoolean) { (_, a, e, _) => contains(a, e) } + registerIR2("contains", TArray(tv("T")), tv("T"), TBoolean) { (_, a, e) => contains(a, e) } for ((stringOp, argType, retType, irOp) <- arrayOps) { - registerIR2(stringOp, TArray(argType), argType, TArray(retType)) { (_, a, c, errorID) => + registerIR2(stringOp, TArray(argType), argType, TArray(retType)) { (_, a, c) => val i = genUID() - ToArray(StreamMap(ToStream(a), i, irOp(Ref(i, c.typ), c, errorID))) + ToArray(StreamMap(ToStream(a), i, irOp(Ref(i, c.typ), c))) } - registerIR2(stringOp, argType, TArray(argType), TArray(retType)) { (_, c, a, errorID) => + registerIR2(stringOp, argType, TArray(argType), TArray(retType)) { (_, c, a) => val i = genUID() - ToArray(StreamMap(ToStream(a), i, irOp(c, Ref(i, c.typ), errorID))) + ToArray(StreamMap(ToStream(a), i, irOp(c, Ref(i, c.typ)))) } - registerIR2(stringOp, TArray(argType), TArray(argType), TArray(retType)) { (_, array1, array2, errorID) => + registerIR2(stringOp, TArray(argType), TArray(argType), TArray(retType)) { (_, array1, array2) => val a1id = genUID() val e1 = Ref(a1id, coerce[TArray](array1.typ).elementType) val a2id = genUID() val e2 = Ref(a2id, coerce[TArray](array2.typ).elementType) - ToArray(StreamZip(FastIndexedSeq(ToStream(array1), ToStream(array2)), FastIndexedSeq(a1id, a2id), - irOp(e1, e2, errorID), ArrayZipBehavior.AssertSameLength)) + ToArray(StreamZip(FastIndexedSeq(ToStream(array1), ToStream(array2)), FastIndexedSeq(a1id, a2id), irOp(e1, e2), ArrayZipBehavior.AssertSameLength)) } } - registerIR1("sum", TArray(tnum("T")), tv("T"))((_, a,_) => sum(a)) + registerIR1("sum", TArray(tnum("T")), tv("T"))((_, a) => sum(a)) - registerIR1("product", TArray(tnum("T")), tv("T"))((_, a, _) => product(a)) + registerIR1("product", TArray(tnum("T")), tv("T"))((_, a) => product(a)) def makeMinMaxOp(op: String): Seq[IR] => IR = { { case Seq(a) => @@ -145,21 +141,21 @@ object ArrayFunctions extends RegistryFunctions { } } - registerIR("min", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a, _) => makeMinMaxOp("min")(a)) - registerIR("nanmin", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a, _) => makeMinMaxOp("nanmin")(a)) - registerIR("max", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a, _) => makeMinMaxOp("max")(a)) - registerIR("nanmax", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a, _) => makeMinMaxOp("nanmax")(a)) + registerIR("min", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a) => makeMinMaxOp("min")(a)) + registerIR("nanmin", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a) => makeMinMaxOp("nanmin")(a)) + registerIR("max", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a) => makeMinMaxOp("max")(a)) + registerIR("nanmax", Array(TArray(tnum("T"))), tv("T"), inline = true)((_, a) => makeMinMaxOp("nanmax")(a)) - registerIR("mean", Array(TArray(tnum("T"))), TFloat64, inline = true)((_, a, _) => mean(a)) + registerIR("mean", Array(TArray(tnum("T"))), TFloat64, inline = true)((_, a) => mean(a)) - registerIR1("median", TArray(tnum("T")), tv("T")) { (_, array, errorID) => + registerIR1("median", TArray(tnum("T")), tv("T")) { (_, array) => val t = array.typ.asInstanceOf[TArray].elementType val v = Ref(genUID(), t) val a = Ref(genUID(), TArray(t)) val size = Ref(genUID(), TInt32) val lastIdx = size - 1 val midIdx = lastIdx.floorDiv(2) - def ref(i: IR) = ArrayRef(a, i, errorID) + def ref(i: IR) = ArrayRef(a, i) def div(a: IR, b: IR): IR = ApplyBinaryPrimOp(BinaryOp.defaultDivideOp(t), a, b) Let(a.name, ArraySort(StreamFilter(ToStream(array), v.name, !IsNA(v))), @@ -174,7 +170,7 @@ object ArrayFunctions extends RegistryFunctions { div(ref(midIdx) + ref(midIdx + 1), Cast(2, t))))))) } - def argF(a: IR, op: (Type) => ComparisonOp[Boolean], errorID: Int): IR = { + def argF(a: IR, op: (Type) => ComparisonOp[Boolean]): IR = { val t = coerce[TArray](a.typ).elementType val tAccum = TStruct("m" -> t, "midx" -> TInt32) val accum = genUID() @@ -186,7 +182,7 @@ object ArrayFunctions extends RegistryFunctions { MakeStruct(FastSeq("m" -> min, "midx" -> midx)) val body = - Let(value, ArrayRef(a, Ref(idx, TInt32), errorID), + Let(value, ArrayRef(a, Ref(idx, TInt32)), Let(m, GetField(Ref(accum, tAccum), "m"), If(IsNA(Ref(value, t)), Ref(accum, tAccum), @@ -204,11 +200,11 @@ object ArrayFunctions extends RegistryFunctions { ), "midx") } - registerIR1("argmin", TArray(tv("T")), TInt32)((_, a, errorID) => argF(a, LT(_), errorID)) + registerIR1("argmin", TArray(tv("T")), TInt32)((_, a) => argF(a, LT(_))) - registerIR1("argmax", TArray(tv("T")), TInt32)((_, a, errorID) => argF(a, GT(_), errorID)) + registerIR1("argmax", TArray(tv("T")), TInt32)((_, a) => argF(a, GT(_))) - def uniqueIndex(a: IR, op: (Type) => ComparisonOp[Boolean], errorID: Int): IR = { + def uniqueIndex(a: IR, op: (Type) => ComparisonOp[Boolean]): IR = { val t = coerce[TArray](a.typ).elementType val tAccum = TStruct("m" -> t, "midx" -> TInt32, "count" -> TInt32) val accum = genUID() @@ -221,7 +217,7 @@ object ArrayFunctions extends RegistryFunctions { MakeStruct(FastSeq("m" -> m, "midx" -> midx, "count" -> count)) val body = - Let(value, ArrayRef(a, Ref(idx, TInt32), errorID), + Let(value, ArrayRef(a, Ref(idx, TInt32)), Let(m, GetField(Ref(accum, tAccum), "m"), If(IsNA(Ref(value, t)), Ref(accum, tAccum), @@ -247,19 +243,19 @@ object ArrayFunctions extends RegistryFunctions { NA(TInt32))) } - registerIR1("uniqueMinIndex", TArray(tv("T")), TInt32)((_, a, errorID) => uniqueIndex(a, LT(_), errorID)) + registerIR1("uniqueMinIndex", TArray(tv("T")), TInt32)((_, a) => uniqueIndex(a, LT(_))) - registerIR1("uniqueMaxIndex", TArray(tv("T")), TInt32)((_, a, errorID) => uniqueIndex(a, GT(_), errorID)) + registerIR1("uniqueMaxIndex", TArray(tv("T")), TInt32)((_, a) => uniqueIndex(a, GT(_))) - registerIR2("indexArray", TArray(tv("T")), TInt32, tv("T")) { (_, a, i, errorID) => + registerIR3("indexArray", TArray(tv("T")), TInt32, TString, tv("T")) { (_, a, i, s) => ArrayRef( a, If(ApplyComparisonOp(LT(TInt32), i, I32(0)), ApplyBinaryPrimOp(Add(), ArrayLen(a), i), - i), errorID) + i), s) } - registerIR2("sliceRight", TArray(tv("T")), TInt32, TArray(tv("T"))) { (_, a, i, errorID) => + registerIR2("sliceRight", TArray(tv("T")), TInt32, TArray(tv("T"))) { (_, a, i) => val idx = genUID() ToArray(StreamMap( StreamRange( @@ -271,10 +267,10 @@ object ArrayFunctions extends RegistryFunctions { ArrayLen(a), I32(1)), idx, - ArrayRef(a, Ref(idx, TInt32), errorID))) + ArrayRef(a, Ref(idx, TInt32)))) } - registerIR2("sliceLeft", TArray(tv("T")), TInt32, TArray(tv("T"))) { (_, a, i, errorID) => + registerIR2("sliceLeft", TArray(tv("T")), TInt32, TArray(tv("T"))) { (_, a, i) => val idx = genUID() If(IsNA(a), a, ToArray(StreamMap( @@ -285,35 +281,34 @@ object ArrayFunctions extends RegistryFunctions { UtilFunctions.intMin(i, ArrayLen(a))), I32(1)), idx, - ArrayRef(a, Ref(idx, TInt32), errorID)))) + ArrayRef(a, Ref(idx, TInt32))))) } - registerIR3("slice", TArray(tv("T")), TInt32, TInt32, TArray(tv("T"))) { - case(_, a, i, j, errorID) => - val idx = genUID() - ToArray(StreamMap( - StreamRange( - If(ApplyComparisonOp(LT(TInt32), i, I32(0)), - UtilFunctions.intMax( - ApplyBinaryPrimOp(Add(), ArrayLen(a), i), - I32(0)), - i), - If(ApplyComparisonOp(LT(TInt32), j, I32(0)), - ApplyBinaryPrimOp(Add(), ArrayLen(a), j), - UtilFunctions.intMin(j, ArrayLen(a))), - I32(1), errorID = errorID), - idx, - ArrayRef(a, Ref(idx, TInt32), errorID))) + registerIR3("slice", TArray(tv("T")), TInt32, TInt32, TArray(tv("T"))) { case(_, a, i, j) => + val idx = genUID() + ToArray(StreamMap( + StreamRange( + If(ApplyComparisonOp(LT(TInt32), i, I32(0)), + UtilFunctions.intMax( + ApplyBinaryPrimOp(Add(), ArrayLen(a), i), + I32(0)), + i), + If(ApplyComparisonOp(LT(TInt32), j, I32(0)), + ApplyBinaryPrimOp(Add(), ArrayLen(a), j), + UtilFunctions.intMin(j, ArrayLen(a))), + I32(1)), + idx, + ArrayRef(a, Ref(idx, TInt32)))) } - registerIR1("flatten", TArray(TArray(tv("T"))), TArray(tv("T"))) { (_, a, _) => + registerIR1("flatten", TArray(TArray(tv("T"))), TArray(tv("T"))) { (_, a) => val elt = Ref(genUID(), coerce[TArray](a.typ).elementType) ToArray(StreamFlatMap(ToStream(a), elt.name, ToStream(elt))) } registerIEmitCode2("corr", TArray(TFloat64), TArray(TFloat64), TFloat64, { (_: Type, _: EmitType, _: EmitType) => EmitType(SFloat64, false) - }) { case (cb, r, rt, errorID, ec1, ec2) => + }) { case (cb, r, rt, ec1, ec2) => ec1.toI(cb).flatMap(cb) { case pc1: SIndexableCode => ec2.toI(cb).flatMap(cb) { case pc2: SIndexableCode => val pv1 = pc1.memoize(cb, "corr_a1") @@ -321,7 +316,7 @@ object ArrayFunctions extends RegistryFunctions { val l1 = cb.newLocal("len1", pv1.loadLength()) val l2 = cb.newLocal("len2", pv2.loadLength()) cb.ifx(l1.cne(l2), { - cb._fatalWithError(errorID, + cb._fatal( "'corr': cannot compute correlation between arrays of different lengths: ", l1.toS, ", ", diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala index 56b26351a66..d0a86ae95b3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/CallFunctions.scala @@ -16,29 +16,29 @@ object CallFunctions extends RegistryFunctions { registerWrappedScalaFunction1("Call", TString, TCall, (rt: Type, st: SType) => SCanonicalCall)(Call.getClass, "parse") registerSCode1("callFromRepr", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, repr, _) => SCanonicalCall.constructFromIntRepr(repr.asInt.intCode(cb)) + case (er, cb, rt, repr) => SCanonicalCall.constructFromIntRepr(repr.asInt.intCode(cb)) } registerSCode1("Call", TBoolean, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, phased, _) => + case (er, cb, rt, phased) => SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( Call0.getClass, "apply", Array(classTag[Boolean].runtimeClass), Array(phased.asBoolean.boolCode(cb)))) } registerSCode2("Call", TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, a1, phased, _) => + case (er, cb, rt, a1, phased) => SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( Call1.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) } registerSCode3("Call", TInt32, TInt32, TBoolean, TCall, (rt: Type, _: SType, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, a1, a2, phased, _) => + case (er, cb, rt, a1, a2, phased) => SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( Call2.getClass, "apply", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass, classTag[Boolean].runtimeClass), Array(a1.asInt.intCode(cb), a2.asInt.intCode(cb), phased.asBoolean.boolCode(cb)))) } registerSCode1("UnphasedDiploidGtIndexCall", TInt32, TCall, (rt: Type, _: SType) => SCanonicalCall) { - case (er, cb, rt, x, _) => + case (er, cb, rt, x) => SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( Call2.getClass, "fromUnphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(x.asInt.intCode(cb)))) } @@ -52,39 +52,39 @@ object CallFunctions extends RegistryFunctions { "isHomVar", "isNonRef", "isHetNonRef", "isHetRef") for (q <- qualities) { registerSCode1(q, TCall, TBoolean, (rt: Type, _: SType) => SBoolean) { - case (er, cb, rt, call, _) => + case (er, cb, rt, call) => primitive(Code.invokeScalaObject[Boolean]( Call.getClass, q, Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) } } registerSCode1("ploidy", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call, _) => + case (er, cb, rt, call) => primitive(Code.invokeScalaObject[Int]( Call.getClass, "ploidy", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) } registerSCode1("nNonRefAlleles", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call, _) => + case (er, cb, rt, call) => primitive(Code.invokeScalaObject[Int]( Call.getClass, "nNonRefAlleles", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) } registerSCode1("unphasedDiploidGtIndex", TCall, TInt32, (rt: Type, _: SType) => SInt32) { - case (er, cb, rt, call, _) => + case (er, cb, rt, call) => primitive(Code.invokeScalaObject[Int]( Call.getClass, "unphasedDiploidGtIndex", Array(classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb)))) } registerSCode2("index", TCall, TInt32, TInt32, (rt: Type, _: SType, _: SType) => SInt32) { - case (er, cb, rt, call, idx, _) => + case (er, cb, rt, call, idx) => primitive(Code.invokeScalaObject[Int]( Call.getClass, "alleleByIndex", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), idx.asInt.intCode(cb)))) } registerSCode2("downcode", TCall, TInt32, TCall, (rt: Type, _: SType, _: SType) => SCanonicalCall) { - case (er, cb, rt, call, downcodedAllele, _) => + case (er, cb, rt, call, downcodedAllele) => SCanonicalCall.constructFromIntRepr(Code.invokeScalaObject[Int]( Call.getClass, "downcode", Array(classTag[Int].runtimeClass, classTag[Int].runtimeClass), Array(call.asCall.loadCanonicalRepresentation(cb), downcodedAllele.asInt.intCode(cb)))) } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/DictFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/DictFunctions.scala index 70ffa402119..f28a172e8c4 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/DictFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/DictFunctions.scala @@ -38,30 +38,30 @@ object DictFunctions extends RegistryFunctions { val tdict = TDict(tv("key"), tv("value")) def registerAll() { - registerIR1("isEmpty", tdict, TBoolean) { (_, d, _) => + registerIR1("isEmpty", tdict, TBoolean) { (_, d) => ArrayFunctions.isEmpty(CastToArray(d)) } - registerIR2("contains", tdict, tv("key"), TBoolean)((_, a, b, _) => contains(a, b)) + registerIR2("contains", tdict, tv("key"), TBoolean)((_, a, b) => contains(a, b)) - registerIR3("get", tdict, tv("key"), tv("value"), tv("value"))((_, a, b, c, _) => get(a, b, c)) - registerIR2("get", tdict, tv("key"), tv("tvalue")) { (_, d, k, _) => + registerIR3("get", tdict, tv("key"), tv("value"), tv("value"))((_, a, b, c) => get(a, b, c)) + registerIR2("get", tdict, tv("key"), tv("tvalue")) { (_, d, k) => get(d, k, NA(types.coerce[TDict](d.typ).valueType)) } - registerIR2("index", tdict, tv("key"), tv("value")) { (_, d, k, errorID) => + registerIR2("index", tdict, tv("key"), tv("value")) { (_, d, k) => val vtype = types.coerce[TBaseStruct](types.coerce[TContainer](d.typ).elementType).types(1) val errormsg = invoke("concat", TString, - Str("Key "), + Str("Key '"), invoke("concat", TString, - invoke("showStr", TString, k), + invoke("str", TString, k), invoke("concat", TString, - Str(" not found in dictionary. Keys: "), + Str("' not found in dictionary. Keys: "), invoke("str", TString, invoke("keys", TArray(k.typ), d))))) - get(d, k, Die(errormsg, vtype, errorID)) + get(d, k, Die(errormsg, vtype, -1)) } - registerIR1("dictToArray", tdict, TArray(TStruct("key" -> tv("key"), "value" -> tv("value")))) { (_, d, _) => + registerIR1("dictToArray", tdict, TArray(TStruct("key" -> tv("key"), "value" -> tv("value")))) { (_, d) => val elt = Ref(genUID(), types.coerce[TContainer](d.typ).elementType) ToArray(StreamMap( ToStream(d), @@ -69,21 +69,21 @@ object DictFunctions extends RegistryFunctions { MakeTuple.ordered(Seq(GetField(elt, "key"), GetField(elt, "value"))))) } - registerIR1("keySet", tdict, TSet(tv("key"))) { (_, d, _) => + registerIR1("keySet", tdict, TSet(tv("key"))) { (_, d) => val pairs = Ref(genUID(), types.coerce[TContainer](d.typ).elementType) ToSet(StreamMap(ToStream(d), pairs.name, GetField(pairs, "key"))) } - registerIR1("dict", TSet(TTuple(tv("key"), tv("value"))), tdict)((_, s, _) => ToDict(ToStream(s))) + registerIR1("dict", TSet(TTuple(tv("key"), tv("value"))), tdict)((_, s) => ToDict(ToStream(s))) - registerIR1("dict", TArray(TTuple(tv("key"), tv("value"))), tdict)((_, a, _) => ToDict(ToStream(a))) + registerIR1("dict", TArray(TTuple(tv("key"), tv("value"))), tdict)((_, a) => ToDict(ToStream(a))) - registerIR1("keys", tdict, TArray(tv("key"))) { (_, d, _) => + registerIR1("keys", tdict, TArray(tv("key"))) { (_, d) => val elt = Ref(genUID(), types.coerce[TContainer](d.typ).elementType) ToArray(StreamMap(ToStream(d), elt.name, GetField(elt, "key"))) } - registerIR1("values", tdict, TArray(tv("value"))) { (_, d, _) => + registerIR1("values", tdict, TArray(tv("value"))) { (_, d) => val elt = Ref(genUID(), types.coerce[TContainer](d.typ).elementType) ToArray(StreamMap(ToStream(d), elt.name, GetField(elt, "value"))) } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index d1f4b0c1f83..60727916e77 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -30,7 +30,7 @@ object IRFunctionRegistry { } type IRFunctionSignature = (Seq[Type], Seq[Type], Type, Boolean) - type IRFunctionImplementation = (Seq[Type], Seq[IR], Int) => IR + type IRFunctionImplementation = (Seq[Type], Seq[IR]) => IR val irRegistry: mutable.Map[String, mutable.Map[IRFunctionSignature, IRFunctionImplementation]] = new mutable.HashMap() @@ -53,7 +53,7 @@ object IRFunctionRegistry { valueParameterTypes: Seq[Type], returnType: Type, alwaysInline: Boolean, - f: IRFunctionImplementation + f: (Seq[Type], Seq[IR]) => IR ): Unit = { requireJavaIdentifier(name) @@ -76,7 +76,7 @@ object IRFunctionRegistry { userAddedFunctions += ((name, (body.typ, typeParameters, valueParameterTypes))) addIR(name, typeParameters, - valueParameterTypes, IRParser.parseType(returnType), false, { (_, args, _) => + valueParameterTypes, IRParser.parseType(returnType), false, { (_, args) => Subst(body, BindingEnv(Env[IR](argNames.asScala.zip(args): _*))) }) @@ -157,13 +157,13 @@ object IRFunctionRegistry { } } - def lookupUnseeded(name: String, returnType: Type, arguments: Seq[Type]): Option[IRFunctionImplementation] = + def lookupUnseeded(name: String, returnType: Type, arguments: Seq[Type]): Option[(Seq[Type], Seq[IR]) => IR] = lookupUnseeded(name, returnType, Array.empty[Type], arguments) - def lookupUnseeded(name: String, returnType: Type, typeParameters: Seq[Type], arguments: Seq[Type]): Option[IRFunctionImplementation] = { - val validIR: Option[IRFunctionImplementation] = lookupIR(name, returnType, typeParameters, arguments).map { - case ((_, _, _, inline), conversion) => (typeParametersPassed, args, errorID) => - val x = ApplyIR(name, typeParametersPassed, args, errorID) + def lookupUnseeded(name: String, returnType: Type, typeParameters: Seq[Type], arguments: Seq[Type]): Option[(Seq[Type], Seq[IR]) => IR] = { + val validIR: Option[(Seq[Type], Seq[IR]) => IR] = lookupIR(name, returnType, typeParameters, arguments).map { + case ((_, _, _, inline), conversion) => (typeParametersPassed, args) => + val x = ApplyIR(name, typeParametersPassed, args) x.conversion = conversion x.inline = inline x @@ -171,12 +171,12 @@ object IRFunctionRegistry { val validMethods = lookupFunction(name, returnType, typeParameters, arguments) .filter(!_.isInstanceOf[SeededJVMFunction]).map { f => - { (irValueParametersTypes: Seq[Type], irArguments: Seq[IR], errorID: Int) => + { (irValueParametersTypes: Seq[Type], irArguments: Seq[IR]) => f match { case _: UnseededMissingnessObliviousJVMFunction => - Apply(name, irValueParametersTypes, irArguments, f.returnType.subst(), errorID) + Apply(name, irValueParametersTypes, irArguments, f.returnType.subst()) case _: UnseededMissingnessAwareJVMFunction => - ApplySpecial(name, irValueParametersTypes, irArguments, f.returnType.subst(), errorID) + ApplySpecial(name, irValueParametersTypes, irArguments, f.returnType.subst()) } } } @@ -341,12 +341,12 @@ abstract class RegistryFunctions { calculateReturnType: (Type, Seq[SType]) => SType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, Array[SCode], Value[Int]) => SCode + impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, Array[SCode]) => SCode ) { IRFunctionRegistry.addJVMFunction( new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], errorID: Value[Int], args: SCode*): SCode = - impl(r, cb, typeParameters, returnSType, args.toArray, errorID) + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = + impl(r, cb, typeParameters, returnSType, args.toArray) }) } @@ -361,7 +361,7 @@ abstract class RegistryFunctions { ) { IRFunctionRegistry.addJVMFunction( new UnseededMissingnessObliviousJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], errorID: Value[Int], args: SCode*): SCode = { + override def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode = { assert(unify(typeParameters, args.map(_.st.virtualType), returnSType.virtualType)) returnSType.fromCodes(FastIndexedSeq(impl(r, cb, returnSType, typeParameters.toArray, args.toArray))) } @@ -375,13 +375,13 @@ abstract class RegistryFunctions { calculateReturnType: (Type, Seq[EmitType]) => EmitType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitRegion, SType, Value[Int], Array[EmitCode]) => EmitCode + impl: (EmitRegion, SType, Array[EmitCode]) => EmitCode ) { IRFunctionRegistry.addJVMFunction( new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { - override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], errorID: Value[Int], args: EmitCode*): EmitCode = { + override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { assert(unify(typeParameters, args.map(_.st.virtualType), rpt.virtualType)) - impl(r, rpt, errorID, args.toArray) + impl(r, rpt, args.toArray) } }) } @@ -393,7 +393,7 @@ abstract class RegistryFunctions { calculateReturnType: (Type, Seq[EmitType]) => EmitType, typeParameters: Array[Type] = Array.empty )( - impl: (EmitCodeBuilder, Value[Region], SType , Value[Int], Array[EmitCode]) => IEmitCode + impl: (EmitCodeBuilder, Value[Region], SType, Array[EmitCode]) => IEmitCode ) { IRFunctionRegistry.addJVMFunction( new UnseededMissingnessAwareJVMFunction(name, typeParameters, valueParameterTypes, returnType, calculateReturnType) { @@ -402,13 +402,12 @@ abstract class RegistryFunctions { r: Value[Region], rpt: SType, typeParameters: Seq[Type], - errorID: Value[Int], args: EmitCode* - ): IEmitCode = impl(cb, r, rpt, errorID, args.toArray) + ): IEmitCode = impl(cb, r, rpt, args.toArray) - override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], errorID: Value[Int], args: EmitCode*): EmitCode = { + override def apply(r: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb) { cb => - apply(cb, r.region, rpt, typeParameters, errorID, args: _*) + apply(cb, r.region, rpt, typeParameters, args: _*) } } }) @@ -423,7 +422,7 @@ abstract class RegistryFunctions { cls: Class[_], method: String ) { - registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args, _) => + registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) rt.fromCodes(FastIndexedSeq( Code.invokeScalaObject(cls, method, cts, args.map { a => SType.extractPrimCode(cb, a) })(PrimitiveTypeToIRIntermediateClassTag(returnType)) @@ -457,7 +456,7 @@ abstract class RegistryFunctions { case _ => scodeToJavaValue(cb, r, code) } - registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args, _) => + registerSCode(name, valueParameterTypes, returnType, calculateReturnType) { case (r, cb, _, rt, args) => val cts = valueParameterTypes.map(ct(_).runtimeClass) unwrapReturn(cb, r.region, rt, Code.invokeScalaObject(cls, method, cts, args.map { a => wrap(cb, r.region, a) })(ct(returnType))) @@ -481,55 +480,55 @@ abstract class RegistryFunctions { } } - def registerIR(name: String, valueParameterTypes: Array[Type], returnType: Type, inline: Boolean = false, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], Seq[IR], Int) => IR): Unit = + def registerIR(name: String, valueParameterTypes: Array[Type], returnType: Type, inline: Boolean = false, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], Seq[IR]) => IR): Unit = IRFunctionRegistry.addIR(name, typeParameters, valueParameterTypes, returnType, inline, f) - def registerSCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, SType, SCode, Value[Int]) => SCode): Unit = + def registerSCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, SType, SCode) => SCode): Unit = registerSCode(name, Array(mt1), rt, unwrappedApply(pt)) { - case (r, cb, _, rt, Array(a1), errorID) => impl(r, cb, rt, a1, errorID) + case (r, cb, _, rt, Array(a1)) => impl(r, cb, rt, a1) } - def registerSCode1t(name: String, typeParams: Array[Type], mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, Value[Int]) => SCode): Unit = + def registerSCode1t(name: String, typeParams: Array[Type], mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode) => SCode): Unit = registerSCode(name, Array(mt1), rt, unwrappedApply(pt), typeParameters = typeParams) { - case (r, cb, typeParams, rt, Array(a1), errorID) => impl(r, cb, typeParams, rt, a1, errorID) + case (r, cb, typeParams, rt, Array(a1)) => impl(r, cb, typeParams, rt, a1) } def registerSCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { - case (r, cb, _, rt, Array(a1, a2) , errorID) => impl(r, cb, rt, a1, a2, errorID) + case (r, cb, _, rt, Array(a1, a2)) => impl(r, cb, rt, a1, a2) } def registerSCode2t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, rt: Type, pt: (Type, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2), rt, unwrappedApply(pt), typeParameters = typeParams) { - case (r, cb, typeParams, rt, Array(a1, a2), errorID) => impl(r, cb, typeParams, rt, a1, a2, errorID) + case (r, cb, typeParams, rt, Array(a1, a2)) => impl(r, cb, typeParams, rt, a1, a2) } def registerSCode3(name: String, mt1: Type, mt2: Type, mt3: Type, rt: Type, pt: (Type, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2, mt3), rt, unwrappedApply(pt)) { - case (r, cb, _, rt, Array(a1, a2, a3), errorID) => impl(r, cb, rt, a1, a2, a3, errorID) + case (r, cb, _, rt, Array(a1, a2, a3)) => impl(r, cb, rt, a1, a2, a3) } def registerSCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { - case (r, cb, _, rt, Array(a1, a2, a3, a4), errorID) => impl(r, cb, rt, a1, a2, a3, a4, errorID) + case (r, cb, _, rt, Array(a1, a2, a3, a4)) => impl(r, cb, rt, a1, a2, a3, a4) } def registerSCode4t(name: String, typeParams: Array[Type], mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, Seq[Type], SType, SCode, SCode, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt), typeParams) { - case (r, cb, typeParams, rt, Array(a1, a2, a3, a4), errorID) => impl(r, cb, typeParams, rt, a1, a2, a3, a4, errorID) + case (r, cb, typeParams, rt, Array(a1, a2, a3, a4)) => impl(r, cb, typeParams, rt, a1, a2, a3, a4) } def registerSCode5(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, rt: Type, pt: (Type, SType, SType, SType, SType, SType) => SType) - (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode, SCode, Value[Int]) => SCode): Unit = + (impl: (EmitRegion, EmitCodeBuilder, SType, SCode, SCode, SCode, SCode, SCode) => SCode): Unit = registerSCode(name, Array(mt1, mt2, mt3, mt4, mt5), rt, unwrappedApply(pt)) { - case (r, cb, _, rt, Array(a1, a2, a3, a4, a5), errorID) => impl(r, cb, rt, a1, a2, a3, a4, a5, errorID) + case (r, cb, _, rt, Array(a1, a2, a3, a4, a5)) => impl(r, cb, rt, a1, a2, a3, a4, a5) } def registerCode1(name: String, mt1: Type, rt: Type, pt: (Type, SType) => SType)(impl: (EmitCodeBuilder, EmitRegion, SType, SCode) => Code[_]): Unit = @@ -544,44 +543,44 @@ abstract class RegistryFunctions { } def registerIEmitCode1(name: String, mt1: Type, rt: Type, pt: (Type, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, Value[Int], EmitCode) => IEmitCode): Unit = - registerIEmitCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (cb, r, rt, errorID, Array(a1)) => - impl(cb, r, rt, errorID, a1) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode) => IEmitCode): Unit = + registerIEmitCode(name, Array(mt1), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1)) => + impl(cb, r, rt, a1) } def registerIEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, Value[Int], EmitCode, EmitCode) => IEmitCode): Unit = - registerIEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (cb, r, rt, errorID, Array(a1, a2)) => - impl(cb, r, rt, errorID, a1, a2) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode) => IEmitCode): Unit = + registerIEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2)) => + impl(cb, r, rt, a1, a2) } def registerIEmitCode4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, Value[Int], EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = - registerIEmitCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (cb, r, rt, errorID, Array(a1, a2, a3, a4)) => - impl(cb, r, rt, errorID, a1, a2, a3, a4) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + registerIEmitCode(name, Array(mt1, mt2, mt3, mt4), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4)) => + impl(cb, r, rt, a1, a2, a3, a4) } def registerIEmitCode6(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, mt5: Type, mt6: Type, rt: Type, pt: (Type, EmitType, EmitType, EmitType, EmitType, EmitType, EmitType) => EmitType) - (impl: (EmitCodeBuilder, Value[Region], SType, Value[Int], EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = - registerIEmitCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6), rt, unwrappedApply(pt)) { case (cb, r, rt, errorID, Array(a1, a2, a3, a4, a5, a6)) => - impl(cb, r, rt, errorID, a1, a2, a3, a4, a5, a6) + (impl: (EmitCodeBuilder, Value[Region], SType, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode, EmitCode) => IEmitCode): Unit = + registerIEmitCode(name, Array(mt1, mt2, mt3, mt4, mt5, mt6), rt, unwrappedApply(pt)) { case (cb, r, rt, Array(a1, a2, a3, a4, a5, a6)) => + impl(cb, r, rt, a1, a2, a3, a4, a5, a6) } def registerEmitCode2(name: String, mt1: Type, mt2: Type, rt: Type, pt: (Type, EmitType, EmitType) => EmitType) - (impl: (EmitRegion, SType, Value[Int], EmitCode, EmitCode) => EmitCode): Unit = - registerEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, rt, errorID, Array(a1, a2)) => impl(r, rt, errorID, a1, a2) } + (impl: (EmitRegion, SType, EmitCode, EmitCode) => EmitCode): Unit = + registerEmitCode(name, Array(mt1, mt2), rt, unwrappedApply(pt)) { case (r, rt, Array(a1, a2)) => impl(r, rt, a1, a2) } - def registerIR1(name: String, mt1: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, Int) => IR): Unit = - registerIR(name, Array(mt1), returnType, typeParameters = typeParameters) { case (t, Seq(a1), errorID) => f(t, a1, errorID) } + def registerIR1(name: String, mt1: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR) => IR): Unit = + registerIR(name, Array(mt1), returnType, typeParameters = typeParameters) { case (t, Seq(a1)) => f(t, a1) } - def registerIR2(name: String, mt1: Type, mt2: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR, Int) => IR): Unit = - registerIR(name, Array(mt1, mt2), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2), errorID) => f(t, a1, a2, errorID) } + def registerIR2(name: String, mt1: Type, mt2: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR) => IR): Unit = + registerIR(name, Array(mt1, mt2), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2)) => f(t, a1, a2) } - def registerIR3(name: String, mt1: Type, mt2: Type, mt3: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR, IR, Int) => IR): Unit = - registerIR(name, Array(mt1, mt2, mt3), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2, a3), errorID) => f(t, a1, a2, a3, errorID) } + def registerIR3(name: String, mt1: Type, mt2: Type, mt3: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR, IR) => IR): Unit = + registerIR(name, Array(mt1, mt2, mt3), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2, a3)) => f(t, a1, a2, a3) } - def registerIR4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR, IR, IR, Int) => IR): Unit = - registerIR(name, Array(mt1, mt2, mt3, mt4), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2, a3, a4), errorID) => f(t, a1, a2, a3, a4, errorID) } + def registerIR4(name: String, mt1: Type, mt2: Type, mt3: Type, mt4: Type, returnType: Type, typeParameters: Array[Type] = Array.empty)(f: (Seq[Type], IR, IR, IR, IR) => IR): Unit = + registerIR(name, Array(mt1, mt2, mt3, mt4), returnType, typeParameters = typeParameters) { case (t, Seq(a1, a2, a3, a4)) => f(t, a1, a2, a3, a4) } def registerSeeded( name: String, @@ -643,7 +642,7 @@ sealed abstract class JVMFunction { def computeReturnEmitType(returnType: Type, valueParameterTypes: Seq[EmitType]): EmitType - def apply(mb: EmitRegion, returnType: SType, typeParameters: Seq[Type], errorID: Value[Int], args: EmitCode*): EmitCode + def apply(mb: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode override def toString: String = s"$name[${ typeParameters.mkString(", ") }](${ valueParameterTypes.mkString(", ") }): $returnType" @@ -679,24 +678,23 @@ abstract class UnseededMissingnessObliviousJVMFunction ( def computeStrictReturnEmitType(returnType: Type, valueParameterTypes: Seq[SType]): SType = MissingnessObliviousJVMFunction.returnSType(missingnessObliviousComputeReturnType)(returnType, valueParameterTypes) - def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], errorID: Value[Int], args: SCode*): SCode + def apply(r: EmitRegion, cb: EmitCodeBuilder, returnSType: SType, typeParameters: Seq[Type], args: SCode*): SCode - def apply(r: EmitRegion, returnType: SType, typeParameters: Seq[Type], errorID: Value[Int], args: EmitCode*): EmitCode = { + def apply(r: EmitRegion, returnType: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = { EmitCode.fromI(r.mb)(cb => IEmitCode.multiMapEmitCodes(cb, args.toFastIndexedSeq) { args => - apply(r, cb, returnType, typeParameters, errorID, args: _*) + apply(r, cb, returnType, typeParameters, args: _*) }) } def getAsMethod[C](cb: EmitClassBuilder[C], rpt: SType, typeParameters: Seq[Type], args: SType*): EmitMethodBuilder[C] = { val unified = unify(typeParameters, args.map(_.virtualType), rpt.virtualType) assert(unified) - val methodbuilder = cb.genEmitMethod(name, FastIndexedSeq[ParamType](typeInfo[Region], typeInfo[Int]) ++ args.map(_.paramType), rpt.paramType) + val methodbuilder = cb.genEmitMethod(name, FastIndexedSeq[ParamType](typeInfo[Region]) ++ args.map(_.paramType), rpt.paramType) methodbuilder.emitSCode(cb => apply(EmitRegion.default(methodbuilder), cb, rpt, typeParameters, - methodbuilder.getCodeParam[Int](2), - (0 until args.length).map(i => methodbuilder.getSCodeParam(i + 3)): _*)) + (0 until args.length).map(i => methodbuilder.getSCodeParam(i + 2)): _*)) methodbuilder } } @@ -721,7 +719,6 @@ abstract class UnseededMissingnessAwareJVMFunction ( r: Value[Region], rpt: SType, typeParameters: Seq[Type], - errorID: Value[Int], args: EmitCode* ): IEmitCode = { ??? @@ -741,7 +738,7 @@ abstract class SeededJVMFunction ( def applySeededI(seed: Long, cb: EmitCodeBuilder, region: Value[Region], rpt: SType, args: EmitCode*): IEmitCode - def apply(region: EmitRegion, rpt: SType, typeParameters: Seq[Type], errorID: Value[Int], args: EmitCode*): EmitCode = + def apply(region: EmitRegion, rpt: SType, typeParameters: Seq[Type], args: EmitCode*): EmitCode = fatal("seeded functions must go through IEmitCode path") def apply(region: EmitRegion, rpt: SType, args: EmitCode*): EmitCode = diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala index 416d4a0baba..8ffb6c9d933 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/GenotypeFunctions.scala @@ -13,7 +13,7 @@ object GenotypeFunctions extends RegistryFunctions { def registerAll() { registerSCode1("gqFromPL", TArray(tv("N", "int32")), TInt32, (_: Type, _: SType) => SInt32) - { case (r, cb, rt, _pl: SIndexableCode, errorID) => + { case (r, cb, rt, _pl: SIndexableCode) => val code = EmitCodeBuilder.scopedCode(r.mb) { cb => val pl = _pl.memoize(cb, "plv") val m = cb.newLocal[Int]("m", 99) @@ -21,7 +21,7 @@ object GenotypeFunctions extends RegistryFunctions { val i = cb.newLocal[Int]("i", 0) cb.whileLoop(i < pl.loadLength(), { - val value = pl.loadElement(cb, i).get(cb, "PL cannot have missing elements.", errorID) + val value = pl.loadElement(cb, i).get(cb, "PL cannot have missing elements.") val pli = cb.newLocal[Int]("pli", value.asInt.intCode(cb)) cb.ifx(pli < m, { cb.assign(m2, m) @@ -39,12 +39,12 @@ object GenotypeFunctions extends RegistryFunctions { registerIEmitCode1("dosage", TArray(tv("N", "float64")), TFloat64, (_: Type, arrayType: EmitType) => EmitType(SFloat64, arrayType.required && arrayType.st.asInstanceOf[SContainer].elementEmitType.required) - ) { case (cb, r, rt, errorID, gp) => + ) { case (cb, r, rt, gp) => gp.toI(cb).flatMap(cb) { case (gpc: SIndexableCode) => val gpv = gpc.memoize(cb, "dosage_gp") cb.ifx(gpv.loadLength().cne(3), - cb._fatalWithError(errorID, const("length of gp array must be 3, got ").concat(gpv.loadLength().toS))) + cb._fatal(const("length of gp array must be 3, got ").concat(gpv.loadLength().toS))) gpv.loadElement(cb, 1).flatMap(cb) { (_1: SCode) => gpv.loadElement(cb, 2).map(cb) { (_2: SCode) => diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala index f99645df4e3..d292b5ec2ff 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/IntervalFunctions.scala @@ -21,7 +21,7 @@ object IntervalFunctions extends RegistryFunctions { required = includesStartET.required && includesEndET.required ).sType, includesStartET.required && includesEndET.required) }) { - case (cb, r, SIntervalPointer(pt: PCanonicalInterval), _, start, end, includesStart, includesEnd) => + case (cb, r, SIntervalPointer(pt: PCanonicalInterval), start, end, includesStart, includesEnd) => includesStart.toI(cb).flatMap(cb) { includesStart => includesEnd.toI(cb).map(cb) { includesEnd => @@ -37,7 +37,7 @@ object IntervalFunctions extends RegistryFunctions { registerIEmitCode1("start", TInterval(tv("T")), tv("T"), (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { - case (cb, r, rt, _, interval) => + case (cb, r, rt, interval) => interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => val pv = pi.memoize(cb, "interval") pv.loadStart(cb) @@ -46,7 +46,7 @@ object IntervalFunctions extends RegistryFunctions { registerIEmitCode1("end", TInterval(tv("T")), tv("T"), (_: Type, x: EmitType) => EmitType(x.st.asInstanceOf[SInterval].pointType, x.required && x.st.asInstanceOf[SInterval].pointEmitType.required)) { - case (cb, r, rt, _, interval) => + case (cb, r, rt, interval) => interval.toI(cb).flatMap(cb) { case pi: SIntervalCode => val pv = pi.memoize(cb, "interval") pv.loadEnd(cb) @@ -56,19 +56,19 @@ object IntervalFunctions extends RegistryFunctions { registerSCode1("includesStart", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => SBoolean ) { - case (r, cb, rt, interval: SIntervalCode, _) => primitive(interval.codeIncludesStart()) + case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesStart()) } registerSCode1("includesEnd", TInterval(tv("T")), TBoolean, (_: Type, x: SType) => SBoolean ) { - case (r, cb, rt, interval: SIntervalCode, _) => primitive(interval.codeIncludesEnd()) + case (r, cb, rt, interval: SIntervalCode) => primitive(interval.includesEnd()) } registerIEmitCode2("contains", TInterval(tv("T")), tv("T"), TBoolean, { case(_: Type, intervalT: EmitType, _: EmitType) => EmitType(SBoolean, intervalT.required) }) { - case (cb, r, rt, _, int, point) => + case (cb, r, rt, int, point) => int.toI(cb).map(cb) { case (intc: SIntervalCode) => val interval: SIntervalValue = intc.memoize(cb, "interval") val pointv = cb.memoize(point.toI(cb), "point") @@ -88,7 +88,7 @@ object IntervalFunctions extends RegistryFunctions { } registerSCode1("isEmpty", TInterval(tv("T")), TBoolean, (_: Type, pt: SType) => SBoolean) { - case (r, cb, rt, interval: SIntervalCode, _) => + case (r, cb, rt, interval: SIntervalCode) => val empty = EmitCodeBuilder.scopedCode(r.mb) { cb => val intv = interval.memoize(cb, "interval") intv.isEmpty(cb) @@ -97,7 +97,7 @@ object IntervalFunctions extends RegistryFunctions { } registerSCode2("overlaps", TInterval(tv("T")), TInterval(tv("T")), TBoolean, (_: Type, i1t: SType, i2t: SType) => SBoolean) { - case (r, cb, rt, int1: SIntervalCode, int2: SIntervalCode, _) => + case (r, cb, rt, int1: SIntervalCode, int2: SIntervalCode) => val overlap = EmitCodeBuilder.scopedCode(r.mb) { cb => val interval1 = int1.memoize(cb, "interval1") val interval2 = int2.memoize(cb, "interval2") @@ -125,14 +125,14 @@ object IntervalFunctions extends RegistryFunctions { } registerIR2("sortedNonOverlappingIntervalsContain", - TArray(TInterval(tv("T"))), tv("T"), TBoolean) { case (_, intervals, value, errorID) => + TArray(TInterval(tv("T"))), tv("T"), TBoolean) { case (_, intervals, value) => val uid = genUID() val uid2 = genUID() Let(uid, LowerBoundOnOrderedCollection(intervals, value, onKey = true), (Let(uid2, Ref(uid, TInt32) - I32(1), (Ref(uid2, TInt32) >= 0) - && invoke("contains", TBoolean, ArrayRef(intervals, Ref(uid2, TInt32), errorID), value))) + && invoke("contains", TBoolean, ArrayRef(intervals, Ref(uid2, TInt32)), value))) || ((Ref(uid, TInt32) < ArrayLen(intervals)) - && invoke("contains", TBoolean, ArrayRef(intervals, Ref(uid, TInt32), errorID), value))) + && invoke("contains", TBoolean, ArrayRef(intervals, Ref(uid, TInt32)), value))) } @@ -140,7 +140,7 @@ object IntervalFunctions extends RegistryFunctions { registerIR2("partitionIntervalContains", TStruct("left" -> endpointT, "right" -> endpointT, "includesLeft" -> TBoolean, "includesRight" -> TBoolean), tv("T"), TBoolean) { - case (_, interval, point, _) => + case (_, interval, point) => def compareStructs(left: IR, right: IR): IR = { bindIRs(left, right) { case Seq(lTuple, r) => diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala index 1b735ebd329..26be380affb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/LocusFunctions.scala @@ -66,7 +66,7 @@ object LocusFunctions extends RegistryFunctions { } def registerLocusCode(methodName: String)(f: IR => IR): Unit = - registerIR1(methodName, tlocus("T"), TBoolean)((_, a, _) => f(a)) + registerIR1(methodName, tlocus("T"), TBoolean)((_, a) => f(a)) def inX(locus: IR): IR = { val xContigs = Literal(TSet(TString), locus.typ.asInstanceOf[TLocus].rg.xContigs) @@ -96,12 +96,12 @@ object LocusFunctions extends RegistryFunctions { registerSCode1("contig", tlocus("T"), TString, (_: Type, x: SType) => x.asInstanceOf[SLocus].contigType) { - case (r, cb, rt, locus: SLocusCode, _) => + case (r, cb, rt, locus: SLocusCode) => locus.contig(cb) } registerSCode1("position", tlocus("T"), TInt32, (_: Type, x: SType) => SInt32) { - case (r, cb, rt, pc: SLocusCode, _) => + case (r, cb, rt, pc: SLocusCode) => val locus = pc.memoize(cb, "locus_position_locus") primitive(locus.position(cb)) } @@ -121,7 +121,7 @@ object LocusFunctions extends RegistryFunctions { PCanonicalStruct("locus" -> locusPT, "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType } }) { - case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), locus: SLocusCode, alleles: SIndexableCode, _) => + case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), locus: SLocusCode, alleles: SIndexableCode) => val variantTuple = Code.invokeScalaObject2[Locus, IndexedSeq[String], (Locus, IndexedSeq[String])]( VariantMethods.getClass, "minRep", locus.getLocusObj(cb), @@ -134,7 +134,7 @@ object LocusFunctions extends RegistryFunctions { (_: Type, _: SType, _: SType) => PCanonicalTuple(false, PCanonicalArray(PInt32(true), true), PCanonicalArray(PInt32(true), true)).sType }) { - case (r: EmitRegion, cb: EmitCodeBuilder, SBaseStructPointer(rt: PCanonicalTuple), groupedCode: SIndexableCode, radiusCode: SFloat64Code, errorID) => + case (r: EmitRegion, cb: EmitCodeBuilder, SBaseStructPointer(rt: PCanonicalTuple), groupedCode: SIndexableCode, radiusCode: SFloat64Code) => val grouped = groupedCode.memoize(cb, "locuswindows_grouped") val radius = cb.newLocal("locuswindows_radius", radiusCode.doubleCode(cb)) @@ -174,12 +174,13 @@ object LocusFunctions extends RegistryFunctions { cb.whileLoop(i < len, { coords.loadElement(cb, i).consume(cb, - cb._fatalWithError(errorID, const("locus_windows: missing value for 'coord_expr' at row ") + cb += Code._fatal[Unit]( + const("locus_windows: missing value for 'coord_expr' at row ") .concat((offset + i).toS)), { sc => val currentCoord = cb.newLocal[Double]("locuswindows_coord_i", sc.asDouble.doubleCode(cb)) cb.ifx(lastCoord > currentCoord, - cb._fatalWithError(errorID, "locus_windows: 'coord_expr' must be in ascending order within each contig."), + cb += Code._fatal[Unit]("locus_windows: 'coord_expr' must be in ascending order within each contig."), cb.assign(lastCoord, currentCoord) ) }) @@ -232,7 +233,7 @@ object LocusFunctions extends RegistryFunctions { registerSCode1("Locus", TString, tlocus("T"), { (returnType: Type, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), str: SStringCode, _) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), str: SStringCode) => val slocus = str.loadString() emitLocus(cb, r.region, @@ -243,7 +244,7 @@ object LocusFunctions extends RegistryFunctions { registerSCode2("Locus", TString, TInt32, tlocus("T"), { (returnType: Type, _: SType, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), contig, pos, _) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), contig, pos) => val contigMemo = contig.memoize(cb, "locus_contig") val posMemo = pos.memoize(cb, "locus_pos") cb += rgCode(r.mb, rt.rg).invoke[String, Int, Unit]("checkLocus", contigMemo.asString.loadString(), posMemo.asInt.intCode(cb)) @@ -256,7 +257,7 @@ object LocusFunctions extends RegistryFunctions { PCanonicalStruct("locus" -> PCanonicalLocus(lTyp.rg, true), "alleles" -> PCanonicalArray(PCanonicalString(true), true)).sType } }) { - case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), variantStr, _) => + case (r, cb, SBaseStructPointer(rt: PCanonicalStruct), variantStr) => val svar = variantStr.asString.loadString() val plocus = rt.types(0).asInstanceOf[PCanonicalLocus] @@ -271,7 +272,7 @@ object LocusFunctions extends RegistryFunctions { val lPTyp = returnType.asInstanceOf[TInterval].pointType.asInstanceOf[TLocus] EmitType(PCanonicalInterval(PCanonicalLocus(lPTyp.asInstanceOf[TLocus].rg)).sType, false) } - }) { case (cb: EmitCodeBuilder, r: Value[Region], SIntervalPointer(rt: PCanonicalInterval), _, locusStrEC: EmitCode, invalidMissingEC: EmitCode) => + }) { case (cb: EmitCodeBuilder, r: Value[Region], SIntervalPointer(rt: PCanonicalInterval), locusStrEC: EmitCode, invalidMissingEC: EmitCode) => val plocus = rt.pointType.asInstanceOf[PLocus] @@ -305,7 +306,6 @@ object LocusFunctions extends RegistryFunctions { }) { case (cb: EmitCodeBuilder, r: Value[Region], SIntervalPointer(rt: PCanonicalInterval), - errorID: Value[Int], locusString: EmitCode, pos1: EmitCode, pos2: EmitCode, @@ -352,14 +352,14 @@ object LocusFunctions extends RegistryFunctions { (returnType: Type, _: SType) => PCanonicalLocus(returnType.asInstanceOf[TLocus].rg).sType }) { - case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), globalPos, _) => + case (r, cb, SCanonicalLocusPointer(rt: PCanonicalLocus), globalPos) => val locus = cb.newLocal[Locus]("global_pos_locus", rgCode(r.mb, rt.rg).invoke[Long, Locus]("globalPosToLocus", globalPos.asLong.longCode(cb))) rt.constructFromPositionAndString(cb, r.region, locus.invoke[String]("contig"), locus.invoke[Int]("position")) } registerSCode1("locusToGlobalPos", tlocus("T"), TInt64, (_: Type, _: SType) => SInt64) { - case (r, cb, rt, locus: SLocusCode, _) => + case (r, cb, rt, locus: SLocusCode) => val locusObject = locus.memoize(cb, "locus_to_global_pos") .getLocusObj(cb) val globalPos = rgCode(r.mb, locus.st.rg).invoke[Locus, Long]("locusToGlobalPos", locusObject) @@ -372,7 +372,7 @@ object LocusFunctions extends RegistryFunctions { EmitType(PCanonicalStruct("result" -> PCanonicalLocus(lTyp.rg, true), "is_negative_strand" -> PBoolean(true)).sType, false) } }) { - case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), _, loc, minMatch) => + case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), loc, minMatch) => loc.toI(cb).flatMap(cb) { loc => minMatch.toI(cb).flatMap(cb) { minMatch => @@ -411,7 +411,7 @@ object LocusFunctions extends RegistryFunctions { EmitType(PCanonicalStruct("result" -> PCanonicalInterval(PCanonicalLocus(lTyp.rg, true), true), "is_negative_strand" -> PBoolean(true)).sType, false) } }) { - case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), _, interval, minMatch) => + case (cb, r, SBaseStructPointer(rt: PCanonicalStruct), interval, minMatch) => interval.toI(cb).flatMap(cb) { interval => minMatch.toI(cb).flatMap(cb) { minMatch => diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala index 26ebbfc2e95..745f995a58c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/MathFunctions.scala @@ -25,26 +25,26 @@ object MathFunctions extends RegistryFunctions { def mod(x: Int, y: Int): Int = { if (y == 0) - fatal(s"$x % 0: modulo by zero", ErrorIDs.NO_ERROR) + fatal(s"$x % 0: modulo by zero") java.lang.Math.floorMod(x, y) } def mod(x: Long, y: Long): Long = { if (y == 0L) - fatal(s"$x % 0: modulo by zero", ErrorIDs.NO_ERROR) + fatal(s"$x % 0: modulo by zero") java.lang.Math.floorMod(x, y) } def mod(x: Float, y: Float): Float = { if (y == 0.0) - fatal(s"$x % 0: modulo by zero", ErrorIDs.NO_ERROR) + fatal(s"$x % 0: modulo by zero") val t = x % y if (t < 0) t + y else t } def mod(x: Double, y: Double): Double = { if (y == 0.0) - fatal(s"$x % 0: modulo by zero", ErrorIDs.NO_ERROR) + fatal(s"$x % 0: modulo by zero") val t = x % y if (t < 0) t + y else t } @@ -59,14 +59,14 @@ object MathFunctions extends RegistryFunctions { def floorDiv(x: Int, y: Int): Int = { if (y == 0) - fatal(s"$x // 0: integer division by zero", ErrorIDs.NO_ERROR) + fatal(s"$x // 0: integer division by zero") java.lang.Math.floorDiv(x, y) } def floorDiv(x: Long, y: Long): Long = { if (y == 0L) - fatal(s"$x // 0: integer division by zero", ErrorIDs.NO_ERROR) + fatal(s"$x // 0: integer division by zero") java.lang.Math.floorDiv(x, y) } @@ -96,10 +96,10 @@ object MathFunctions extends RegistryFunctions { val jDoubleClass = classOf[java.lang.Double] // numeric conversions - registerIR1("toInt32", tnum("T"), TInt32)((_, x, _) => Cast((x), TInt32)) - registerIR1("toInt64", tnum("T"), TInt64)((_, x, _) => Cast(x, TInt64)) - registerIR1("toFloat32", tnum("T"), TFloat32)((_, x, _) => Cast(x, TFloat32)) - registerIR1("toFloat64", tnum("T"), TFloat64)((_, x, _) => Cast(x, TFloat64)) + registerIR1("toInt32", tnum("T"), TInt32)((_, x) => Cast((x), TInt32)) + registerIR1("toInt64", tnum("T"), TInt64)((_, x) => Cast(x, TInt64)) + registerIR1("toFloat32", tnum("T"), TFloat32)((_, x) => Cast(x, TFloat32)) + registerIR1("toFloat64", tnum("T"), TFloat64)((_, x) => Cast(x, TFloat64)) registerScalaFunction("abs", Array(TInt32), TInt32, null)(mathPackageClass, "abs") registerScalaFunction("abs", Array(TInt64), TInt64, null)(mathPackageClass, "abs") @@ -171,7 +171,7 @@ object MathFunctions extends RegistryFunctions { registerSCode4("fisher_exact_test", TInt32, TInt32, TInt32, TInt32, fetStruct.virtualType, (_, _, _, _, _) => fetStruct.sType - ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, _) => + ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("fisher_exact_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "fisherExactTest", a.intCode(cb), @@ -189,7 +189,7 @@ object MathFunctions extends RegistryFunctions { registerSCode4("chi_squared_test", TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, (_, _, _, _, _) => chisqStruct.sType - ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, _) => + ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code) => val res = cb.newLocal[Array[Double]]("chi_squared_test_res", Code.invokeScalaObject4[Int, Int, Int, Int, Array[Double]](statsPackageClass, "chiSquaredTest", a.intCode(cb), @@ -205,7 +205,7 @@ object MathFunctions extends RegistryFunctions { registerSCode5("contingency_table_test", TInt32, TInt32, TInt32, TInt32, TInt32, chisqStruct.virtualType, (_, _, _, _, _, _) => chisqStruct.sType - ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, mcc: SInt32Code, _) => + ) { case (r, cb, rt, a: SInt32Code, b: SInt32Code, c: SInt32Code, d: SInt32Code, mcc: SInt32Code) => val res = cb.newLocal[Array[Double]]("contingency_table_test_res", Code.invokeScalaObject5[Int, Int, Int, Int, Int, Array[Double]](statsPackageClass, "contingencyTableTest", a.intCode(cb), @@ -222,7 +222,7 @@ object MathFunctions extends RegistryFunctions { registerSCode3("hardy_weinberg_test", TInt32, TInt32, TInt32, hweStruct.virtualType, (_, _, _, _) => hweStruct.sType - ) { case (r, cb, rt, nHomRef: SInt32Code, nHet: SInt32Code, nHomVar: SInt32Code, _) => + ) { case (r, cb, rt, nHomRef: SInt32Code, nHet: SInt32Code, nHomVar: SInt32Code) => val res = cb.newLocal[Array[Double]]("hardy_weinberg_test_res", Code.invokeScalaObject3[Int, Int, Int, Array[Double]](statsPackageClass, "hardyWeinbergTest", nHomRef.intCode(cb), diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala index 115258bcbba..9e37321e4a7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/NDArrayFunctions.scala @@ -1,7 +1,7 @@ package is.hail.expr.ir.functions import is.hail.annotations.{Memory, Region} -import is.hail.asm4s._ +import is.hail.asm4s.{Code, Value} import is.hail.expr.ir._ import is.hail.expr.{Nat, NatVariable} import is.hail.linalg.{LAPACK, LinalgCodeUtils} @@ -9,7 +9,6 @@ import is.hail.types.coerce import is.hail.types.physical.stypes.EmitType import is.hail.types.physical.stypes.concrete.{SBaseStructPointer, SNDArrayPointer} import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives.SBooleanCode import is.hail.types.physical.{PBooleanRequired, PCanonicalNDArray, PCanonicalStruct, PFloat64Required, PType} import is.hail.types.virtual._ @@ -17,63 +16,27 @@ object NDArrayFunctions extends RegistryFunctions { override def registerAll() { for ((stringOp, argType, retType, irOp) <- ArrayFunctions.arrayOps) { val nDimVar = NatVariable() - registerIR2(stringOp, TNDArray(argType, nDimVar), argType, TNDArray(retType, nDimVar)) { (_, a, c, errorID) => + registerIR2(stringOp, TNDArray(argType, nDimVar), argType, TNDArray(retType, nDimVar)) { (_, a, c) => val i = genUID() - NDArrayMap(a, i, irOp(Ref(i, c.typ), c, errorID)) + NDArrayMap(a, i, irOp(Ref(i, c.typ), c)) } - registerIR2(stringOp, argType, TNDArray(argType, nDimVar), TNDArray(retType, nDimVar)) { (_, c, a, errorID) => + registerIR2(stringOp, argType, TNDArray(argType, nDimVar), TNDArray(retType, nDimVar)) { (_, c, a) => val i = genUID() - NDArrayMap(a, i, irOp(c, Ref(i, c.typ), errorID)) + NDArrayMap(a, i, irOp(c, Ref(i, c.typ))) } - registerIR2(stringOp, TNDArray(argType, nDimVar), TNDArray(argType, nDimVar), TNDArray(retType, nDimVar)) { (_, l, r, errorID) => + registerIR2(stringOp, TNDArray(argType, nDimVar), TNDArray(argType, nDimVar), TNDArray(retType, nDimVar)) { (_, l, r) => val lid = genUID() val rid = genUID() val lElemRef = Ref(lid, coerce[TNDArray](l.typ).elementType) val rElemRef = Ref(rid, coerce[TNDArray](r.typ).elementType) - NDArrayMap2(l, r, lid, rid, irOp(lElemRef, rElemRef, errorID), errorID) + NDArrayMap2(l, r, lid, rid, irOp(lElemRef, rElemRef)) } } - def linear_triangular_solve(ndCoef: SNDArrayCode, ndDep: SNDArrayCode, lower: SBooleanCode, outputPt: PType, cb: EmitCodeBuilder, region: Value[Region], errorID: Value[Int]): (SNDArrayCode, Value[Int]) = { - val ndCoefInput = ndCoef.asNDArray.memoize(cb, "ndCoef") - val ndDepInput = ndDep.asNDArray.memoize(cb, "ndDep") - - val ndCoefColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(ndCoefInput, cb, region) - val ndDepColMajor = LinalgCodeUtils.checkColMajorAndCopyIfNeeded(ndDepInput, cb, region) - - val IndexedSeq(ndCoefRow, ndCoefCol) = ndCoefColMajor.shapes(cb) - cb.ifx(ndCoefRow cne ndCoefCol, cb._fatalWithError(errorID, "hail.nd.solve_triangular: matrix a must be square.")) - - val IndexedSeq(ndDepRow, ndDepCol) = ndDepColMajor.shapes(cb) - cb.ifx(ndCoefRow cne ndDepRow, cb._fatalWithError(errorID,"hail.nd.solve_triangular: Solve dimensions incompatible")) - - val uplo = cb.newLocal[String]("dtrtrs_uplo") - cb.ifx(lower.boolCode(cb), cb.assign(uplo, const("L")), cb.assign(uplo, const("U"))) - - val infoDTRTRSResult = cb.newLocal[Int]("dtrtrs_result") - - val outputPType = coerce[PCanonicalNDArray](outputPt) - val output = outputPType.constructByActuallyCopyingData(ndDepColMajor, cb, region).memoize(cb, "triangular_solve_output") - - cb.assign(infoDTRTRSResult, Code.invokeScalaObject9[String, String, String, Int, Int, Long, Int, Long, Int, Int](LAPACK.getClass, "dtrtrs", - uplo, - const("N"), - const("N"), - ndDepRow.toI, - ndDepCol.toI, - ndCoefColMajor.firstDataAddress(cb), - ndDepRow.toI, - output.firstDataAddress(cb), - ndDepRow.toI - )) - - (output.get, infoDTRTRSResult) - } - - def linear_solve(a: SNDArrayCode, b: SNDArrayCode, outputPt: PType, cb: EmitCodeBuilder, region: Value[Region], errorID: Value[Int]): (SNDArrayCode, Value[Int]) = { + def linear_solve(a: SNDArrayCode, b: SNDArrayCode, outputPt: PType, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayCode, Value[Int]) = { val aInput = a.asNDArray.memoize(cb, "A") val bInput = b.asNDArray.memoize(cb, "B") @@ -82,11 +45,11 @@ object NDArrayFunctions extends RegistryFunctions { val IndexedSeq(n0, n1) = aColMajor.shapes(cb) - cb.ifx(n0 cne n1, cb._fatalWithError(errorID, "hail.nd.solve: matrix a must be square.")) + cb.ifx(n0 cne n1, cb._fatal("hail.nd.solve: matrix a must be square.")) val IndexedSeq(n, nrhs) = bColMajor.shapes(cb) - cb.ifx(n0 cne n, cb._fatalWithError(errorID, "hail.nd.solve: Solve dimensions incompatible")) + cb.ifx(n0 cne n, cb._fatal("hail.nd.solve: Solve dimensions incompatible")) val infoDGESVResult = cb.newLocal[Int]("dgesv_result") val ipiv = cb.newLocal[Long]("dgesv_ipiv") @@ -125,11 +88,11 @@ object NDArrayFunctions extends RegistryFunctions { registerIEmitCode2("linear_solve_no_crash", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TStruct(("solution", TNDArray(TFloat64, Nat(2))), ("failed", TBoolean)), { (t, p1, p2) => EmitType(PCanonicalStruct(false, ("solution", PCanonicalNDArray(PFloat64Required, 2, false)), ("failed", PBooleanRequired)).sType, false) }) { - case (cb, region, SBaseStructPointer(outputStructType: PCanonicalStruct), errorID, aec, bec) => + case (cb, region, SBaseStructPointer(outputStructType: PCanonicalStruct), aec, bec) => aec.toI(cb).flatMap(cb) { apc => bec.toI(cb).map(cb) { bpc => val outputNDArrayPType = outputStructType.fieldType("solution") - val (resNDPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, outputNDArrayPType, cb, region, errorID) + val (resNDPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, outputNDArrayPType, cb, region) val ndEmitCode = EmitCode(Code._empty, info cne 0, resNDPCode) outputStructType.constructFromFields(cb, region, IndexedSeq[EmitCode](ndEmitCode, EmitCode(Code._empty, false, primitive(info cne 0))), false) } @@ -138,16 +101,9 @@ object NDArrayFunctions extends RegistryFunctions { registerSCode2("linear_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), { (t, p1, p2) => PCanonicalNDArray(PFloat64Required, 2, true).sType }) { - case (er, cb, SNDArrayPointer(pt), apc, bpc, errorID) => - val (resPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, pt, cb, er.region, errorID) - cb.ifx(info cne 0, cb._fatalWithError(errorID,s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", info.toS)) - resPCode - } - registerSCode3("linear_triangular_solve", TNDArray(TFloat64, Nat(2)), TNDArray(TFloat64, Nat(2)), TBoolean, TNDArray(TFloat64, Nat(2)), - { (t, p1, p2, p3) => PCanonicalNDArray(PFloat64Required, 2, true).sType }) { - case (er, cb, SNDArrayPointer(pt), apc, bpc, lower, errorID) => - val (resPCode, info) = linear_triangular_solve(apc.asNDArray, bpc.asNDArray,lower.asBoolean, pt, cb, er.region, errorID) - cb.ifx(info cne 0, cb._fatalWithError(errorID,s"hl.nd.solve: Could not solve, matrix was singular. dtrtrs error code ", info.toS)) + case (er, cb, SNDArrayPointer(pt), apc, bpc) => + val (resPCode, info) = linear_solve(apc.asNDArray, bpc.asNDArray, pt, cb, er.region) + cb.ifx(info cne 0, cb._fatal(s"hl.nd.solve: Could not solve, matrix was singular. dgesv error code ", info.toS)) resPCode } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala index f2750a9dce0..d563e778803 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/ReferenceGenomeFunctions.scala @@ -16,13 +16,13 @@ object ReferenceGenomeFunctions extends RegistryFunctions { def registerAll() { registerSCode1t("isValidContig", Array(LocusFunctions.tlocus("R")), TString, TBoolean, (_: Type, _: SType) => SBoolean) { - case (r, cb, Seq(tlocus: TLocus), _, contig, _) => + case (r, cb, Seq(tlocus: TLocus), _, contig) => val scontig = contig.asString.loadString() primitive(rgCode(r.mb, tlocus.asInstanceOf[TLocus].rg).invoke[String, Boolean]("isValidContig", scontig)) } registerSCode2t("isValidLocus", Array(LocusFunctions.tlocus("R")), TString, TInt32, TBoolean, (_: Type, _: SType, _: SType) => SBoolean) { - case (r, cb, Seq(tlocus: TLocus), _, contig, pos, _) => + case (r, cb, Seq(tlocus: TLocus), _, contig, pos) => val scontig = contig.asString.loadString() primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int, Boolean]("isValidLocus", scontig, pos.asInt.intCode(cb))) } @@ -31,7 +31,7 @@ object ReferenceGenomeFunctions extends RegistryFunctions { Array(LocusFunctions.tlocus("R")), TString, TInt32, TInt32, TInt32, TString, (_: Type, _: SType, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString())) { - case (r, cb, Seq(typeParam: TLocus), st, contig, pos, before, after, _) => + case (r, cb, Seq(typeParam: TLocus), st, contig, pos, before, after) => val scontig = contig.asString.loadString() unwrapReturn(cb, r.region, st, rgCode(cb.emb, typeParam.rg).invoke[String, Int, Int, Int, String]("getSequence", @@ -42,13 +42,13 @@ object ReferenceGenomeFunctions extends RegistryFunctions { } registerSCode1t("contigLength", Array(LocusFunctions.tlocus("R")), TString, TInt32, (_: Type, _: SType) => SInt32) { - case (r, cb, Seq(tlocus: TLocus), _, contig, _) => + case (r, cb, Seq(tlocus: TLocus), _, contig) => val scontig = contig.asString.loadString() primitive(rgCode(r.mb, tlocus.rg).invoke[String, Int]("contigLength", scontig)) } registerIR("getReferenceSequence", Array(TString, TInt32, TInt32, TInt32), TString, typeParameters = Array(LocusFunctions.tlocus("R"))) { - case (tl, Seq(contig, pos, before, after), _) => + case (tl, Seq(contig, pos, before, after)) => val getRef = IRFunctionRegistry.lookupUnseeded( name = "getReferenceSequenceFromValidLocus", returnType = TString, @@ -60,8 +60,8 @@ object ReferenceGenomeFunctions extends RegistryFunctions { typeParameters = tl, Seq(TString, TInt32)).get - val r = isValid(tl, Seq(contig, pos), ErrorIDs.NO_ERROR) - val p = getRef(tl, Seq(contig, pos, before, after), ErrorIDs.NO_ERROR) + val r = isValid(tl, Seq(contig, pos)) + val p = getRef(tl, Seq(contig, pos, before, after)) If(r, p, NA(TString)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/SetFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/SetFunctions.scala index 4cdc6ca24bf..ce2d9e24f08 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/SetFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/SetFunctions.scala @@ -18,17 +18,17 @@ object SetFunctions extends RegistryFunctions { } def registerAll() { - registerIR1("toSet", TArray(tv("T")), TSet(tv("T"))) { (_, a, _) => + registerIR1("toSet", TArray(tv("T")), TSet(tv("T"))) { (_, a) => ToSet(ToStream(a)) } - registerIR1("isEmpty", TSet(tv("T")), TBoolean) { (_, s, _) => + registerIR1("isEmpty", TSet(tv("T")), TBoolean) { (_, s) => ArrayFunctions.isEmpty(CastToArray(s)) } - registerIR2("contains", TSet(tv("T")), tv("T"), TBoolean)((_, a, b, _) => contains(a, b)) + registerIR2("contains", TSet(tv("T")), tv("T"), TBoolean)((_, a, b) => contains(a, b)) - registerIR2("remove", TSet(tv("T")), tv("T"), TSet(tv("T"))) { (_, s, v, _) => + registerIR2("remove", TSet(tv("T")), tv("T"), TSet(tv("T"))) { (_, s, v) => val t = v.typ val x = genUID() ToSet( @@ -38,7 +38,7 @@ object SetFunctions extends RegistryFunctions { ApplyComparisonOp(NEQWithNA(t), Ref(x, t), v))) } - registerIR2("add", TSet(tv("T")), tv("T"), TSet(tv("T"))) { (_, s, v, _) => + registerIR2("add", TSet(tv("T")), tv("T"), TSet(tv("T"))) { (_, s, v) => val t = v.typ val x = genUID() ToSet( @@ -48,7 +48,7 @@ object SetFunctions extends RegistryFunctions { ToStream(Ref(x, TArray(t))))) } - registerIR2("union", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2, _) => + registerIR2("union", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2) => val t = s1.typ.asInstanceOf[TSet].elementType val x = genUID() ToSet( @@ -58,7 +58,7 @@ object SetFunctions extends RegistryFunctions { ToStream(Ref(x, TArray(t))))) } - registerIR2("intersection", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2, _) => + registerIR2("intersection", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2) => val t = s1.typ.asInstanceOf[TSet].elementType val x = genUID() ToSet( @@ -66,7 +66,7 @@ object SetFunctions extends RegistryFunctions { contains(s2, Ref(x, t)))) } - registerIR2("difference", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2, _) => + registerIR2("difference", TSet(tv("T")), TSet(tv("T")), TSet(tv("T"))) { (_, s1, s2) => val t = s1.typ.asInstanceOf[TSet].elementType val x = genUID() ToSet( @@ -74,16 +74,16 @@ object SetFunctions extends RegistryFunctions { ApplyUnaryPrimOp(Bang(), contains(s2, Ref(x, t))))) } - registerIR2("isSubset", TSet(tv("T")), TSet(tv("T")), TBoolean) { (_, s, w, errorID) => + registerIR2("isSubset", TSet(tv("T")), TSet(tv("T")), TBoolean) { (_, s, w) => val t = s.typ.asInstanceOf[TSet].elementType val a = genUID() val x = genUID() StreamFold(ToStream(s), True(), a, x, // FIXME short circuit - ApplySpecial("land", FastSeq(), FastSeq(Ref(a, TBoolean), contains(w, Ref(x, t))), TBoolean, errorID)) + ApplySpecial("land", FastSeq(), FastSeq(Ref(a, TBoolean), contains(w, Ref(x, t))), TBoolean)) } - registerIR1("median", TSet(tnum("T")), tv("T")) { (_, s, _) => + registerIR1("median", TSet(tnum("T")), tv("T")) { (_, s) => val t = s.typ.asInstanceOf[TSet].elementType val a = Ref(genUID(), TArray(t)) val size = Ref(genUID(), TInt32) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index d0cd8bb566a..1f2f018be60 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -57,7 +57,7 @@ object StringFunctions extends RegistryFunctions { val charD = new mutable.HashMap[Char, String] d.foreach { case (k, v) => if (k.length != 1) - fatal(s"translate: mapping keys must be one character, found '$k'", ErrorIDs.NO_ERROR) + fatal(s"translate: mapping keys must be one character, found '$k'") charD += ((k(0), v)) } @@ -100,20 +100,20 @@ object StringFunctions extends RegistryFunctions { def registerAll(): Unit = { val thisClass = getClass - registerSCode1("length", TString, TInt32, (_: Type, _: SType) => SInt32) { case (r: EmitRegion, cb, _, s: SStringCode, _) => + registerSCode1("length", TString, TInt32, (_: Type, _: SType) => SInt32) { case (r: EmitRegion, cb, _, s: SStringCode) => primitive(s.loadString().invoke[Int]("length")) } registerSCode3("substring", TString, TInt32, TInt32, TString, { (_: Type, _: SType, _: SType, _: SType) => SStringPointer(PCanonicalString()) }) { - case (r: EmitRegion, cb, st: SString, s, start, end, _) => + case (r: EmitRegion, cb, st: SString, s, start, end) => val str = s.asString.loadString().invoke[Int, Int, String]("substring", start.asInt.intCode(cb), end.asInt.intCode(cb)) st.constructFromString(cb, r.region, str) } - registerIR3("slice", TString, TInt32, TInt32, TString) { (_, str, start, end, _) => + registerIR3("slice", TString, TInt32, TInt32, TString) { (_, str, start, end) => val len = Ref(genUID(), TInt32) val s = Ref(genUID(), TInt32) val e = Ref(genUID(), TInt32) @@ -123,7 +123,7 @@ object StringFunctions extends RegistryFunctions { invoke("substring", TString, str, s, If(e < s, s, e))))) } - registerIR2("index", TString, TInt32, TString) { (_, s, i, errorID) => + registerIR2("index", TString, TInt32, TString) { (_, s, i) => val len = Ref(genUID(), TInt32) val idx = Ref(genUID(), TInt32) Let(len.name, invoke("length", TInt32, s), @@ -133,15 +133,15 @@ object StringFunctions extends RegistryFunctions { Str("string index out of bounds: "), invoke("concat", TString, invoke("str", TString, i), - invoke("concat", TString, Str(" / "), invoke("str", TString, len)))), TInt32, errorID), + invoke("concat", TString, Str(" / "), invoke("str", TString, len)))), TInt32, -1), If(i < 0, i + len, i)), invoke("substring", TString, s, idx, idx + 1))) } - registerIR2("sliceRight", TString, TInt32, TString) { (_, s, start, _) => invoke("slice", TString, s, start, invoke("length", TInt32, s)) } - registerIR2("sliceLeft", TString, TInt32, TString) { (_, s, end, _) => invoke("slice", TString, s, I32(0), end) } + registerIR2("sliceRight", TString, TInt32, TString) { (_, s, start) => invoke("slice", TString, s, start, invoke("length", TInt32, s)) } + registerIR2("sliceLeft", TString, TInt32, TString) { (_, s, end) => invoke("slice", TString, s, I32(0), end) } - registerSCode1("str", tv("T"), TString, (_: Type, _: SType) => SStringPointer(PCanonicalString())) { case (r, cb, st: SString, a, _) => + registerSCode1("str", tv("T"), TString, (_: Type, _: SType) => SStringPointer(PCanonicalString())) { case (r, cb, st: SString, a) => val annotation = scodeToJavaValue(cb, r.region, a) val str = cb.emb.getType(a.st.virtualType).invoke[Any, String]("str", annotation) st.constructFromString(cb, r.region, str) @@ -149,7 +149,7 @@ object StringFunctions extends RegistryFunctions { registerIEmitCode1("showStr", tv("T"), TString, { (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true) - }) { case (cb, r, st: SString, _, a) => + }) { case (cb, r, st: SString, a) => val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) a.toI(cb).consume(cb, cb.assignAny(jObj, Code._null(boxedTypeInfo(a.st.virtualType))), @@ -162,7 +162,7 @@ object StringFunctions extends RegistryFunctions { registerIEmitCode2("showStr", tv("T"), TInt32, TString, { (_: Type, _: EmitType, truncType: EmitType) => EmitType(SStringPointer(PCanonicalString()), truncType.required) - }) { case (cb, r, st: SString, _, a, trunc) => + }) { case (cb, r, st: SString, a, trunc) => val jObj = cb.newLocal("showstr_java_obj")(boxedTypeInfo(a.st.virtualType)) trunc.toI(cb).map(cb) { trunc => @@ -176,7 +176,7 @@ object StringFunctions extends RegistryFunctions { } registerIEmitCode1("json", tv("T"), TString, (_: Type, _: EmitType) => EmitType(SStringPointer(PCanonicalString()), true)) { - case (cb, r, st: SString, _, a) => + case (cb, r, st: SString, a) => val ti = boxedTypeInfo(a.st.virtualType) val inputJavaValue = cb.newLocal("json_func_input_jv")(ti) a.toI(cb).consume(cb, @@ -237,7 +237,7 @@ object StringFunctions extends RegistryFunctions { registerIEmitCode2("firstMatchIn", TString, TString, TArray(TString), { case (_: Type, _: EmitType, _: EmitType) => EmitType(PCanonicalArray(PCanonicalString(true)).sType, false) - }) { case (cb: EmitCodeBuilder, region: Value[Region], SIndexablePointer(rt: PCanonicalArray), _, + }) { case (cb: EmitCodeBuilder, region: Value[Region], SIndexablePointer(rt: PCanonicalArray), s: EmitCode, r: EmitCode) => s.toI(cb).flatMap(cb) { case sc: SStringCode => r.toI(cb).flatMap(cb) { case rc: SStringCode => @@ -259,7 +259,7 @@ object StringFunctions extends RegistryFunctions { registerEmitCode2("hamming", TString, TString, TInt32, { case (_: Type, _: EmitType, _: EmitType) => EmitType(SInt32, false) - }) { case (r: EmitRegion, rt, _, e1: EmitCode, e2: EmitCode) => + }) { case (r: EmitRegion, rt, e1: EmitCode, e2: EmitCode) => EmitCode.fromI(r.mb) { cb => e1.toI(cb).flatMap(cb) { case (sc1: SStringCode) => e2.toI(cb).flatMap(cb) { case (sc2: SStringCode) => @@ -294,7 +294,7 @@ object StringFunctions extends RegistryFunctions { registerSCode("parse_json", Array(TString), TTuple(tv("T")), (rType: Type, _: Seq[SType]) => SType.canonical(rType), typeParameters = Array(tv("T")) - ) { case (er, cb, _, resultType, Array(s: SStringCode), _) => + ) { case (er, cb, _, resultType, Array(s: SStringCode)) => val warnCtx = cb.emb.genFieldThisRef[mutable.HashSet[String]]("parse_json_context") cb.ifx(warnCtx.load().isNull, cb.assign(warnCtx, Code.newInstance[mutable.HashSet[String]]())) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala index 6e0acaedbcc..7197afbe67c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/UtilFunctions.scala @@ -170,7 +170,7 @@ object UtilFunctions extends RegistryFunctions { registerSCode4("valuesSimilar", tv("T"), tv("U"), TFloat64, TBoolean, TBoolean, { case (_: Type, _: SType, _: SType, _: SType, _: SType) => SBoolean }) { - case (er, cb, rt, l, r, tol, abs, _) => + case (er, cb, rt, l, r, tol, abs) => assert(l.st.virtualType == r.st.virtualType, s"\n lt=${ l.st.virtualType }\n rt=${ r.st.virtualType }") val lb = scodeToJavaValue(cb, er.region, l) val rb = scodeToJavaValue(cb, er.region, r) @@ -182,10 +182,10 @@ object UtilFunctions extends RegistryFunctions { (n * (n + 1)) / 2 } - registerSCode1("toInt32", TBoolean, TInt32, (_: Type, _: SType) => SInt32) { case (_, cb, _, x, _) => primitive(x.asBoolean.boolCode(cb).toI) } - registerSCode1("toInt64", TBoolean, TInt64, (_: Type, _: SType) => SInt64) { case (_, cb, _, x, _) => primitive(x.asBoolean.boolCode(cb).toI.toL) } - registerSCode1("toFloat32", TBoolean, TFloat32, (_: Type, _: SType) => SFloat32) { case (_, cb, _, x, _) => primitive(x.asBoolean.boolCode(cb).toI.toF) } - registerSCode1("toFloat64", TBoolean, TFloat64, (_: Type, _: SType) => SFloat64) { case (_, cb, _, x, _) => primitive(x.asBoolean.boolCode(cb).toI.toD) } + registerSCode1("toInt32", TBoolean, TInt32, (_: Type, _: SType) => SInt32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI) } + registerSCode1("toInt64", TBoolean, TInt64, (_: Type, _: SType) => SInt64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toL) } + registerSCode1("toFloat32", TBoolean, TFloat32, (_: Type, _: SType) => SFloat32) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toF) } + registerSCode1("toFloat64", TBoolean, TFloat64, (_: Type, _: SType) => SFloat64) { case (_, cb, _, x) => primitive(x.asBoolean.boolCode(cb).toI.toD) } for ((name, t, rpt, ct) <- Seq[(String, Type, SType, ClassTag[_])]( ("Boolean", TBoolean, SBoolean, implicitly[ClassTag[Boolean]]), @@ -196,12 +196,12 @@ object UtilFunctions extends RegistryFunctions { )) { val ctString: ClassTag[String] = implicitly[ClassTag[String]] registerSCode1(s"to$name", TString, t, (_: Type, _: SType) => rpt) { - case (r, cb, rt, x: SStringCode, _) => + case (r, cb, rt, x: SStringCode) => val s = x.loadString() primitive(rt.virtualType, Code.invokeScalaObject1(thisClass, s"parse$name", s)(ctString, ct)) } registerIEmitCode1(s"to${name}OrMissing", TString, t, (_: Type, xPT: EmitType) => EmitType(rpt, xPT.required)) { - case (cb, r, rt, _, x) => + case (cb, r, rt, x) => x.toI(cb).flatMap(cb) { case (sc: SStringCode) => val sv = cb.newLocal[String]("s", sc.loadString()) IEmitCode(cb, @@ -212,8 +212,8 @@ object UtilFunctions extends RegistryFunctions { } Array(TInt32, TInt64).foreach { t => - registerIR2("min", t, t, t)((_, a, b, _) => intMin(a, b)) - registerIR2("max", t, t, t)((_, a, b, _) => intMax(a, b)) + registerIR2("min", t, t, t)((_, a, b) => intMin(a, b)) + registerIR2("max", t, t, t)((_, a, b) => intMax(a, b)) } Array("min", "max").foreach { name => @@ -269,32 +269,32 @@ object UtilFunctions extends RegistryFunctions { } registerIEmitCode2(ignoreMissingName, TInt32, TInt32, TInt32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt32, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Int](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Int, Int, Int](name, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Int](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Int, Int, Int](name, _, _)) } registerIEmitCode2(ignoreMissingName, TInt64, TInt64, TInt64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SInt64, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Long](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Long, Long, Long](name, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Long](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Long, Long, Long](name, _, _)) } registerIEmitCode2(ignoreMissingName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Float, Float, Float](name, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Float, Float, Float](name, _, _)) } registerIEmitCode2(ignoreMissingName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Double, Double, Double](name, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, name, Code.invokeStatic2[Math, Double, Double, Double](name, _, _)) } registerIEmitCode2(ignoreBothName, TFloat32, TFloat32, TFloat32, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat32, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Float](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Float, Float, Float](thisClass, ignoreNanName, _, _)) } registerIEmitCode2(ignoreBothName, TFloat64, TFloat64, TFloat64, (_: Type, t1: EmitType, t2: EmitType) => EmitType(SFloat64, t1.required || t2.required)) { - case (cb, r, rt, _, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, _, _)) + case (cb, r, rt, v1, v2) => ignoreMissingTriplet[Double](cb, rt, v1, v2, ignoreNanName, Code.invokeScalaObject2[Double, Double, Double](thisClass, ignoreNanName, _, _)) } } registerSCode2("format", TString, tv("T", "tuple"), TString, (_: Type, _: SType, _: SType) => PCanonicalString().sType) { - case (r, cb, SStringPointer(rt: PCanonicalString), format, args, _) => + case (r, cb, SStringPointer(rt: PCanonicalString), format, args) => val javaObjArgs = Code.checkcast[Row](scodeToJavaValue(cb, r.region, args)) val formatted = Code.invokeScalaObject2[String, Row, String](thisClass, "format", format.asString.loadString(), javaObjArgs) val st = SStringPointer(rt) @@ -302,7 +302,7 @@ object UtilFunctions extends RegistryFunctions { } registerIEmitCode2("land", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { - case (cb, _, rt,_ , l, r) => + case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm val w = cb.newLocal[Int]("land_w") @@ -333,7 +333,7 @@ object UtilFunctions extends RegistryFunctions { } registerIEmitCode2("lor", TBoolean, TBoolean, TBoolean, (_: Type, tl: EmitType, tr: EmitType) => EmitType(SBoolean, tl.required && tr.required)) { - case (cb, _, rt,_, l, r) => + case (cb, _, rt, l, r) => // 00 ... 00 rv rm lv lm val w = cb.newLocal[Int]("lor_w") diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index 0d517fdc087..97a911a4c18 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -28,7 +28,7 @@ object BlockMatrixStage { val start = GetField(ctxRef, "start") bindIR(NDArrayReshape( NDArraySlice(v, MakeTuple.ordered(FastSeq(MakeTuple.ordered(FastSeq(start.toL, (start + len).toL, 1L))))), - MakeTuple.ordered(if (asRowVector) FastSeq[IR](1L, len.toL) else FastSeq[IR](len.toL, 1L)), ErrorIDs.NO_ERROR)) { sliced => + MakeTuple.ordered(if (asRowVector) FastSeq[IR](1L, len.toL) else FastSeq[IR](len.toL, 1L)))) { sliced => NDArrayConcat(ToArray(mapIR(rangeIR(nRep))(_ => sliced)), if (asRowVector) 0 else 1) } } @@ -278,7 +278,7 @@ object LowerBlockMatrixIR { loweredLeft .addGlobals(loweredRight.globalVals: _*) .addContext(loweredRight.ctxType)(loweredRight.blockContext).mapBody { (ctx, leftBody) => - NDArrayMap2(leftBody, bindIR(GetField(ctx, "new"))(loweredRight.blockBody), lname, rname, f, ErrorIDs.NO_ERROR) + NDArrayMap2(leftBody, bindIR(GetField(ctx, "new"))(loweredRight.blockBody), lname, rname, f) } case x@BlockMatrixBroadcast(child, IndexedSeq(), _, _) => @@ -299,7 +299,7 @@ object LowerBlockMatrixIR { } case x@BlockMatrixBroadcast(child, IndexedSeq(axis), _, _) => val len = child.typ.shape.max - val vector = NDArrayReshape(lower(child).collectLocal(relationalLetsAbove, child.typ), MakeTuple.ordered(FastSeq(I64(len))), ErrorIDs.NO_ERROR) + val vector = NDArrayReshape(lower(child).collectLocal(relationalLetsAbove, child.typ), MakeTuple.ordered(FastSeq(I64(len)))) BlockMatrixStage.broadcastVector(vector, x.typ, asRowVector = axis == 1) case x@BlockMatrixBroadcast(child, IndexedSeq(axis, axis2), _, _) if (axis == axis2) => // diagonal as row/col vector @@ -446,13 +446,13 @@ object LowerBlockMatrixIR { def blockMultiply(elt: Ref) = bindIR(GetTupleElement(elt, 0)) { leftElt => bindIR(GetTupleElement(elt, 1)) { rightElt => - NDArrayMatMul(left.blockBody(leftElt), right.blockBody(rightElt), ErrorIDs.NO_ERROR) + NDArrayMatMul(left.blockBody(leftElt), right.blockBody(rightElt)) } } foldIR(ToStream(invoke("sliceRight", ctxType, ctxRef, I32(1))), bindIR(ArrayRef(ctxRef, 0))(blockMultiply)) { (sum, elt) => NDArrayMap2(sum, blockMultiply(elt), "l", "r", - Ref("l", x.typ.elementType) + Ref("r", x.typ.elementType), ErrorIDs.NO_ERROR) + Ref("l", x.typ.elementType) + Ref("r", x.typ.elementType)) } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index 5cc5272f749..f55b2d0c597 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -1,13 +1,10 @@ package is.hail.expr.ir.lowering -import is.hail.HailContext import is.hail.expr.ir._ -import is.hail.io.{BufferSpec, TypedCodecSpec} import is.hail.methods.{ForceCountTable, NPartitionsTable} import is.hail.rvd.{PartitionBoundOrdering, RVDPartitioner} -import is.hail.types.physical.{PCanonicalBinary, PCanonicalTuple} import is.hail.types.virtual._ -import is.hail.types.{RField, RStruct, RTable, TableType} +import is.hail.types.{RTable, TableType, RStruct, RField} import is.hail.utils._ import org.apache.spark.sql.Row @@ -1174,111 +1171,35 @@ object LowerTableIR { val initFromSerializedStates = Begin(aggs.aggs.zipWithIndex.map { case (agg, i) => InitFromSerializedValue(i, GetTupleElement(initStateRef, i), agg.state )}) - val useTreeAggregate = aggs.shouldTreeAggregate - val isCommutative = aggs.isCommutative - log.info(s"Aggregate: useTreeAggregate=${ useTreeAggregate }") - log.info(s"Aggregate: commutative=${ isCommutative }") - - if (useTreeAggregate) { - val branchFactor = HailContext.get.branchingFactor - val tmpDir = ctx.createTmpPath("aggregate_intermediates/") - - val codecSpec = TypedCodecSpec(PCanonicalTuple(true, aggs.aggs.map(_ => PCanonicalBinary(true)): _*), BufferSpec.wireSpec) - lcWithInitBinding.mapCollectWithGlobals(relationalLetsAbove)({ part: IR => - Let("global", lc.globals, - RunAgg( - Begin(FastIndexedSeq( - initFromSerializedStates, - StreamFor(part, - "row", - aggs.seqPerElt - ) - )), - WriteValue(MakeTuple.ordered(aggs.aggs.zipWithIndex.map { case (sig, i) => AggStateValue(i, sig.state) }), Str(tmpDir) + UUID4(), codecSpec), - aggs.states - )) - }) { case (collected, globals) => - val treeAggFunction = genUID() - val currentAggStates = Ref(genUID(), TArray(TString)) - - val distAggStatesRef = Ref(genUID(), TArray(TString)) - - - def combineGroup(partArrayRef: IR): IR = { + lcWithInitBinding.mapCollectWithGlobals(relationalLetsAbove)({ part: IR => + Let("global", lc.globals, + RunAgg( Begin(FastIndexedSeq( - bindIR(ReadValue(ArrayRef(partArrayRef, 0), codecSpec, codecSpec.encodedVirtualType)) { serializedTuple => - Begin( - aggs.aggs.zipWithIndex.map { case (sig, i) => - InitFromSerializedValue(i, GetTupleElement(serializedTuple, i), sig.state) - }) - }, - forIR(StreamRange(1, ArrayLen(partArrayRef), 1)) { fileIdx => - - bindIR(ReadValue(ArrayRef(partArrayRef, fileIdx), codecSpec, codecSpec.encodedVirtualType)) { serializedTuple => - Begin( - aggs.aggs.zipWithIndex.map { case (sig, i) => - CombOpValue(i, GetTupleElement(serializedTuple, i), sig) - }) - } - })) - } - - bindIR(TailLoop(treeAggFunction, - FastIndexedSeq((currentAggStates.name -> collected)), - If(ArrayLen(currentAggStates) <= I32(branchFactor), - currentAggStates, - Recur(treeAggFunction, FastIndexedSeq(CollectDistributedArray(mapIR(StreamGrouped(ToStream(currentAggStates), I32(branchFactor)))(x => ToArray(x)), - MakeStruct(FastSeq()), distAggStatesRef.name, genUID(), - RunAgg( - combineGroup(distAggStatesRef), - WriteValue(MakeTuple.ordered(aggs.aggs.zipWithIndex.map { case (sig, i) => AggStateValue(i, sig.state) }), Str(tmpDir) + UUID4(), codecSpec), - aggs.states - ) - )), currentAggStates.typ))) - ) { finalParts => - RunAgg( - combineGroup(finalParts), - Let("global", globals, - Let( - resultUID, - ResultOp(0, aggs.aggs), - aggs.postAggIR)), - aggs.states - ) - } - } - } - else { - lcWithInitBinding.mapCollectWithGlobals(relationalLetsAbove)({ part: IR => - Let("global", lc.globals, - RunAgg( - Begin(FastIndexedSeq( - initFromSerializedStates, - StreamFor(part, - "row", - aggs.seqPerElt - ) - )), - MakeTuple.ordered(aggs.aggs.zipWithIndex.map { case (sig, i) => AggStateValue(i, sig.state) }), - aggs.states - )) - }) { case (collected, globals) => - Let("global", - globals, - RunAgg( - Begin(FastIndexedSeq( - initFromSerializedStates, - forIR(ToStream(collected)) { state => - Begin(aggs.aggs.zipWithIndex.map { case (sig, i) => CombOpValue(i, GetTupleElement(state, i), sig) }) - } - )), - Let( - resultUID, - ResultOp(0, aggs.aggs), - aggs.postAggIR), - aggs.states - )) - } + initFromSerializedStates, + StreamFor(part, + "row", + aggs.seqPerElt + ) + )), + MakeTuple.ordered(aggs.aggs.zipWithIndex.map { case (sig, i) => AggStateValue(i, sig.state) }), + aggs.states + )) + }) { case (collected, globals) => + Let("global", + globals, + RunAgg( + Begin(FastIndexedSeq( + initFromSerializedStates, + forIR(ToStream(collected)) { state => + Begin(aggs.aggs.zipWithIndex.map { case (sig, i) => CombOpValue(i, GetTupleElement(state, i), sig) }) + } + )), + Let( + resultUID, + ResultOp(0, aggs.aggs), + aggs.postAggIR), + aggs.states + )) } case TableToValueApply(child, NPartitionsTable()) => diff --git a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala index ef5a1291023..07546639948 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ndarrays/EmitNDArray.scala @@ -122,13 +122,13 @@ object EmitNDArray { } } } - case NDArrayMap2(lChild, rChild, lName, rName, body, errorID) => { + case NDArrayMap2(lChild, rChild, lName, rName, body) => { deforestRecur(lChild, cb).flatMap(cb) { leftProducer => deforestRecur(rChild, cb).map(cb) { rightProducer => val leftShapeValues = leftProducer.shape val rightShapeValues = rightProducer.shape - val shapeArray = NDArrayEmitter.unifyShapes2(cb, leftShapeValues, rightShapeValues, errorID) + val shapeArray = NDArrayEmitter.unifyShapes2(cb, leftShapeValues, rightShapeValues) val lElemRef = cb.emb.newEmitField(lName, leftProducer.elementType.sType, required = true) val rElemRef = cb.emb.newEmitField(rName, rightProducer.elementType.sType, required = true) @@ -164,7 +164,7 @@ object EmitNDArray { cb.assign(lElemRef, EmitCode.present(cb.emb, leftBroadcasted.loadElementAtCurrentAddr(cb))) cb.assign(rElemRef, EmitCode.present(cb.emb, rightBroadcasted.loadElementAtCurrentAddr(cb))) - bodyEC.toI(cb).get(cb, "NDArrayMap2 body cannot be missing", errorID) + bodyEC.toI(cb).get(cb, "NDArrayMap2 body cannot be missing") } } } @@ -203,7 +203,7 @@ object EmitNDArray { override def loadElementAtCurrentAddr(cb: EmitCodeBuilder): SCode = childProducer.loadElementAtCurrentAddr(cb) } } - case x@NDArrayReshape(childND, shape, errorID) => + case x@NDArrayReshape(childND, shape) => emitI(childND, cb).flatMap(cb) { case childND: SNDArrayCode => // Plan: Run through the child row major, make an array. Then jump around it as needed. val childMemo = childND.memoize(cb, "ndarray_reshape_child") @@ -226,19 +226,19 @@ object EmitNDArray { cb.assign(runningProduct, 1L) (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.", errorID).asLong.longCode(cb)) + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) cb.ifx(tempShapeElement < 0L, { cb.ifx(tempShapeElement ceq -1L, { cb.ifx(hasNegativeOne, { - cb._fatalWithError(errorID, "Can't infer shape, more than one -1") + cb._fatal("Can't infer shape, more than one -1") }, { cb.assign(hasNegativeOne, true) }) }, { - cb._fatalWithError(errorID,"Can't reshape, new shape must contain only nonnegative numbers or -1") + cb._fatal("Can't reshape, new shape must contain only nonnegative numbers or -1") } ) }, @@ -255,12 +255,12 @@ object EmitNDArray { (runningProduct ceq 0L) || (numElements % runningProduct) > 0L, numElements cne runningProduct ), { - cb._fatalWithError(errorID,"Can't reshape since requested shape is incompatible with number of elements") + cb._fatal("Can't reshape since requested shape is incompatible with number of elements") }) cb.assign(replacesNegativeOne, (runningProduct ceq 0L).mux(0L, numElements / runningProduct)) (0 until outputNDims).foreach { i => - cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.", errorID).asLong.longCode(cb)) + cb.assign(tempShapeElement, tupleValue.loadField(cb, i).get(cb, "Can't reshape if elements of reshape tuple are missing.").asLong.longCode(cb)) cb.assign(requestedShapeValues(i), (tempShapeElement ceq -1L).mux(replacesNegativeOne, tempShapeElement)) } diff --git a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala index b12a8619385..cb40c07f106 100644 --- a/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala +++ b/hail/src/main/scala/is/hail/expr/ir/orderings/StringOrdering.scala @@ -22,17 +22,6 @@ object StringOrdering { ord.compareNonnull(cb, x.asString.asBytes(), y.asString.asBytes()) } } - - case (_, _) => - new CodeOrderingCompareConsistentWithOthers { - - val type1: SString = t1 - val type2: SString = t2 - - def _compareNonnull(cb: EmitCodeBuilder, x: SCode, y: SCode): Code[Int] = { - x.asString.loadString().invoke[String, Int]("compareTo", y.asString.loadString()) - } - } } } } diff --git a/hail/src/main/scala/is/hail/expr/ir/package.scala b/hail/src/main/scala/is/hail/expr/ir/package.scala index 14061cbd521..a75f0de877d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/package.scala +++ b/hail/src/main/scala/is/hail/expr/ir/package.scala @@ -53,18 +53,13 @@ package object ir { private[ir] def coerce[T <: BaseTypeWithRequiredness](x: BaseTypeWithRequiredness): T = tycoerce[T](x) - def invoke(name: String, rt: Type, typeArgs: Array[Type], errorID: Int, args: IR*): IR = IRFunctionRegistry.lookupUnseeded(name, rt, typeArgs, args.map(_.typ)) match { - case Some(f) => f(typeArgs, args, errorID) + def invoke(name: String, rt: Type, typeArgs: Array[Type], args: IR*): IR = IRFunctionRegistry.lookupUnseeded(name, rt, typeArgs, args.map(_.typ)) match { + case Some(f) => f(typeArgs, args) case None => fatal(s"no conversion found for $name(${typeArgs.mkString(", ")}, ${args.map(_.typ).mkString(", ")}) => $rt") } - def invoke(name: String, rt: Type, typeArgs: Array[Type], args: IR*): IR = - invoke(name, rt, typeArgs, ErrorIDs.NO_ERROR, args:_*) def invoke(name: String, rt: Type, args: IR*): IR = - invoke(name, rt, Array.empty[Type], ErrorIDs.NO_ERROR, args:_*) - - def invoke(name: String, rt: Type, errorID: Int, args: IR*): IR = - invoke(name, rt, Array.empty[Type], errorID, args:_*) + invoke(name, rt, Array.empty[Type], args:_*) def invokeSeeded(name: String, seed: Long, rt: Type, args: IR*): IR = IRFunctionRegistry.lookupSeeded(name, seed, rt, args.map(_.typ)) match { case Some(f) => f(args) @@ -178,7 +173,7 @@ package object ir { def zipIR(ss: IndexedSeq[IR], behavior: ArrayZipBehavior.ArrayZipBehavior)(f: IndexedSeq[Ref] => IR): IR = { val refs = ss.map(s => Ref(genUID(), coerce[TStream](s.typ).elementType)) - StreamZip(ss, refs.map(_.name), f(refs), behavior, ErrorIDs.NO_ERROR) + StreamZip(ss, refs.map(_.name), f(refs), behavior) } def makestruct(fields: (String, IR)*): MakeStruct = MakeStruct(fields) diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala index 0f49249e537..6b525776bad 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/EmitStream.scala @@ -7,7 +7,7 @@ import is.hail.expr.ir.orderings.StructOrdering import is.hail.services.shuffler.CompileTimeShuffleClient import is.hail.types.{TypeWithRequiredness, VirtualTypeWithReq} import is.hail.types.physical.stypes.{EmitType, SType} -import is.hail.types.physical.stypes.concrete.{SCanonicalShufflePointerSettable, SUnreachable} +import is.hail.types.physical.stypes.concrete.SCanonicalShufflePointerSettable import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} import is.hail.types.physical.{PCanonicalArray, PCanonicalStream, PCanonicalStruct, PInterval, PStruct, PType} @@ -168,8 +168,8 @@ object EmitStream { streamIR match { - case x@NA(_typ: TStream) => - val st = SStream(EmitType(SUnreachable.fromVirtualType(_typ.elementType), true)) + case x@NA(_typ) => + val st = typeWithReq.canonicalEmitType.st.asInstanceOf[SStream] val region = mb.genFieldThisRef[Region]("na_region") val producer = new StreamProducer { override def initialize(cb: EmitCodeBuilder): Unit = {} @@ -383,7 +383,7 @@ object EmitStream { leftEC.required && rightEC.required) } - case StreamRange(startIR, stopIR, stepIR, _requiresMemoryManagementPerElement, errorID) => + case StreamRange(startIR, stopIR, stepIR, _requiresMemoryManagementPerElement) => emit(startIR, cb).flatMap(cb) { startc => emit(stopIR, cb).flatMap(cb) { stopc => @@ -409,7 +409,7 @@ object EmitStream { cb.assign(stop, stopc.asInt.intCode(cb)) cb.assign(step, stepc.asInt.intCode(cb)) - cb.ifx(step ceq const(0), cb._fatalWithError(errorID, "Array range cannot have step size 0.")) + cb.ifx(step ceq const(0), cb._fatal("Array range cannot have step size 0.")) cb.ifx(step < const(0), { cb.ifx(start.toL <= stop.toL, { cb.assign(llen, 0L) @@ -424,7 +424,7 @@ object EmitStream { }) }) cb.ifx(llen > const(Int.MaxValue.toLong), { - cb._fatalWithError(errorID, "Array range cannot have more than MAXINT elements.") + cb._fatal("Array range cannot have more than MAXINT elements.") }) cb.assign(len, llen.toI) @@ -1524,7 +1524,7 @@ object EmitStream { } } - case StreamZip(as, names, body, behavior, errorID) => + case StreamZip(as, names, body, behavior) => IEmitCode.multiMapEmitCodes(cb, as.map(a => EmitCode.fromI(mb)(cb => produce(a, cb)))) { childStreams => val producers = childStreams.map(_.asInstanceOf[SStreamCode].producer) @@ -1612,7 +1612,7 @@ object EmitStream { val len = cb.newLocal[Int]("zip_len", ls.head(cb)) ls.tail.foreach { compL => val lenTemp = cb.newLocal[Int]("lenTemp", compL(cb)) - cb.ifx(len.cne(lenTemp), cb._fatalWithError(errorID, "zip: length mismatch: ", len.toS, ", ", lenTemp.toS)) + cb.ifx(len.cne(lenTemp), cb._fatal("zip: length mismatch: ", len.toS, ", ", lenTemp.toS)) } len }) @@ -1657,7 +1657,7 @@ object EmitStream { cb.ifx(anyEOS, cb.ifx(allEOS, cb.goto(LendOfStream), - cb._fatalWithError(errorID, "zip: length mismatch")) + cb._fatal("zip: length mismatch")) ) cb.goto(LproduceElementDone) diff --git a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala index a6ec6a98c35..86490a5bfae 100644 --- a/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala +++ b/hail/src/main/scala/is/hail/expr/ir/streams/StreamUtils.scala @@ -89,7 +89,7 @@ object StreamUtils { traverse(a, mult); traverse(i, 2); traverse(s, 2); traverse(r, 2) case StreamZipJoin(as, _, _, _, f) => as.foreach(traverse(_, mult)); traverse(f, 2) - case StreamZip(as, _, body, _, _) => + case StreamZip(as, _, body, _) => as.foreach(traverse(_, mult)); traverse(body, 2) case StreamFold(a, zero, _, _, body) => traverse(a, mult); traverse(zero, mult); traverse(body, 2) @@ -102,7 +102,7 @@ object StreamUtils { traverse(a, mult); traverse(body, 2) case NDArrayMap(a, _, body) => traverse(a, mult); traverse(body, 2) - case NDArrayMap2(l, r, _, _, body, _) => + case NDArrayMap2(l, r, _, _, body) => traverse(l, mult); traverse(r, mult); traverse(body, 2) case _ => ir.children.foreach { diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala index 13a0b354b31..64fcc76cc91 100644 --- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala +++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala @@ -201,8 +201,8 @@ object MatrixPLINKReader { var nPartitions = params.nPartitions match { case Some(nPartitions) => nPartitions case None => - val blockSizeInB = params.blockSizeInMB.getOrElse(16) * 1024 * 1024 - ((bedSize + blockSizeInB - 1) / blockSizeInB).toInt + val blockSizeInB = params.blockSizeInMB.getOrElse(128) * 1024 * 1024 + (nVariants + blockSizeInB - 1) / blockSizeInB } params.minPartitions match { case Some(minPartitions) => @@ -223,14 +223,14 @@ object MatrixPLINKReader { var p = 0 var prevEnd = 0 val lOrd = locusType.ordering - while (p < nPartitions && prevEnd < nVariants) { + while (p < nPartitions) { val start = prevEnd var end = partScan(p + 1) if (start < end) { - while (end < nVariants - && lOrd.equiv(variants(end - 1).locusAlleles.asInstanceOf[Row].get(0), - variants(end).locusAlleles.asInstanceOf[Row].get(0))) + while (end + 1 < nVariants + && lOrd.equiv(variants(end).locusAlleles.asInstanceOf[Row].get(0), + variants(end + 1).locusAlleles.asInstanceOf[Row].get(0))) end += 1 cb += Row(params.bed, start, end) diff --git a/hail/src/main/scala/is/hail/io/reference/FASTAReader.scala b/hail/src/main/scala/is/hail/io/reference/FASTAReader.scala index 530fd6bad66..130d8db66dc 100644 --- a/hail/src/main/scala/is/hail/io/reference/FASTAReader.scala +++ b/hail/src/main/scala/is/hail/io/reference/FASTAReader.scala @@ -14,9 +14,9 @@ import is.hail.io.fs.FS import scala.language.postfixOps import scala.collection.concurrent -case class FASTAReaderConfig(tmpdir: String, fs: FS, rg: ReferenceGenome, - fastaFile: String, indexFile: String, blockSize: Int = 4096, capacity: Int = 100 -) { +case class FASTAReaderConfig(val tmpdir: String, val fsBc: BroadcastValue[FS], val rg: ReferenceGenome, + val fastaFile: String, val indexFile: String, val blockSize: Int = 4096, val capacity: Int = 100 +) extends Serializable { if (blockSize <= 0) fatal(s"'blockSize' must be greater than 0. Found $blockSize.") if (capacity <= 0) @@ -55,10 +55,10 @@ object FASTAReader { } class FASTAReader(val cfg: FASTAReaderConfig) { - val FASTAReaderConfig(tmpdir, fs, rg, fastaFile, indexFile, blockSize, capacity) = cfg + val FASTAReaderConfig(tmpdir, fsBc, rg, fastaFile, indexFile, blockSize, capacity) = cfg private[this] def newReader(): ReferenceSequenceFile = { - val localFastaFile = FASTAReader.getLocalFastaFile(tmpdir, fs, fastaFile, indexFile) + val localFastaFile = FASTAReader.getLocalFastaFile(tmpdir, fsBc.value, fastaFile, indexFile) ReferenceSequenceFileFactory.getReferenceSequenceFile(new java.io.File(uriPath(localFastaFile))) } diff --git a/hail/src/main/scala/is/hail/io/reference/LiftOver.scala b/hail/src/main/scala/is/hail/io/reference/LiftOver.scala index d2f072929a5..0c6720ca3a5 100644 --- a/hail/src/main/scala/is/hail/io/reference/LiftOver.scala +++ b/hail/src/main/scala/is/hail/io/reference/LiftOver.scala @@ -1,5 +1,6 @@ package is.hail.io.reference +import is.hail.backend.BroadcastValue import is.hail.expr.ir.ExecuteContext import is.hail.variant.{Locus, ReferenceGenome} import is.hail.utils._ @@ -9,9 +10,19 @@ import scala.collection.JavaConverters._ import scala.collection.concurrent import scala.language.implicitConversions +class SerializableHtsjdkLiftOver(val tmpdir: String, val fsBc: BroadcastValue[FS], val chainFile: String) extends Serializable { + @transient lazy val value: htsjdk.samtools.liftover.LiftOver = { + val localChainFile = LiftOver.getLocalChainFile(tmpdir, fsBc.value, chainFile) + new htsjdk.samtools.liftover.LiftOver(new java.io.File(uriPath(localChainFile))) + } +} + object LiftOver { private[this] val localChainFiles: concurrent.Map[String, String] = new concurrent.TrieMap() + def getLocalChainFile(tmpdir: String, fs: FS, chainFile: String): String = + localChainFiles.getOrElseUpdate(chainFile, LiftOver.setup(tmpdir, fs, chainFile)) + def setup(tmpdir: String, fs: FS, chainFile: String): String = { val localChainFile = ExecuteContext.createTmpPathNoCleanup(tmpdir, "lift-over", "chain") fs.copyRecode(chainFile, localChainFile) @@ -23,11 +34,11 @@ object LiftOver { } def apply(tmpdir: String, fs: FS, chainFile: String): LiftOver = - new LiftOver(localChainFiles.getOrElseUpdate(chainFile, LiftOver.setup(tmpdir, fs, chainFile)), chainFile) + new LiftOver(tmpdir, fs.broadcast, chainFile) } -class LiftOver(localChainFile: String, val chainFile: String) { - val lo = new htsjdk.samtools.liftover.LiftOver(new java.io.File(uriPath(localChainFile))) +class LiftOver(val tmpdir: String, val fsBc: BroadcastValue[FS], val chainFile: String) extends Serializable { + val lo = new SerializableHtsjdkLiftOver(tmpdir, fsBc, chainFile) def queryInterval(interval: is.hail.utils.Interval, minMatch: Double = htsjdk.samtools.liftover.LiftOver.DEFAULT_LIFTOVER_MINMATCH): (is.hail.utils.Interval, Boolean) = { val start = interval.start.asInstanceOf[Locus] @@ -43,7 +54,7 @@ class LiftOver(localChainFile: String, val chainFile: String) { if (startPos == endPos) fatal(s"Cannot liftover a 0-length interval: ${ interval.toString }.\nDid you mean to use 'liftover_locus'?") - val result = lo.liftOver(new htsjdk.samtools.util.Interval(contig, startPos, endPos), minMatch) + val result = lo.value.liftOver(new htsjdk.samtools.util.Interval(contig, startPos, endPos), minMatch) if (result != null) (Interval( Locus(result.getContig, result.getStart), @@ -56,7 +67,7 @@ class LiftOver(localChainFile: String, val chainFile: String) { } def queryLocus(l: Locus, minMatch: Double = htsjdk.samtools.liftover.LiftOver.DEFAULT_LIFTOVER_MINMATCH): (Locus, Boolean) = { - val result = lo.liftOver(new htsjdk.samtools.util.Interval(l.contig, l.position, l.position), minMatch) + val result = lo.value.liftOver(new htsjdk.samtools.util.Interval(l.contig, l.position, l.position), minMatch) if (result != null) (Locus(result.getContig, result.getStart), result.isNegativeStrand) else @@ -64,7 +75,7 @@ class LiftOver(localChainFile: String, val chainFile: String) { } def checkChainFile(srcRG: ReferenceGenome, destRG: ReferenceGenome) { - val cMap = lo.getContigMap.asScala + val cMap = lo.value.getContigMap.asScala cMap.foreach { case (srcContig, destContigs) => srcRG.checkContig(srcContig) destContigs.asScala.foreach(destRG.checkContig) diff --git a/hail/src/main/scala/is/hail/linalg/BLAS.scala b/hail/src/main/scala/is/hail/linalg/BLAS.scala index ddd95965148..dfd4e1ca620 100644 --- a/hail/src/main/scala/is/hail/linalg/BLAS.scala +++ b/hail/src/main/scala/is/hail/linalg/BLAS.scala @@ -68,22 +68,9 @@ object BLAS { libraryInstance.get.dgemm(TRANSA, TRANSB, mInt, nInt, kInt, alphaDouble, A, LDAInt, B, LDBInt, betaDouble, C, LDCInt) } - - def dgemv(TRANS: String, M: Int, N: Int, ALPHA: Double, A: Long, LDA: Int, X: Long, INCX: Int, BETA: Double, Y: Long, INCY: Int): Unit = { - val mInt = new IntByReference(M) - val nInt = new IntByReference(N) - val alphaDouble = new DoubleByReference(ALPHA) - val LDAInt = new IntByReference(LDA) - val betaDouble = new DoubleByReference(BETA) - val incxInt = new IntByReference(INCX) - val incyInt = new IntByReference(INCY) - - libraryInstance.get.dgemv(TRANS, mInt, nInt, alphaDouble, A, LDAInt, X, incxInt, betaDouble, Y, incyInt) - } } trait BLASLibrary extends Library { - def dgemv(TRANS: String, M: IntByReference, N: IntByReference, ALPHA: DoubleByReference, A: Long, LDA: IntByReference, X: Long, INCX: IntByReference, BETA: DoubleByReference, Y: Long, INCY: IntByReference) def dgemm(TRANSA: String, TRANSB: String, M: IntByReference, N: IntByReference, K: IntByReference, ALPHA: DoubleByReference, A: Long, LDA: IntByReference, B: Long, LDB: IntByReference, BETA: DoubleByReference, C: Long, LDC: IntByReference) diff --git a/hail/src/main/scala/is/hail/linalg/LAPACK.scala b/hail/src/main/scala/is/hail/linalg/LAPACK.scala index 3e24e41721b..95868931382 100644 --- a/hail/src/main/scala/is/hail/linalg/LAPACK.scala +++ b/hail/src/main/scala/is/hail/linalg/LAPACK.scala @@ -98,7 +98,7 @@ object LAPACK { } def dgesv(N: Int, NHRS: Int, A: Long, LDA: Int, IPIV: Long, B: Long, LDB: Int): Int = { - val Nref = new IntByReference(N) + val Nref= new IntByReference(N) val NHRSref = new IntByReference(NHRS) val LDAref = new IntByReference(LDA) val LDBref = new IntByReference(LDB) @@ -109,16 +109,6 @@ object LAPACK { INFOref.getValue() } - def dtrtrs(UPLO: String, TRANS: String, DIAG: String, N: Int, NRHS: Int, - A: Long, LDA: Int, B: Long, LDB: Int): Int = { - val Nref = new IntByReference(N) - val NRHSref = new IntByReference(NRHS) - val LDAref = new IntByReference(LDA) - val LDBref = new IntByReference(LDB) - val INFOref = new IntByReference(1) - libraryInstance.get.dtrtrs(UPLO, TRANS, DIAG, Nref, NRHSref, A, LDAref, B, LDBref, INFOref) - INFOref.getValue() - } private def versionTest(libInstance: LAPACKLibrary): Try[String] = { val major = new IntByReference() @@ -140,5 +130,4 @@ trait LAPACKLibrary extends Library { def dgetri(N: IntByReference, A: Long, LDA: IntByReference, IPIV: Long, WORK: Long, LWORK: IntByReference, INFO: IntByReference) def dgesdd(JOBZ: String, M: IntByReference, N: IntByReference, A: Long, LDA: IntByReference, S: Long, U: Long, LDU: IntByReference, VT: Long, LDVT: IntByReference, WORK: Long, LWORK: IntByReference, IWORK: Long, INFO: IntByReference) def ilaver(MAJOR: IntByReference, MINOR: IntByReference, PATCH: IntByReference) - def dtrtrs(UPLO: String, TRANS: String, DIAG: String, N: IntByReference, NRHS: IntByReference, A: Long, LDA: IntByReference, B: Long, LDB: IntByReference, INFO:IntByReference) } diff --git a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala index b9a17a67c7a..dff7c72ba31 100644 --- a/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala +++ b/hail/src/main/scala/is/hail/linalg/LinalgCodeUtils.scala @@ -2,10 +2,9 @@ package is.hail.linalg import is.hail.annotations.Region import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder} -import is.hail.types.physical.PCanonicalNDArray -import is.hail.types.physical.stypes.concrete.SUnreachableNDArray -import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArraySettable, SNDArrayValue} +import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, IEmitCode} +import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerSettable} +import is.hail.types.physical.stypes.interfaces.{SNDArray, SNDArrayCode, SNDArrayValue} import is.hail.utils.FastIndexedSeq object LinalgCodeUtils { @@ -15,11 +14,12 @@ object LinalgCodeUtils { val strides = pndv.strides(cb) val runningProduct = cb.newLocal[Long]("check_column_major_running_product") - val st = pndv.st + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType + val elementType = pt.elementType val nDims = pndv.st.nDims cb.assign(answer, true) - cb.assign(runningProduct, st.elementByteSize) + cb.assign(runningProduct, elementType.byteSize) (0 until nDims).foreach{ index => cb.assign(answer, answer & (strides(index) ceq runningProduct)) cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) @@ -33,11 +33,12 @@ object LinalgCodeUtils { val strides = pndv.strides(cb) val runningProduct = cb.newLocal[Long]("check_column_major_running_product") - val st = pndv.st - val nDims = st.nDims + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType + val elementType = pt.elementType + val nDims = pt.nDims cb.assign(answer, true) - cb.assign(runningProduct, st.elementByteSize) + cb.assign(runningProduct, elementType.byteSize) ((nDims - 1) to 0 by -1).foreach { index => cb.assign(answer, answer & (strides(index) ceq runningProduct)) cb.assign(runningProduct, runningProduct * (shapes(index) > 0L).mux(shapes(index), 1L)) @@ -47,20 +48,22 @@ object LinalgCodeUtils { def createColumnMajorCode(pndv: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayCode = { val shape = pndv.shapes(cb) - val pt = PCanonicalNDArray(pndv.st.elementType.canonicalPType().setRequired(true), pndv.st.nDims, false) + val pt = pndv.st.asInstanceOf[SNDArrayPointer].pType val strides = pt.makeColumnMajorStrides(shape, region, cb) val (dataFirstElementAddress, dataFinisher) = pt.constructDataFunction(shape, strides, cb, region) // construct an SNDArrayCode with undefined contents val result = dataFinisher(cb).memoize(cb, "col_major_result") - result.coiterateMutate(cb, region, (pndv.get, "pndv")) { case Seq(l, r) => r } + SNDArray.coiterate(cb, region, FastIndexedSeq((result.get, "result"), (pndv.get, "pndv")), { + case Seq(l, r) => cb.assign(l, r) + }) result.get } def checkColMajorAndCopyIfNeeded(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): SNDArrayValue = { val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) - val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.st).asInstanceOf[SNDArraySettable] + val aColMajor = cb.emb.newPField("ndarray_output_column_major", aInput.st).asInstanceOf[SNDArrayPointerSettable] cb.ifx(aIsColumnMajor, {cb.assign(aColMajor, aInput)}, { cb.assign(aColMajor, LinalgCodeUtils.createColumnMajorCode(aInput, cb, region)) @@ -69,11 +72,8 @@ object LinalgCodeUtils { } def checkStandardStriding(aInput: SNDArrayValue, cb: EmitCodeBuilder, region: Value[Region]): (SNDArrayValue, Value[Boolean]) = { - if (aInput.st.isInstanceOf[SUnreachableNDArray]) - return (aInput, const(true)) - val aIsColumnMajor = LinalgCodeUtils.checkColumnMajor(aInput, cb) - val a = cb.emb.newPField("ndarray_output_standardized", aInput.st).asInstanceOf[SNDArraySettable] + val a = cb.emb.newPField("ndarray_output_standardized", aInput.st).asInstanceOf[SNDArrayPointerSettable] cb.ifx(aIsColumnMajor, {cb.assign(a, aInput)}, { val isRowMajor = LinalgCodeUtils.checkRowMajor(aInput, cb) cb.ifx(isRowMajor, {cb.assign(a, aInput)}, { diff --git a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala index 078d89d582d..ce9e47fed83 100644 --- a/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala +++ b/hail/src/main/scala/is/hail/rvd/AbstractRVDSpec.scala @@ -124,13 +124,7 @@ object AbstractRVDSpec { val extendedNewPartitioner = newPartitioner.map(_.extendKey(partitioner.kType)) val tmpPartitioner = extendedNewPartitioner match { case Some(np) => np.intersect(partitioner) - case None => - val iOrd = partitioner.kord.intervalEndpointOrdering - val includedIndices = (0 until partitioner.numPartitions).filter { i => - val rb = partitioner.rangeBounds(i) - !rb.isDisjointFrom(iOrd, rb) - }.toArray - partitioner.copy(rangeBounds = includedIndices.map(partitioner.rangeBounds)) + case None => partitioner } val (indexSpecLeft, indexSpecRight) = (specLeft, specRight) match { @@ -195,23 +189,16 @@ object AbstractRVDSpec { val partitioner = specLeft.partitioner val extendedNewPartitioner = newPartitioner.map(_.extendKey(partitioner.kType)) - val (parts, tmpPartitioner) = extendedNewPartitioner match { - case Some(np) => - val tmpPart = np.intersect(partitioner) - assert(specLeft.key.nonEmpty) - val p = tmpPart.rangeBounds.map { b => specLeft.partFiles(partitioner.lowerBoundInterval(b)) } - (p, tmpPart) - case None => - // need to remove partitions with degenerate intervals - // these partitions are necessarily empty - val iOrd = partitioner.kord.intervalEndpointOrdering - val includedIndices = (0 until partitioner.numPartitions).filter { i => - val rb = partitioner.rangeBounds(i) - !rb.isDisjointFrom(iOrd, rb) - }.toArray - (includedIndices.map(specLeft.partFiles), partitioner.copy(rangeBounds = includedIndices.map(partitioner.rangeBounds))) + val tmpPartitioner = extendedNewPartitioner match { + case Some(np) => np.intersect(partitioner) + case None => partitioner } + val parts = if (specLeft.key.isEmpty) + specLeft.partFiles + else + tmpPartitioner.rangeBounds.map { b => specLeft.partFiles(partitioner.lowerBoundInterval(b)) } + val (isl, isr) = (specLeft, specRight) match { case (l: Indexed, r: Indexed) => (Some(l.indexSpec), Some(r.indexSpec)) case _ => (None, None) @@ -281,8 +268,9 @@ abstract class AbstractRVDSpec { case Some(_) => fatal("attempted to read unindexed data as indexed") case None => if (!partitioner.kType.fieldNames.startsWith(requestedType.key)) - fatal(s"Error while reading table ${ path }: legacy table written without key." + - s"\n Read and write with version 0.2.70 or earlier") + fatal(s"cannot generate whole-stage code for legacy table: " + + s"table key = [${ requestedType.key.mkString(", ") }], " + + s"key on disk: [${ partitioner.kType.fieldNames.mkString(", ") }]") val rSpec = typedCodecSpec diff --git a/hail/src/main/scala/is/hail/types/encoded/EArray.scala b/hail/src/main/scala/is/hail/types/encoded/EArray.scala index 899d511d091..632ceae660f 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EArray.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EArray.scala @@ -61,7 +61,7 @@ final case class EArray(val elementType: EType, override val required: Boolean = cb.ifx(value.isElementMissing(i), cb.assign(b, b | (const(1) << shift))) cb.assign(shift, shift + 1) cb.assign(i, i + 1) - cb.ifx(shift.ceq(8), { + cb.ifx(shift.ceq(7), { cb.assign(shift, 0) cb += out.writeByte(b.toB) cb.assign(b, 0) diff --git a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala index cb578b13841..96c9a802491 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBaseStruct.scala @@ -8,7 +8,7 @@ import is.hail.types.BaseStruct import is.hail.types.physical._ import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete._ -import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, SLocus, SLocusValue} +import is.hail.types.physical.stypes.interfaces.SBaseStructValue import is.hail.types.virtual._ import is.hail.utils._ @@ -75,7 +75,10 @@ final case class EBaseStruct(fields: IndexedSeq[EField], override val required: case SIntervalPointer(t: PCanonicalInterval) => new SBaseStructPointerSettable( SBaseStructPointer(t.representation), v.asInstanceOf[SIntervalPointerSettable].a) - case _: SLocus => v.asInstanceOf[SLocusValue].structRepr(cb) + case SCanonicalLocusPointer(t) => + new SBaseStructPointerSettable( + SBaseStructPointer(t.representation), + v.asInstanceOf[SCanonicalLocusPointerSettable].a) case _ => v.asInstanceOf[SBaseStructValue] } // write missing bytes diff --git a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala index a10824b6865..9185f7dcf12 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EBinary.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EBinary.scala @@ -9,7 +9,7 @@ import is.hail.types.virtual._ import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SBinaryPointer, SBinaryPointerCode, SBinaryPointerSettable, SStringPointer, SStringPointerCode, SStringPointerSettable} -import is.hail.types.physical.stypes.interfaces.{SBinary, SBinaryValue, SString} +import is.hail.types.physical.stypes.interfaces.SBinaryValue import is.hail.utils._ case object EBinaryOptional extends EBinary(false) @@ -19,9 +19,8 @@ class EBinary(override val required: Boolean) extends EType { override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val bin = v.st match { - case _: SBinary => v.asInstanceOf[SBinaryValue] + case SBinaryPointer(t) => v.asInstanceOf[SBinaryValue] case SStringPointer(t) => new SBinaryPointerSettable(SBinaryPointer(t.binaryRepresentation), v.asInstanceOf[SStringPointerSettable].a) - case _: SString => v.asString.asBytes().memoize(cb, "encoder_sstring") } val len = cb.newLocal[Int]("len", bin.loadLength()) diff --git a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala index 289536e491b..1acac463b3b 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EInt32.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EInt32.scala @@ -7,7 +7,7 @@ import is.hail.io.{InputBuffer, OutputBuffer} import is.hail.types.physical._ import is.hail.types.physical.stypes.{SCode, SType, SValue} import is.hail.types.physical.stypes.concrete.{SCanonicalCall, SCanonicalCallCode} -import is.hail.types.physical.stypes.interfaces.{SCall, SCallValue} +import is.hail.types.physical.stypes.interfaces.SCallValue import is.hail.types.physical.stypes.primitives.{SInt32, SInt32Code} import is.hail.types.virtual._ import is.hail.utils._ @@ -19,7 +19,7 @@ case object EInt32Required extends EInt32(true) class EInt32(override val required: Boolean) extends EType { override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { val x = v.st match { - case _: SCall => v.asInstanceOf[SCallValue].canonicalCall(cb) + case SCanonicalCall => v.asInstanceOf[SCallValue].canonicalCall(cb) case SInt32 => v.asInt32.intCode(cb) } cb += out.writeInt(x) diff --git a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala index 97dd082523e..7d8a9171855 100644 --- a/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala +++ b/hail/src/main/scala/is/hail/types/encoded/ENDArrayColumnMajor.scala @@ -19,11 +19,11 @@ case class ENDArrayColumnMajor(elementType: EType, nDims: Int, required: Boolean val shapes = ndarray.shapes(cb) shapes.foreach(s => cb += out.writeLong(s)) - SNDArray.coiterate(cb, (ndarray.get, "A")){ + SNDArray.coiterate(cb, null, FastIndexedSeq((ndarray.get, "A")), { case Seq(elt) => elementType.buildEncoder(elt.st, cb.emb.ecb) .apply(cb, elt, out) - } + }) } override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SCode = { diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala index beec649118e..86a4f357894 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalArray.scala @@ -422,9 +422,9 @@ final case class PCanonicalArray(elementType: PType, required: Boolean = false) value.asInstanceOf[SIndexablePointerCode].a case _ => val newAddr = cb.newLocal[Long]("pcarray_store_newaddr") - val valueMemo = value.asIndexable.memoize(cb, "pcarray_store_src_difftype") - cb.assign(newAddr, allocate(region, valueMemo.loadLength())) - storeContentsAtAddress(cb, newAddr, region, valueMemo, deepCopy) + val pcInd = value.asIndexable.memoize(cb, "pcarray_store_src_sametype").asInstanceOf[SIndexablePointerSettable] + cb.assign(newAddr, allocate(region, pcInd.loadLength())) + storeContentsAtAddress(cb, newAddr, region, pcInd, deepCopy) newAddr } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala index 147b79d5f1d..42ef32c8793 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBaseStruct.scala @@ -90,7 +90,8 @@ abstract class PCanonicalBaseStruct(val types: Array[PType]) extends PBaseStruct types(fieldIdx).unstagedLoadFromNested(off) } - def loadField(offset: Code[Long], fieldIdx: Int): Code[Long] = loadField(fieldOffset(offset, fieldIdx), types(fieldIdx)) + def loadField(offset: Code[Long], fieldIdx: Int): Code[Long] = + loadField(fieldOffset(offset, fieldIdx), types(fieldIdx)) private def loadField(fieldOffset: Code[Long], fieldType: PType): Code[Long] = { fieldType.loadFromNested(fieldOffset) diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala index 808a163b09d..52f669a8a74 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalBinary.scala @@ -135,15 +135,16 @@ class PCanonicalBinary(val required: Boolean) extends PBinary { def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { value.st match { - case SBinaryPointer(PCanonicalBinary(_)) if !deepCopy => - value.asInstanceOf[SBinaryPointerCode].a - case _ => - val bv = value.asBinary.memoize(cb, "pcbin_store") - val len = cb.newLocal[Int]("pcbinary_store_len", bv.loadLength()) - val newAddr = cb.newLocal[Long]("pcbinary_store_newaddr", allocate(region, len)) - cb += storeLength(newAddr, len) - cb += Region.copyFrom(bv.bytesAddress(), bytesAddress(newAddr), len.toL) - newAddr + case SBinaryPointer(PCanonicalBinary(_)) => + if (deepCopy) { + val memoizedValue = value.memoize(cb, "pcbinary_store_value").asInstanceOf[SBinaryPointerSettable] + val len = cb.newLocal[Int]("pcbinary_store_len", memoizedValue.loadLength()) + val newAddr = cb.newLocal[Long]("pcbinary_store_newaddr", allocate(region, len)) + cb += storeLength(newAddr, len) + cb += Region.copyFrom(bytesAddress(memoizedValue.a), bytesAddress(newAddr), len.toL) + newAddr + } else + value.asInstanceOf[SBinaryPointerCode].a } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala index 30c8cf61ea8..c124eed52a5 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalCall.scala @@ -52,7 +52,7 @@ final case class PCanonicalCall(required: Boolean = false) extends PCall { } def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { - cb += Region.storeInt(addr, value.asCall.loadCanonicalRepresentation(cb)) + cb += Region.storeInt(addr, value.asInstanceOf[SCanonicalCallCode].call) } def loadFromNested(addr: Code[Long]): Code[Long] = representation.loadFromNested(addr) diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala index a4dfe11f06f..488decf4e50 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalInterval.scala @@ -89,7 +89,6 @@ final case class PCanonicalInterval(pointType: PType, override val required: Boo srcPType match { case t: PCanonicalInterval => representation.unstagedStoreAtAddress(addr, region, t.representation, srcAddress, deepCopy) - } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala index b8c556e8705..e1c1997f345 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalLocus.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitMethodBuilder} import is.hail.types.physical.stypes.SCode -import is.hail.types.physical.stypes.concrete.{SCanonicalLocusPointer, SCanonicalLocusPointerCode, SStackStruct, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SCanonicalLocusPointer, SCanonicalLocusPointerCode, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.utils.FastIndexedSeq import is.hail.variant._ @@ -99,10 +99,6 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e value.st match { case SCanonicalLocusPointer(pt) => representation.store(cb, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) - case _ => - val addr = representation.allocate(region) - storeAtAddress(cb, addr, region, value, deepCopy) - addr } } @@ -110,12 +106,6 @@ final case class PCanonicalLocus(rgBc: BroadcastRG, required: Boolean = false) e value.st match { case SCanonicalLocusPointer(pt) => representation.storeAtAddress(cb, addr, region, pt.representation.loadCheapSCode(cb, value.asInstanceOf[SCanonicalLocusPointerCode].a), deepCopy) - case _ => - val loc = value.asLocus.memoize(cb, "pclocus_store") - representation.storeAtAddress(cb, addr, region, - SStackStruct.constructFromArgs(cb, region, representation.virtualType, - EmitCode.present(cb.emb, loc.contig(cb)), EmitCode.present(cb.emb, primitive(loc.position(cb)))), - deepCopy) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala index 72b99c744a8..bd5d58a3b68 100644 --- a/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PCanonicalNDArray.scala @@ -2,11 +2,11 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, NDArray, Region, UnsafeOrdering} import is.hail.asm4s.{Code, _} -import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, Param, ParamType, SCodeParam} +import is.hail.expr.ir.{CodeParam, CodeParamType, EmitCode, EmitCodeBuilder, SCodeParam, Param, ParamType} import is.hail.types.physical.stypes.SCode import is.hail.types.physical.stypes.interfaces._ import is.hail.types.virtual.{TNDArray, Type} -import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SIndexablePointerSettable, SNDArrayPointer, SNDArrayPointerCode, SNDArrayPointerSettable, SStackStruct} +import is.hail.types.physical.stypes.concrete.{SNDArrayPointer, SNDArrayPointerCode, SStackStruct} import org.apache.spark.sql.Row import is.hail.utils._ @@ -61,12 +61,14 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo this.loadStride(addr, dimIdx) } } + + val dataType: PCanonicalArray = PCanonicalArray(elementType, required = true) lazy val representation: PCanonicalStruct = { PCanonicalStruct(required, ("shape", shapeType), ("strides", strideType), - ("data", PInt64Required)) + ("data", dataType)) } override lazy val byteSize: Long = representation.byteSize @@ -110,20 +112,27 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo } def getElementAddress(indices: IndexedSeq[Long], nd: Long): Long = { + val dataLength = (0 until nDims).map(loadShape(nd, _)).foldLeft(1L)(_ * _) + val dataAddress = this.representation.loadField(nd, 2) + var bytesAway = 0L indices.zipWithIndex.foreach{case (requestedIndex: Long, strideIndex: Int) => bytesAway += requestedIndex * loadStride(nd, strideIndex) } - bytesAway + this.unstagedDataFirstElementPointer(nd) + + bytesAway + dataType.firstElementOffset(dataAddress, dataLength.toInt) } private def getElementAddress(cb: EmitCodeBuilder, indices: IndexedSeq[Value[Long]], nd: Value[Long]): Value[Long] = { val ndarrayValue = loadCheapSCode(cb, nd).asNDArray.memoize(cb, "getElementAddressNDValue") val stridesTuple = ndarrayValue.strides(cb) + val dataStore = cb.newLocal[Long]("nd_get_element_address_data_store", + representation.loadField(nd, "data")) + cb.newLocal[Long]("pcndarray_get_element_addr", indices.zipWithIndex.map { case (requestedElementIndex, strideIndex) => requestedElementIndex * stridesTuple(strideIndex) - }.foldLeft(const(0L).get)(_ + _) + ndarrayValue.firstDataAddress(cb)) + }.foldLeft(const(0L).get)(_ + _) + dataType.firstElementOffset(dataStore, dataType.loadLength(dataStore))) } def setElement(cb: EmitCodeBuilder, region: Value[Region], @@ -151,20 +160,18 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo elementType.loadFromNested(off) } - def contentsByteSize(numElements: Long): Long = this.elementType.byteSize * numElements - - def contentsByteSize(numElements: Code[Long]): Code[Long] = { - numElements * elementType.byteSize - } - - def allocateData(shape: IndexedSeq[Value[Long]], region: Value[Region]): Code[Long] = { - val sizeOfArray = this.contentsByteSize(this.numElements(shape).toL) - region.allocateSharedChunk(sizeOfArray) + def allocate(shape: IndexedSeq[Value[Long]], region: Value[Region]): Code[Long] = { + //Need to allocate enough space to construct my tuple, then to construct the array right next to it. + val sizeOfArray = this.dataType.contentsByteSize(this.numElements(shape).toI) + val sizeOfStruct = this.representation.byteSize + region.allocateNDArray(sizeOfArray + sizeOfStruct) } - def allocateData(shape: IndexedSeq[Long], region: Region): Long = { - val sizeOfArray: Long = this.contentsByteSize(shape.product) - region.allocateSharedChunk(sizeOfArray) + def allocate(shape: IndexedSeq[Long], region: Region): Long = { + //Need to allocate enough space to construct my tuple, then to construct the array right next to it. + val sizeOfArray: Long = this.dataType.contentsByteSize(shape.product.toInt) + val sizeOfStruct = this.representation.byteSize + region.allocateNDArray(sizeOfArray + sizeOfStruct) } def constructByCopyingArray( @@ -189,7 +196,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo val strides = (0 until nDims).map(i => mb.getCodeParam[Long](3 + nDims + i)) val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") - cb.assign(ndAddr, this.representation.allocate(region)) + cb.assign(ndAddr, this.allocate(shape, region)) shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), @@ -199,21 +206,12 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) - val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", this.allocateData(shape, region)) - cb.append(Region.storeAddress(this.representation.fieldOffset(ndAddr, "data"), newDataPointer)) - val result = new SNDArrayPointerCode(sType, ndAddr).memoize(cb, "construct_by_copying_array_result") - - dataValue.st match { - case SIndexablePointer(PCanonicalArray(otherElementType, _)) if otherElementType == elementType => - cb += Region.copyFrom(dataValue.asInstanceOf[SIndexablePointerSettable].elementsAddress, newDataPointer, dataValue.loadLength().toL * elementType.byteSize) - case _ => - val loopCtr = cb.newLocal[Long]("pcanonical_ndarray_construct_by_copying_loop_idx") - cb.forLoop(cb.assign(loopCtr, 0L), loopCtr < dataValue.loadLength().toL, cb.assign(loopCtr, loopCtr + 1L), { - elementType.storeAtAddress(cb, newDataPointer + (loopCtr * elementType.byteSize), region, dataValue.loadElement(cb, loopCtr.toI).get(cb, "NDArray elements cannot be missing"), true) - }) - } + val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) + + cb.append(Region.storeLong(this.representation.fieldOffset(ndAddr, "data"), newDataPointer)) + dataType.storeContentsAtAddress(cb, newDataPointer, region, dataValue, true) - result + new SNDArrayPointerCode(sType, ndAddr) } } @@ -229,7 +227,7 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo ): (Value[Long], EmitCodeBuilder => SNDArrayPointerCode) = { val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") - cb.assign(ndAddr, this.representation.allocate(region)) + cb.assign(ndAddr, this.allocate(shape, region)) shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), region, SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), @@ -239,112 +237,114 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), false) - val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", this.allocateData(shape, region)) + val newDataPointer = cb.newLocal("ndarray_construct_new_data_pointer", ndAddr + this.representation.byteSize) cb.append(Region.storeLong(this.representation.fieldOffset(ndAddr, "data"), newDataPointer)) + //TODO Use the known length here val newFirstElementDataPointer = cb.newLocal[Long]("ndarray_construct_first_element_pointer", this.dataFirstElementPointer(ndAddr)) + cb.append(dataType.stagedInitialize(newDataPointer, this.numElements(shape).toI)) + (newFirstElementDataPointer, (cb: EmitCodeBuilder) => new SNDArrayPointerCode(sType, ndAddr)) } - def constructByCopyingDataPointer( - shape: IndexedSeq[Value[Long]], - strides: IndexedSeq[Value[Long]], - dataPtr: Code[Long], - cb: EmitCodeBuilder, - region: Value[Region] - ): SNDArrayPointerCode = { - val ndAddr = cb.newLocal[Long]("ndarray_construct_addr") - cb.assign(ndAddr, this.representation.allocate(region)) - shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(ndAddr, "shape")), - region, - SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), - false) - strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(ndAddr, "strides")), - region, - SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), - false) - cb += Region.storeAddress(this.representation.fieldOffset(ndAddr, 2), dataPtr) - new SNDArrayPointerCode(sType, ndAddr) - } + def unstagedConstructDataFunction( + shape: IndexedSeq[Long], + strides: IndexedSeq[Long], + region: Region + )(writeDataToAddress: Long => Unit): Long = { - def constructByActuallyCopyingData( - toBeCopied: SNDArrayValue, - cb: EmitCodeBuilder, - region: Value[Region] - ): SNDArrayCode = { - val oldDataAddr = toBeCopied.firstDataAddress(cb) - val numDataBytes = cb.newLocal("constructByActuallyCopyingData_numDataBytes", Region.getSharedChunkByteSize(oldDataAddr)) - cb.ifx(numDataBytes < 0L, cb._fatal("numDataBytes was ", numDataBytes.toS)) - val newDataAddr = cb.newLocal("constructByActuallyCopyingData_newDataAddr", region.allocateSharedChunk(numDataBytes)) - cb += Region.copyFrom(oldDataAddr, newDataAddr, numDataBytes) - constructByCopyingDataPointer( - toBeCopied.shapes(cb), - toBeCopied.strides(cb), - newDataAddr, - cb, - region - ) - } + val ndAddr = this.allocate(shape, region) + shapeType.unstagedStoreJavaObjectAtAddress(ndAddr, Row(shape:_*), region) + strideType.unstagedStoreJavaObjectAtAddress(ndAddr + shapeType.byteSize, Row(strides:_*), region) - def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = { - val srcNDPType = srcPType.asInstanceOf[PCanonicalNDArray] - assert(nDims == srcNDPType.nDims) + val newDataPointer = ndAddr + this.representation.byteSize + Region.storeLong(this.representation.fieldOffset(ndAddr, 2), newDataPointer) - if (equalModuloRequired(srcPType) && !deepCopy) { - return srcAddress - } + val newFirstElementDataPointer = this.unstagedDataFirstElementPointer(ndAddr, shape) + dataType.initialize(newDataPointer, numElements(shape).toInt) + writeDataToAddress(newFirstElementDataPointer) - val newAddress = this.representation.allocate(region) - unstagedStoreAtAddress(newAddress, region, srcPType, srcAddress, deepCopy) - newAddress + ndAddr } - override def deepRename(t: Type) = deepRenameNDArray(t.asInstanceOf[TNDArray]) - - private def deepRenameNDArray(t: TNDArray) = - PCanonicalNDArray(this.elementType.deepRename(t.elementType), this.nDims, this.required) - - def setRequired(required: Boolean) = if(required == this.required) this else PCanonicalNDArray(elementType, nDims, required) + private def deepPointerCopy(region: Region, ndAddress: Long): Unit = { + // Tricky, need to rewrite the address of the data pointer to point to directly after the struct. + val shape = this.unstagedLoadShapes(ndAddress) + val firstElementAddressOld = this.unstagedDataFirstElementPointer(ndAddress, shape) + assert(this.elementType.containsPointers) + val arrayAddressNew = ndAddress + this.representation.byteSize + val numElements = this.numElements(shape) + this.dataType.initialize(arrayAddressNew, numElements.toInt) + Region.storeLong(this.representation.fieldOffset(ndAddress, 2), arrayAddressNew) + val firstElementAddressNew = this.dataType.firstElementOffset(arrayAddressNew) + + + var currentIdx = 0 + while(currentIdx < numElements) { + val currentElementAddressOld = firstElementAddressOld + currentIdx * elementType.byteSize + val currentElementAddressNew = firstElementAddressNew + currentIdx * elementType.byteSize + this.elementType.unstagedStoreAtAddress(currentElementAddressNew, region, this.elementType, elementType.unstagedLoadFromNested(currentElementAddressOld), true) + currentIdx += 1 + } + } - def unstagedStoreAtAddress(destAddress: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { + def _copyFromAddress(region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Long = { val srcNDPType = srcPType.asInstanceOf[PCanonicalNDArray] assert(nDims == srcNDPType.nDims) - if (equalModuloRequired(srcPType)) { // The situation where you can just memcpy - Region.copyFrom(srcAddress, destAddress, this.representation.field("shape").typ.byteSize + this.representation.field("strides").typ.byteSize) - - val srcDataAddress = srcNDPType.unstagedDataFirstElementPointer(srcAddress) - assert(!elementType.containsPointers) + if (equalModuloRequired(srcPType)) { // The situation where you can just memcpy, but then still have to update pointers. + if (!deepCopy) { + return srcAddress + } - val newDataAddress = { - if (deepCopy) { - region.trackSharedChunk(srcDataAddress) - } - srcDataAddress + // Deep copy, two scenarios. + if (elementType.containsPointers) { + // Can't just reference count change, since the elements have to be copied and updated. + val numBytes = PNDArray.getByteSize(srcAddress) + val newAddress = region.allocateNDArray(numBytes) + Region.copyFrom(srcAddress, newAddress, numBytes) + deepPointerCopy(region, newAddress) + newAddress + } + else { + region.trackNDArray(srcAddress) + srcAddress } - Region.storeAddress(this.representation.fieldOffset(destAddress, 2), newDataAddress) } else { // The situation where maybe the structs inside the ndarray have different requiredness + // Deep copy doesn't matter, we have to make a new one no matter what. val srcShape = srcPType.asInstanceOf[PNDArray].unstagedLoadShapes(srcAddress) val srcStrides = srcPType.asInstanceOf[PNDArray].unstagedLoadStrides(srcAddress) + val newAddress = this.unstagedConstructDataFunction(srcShape, srcStrides, region){ firstElementAddress => + var currentAddressToWrite = firstElementAddress - shapeType.unstagedStoreJavaObjectAtAddress(destAddress, Row(srcShape:_*), region) - strideType.unstagedStoreJavaObjectAtAddress(destAddress + shapeType.byteSize, Row(srcStrides:_*), region) + SNDArray.unstagedForEachIndex(srcShape) { indices => + val srcElementAddress = srcNDPType.getElementAddress(indices, srcAddress) + this.elementType.unstagedStoreAtAddress(currentAddressToWrite, region, srcNDPType.elementType, srcElementAddress, true) + currentAddressToWrite += elementType.byteSize + } + } - val newDataPointer = this.allocateData(srcShape, region) - Region.storeLong(this.representation.fieldOffset(destAddress, 2), newDataPointer) + newAddress + } - val newFirstElementDataPointer = this.unstagedDataFirstElementPointer(destAddress) + } - var currentAddressToWrite = newFirstElementDataPointer + override def deepRename(t: Type) = deepRenameNDArray(t.asInstanceOf[TNDArray]) - SNDArray.unstagedForEachIndex(srcShape) { indices => - val srcElementAddress = srcNDPType.getElementAddress(indices, srcAddress) - this.elementType.unstagedStoreAtAddress(currentAddressToWrite, region, srcNDPType.elementType, srcElementAddress, true) - currentAddressToWrite += elementType.byteSize - } + private def deepRenameNDArray(t: TNDArray) = + PCanonicalNDArray(this.elementType.deepRename(t.elementType), this.nDims, this.required) + + def setRequired(required: Boolean) = if(required == this.required) this else PCanonicalNDArray(elementType, nDims, required) + + def unstagedStoreAtAddress(addr: Long, region: Region, srcPType: PType, srcAddress: Long, deepCopy: Boolean): Unit = { + val srcND = srcPType.asInstanceOf[PCanonicalNDArray] + + if (deepCopy) { + region.trackNDArray(srcAddress) } + Region.storeAddress(addr, copyFromAddress(region, srcND, srcAddress, deepCopy)) } def sType: SNDArrayPointer = SNDArrayPointer(setRequired(false).asInstanceOf[PCanonicalNDArray]) @@ -352,75 +352,69 @@ final case class PCanonicalNDArray(elementType: PType, nDims: Int, required: Boo def loadCheapSCode(cb: EmitCodeBuilder, addr: Code[Long]): SCode = new SNDArrayPointerCode(sType, addr) def store(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): Code[Long] = { - val addr = cb.newField[Long]("pcanonical_ndarray_store", this.representation.allocate(region)) - storeAtAddress(cb, addr, region, value, deepCopy) - addr - } - - def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { - val targetAddr = cb.newLocal[Long]("pcanonical_ndarray_store_at_addr_target", addr) - val inputSNDValue = value.asNDArray.memoize(cb, "pcanonical_ndarray_store_at_addr_input") - val shape = inputSNDValue.shapes(cb) - val strides = inputSNDValue.strides(cb) - val dataAddr = inputSNDValue.firstDataAddress(cb) - shapeType.storeAtAddress(cb, cb.newLocal[Long]("construct_shape", this.representation.fieldOffset(targetAddr, "shape")), - region, - SStackStruct.constructFromArgs(cb, region, shapeType.virtualType, shape.map(s => EmitCode.present(cb.emb, primitive(s))): _*), - false) - strideType.storeAtAddress(cb, cb.newLocal[Long]("construct_strides", this.representation.fieldOffset(targetAddr, "strides")), - region, - SStackStruct.constructFromArgs(cb, region, strideType.virtualType, strides.map(s => EmitCode.present(cb.emb, primitive(s))): _*), - false) - value.st match { - case SNDArrayPointer(t) if t.equalModuloRequired(this) => + case SNDArrayPointer(t) if t.equalModuloRequired(this) => + val storedAddress = cb.newLocal[Long]("pcanonical_ndarray_store", value.asInstanceOf[SNDArrayPointerCode].a) if (deepCopy) { - region.trackSharedChunk(cb, dataAddr) + cb.append(region.trackNDArray(storedAddress)) } - cb += Region.storeAddress(this.representation.fieldOffset(targetAddr, "data"), dataAddr) - case _ => - val newDataAddr = this.allocateData(shape, region) - cb += Region.storeAddress(this.representation.fieldOffset(targetAddr, "data"), newDataAddr) - val outputSNDValue = new SNDArrayPointerCode(sType, targetAddr).memoize(cb, "pcanonical_ndarray_store_at_addr_output") - outputSNDValue.coiterateMutate(cb, region, true, (inputSNDValue.get, "input")){ + storedAddress + case SNDArrayPointer(t) => + val oldND = value.asNDArray.memoize(cb, "pcanonical_ndarray_store_old") + val shape = oldND.shapes(cb) + val newStrides = makeColumnMajorStrides(shape, region, cb) + val (targetDataFirstElementAddr, finish) = this.constructDataFunction(shape, newStrides, cb, region) + val result = finish(cb) + + SNDArray.coiterate(cb, region, FastIndexedSeq((result, "result"), (oldND.get, "oldND")), { case Seq(dest, elt) => - elt - } + cb.assign(dest, elt) + }, deepCopy = true) + + result.a } } - def unstagedDataFirstElementPointer(ndAddr: Long): Long = - Region.loadAddress(representation.loadField(ndAddr, 2)) + def storeAtAddress(cb: EmitCodeBuilder, addr: Code[Long], region: Value[Region], value: SCode, deepCopy: Boolean): Unit = { + cb += Region.storeAddress(addr, store(cb, region, value, deepCopy)) + } - override def dataFirstElementPointer(ndAddr: Code[Long]): Code[Long] = Region.loadAddress(representation.loadField(ndAddr, "data")) + def unstagedDataFirstElementPointer(ndAddr: Long, shape: IndexedSeq[Long]): Long = + dataType.firstElementOffset(unstagedDataPArrayPointer(ndAddr), numElements(shape).toInt) - def loadFromNested(addr: Code[Long]): Code[Long] = addr + def unstagedDataPArrayPointer(ndAddr: Long): Long = + representation.loadField(ndAddr, 2) - override def unstagedLoadFromNested(addr: Long): Long = addr + override def dataFirstElementPointer(ndAddr: Code[Long]): Code[Long] = dataType.firstElementOffset(this.dataPArrayPointer(ndAddr)) - override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = { - val addr = this.representation.allocate(region) - this.unstagedStoreJavaObjectAtAddress(addr, annotation, region) - addr - } + override def dataPArrayPointer(ndAddr: Code[Long]): Code[Long] = representation.loadField(ndAddr, "data") - override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { + def loadFromNested(addr: Code[Long]): Code[Long] = Region.loadAddress(addr) + + override def unstagedLoadFromNested(addr: Long): Long = Region.loadAddress(addr) + + override def unstagedStoreJavaObject(annotation: Annotation, region: Region): Long = { val aNDArray = annotation.asInstanceOf[NDArray] - var runningProduct = this.elementType.byteSize + var runningProduct = this.representation.fieldType("data").asInstanceOf[PArray].elementType.byteSize val stridesArray = new Array[Long](aNDArray.shape.size) ((aNDArray.shape.size - 1) to 0 by -1).foreach { i => stridesArray(i) = runningProduct runningProduct = runningProduct * (if (aNDArray.shape(i) > 0L) aNDArray.shape(i) else 1L) } - val dataFirstElementAddress = this.allocateData(aNDArray.shape, region) - var curElementAddress = dataFirstElementAddress - aNDArray.getRowMajorElements().foreach{ element => - elementType.unstagedStoreJavaObjectAtAddress(curElementAddress, element, region) - curElementAddress += elementType.byteSize + + val addr = unstagedConstructDataFunction(aNDArray.shape, stridesArray, region) { dataFirstElementAddress => + var curElementAddress = dataFirstElementAddress + aNDArray.getRowMajorElements().foreach{ element => + dataType.elementType.unstagedStoreJavaObjectAtAddress(curElementAddress, element, region) + curElementAddress += dataType.elementType.byteSize + } } - val shapeRow = Row(aNDArray.shape: _*) - val stridesRow = Row(stridesArray: _*) - this.representation.unstagedStoreJavaObjectAtAddress(addr, Row(shapeRow, stridesRow, dataFirstElementAddress), region) + + addr + } + + override def unstagedStoreJavaObjectAtAddress(addr: Long, annotation: Annotation, region: Region): Unit = { + Region.storeAddress(addr, unstagedStoreJavaObject(annotation, region)) } } diff --git a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala index 9745d7bd472..691844ac1cf 100644 --- a/hail/src/main/scala/is/hail/types/physical/PNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/PNDArray.scala @@ -10,6 +10,14 @@ import is.hail.types.physical.stypes.concrete.SNDArrayPointerCode import is.hail.types.physical.stypes.interfaces.{SIndexableCode, SNDArrayCode, SNDArrayValue} import is.hail.types.virtual.TNDArray +object PNDArray { + val headerBytes = 16L + def getReferenceCount(ndAddr: Long): Long = Region.loadLong(ndAddr - 16L) + def storeReferenceCount(ndAddr: Long, newCount: Long): Unit = Region.storeLong(ndAddr - 16L, newCount) + def getByteSize(ndAddr: Long): Long = Region.loadLong(ndAddr - 8L) + def storeByteSize(ndAddr: Long, byteSize: Long): Unit = Region.storeLong(ndAddr - 8L, byteSize) +} + abstract class PNDArray extends PType { val elementType: PType val nDims: Int @@ -20,6 +28,7 @@ abstract class PNDArray extends PType { assert(elementType.required, "elementType must be required") def dataFirstElementPointer(ndAddr: Code[Long]): Code[Long] + def dataPArrayPointer(ndAddr: Code[Long]): Code[Long] def loadShape(off: Long, idx: Int): Long def unstagedLoadShapes(addr: Long): IndexedSeq[Long] = { diff --git a/hail/src/main/scala/is/hail/types/physical/PStruct.scala b/hail/src/main/scala/is/hail/types/physical/PStruct.scala index 3116823c729..0cbf5328cff 100644 --- a/hail/src/main/scala/is/hail/types/physical/PStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/PStruct.scala @@ -32,7 +32,7 @@ trait PStruct extends PBaseStruct { def rename(m: Map[String, String]): PStruct - def identBase: String = "struct" + def identBase: String = "tuple" final def selectFields(names: Seq[String]): PCanonicalStruct = PCanonicalStruct(required, names.map(f => f -> field(f).typ): _*) diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index dc46ded21d0..0e264225a9d 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -110,7 +110,7 @@ object PType { implicit def arbType = Arbitrary(genArb) - def canonical(t: Type, required: Boolean, innerRequired: Boolean): PType = { + def canonical(t: Type, required: Boolean): PType = { t match { case TInt32 => PInt32(required) case TInt64 => PInt64(required) @@ -122,21 +122,19 @@ object PType { case TString => PCanonicalString(required) case TCall => PCanonicalCall(required) case t: TLocus => PCanonicalLocus(t.rg, required) - case t: TInterval => PCanonicalInterval(canonical(t.pointType, innerRequired, innerRequired), required) - case t: TStream => PCanonicalStream(canonical(t.elementType, innerRequired, innerRequired), required = required) - case t: TArray => PCanonicalArray(canonical(t.elementType, innerRequired, innerRequired), required) - case t: TSet => PCanonicalSet(canonical(t.elementType, innerRequired, innerRequired), required) - case t: TDict => PCanonicalDict(canonical(t.keyType, innerRequired, innerRequired), canonical(t.valueType, innerRequired, innerRequired), required) - case t: TTuple => PCanonicalTuple(t._types.map(tf => PTupleField(tf.index, canonical(tf.typ, innerRequired, innerRequired))), required) - case t: TStruct => PCanonicalStruct(t.fields.map(f => PField(f.name, canonical(f.typ, innerRequired, innerRequired), f.index)), required) - case t: TNDArray => PCanonicalNDArray(canonical(t.elementType, innerRequired, innerRequired).setRequired(true), t.nDims, required) + case t: TInterval => PCanonicalInterval(canonical(t.pointType), required) + case t: TStream => PCanonicalStream(canonical(t.elementType), required = required) + case t: TArray => PCanonicalArray(canonical(t.elementType), required) + case t: TSet => PCanonicalSet(canonical(t.elementType), required) + case t: TDict => PCanonicalDict(canonical(t.keyType), canonical(t.valueType), required) + case t: TTuple => PCanonicalTuple(t._types.map(tf => PTupleField(tf.index, canonical(tf.typ))), required) + case t: TStruct => PCanonicalStruct(t.fields.map(f => PField(f.name, canonical(f.typ), f.index)), required) + case t: TNDArray => PCanonicalNDArray(canonical(t.elementType).setRequired(true), t.nDims, required) case TVoid => PVoid } } - def canonical(t: Type, required: Boolean): PType = canonical(t, required, false) - - def canonical(t: Type): PType = canonical(t, false, false) + def canonical(t: Type): PType = canonical(t, false) // currently identity def canonical(t: PType): PType = { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala index 606a6143462..0c0491d7878 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SType.scala @@ -5,7 +5,6 @@ import is.hail.asm4s._ import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitSettable, SCodeEmitParamType, SCodeParamType} import is.hail.types.VirtualTypeWithReq import is.hail.types.physical.PType -import is.hail.types.physical.stypes.concrete.SUnreachable import is.hail.types.physical.stypes.interfaces.SStream import is.hail.types.physical.stypes.primitives._ import is.hail.types.virtual._ @@ -13,15 +12,10 @@ import is.hail.types.virtual._ object SType { def chooseCompatibleType(req: VirtualTypeWithReq, stypes: SType*): SType = { - val reachable = stypes.filter(t => !t.isInstanceOf[SUnreachable]).toSet - - // all unreachable - if (reachable.isEmpty) - SUnreachable.fromVirtualType(req.t) - else if (reachable.size == 1) // only one reachable stype - reachable.head + if (stypes.toSet.size == 1) + stypes.head else - req.canonicalEmitType.st // fall back to canonical emit type from requiredness + req.canonicalEmitType.st } def canonical(virt: Type): SType = { @@ -42,14 +36,7 @@ object SType { trait SType { def virtualType: Type - final def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { - value.st match { - case _: SUnreachable => this.defaultValue - case _ => _coerceOrCopy(cb, region, value, deepCopy) - } - } - - protected[stypes] def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode def codeTupleTypes(): IndexedSeq[TypeInfo[_]] diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala index e85793b9169..5da7ec32fd1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBaseStructPointer.scala @@ -21,7 +21,7 @@ case class SBaseStructPointer(pType: PBaseStruct) extends SBaseStruct { override def fieldIdx(fieldName: String): Int = pType.fieldIdx(fieldName) - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBaseStructPointerCode(this, pType.store(cb, region, value, deepCopy)) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala index e16217b234a..18fea2daba7 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SBinaryPointer.scala @@ -15,7 +15,7 @@ case class SBinaryPointer(pType: PBinary) extends SBinary { require(!pType.required) lazy val virtualType: Type = pType.virtualType - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SBinaryPointerCode(this, pType.store(cb, region, value, deepCopy)) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala index b173a24a49c..3f0391ea887 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalCall.scala @@ -13,7 +13,7 @@ import is.hail.variant.Genotype case object SCanonicalCall extends SCall { - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SCanonicalCall => value } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala index bce5677191d..2064cbab6a9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalLocusPointer.scala @@ -4,7 +4,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SBaseStructValue, SLocus, SLocusCode, SLocusValue, SString, SStringCode} +import is.hail.types.physical.stypes.interfaces.{SLocus, SLocusCode, SLocusValue, SString, SStringCode} import is.hail.types.physical.stypes.{SCode, SSettable, SType} import is.hail.types.physical.{PCanonicalLocus, PType} import is.hail.types.virtual.Type @@ -23,7 +23,7 @@ case class SCanonicalLocusPointer(pType: PCanonicalLocus) extends SLocus { override def rg: ReferenceGenome = pType.rg - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SCanonicalLocusPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -81,9 +81,6 @@ class SCanonicalLocusPointerSettable( } def position(cb: EmitCodeBuilder): Code[Int] = _position - - override def structRepr(cb: EmitCodeBuilder): SBaseStructValue = new SBaseStructPointerSettable( - SBaseStructPointer(st.pType.representation), a) } class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Long]) extends SLocusCode { @@ -113,6 +110,4 @@ class SCanonicalLocusPointerCode(val st: SCanonicalLocusPointer, val a: Code[Lon def memoizeField(cb: EmitCodeBuilder, name: String): SCanonicalLocusPointerSettable = memoize(cb, name, cb.fieldBuilder) def store(mb: EmitMethodBuilder[_], r: Value[Region], dst: Code[Long]): Code[Unit] = Region.storeAddress(dst, a) - - def structRepr(cb: EmitCodeBuilder): SBaseStructCode = new SBaseStructPointerCode(SBaseStructPointer(st.pType.representation), a) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala index b8b96e16061..60952a94d96 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SCanonicalShufflePointer.scala @@ -20,7 +20,7 @@ case class SCanonicalShufflePointer(pType: PCanonicalShuffle) extends SShuffle { lazy val binarySType = SBinaryPointer(pType.representation) - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SCanonicalShufflePointerCode(this, pType.representation.loadCheapSCode(cb, pType.store(cb, region, value, deepCopy))) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala index e3f87e91498..be362393fac 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIndexablePointer.scala @@ -21,7 +21,7 @@ case class SIndexablePointer(pType: PContainer) extends SContainer { def elementEmitType: EmitType = EmitType(elementType, pType.elementType.required) - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIndexablePointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -54,7 +54,7 @@ class SIndexablePointerCode(val st: SIndexablePointer, val a: Code[Long]) extend def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def codeLoadLength(): Code[Int] = pt.loadLength(a) + def loadLength(): Code[Int] = pt.loadLength(a) def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIndexableValue = { val s = SIndexablePointerSettable(sb, st, name) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala index d059af374c9..bcfd8ee050d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SInsertFieldsStruct.scala @@ -63,7 +63,7 @@ case class SInsertFieldsStruct(virtualType: TStruct, parent: SBaseStruct, insert }) } - override def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value match { case ss: SInsertFieldsStructCode if ss.st == this => value case _ => throw new RuntimeException(s"copy insertfields struct") diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala index 811f9bde02f..395c6dbb011 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SIntervalPointer.scala @@ -14,7 +14,7 @@ import is.hail.utils.FastIndexedSeq case class SIntervalPointer(pType: PInterval) extends SInterval { require(!pType.required) - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SIntervalPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -111,9 +111,9 @@ class SIntervalPointerCode(val st: SIntervalPointer, val a: Code[Long]) extends def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq(a) - def codeIncludesStart(): Code[Boolean] = pt.includesStart(a) + def includesStart(): Code[Boolean] = pt.includesStart(a) - def codeIncludesEnd(): Code[Boolean] = pt.includesEnd(a) + def includesEnd(): Code[Boolean] = pt.includesEnd(a) def memoize(cb: EmitCodeBuilder, name: String, sb: SettableBuilder): SIntervalValue = { val s = SIntervalPointerSettable(sb, st, name) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala index 319db9b4015..4c0b4171fb1 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SNDArrayPointer.scala @@ -1,25 +1,20 @@ package is.hail.types.physical.stypes.concrete import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SNDArrayValue} import is.hail.asm4s.{Code, IntInfo, LongInfo, Settable, SettableBuilder, TypeInfo, Value, const} import is.hail.expr.ir.orderings.CodeOrdering import is.hail.expr.ir.{EmitCodeBuilder, EmitMethodBuilder, SortOrder} -import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SNDArraySettable, SNDArrayValue} +import is.hail.types.physical.stypes.interfaces.{SBaseStructCode, SNDArray, SNDArrayCode, SNDArrayValue} import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} import is.hail.types.physical.{PCanonicalNDArray, PType} import is.hail.types.virtual.Type -import is.hail.utils.{FastIndexedSeq, toRichIterable} +import is.hail.utils.FastIndexedSeq case class SNDArrayPointer(pType: PCanonicalNDArray) extends SNDArray { require(!pType.required) def nDims: Int = pType.nDims - override def elementByteSize: Long = pType.elementType.byteSize - override def elementType: SType = pType.elementType.sType override def elementPType: PType = pType.elementType @@ -28,7 +23,7 @@ case class SNDArrayPointer(pType: PCanonicalNDArray) extends SNDArray { override def castRename(t: Type): SType = SNDArrayPointer(pType.deepRename(t)) - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SNDArrayPointerCode(this, pType.store(cb, region, value, deepCopy)) } @@ -70,7 +65,7 @@ class SNDArrayPointerSettable( val shape: IndexedSeq[Settable[Long]], val strides: IndexedSeq[Settable[Long]], val dataFirstElement: Settable[Long] - ) extends SNDArraySettable { + ) extends SNDArrayValue with SSettable { val pt: PCanonicalNDArray = st.pType def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode = { @@ -124,29 +119,6 @@ class SNDArrayPointerSettable( } def firstDataAddress(cb: EmitCodeBuilder): Value[Long] = dataFirstElement - - // Note: to iterate through an array in column major order, make sure the indices are in ascending order. E.g. - // A.coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { - // SCode.add(cb, a, b) - // }) - // computes A += B. - def coiterateMutate( - cb: EmitCodeBuilder, - region: Value[Region], - deepCopy: Boolean, - indexVars: IndexedSeq[String], - destIndices: IndexedSeq[Int], - arrays: (SNDArrayCode, IndexedSeq[Int], String)* - )(body: IndexedSeq[SCode] => SCode - ): Unit = { - SNDArray._coiterate(cb, indexVars, (this.get, destIndices, "dest") +: arrays: _*) { ptrs => - val codes = (this.get +: arrays.map(_._1)).zip(ptrs).toFastIndexedSeq.map { case (array, ptr) => - val pt: PType = array.st.pType.elementType - pt.loadCheapSCode(cb, pt.loadFromNested(ptr)) - } - pt.elementType.storeAtAddress(cb, ptrs.head, region, body(codes), deepCopy) - } - } } class SNDArrayPointerCode(val st: SNDArrayPointer, val a: Code[Long]) extends SNDArrayCode { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala index c613e474fb0..d233b204fb4 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStackStruct.scala @@ -37,7 +37,7 @@ case class SStackStruct(virtualType: TBaseStruct, fieldEmitTypes: IndexedSeq[Emi override lazy val fieldTypes: IndexedSeq[SType] = fieldEmitTypes.map(_.st) - def fieldIdx(fieldName: String): Int = virtualType.fieldIdx(fieldName) + def fieldIdx(fieldName: String): Int = virtualType.asInstanceOf[TStruct].fieldIdx(fieldName) override def canonicalPType(): PType = virtualType match { case ts: TStruct => @@ -68,7 +68,7 @@ case class SStackStruct(virtualType: TBaseStruct, fieldEmitTypes: IndexedSeq[Emi }) } - override def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + override def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value match { case ss: SStackStructCode => if (ss.st == this && !deepCopy) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala index 1ee1c35b478..30612a0ea43 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SStringPointer.scala @@ -18,7 +18,7 @@ case class SStringPointer(pType: PString) extends SString { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { new SStringPointerCode(this, pType.store(cb, region, value, deepCopy)) } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala index 8656245b29b..877e03f1503 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SSubsetStruct.scala @@ -43,7 +43,7 @@ case class SSubsetStruct(parent: SBaseStruct, fieldNames: IndexedSeq[String]) ex newType } - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) throw new NotImplementedError("Deep copy on subset struct") value.st match { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SUnreachable.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SUnreachable.scala deleted file mode 100644 index 12e68714eee..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SUnreachable.scala +++ /dev/null @@ -1,309 +0,0 @@ -package is.hail.types.physical.stypes.concrete - -import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, IEmitCode} -import is.hail.types.physical.{PCanonicalNDArray, PNDArray, PType} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.{EmitType, SCode, SSettable, SType} -import is.hail.types.virtual._ -import is.hail.utils.FastIndexedSeq -import is.hail.variant.ReferenceGenome - -object SUnreachable { - def fromVirtualType(t: Type): SType = { - require(t.isRealizable) - t match { - case t if t.isPrimitive => SType.canonical(t) - case ts: TBaseStruct => SUnreachableStruct(ts) - case tc: TContainer => SUnreachableContainer(tc) - case tnd: TNDArray => SUnreachableNDArray(tnd) - case tl: TLocus => SUnreachableLocus(tl) - case ti: TInterval => SUnreachableInterval(ti) - case ts: TShuffle => SUnreachableShuffle(ts) - case TCall => SUnreachableCall - case TBinary => SUnreachableBinary - case TString => SUnreachableString - case TVoid => SVoid - } - } -} - -abstract class SUnreachable extends SType { - def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq() - - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = FastIndexedSeq() - - def canonicalPType(): PType = PType.canonical(virtualType, required = false, innerRequired = true) - - override def asIdent: String = s"s_unreachable" - - def castRename(t: Type): SType = SUnreachable.fromVirtualType(t) - - val sv: SUnreachableValue - - override def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = sv - - override def fromCodes(codes: IndexedSeq[Code[_]]): SUnreachableValue = sv - - override def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = sv -} - -abstract class SUnreachableValue extends SCode with SSettable { - def makeCodeTuple(cb: EmitCodeBuilder): IndexedSeq[Code[_]] = FastIndexedSeq() - - def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq() - - def store(cb: EmitCodeBuilder, v: SCode): Unit = {} - - override def get: SCode = this -} - -case class SUnreachableStruct(virtualType: TBaseStruct) extends SUnreachable with SBaseStruct { - override def size: Int = virtualType.size - - val fieldTypes: IndexedSeq[SType] = virtualType.types.map(SUnreachable.fromVirtualType) - val fieldEmitTypes: IndexedSeq[EmitType] = fieldTypes.map(f => EmitType(f, true)) - - def fieldIdx(fieldName: String): Int = virtualType.fieldIdx(fieldName) - - val sv = new SUnreachableStructValue(this) - - override def fromCodes(codes: IndexedSeq[Code[_]]): SUnreachableStructValue = sv -} - -class SUnreachableStructValue(val st: SUnreachableStruct) extends SUnreachableValue with SBaseStructValue with SBaseStructCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SBaseStructValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SBaseStructValue = this - - def loadField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = IEmitCode.present(cb, SUnreachable.fromVirtualType(st.virtualType.types(fieldIdx)).defaultValue) - - override def isFieldMissing(fieldIdx: Int): Code[Boolean] = false - - override def loadSingleField(cb: EmitCodeBuilder, fieldIdx: Int): IEmitCode = loadField(cb, fieldIdx) - - override def subset(fieldNames: String*): SBaseStructCode = { - val oldType = st.virtualType.asInstanceOf[TStruct] - val newType = TStruct(fieldNames.map(f => (f, oldType.fieldType(f))): _*) - new SUnreachableStructValue(SUnreachableStruct(newType)) - } - - override def insert(cb: EmitCodeBuilder, region: Value[Region], newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = - new SUnreachableStructValue(SUnreachableStruct(newType)) - - override def _insert(newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = - new SUnreachableStructValue(SUnreachableStruct(newType)) -} - -case object SUnreachableBinary extends SUnreachable with SBinary { - override def virtualType: Type = TBinary - - val sv = new SUnreachableBinaryValue -} - -class SUnreachableBinaryValue extends SUnreachableValue with SBinaryValue with SBinaryCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableBinaryValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableBinaryValue = this - - override def loadByte(i: Code[Int]): Code[Byte] = const(0.toByte) - - override def bytesAddress(): Code[Long] = const(0L) - - override def loadBytes(): Code[Array[Byte]] = Code._null - - override def loadLength(): Code[Int] = const(0) - - def st: SUnreachableBinary.type = SUnreachableBinary - - override def get: SUnreachableBinaryValue = this -} - -case object SUnreachableString extends SUnreachable with SString { - override def virtualType: Type = TString - - val sv = new SUnreachableStringValue - - override def constructFromString(cb: EmitCodeBuilder, r: Value[Region], s: Code[String]): SStringCode = sv -} - -class SUnreachableStringValue extends SUnreachableValue with SStringValue with SStringCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableStringValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableStringValue = this - - override def loadLength(): Code[Int] = const(0) - - def st: SUnreachableString.type = SUnreachableString - - override def loadString(): Code[String] = Code._null - - override def asBytes(): SBinaryCode = new SUnreachableBinaryValue - - override def get: SUnreachableStringValue = this -} - -case class SUnreachableShuffle(virtualType: TShuffle) extends SUnreachable with SShuffle { - val sv = new SUnreachableShuffleValue(this) -} - -class SUnreachableShuffleValue(val st: SUnreachableShuffle) extends SUnreachableValue with SShuffleValue with SShuffleCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableShuffleValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableShuffleValue = this - - override def loadBytes(): Code[Array[Byte]] = Code._null - - override def loadLength(): Code[Int] = const(0) - - override def get: SUnreachableShuffleValue = this -} - -case class SUnreachableLocus(virtualType: TLocus) extends SUnreachable with SLocus { - val sv = new SUnreachableLocusValue(this) - - override def contigType: SString = SUnreachableString - - override def rg: ReferenceGenome = virtualType.rg -} - -class SUnreachableLocusValue(val st: SUnreachableLocus) extends SUnreachableValue with SLocusValue with SLocusCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableLocusValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableLocusValue = this - - override def position(cb: EmitCodeBuilder): Code[Int] = const(0) - - override def contig(cb: EmitCodeBuilder): SStringCode = new SUnreachableStringValue - - override def structRepr(cb: EmitCodeBuilder): SBaseStructValue = SUnreachableStruct(TStruct("contig" -> TString, "position" -> TInt32)).defaultValue.asInstanceOf[SUnreachableStructValue] -} - - -case object SUnreachableCall extends SUnreachable with SCall { - override def virtualType: Type = TCall - - val sv = new SUnreachableCallValue -} - -class SUnreachableCallValue extends SUnreachableValue with SCallValue with SCallCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableCallValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableCallValue = this - - override def loadCanonicalRepresentation(cb: EmitCodeBuilder): Code[Int] = const(0) - - override def forEachAllele(cb: EmitCodeBuilder)(alleleCode: Value[Int] => Unit): Unit = {} - - override def isPhased(): Code[Boolean] = const(false) - - override def ploidy(): Code[Int] = const(0) - - override def canonicalCall(cb: EmitCodeBuilder): Code[Int] = const(0) - - def st: SUnreachableCall.type = SUnreachableCall - - override def get: SUnreachableCallValue = this -} - - -case class SUnreachableInterval(virtualType: TInterval) extends SUnreachable with SInterval { - val sv = new SUnreachableIntervalValue(this) - - override def pointType: SType = SUnreachable.fromVirtualType(virtualType.pointType) - - override def pointEmitType: EmitType = EmitType(pointType, true) -} - -class SUnreachableIntervalValue(val st: SUnreachableInterval) extends SUnreachableValue with SIntervalValue with SIntervalCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableIntervalValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableIntervalValue = this - - def includesStart(): Value[Boolean] = const(false) - - def includesEnd(): Value[Boolean] = const(false) - - def codeIncludesStart(): Code[Boolean] = const(false) - - def codeIncludesEnd(): Code[Boolean] = const(false) - - def loadStart(cb: EmitCodeBuilder): IEmitCode = IEmitCode.present(cb, SUnreachable.fromVirtualType(st.virtualType.pointType).defaultValue) - - def startDefined(cb: EmitCodeBuilder): Code[Boolean] = const(false) - - def loadEnd(cb: EmitCodeBuilder): IEmitCode = IEmitCode.present(cb, SUnreachable.fromVirtualType(st.virtualType.pointType).defaultValue) - - def endDefined(cb: EmitCodeBuilder): Code[Boolean] = const(false) - - def isEmpty(cb: EmitCodeBuilder): Code[Boolean] = const(false) -} - - -case class SUnreachableNDArray(virtualType: TNDArray) extends SUnreachable with SNDArray { - val sv = new SUnreachableNDArrayValue(this) - - override def nDims: Int = virtualType.nDims - - lazy val elementType: SType = SUnreachable.fromVirtualType(virtualType.elementType) - - override def elementPType: PType = elementType.canonicalPType() - - override def pType: PNDArray = PCanonicalNDArray(elementPType.setRequired(true), nDims, false) - - override def elementByteSize: Long = 0L -} - -class SUnreachableNDArrayValue(val st: SUnreachableNDArray) extends SUnreachableValue with SNDArraySettable with SNDArrayCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableNDArrayValue = this - - def shape(cb: EmitCodeBuilder): SBaseStructCode = SUnreachableStruct(TTuple((0 until st.nDims).map(_ => TInt64): _*)).defaultValue.asBaseStruct - - def loadElement(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): SCode = SUnreachable.fromVirtualType(st.virtualType.elementType).defaultValue - - def shapes(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] = (0 until st.nDims).map(_ => const(0L)) - - def strides(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] = (0 until st.nDims).map(_ => const(0L)) - - def outOfBounds(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder): Code[Boolean] = const(false) - - def assertInBounds(indices: IndexedSeq[Value[Long]], cb: EmitCodeBuilder, errorId: Int = -1): Code[Unit] = Code._empty - - def sameShape(other: SNDArrayValue, cb: EmitCodeBuilder): Code[Boolean] = const(false) - - def firstDataAddress(cb: EmitCodeBuilder): Value[Long] = const(0L) - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableNDArrayValue = this - - override def get: SUnreachableNDArrayValue = this - - override def coiterateMutate(cb: EmitCodeBuilder, region: Value[Region], deepCopy: Boolean, indexVars: IndexedSeq[String], - destIndices: IndexedSeq[Int], arrays: (SNDArrayCode, IndexedSeq[Int], String)*)(body: IndexedSeq[SCode] => SCode): Unit = () -} - -case class SUnreachableContainer(virtualType: TContainer) extends SUnreachable with SContainer { - val sv = new SUnreachableContainerValue(this) - - lazy val elementType: SType = SUnreachable.fromVirtualType(virtualType.elementType) - - lazy val elementEmitType: EmitType = EmitType(elementType, true) -} - -class SUnreachableContainerValue(val st: SUnreachableContainer) extends SUnreachableValue with SIndexableValue with SIndexableCode { - override def memoizeField(cb: EmitCodeBuilder, name: String): SUnreachableContainerValue = this - - override def memoize(cb: EmitCodeBuilder, name: String): SUnreachableContainerValue = this - - def loadLength(): Value[Int] = const(0) - - override def codeLoadLength(): Code[Int] = const(0) - - def isElementMissing(i: Code[Int]): Code[Boolean] = const(false) - - def loadElement(cb: EmitCodeBuilder, i: Code[Int]): IEmitCode = IEmitCode.present(cb, SUnreachable.fromVirtualType(st.virtualType.elementType).defaultValue) - - def hasMissingValues(cb: EmitCodeBuilder): Code[Boolean] = const(false) - - def castToArray(cb: EmitCodeBuilder): SIndexableCode = SUnreachable.fromVirtualType(st.virtualType.arrayElementsRepr).defaultValue.asIndexable -} diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala index 69c939001ac..fa65a126e8d 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SBaseStruct.scala @@ -64,7 +64,7 @@ trait SBaseStructCode extends SCode { ) } - def insert(cb: EmitCodeBuilder, region: Value[Region], newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { + final def insert(cb: EmitCodeBuilder, region: Value[Region], newType: TStruct, fields: (String, EmitCode)*): SBaseStructCode = { if (newType.size < 64 || fields.length < 16) return _insert(newType, fields: _*) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala index d6ff17d786c..83f9318f79a 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SContainer.scala @@ -40,7 +40,7 @@ trait SIndexableValue extends SValue { trait SIndexableCode extends SCode { def st: SContainer - def codeLoadLength(): Code[Int] + def loadLength(): Code[Int] def memoize(cb: EmitCodeBuilder, name: String): SIndexableValue diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala index 22fe34794e0..cb7f533e318 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SInterval.scala @@ -31,9 +31,9 @@ trait SIntervalValue extends SValue { trait SIntervalCode extends SCode { def st: SInterval - def codeIncludesStart(): Code[Boolean] + def includesStart(): Code[Boolean] - def codeIncludesEnd(): Code[Boolean] + def includesEnd(): Code[Boolean] def memoize(cb: EmitCodeBuilder, name: String): SIntervalValue diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala index 4b1837ad57d..629799a6a98 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SLocus.scala @@ -17,8 +17,6 @@ trait SLocusValue extends SValue { def getLocusObj(cb: EmitCodeBuilder): Code[Locus] = Code.invokeStatic2[Locus, String, Int, Locus]("apply", contig(cb).loadString(), position(cb)) - - def structRepr(cb: EmitCodeBuilder): SBaseStructValue } trait SLocusCode extends SCode { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala index e5557ce19da..a47530a0758 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala @@ -3,8 +3,8 @@ package is.hail.types.physical.stypes.interfaces import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} import is.hail.types.physical.{PNDArray, PType} +import is.hail.types.physical.stypes.{SCode, SSettable, SType, SValue} import is.hail.utils.{FastIndexedSeq, toRichIterable} object SNDArray { @@ -18,39 +18,26 @@ object SNDArray { forEachIndexWithInitAndIncColMajor(cb, shape, shape.map(_ => (cb: EmitCodeBuilder) => ()), shape.map(_ => (cb: EmitCodeBuilder) => ()), context)(f) } - def coiterate(cb: EmitCodeBuilder, arrays: (SNDArrayCode, String)*)(body: IndexedSeq[SCode] => Unit): Unit = { + def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit): Unit = + coiterate(cb, region, arrays, body, deepCopy=false) + + def coiterate(cb: EmitCodeBuilder, region: Value[Region], arrays: IndexedSeq[(SNDArrayCode, String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { if (arrays.isEmpty) return val indexVars = Array.tabulate(arrays(0)._1.st.nDims)(i => s"i$i").toFastIndexedSeq val indices = Array.range(0, arrays(0)._1.st.nDims).toFastIndexedSeq - coiterate(cb, indexVars, arrays.map { case (array, name) => (array, indices, name) }: _*)(body) + coiterate(cb, region, indexVars, arrays.map { case (array, name) => (array, indices, name) }, body, deepCopy) } + def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit): Unit = + coiterate(cb, region, indexVars, arrays, body, deepCopy=false) + // Note: to iterate through an array in column major order, make sure the indices are in ascending order. E.g. - // A.coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { - // SCode.add(cb, a, b) + // coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { + // case Seq(a, b) => cb.assign(a, SCode.add(cb, a, b)) // }) // computes A += B. - def coiterate( - cb: EmitCodeBuilder, - indexVars: IndexedSeq[String], - arrays: (SNDArrayCode, IndexedSeq[Int], String)* - )(body: IndexedSeq[SCode] => Unit - ): Unit = { - _coiterate(cb, indexVars, arrays: _*) { ptrs => - val codes = ptrs.zip(arrays).map { case (ptr, (array, _, _)) => - val pt = array.st.pType.elementType - pt.loadCheapSCode(cb, pt.loadFromNested(ptr)) - } - body(codes) - } - } + def coiterate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], arrays: IndexedSeq[(SNDArrayCode, IndexedSeq[Int], String)], body: IndexedSeq[SSettable] => Unit, deepCopy: Boolean): Unit = { - def _coiterate( - cb: EmitCodeBuilder, - indexVars: IndexedSeq[String], - arrays: (SNDArrayCode, IndexedSeq[Int], String)* - )(body: IndexedSeq[Value[Long]] => Unit - ): Unit = { val indexSizes = new Array[Settable[Int]](indexVars.length) val indexCoords = Array.tabulate(indexVars.length) { i => cb.newLocal[Int](indexVars(i)) } @@ -58,10 +45,11 @@ object SNDArray { array: SNDArrayValue, strides: IndexedSeq[Value[Long]], pos: IndexedSeq[Settable[Long]], + elt: SSettable, indexToDim: Map[Int, Int], name: String) - val info = arrays.toIndexedSeq.map { case (_array, indices, name) => + val info = arrays.map { case (_array, indices, name) => for (idx <- indices) assert(idx < indexVars.length && idx >= 0) // FIXME: relax this assumption to handle transposing, non-column major for (i <- 0 until indices.length - 1) assert(indices(i) < indices(i+1)) @@ -80,14 +68,22 @@ object SNDArray { } val strides = array.strides(cb) val pos = Array.tabulate(array.st.nDims + 1) { i => cb.newLocal[Long](s"$name$i") } + val elt = new SSettable { + def st: SType = array.st.elementType + val pt: PType = array.st.pType.elementType + + // FIXME: need to use `pos` of smallest index var + def get: SCode = pt.loadCheapSCode(cb, pt.loadFromNested(pos(0))) + def store(cb: EmitCodeBuilder, v: SCode): Unit = pt.storeAtAddress(cb, pos(0), region, v, deepCopy) + def settableTuple(): IndexedSeq[Settable[_]] = FastIndexedSeq(pos.last) + } val indexToDim = indices.zipWithIndex.toMap - ArrayInfo(array, strides, pos, indexToDim, name) + ArrayInfo(array, strides, pos, elt, indexToDim, name) } def recurLoopBuilder(idx: Int): Unit = { if (idx < 0) { - // FIXME: to handle non-column major, need to use `pos` of smallest index var - body(info.map(_.pos(0))) + body(info.map(_.elt)) } else { val coord = indexCoords(idx) def init(): Unit = { @@ -233,8 +229,6 @@ trait SNDArray extends SType { def elementType: SType def elementPType: PType - - def elementByteSize: Long } trait SNDArrayValue extends SValue { @@ -255,38 +249,8 @@ trait SNDArrayValue extends SValue { def sameShape(other: SNDArrayValue, cb: EmitCodeBuilder): Code[Boolean] def firstDataAddress(cb: EmitCodeBuilder): Value[Long] - - def coiterateMutate(cb: EmitCodeBuilder, region: Value[Region], arrays: (SNDArrayCode, String)*)(body: IndexedSeq[SCode] => SCode): Unit = - coiterateMutate(cb, region, false, arrays: _*)(body) - - def coiterateMutate(cb: EmitCodeBuilder, region: Value[Region], deepCopy: Boolean, arrays: (SNDArrayCode, String)*)(body: IndexedSeq[SCode] => SCode): Unit = { - if (arrays.isEmpty) return - val indexVars = Array.tabulate(arrays(0)._1.st.nDims)(i => s"i$i").toFastIndexedSeq - val indices = Array.range(0, arrays(0)._1.st.nDims).toFastIndexedSeq - coiterateMutate(cb, region, deepCopy, indexVars, indices, arrays.map { case (array, name) => (array, indices, name) }: _*)(body) - } - - def coiterateMutate(cb: EmitCodeBuilder, region: Value[Region], indexVars: IndexedSeq[String], destIndices: IndexedSeq[Int], arrays: (SNDArrayCode, IndexedSeq[Int], String)*)(body: IndexedSeq[SCode] => SCode): Unit = - coiterateMutate(cb, region, false, indexVars, destIndices, arrays: _*)(body) - - // Note: to iterate through an array in column major order, make sure the indices are in ascending order. E.g. - // A.coiterate(cb, region, IndexedSeq("i", "j"), IndexedSeq((A, IndexedSeq(0, 1), "A"), (B, IndexedSeq(0, 1), "B")), { - // SCode.add(cb, a, b) - // }) - // computes A += B. - def coiterateMutate( - cb: EmitCodeBuilder, - region: Value[Region], - deepCopy: Boolean, - indexVars: IndexedSeq[String], - destIndices: IndexedSeq[Int], - arrays: (SNDArrayCode, IndexedSeq[Int], String)* - )(body: IndexedSeq[SCode] => SCode - ): Unit } -trait SNDArraySettable extends SNDArrayValue with SSettable - trait SNDArrayCode extends SCode { def st: SNDArray diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala index c980f175a58..027e000c5a5 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SStream.scala @@ -11,7 +11,7 @@ import is.hail.types.virtual.{TStream, Type} case class SStream(elementEmitType: EmitType) extends SType { def elementType: SType = elementEmitType.st - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { if (deepCopy) throw new UnsupportedOperationException assert(value.st == this) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala index be3f096c426..c7e91e77ee8 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SVoid.scala @@ -14,7 +14,7 @@ case object SVoid extends SType { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = value + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = value def codeTupleTypes(): IndexedSeq[TypeInfo[_]] = IndexedSeq() diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala index bea6b823684..2607f9f318e 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SBoolean.scala @@ -16,7 +16,7 @@ case object SBoolean extends SPrimitive { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SBoolean => value diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala index f2cc64832c9..43f5a2db83e 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat32.scala @@ -16,7 +16,7 @@ case object SFloat32 extends SPrimitive { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SFloat32 => value } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala index 2e0fe1b06da..753ac6e9e21 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SFloat64.scala @@ -16,7 +16,7 @@ case object SFloat64 extends SPrimitive { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SFloat64 => value } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala index 6ef98ba5bc3..fd9f0cfd6d9 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt32.scala @@ -16,7 +16,7 @@ case object SInt32 extends SPrimitive { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SInt32 => value } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala index 1f6fc7a11b1..2a2c0ba9672 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/primitives/SInt64.scala @@ -15,7 +15,7 @@ case object SInt64 extends SPrimitive { override def castRename(t: Type): SType = this - def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { + def coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SCode, deepCopy: Boolean): SCode = { value.st match { case SInt64 => value } diff --git a/hail/src/main/scala/is/hail/types/virtual/TArray.scala b/hail/src/main/scala/is/hail/types/virtual/TArray.scala index 9a108228d1e..c073eb2d818 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TArray.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TArray.scala @@ -66,6 +66,4 @@ final case class TArray(elementType: Type) extends TContainer { val subsetElem = elementType.valueSubsetter(subtype.asInstanceOf[TArray].elementType) (a: Any) => a.asInstanceOf[IndexedSeq[Any]].map(subsetElem) } - - override def arrayElementsRepr: TArray = this } diff --git a/hail/src/main/scala/is/hail/types/virtual/TBaseStruct.scala b/hail/src/main/scala/is/hail/types/virtual/TBaseStruct.scala index dfb54721478..f4e53ff635f 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TBaseStruct.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TBaseStruct.scala @@ -27,8 +27,6 @@ abstract class TBaseStruct extends Type { def fields: IndexedSeq[Field] - lazy val fieldIdx: collection.Map[String, Int] = toMapFast(fields)(_.name, _.index) - override def children: Seq[Type] = types def size: Int diff --git a/hail/src/main/scala/is/hail/types/virtual/TContainer.scala b/hail/src/main/scala/is/hail/types/virtual/TContainer.scala index 71afda8e8cd..4fa6ac4dafe 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TContainer.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TContainer.scala @@ -9,6 +9,4 @@ abstract class TContainer extends TIterable { && (a1.asInstanceOf[Iterable[_]].size == a2.asInstanceOf[Iterable[_]].size) && a1.asInstanceOf[Iterable[_]].zip(a2.asInstanceOf[Iterable[_]]) .forall { case (e1, e2) => elementType.valuesSimilar(e1, e2, tolerance, absolute) }) - - def arrayElementsRepr: TArray } diff --git a/hail/src/main/scala/is/hail/types/virtual/TDict.scala b/hail/src/main/scala/is/hail/types/virtual/TDict.scala index b28a1cf5a40..6af3f7a78f6 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TDict.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TDict.scala @@ -86,6 +86,4 @@ final case class TDict(keyType: Type, valueType: Type) extends TContainer { val subsetValue = valueType.valueSubsetter(subdict.valueType) (a: Any) => a.asInstanceOf[Map[Any, Any]].mapValues(subsetValue) } - - override def arrayElementsRepr: TArray = TArray(elementType) } diff --git a/hail/src/main/scala/is/hail/types/virtual/TSet.scala b/hail/src/main/scala/is/hail/types/virtual/TSet.scala index 334d7da39e2..11c276d0746 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TSet.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TSet.scala @@ -61,6 +61,4 @@ final case class TSet(elementType: Type) extends TContainer { assert(elementType == subtype.asInstanceOf[TSet].elementType) identity } - - override def arrayElementsRepr: TArray = TArray(elementType) } diff --git a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala index 9ddbc27952f..d068c0fbe57 100644 --- a/hail/src/main/scala/is/hail/types/virtual/TStruct.scala +++ b/hail/src/main/scala/is/hail/types/virtual/TStruct.scala @@ -39,6 +39,8 @@ final case class TStruct(fields: IndexedSeq[Field]) extends TBaseStruct { lazy val types: Array[Type] = fields.map(_.typ).toArray + lazy val fieldIdx: collection.Map[String, Int] = toMapFast(fields)(_.name, _.index) + lazy val fieldNames: Array[String] = fields.map(_.name).toArray def size: Int = fields.length diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeRegion.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeRegion.scala index 35b8f053182..3969dd4b013 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichCodeRegion.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichCodeRegion.scala @@ -2,7 +2,6 @@ package is.hail.utils.richUtils import is.hail.annotations.{Region, RegionPool} import is.hail.asm4s._ -import is.hail.expr.ir.EmitCodeBuilder class RichCodeRegion(val region: Code[Region]) extends AnyVal { def allocate(alignment: Code[Long], n: Code[Long]): Code[Long] = @@ -46,9 +45,9 @@ class RichCodeRegion(val region: Code[Region]) extends AnyVal { def totalManagedBytes(): Code[Long] = region.invoke[Long]("totalManagedBytes") - def allocateSharedChunk(nBytes: Code[Long]): Code[Long] = - region.invoke[Long, Long]("allocateSharedChunk", nBytes) + def allocateNDArray(nBytes: Code[Long]): Code[Long] = + region.invoke[Long, Long]("allocateNDArray", nBytes) - def trackSharedChunk(cb: EmitCodeBuilder, addr: Code[Long]): Unit = - cb += region.invoke[Long, Unit]("trackSharedChunk", addr) + def trackNDArray(addr: Code[Long]): Code[Unit] = + region.invoke[Long, Unit]("trackNDArray", addr) } diff --git a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala index 308e91a5644..ad0e581f57e 100644 --- a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala +++ b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala @@ -137,9 +137,7 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St Interval(start, end, includesStart = true, includesEnd = false) } - private var fastaFilePath: String = _ - private var fastaIndexPath: String = _ - @transient private var fastaReaderCfg: FASTAReaderConfig = _ + private var fastaReaderCfg: FASTAReaderConfig = _ def contigParser = Parser.oneOfLiteral(contigs) @@ -309,7 +307,7 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St s"@1", badContigs.truncatable("\n ")) } - def hasSequence: Boolean = fastaFilePath != null + def hasSequence: Boolean = fastaReaderCfg != null def addSequence(ctx: ExecuteContext, fastaFile: String, indexFile: String) { if (hasSequence) @@ -321,10 +319,7 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St fatal(s"FASTA file '$fastaFile' does not exist.") if (!fs.exists(indexFile)) fatal(s"FASTA index file '$indexFile' does not exist.") - fastaFilePath = fastaFile - fastaIndexPath = indexFile - // assumption, fastaFile and indexFile will not move or change for the entire duration of a hail pipeline val localIndexFile = ExecuteContext.createTmpPathNoCleanup(tmpdir, "fasta-reader-add-seq", "fai") fs.copyRecode(indexFile, localIndexFile) @@ -346,7 +341,15 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St if (invalidLengths.nonEmpty) fatal(s"Contig sizes in FASTA '$fastaFile' do not match expected sizes for reference genome '$name':\n " + s"@1", invalidLengths.truncatable("\n ")) - heal(tmpdir, fs) + + val fastaPath = fs.fileStatus(fastaFile).getPath.toString + val indexPath = fs.fileStatus(indexFile).getPath.toString + fastaReaderCfg = FASTAReaderConfig(ctx.localTmpdir, fs.broadcast, this, fastaPath, indexPath) + } + + def addSequenceFromReader(tmpdir: String, fs: FS, fastaFile: String, indexFile: String, blockSize: Int, capacity: Int): ReferenceGenome = { + fastaReaderCfg = FASTAReaderConfig(tmpdir, fs.broadcast, this, fastaFile, indexFile, blockSize, capacity) + this } @transient private lazy val realFastaReader: ThreadLocal[FASTAReader] = new ThreadLocal[FASTAReader] @@ -375,15 +378,12 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St def removeSequence(): Unit = { if (!hasSequence) fatal(s"Reference genome '$name' does not have sequence loaded.") - fastaFilePath = null - fastaIndexPath = null fastaReaderCfg = null } - private var chainFiles: Map[String, String] = Map.empty - @transient private[this] var liftoverMap: mutable.Map[String, LiftOver] = _ + private[this] var liftoverMaps: Map[String, LiftOver] = Map.empty[String, LiftOver] - def hasLiftover(destRGName: String): Boolean = chainFiles.contains(destRGName) + def hasLiftover(destRGName: String): Boolean = liftoverMaps.contains(destRGName) def addLiftover(ctx: ExecuteContext, chainFile: String, destRGName: String): Unit = { if (name == destRGName) @@ -397,26 +397,31 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St if (!fs.exists(chainFile)) fatal(s"Chain file '$chainFile' does not exist.") - val chainFilePath = fs.fileStatus(chainFile).getPath - val lo = LiftOver(tmpdir, fs, chainFilePath) + val chainFilePath = fs.fileStatus(chainFile).getPath.toString + val lo = LiftOver(tmpdir, ctx.fs, chainFilePath) + val destRG = ReferenceGenome.getReference(destRGName) lo.checkChainFile(this, destRG) - chainFiles += destRGName -> chainFile - heal(tmpdir, fs) + liftoverMaps += destRGName -> lo + } + + def addLiftoverFromFS(tmpdir: String, fs: FS, chainFilePath: String, destRGName: String): ReferenceGenome = { + val lo = new LiftOver(tmpdir, fs.broadcast, chainFilePath) + liftoverMaps += destRGName -> lo + this } def getLiftover(destRGName: String): LiftOver = { if (!hasLiftover(destRGName)) fatal(s"Chain file has not been loaded for source reference '$name' and destination reference '$destRGName'.") - liftoverMap(destRGName) + liftoverMaps(destRGName) } def removeLiftover(destRGName: String): Unit = { if (!hasLiftover(destRGName)) fatal(s"liftover does not exist from reference genome '$name' to '$destRGName'.") - chainFiles -= destRGName - liftoverMap -= destRGName + liftoverMaps -= destRGName } def liftoverLocus(destRGName: String, l: Locus, minMatch: Double): (Locus, Boolean) = { @@ -429,32 +434,6 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St lo.queryInterval(interval, minMatch) } - def heal(tmpdir: String, fs: FS): Unit = this.synchronized { - // add liftovers - if (liftoverMap == null) { - liftoverMap = mutable.Map.empty - } - // NOTE: it shouldn't be possible for the liftover map to have more elements than the chain file - // since removeLiftover updates both maps, so we don't check to see if liftoverMap has - // keys that are not in chainFiles - for ((destRGName, chainFile) <- chainFiles) { - val chainFilePath = fs.fileStatus(chainFile).getPath - liftoverMap.get(destRGName) match { - case Some(lo) if lo.chainFile == chainFilePath => // do nothing - case _ => liftoverMap += destRGName -> LiftOver(tmpdir, fs, chainFilePath) - } - } - - // add sequence - if (fastaFilePath != null) { - val fastaPath = fs.fileStatus(fastaFilePath).getPath - val indexPath = fs.fileStatus(fastaIndexPath).getPath - if (fastaReaderCfg == null || fastaReaderCfg.fastaFile != fastaPath || fastaReaderCfg.indexFile != indexPath) { - fastaReaderCfg = FASTAReaderConfig(tmpdir, fs, this, fastaPath, indexPath) - } - } - } - @transient lazy val broadcast: BroadcastValue[ReferenceGenome] = HailContext.backend.broadcast(this) override def hashCode: Int = { @@ -508,6 +487,39 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St implicit val formats: Formats = defaultJSONFormats Serialization.write(toJSON) } + + def codeSetup(localTmpdir: String, cb: EmitClassBuilder[_]): Code[ReferenceGenome] = { + val json = toJSONString + val chunkSize = (1 << 16) - 1 + val nChunks = (json.length() - 1) / chunkSize + 1 + assert(nChunks > 0) + + val chunks = Array.tabulate(nChunks){ i => json.slice(i * chunkSize, (i + 1) * chunkSize) } + val stringAssembler = + chunks.tail.foldLeft[Code[String]](chunks.head) { (c, s) => c.invoke[String, String]("concat", s) } + + var rg = Code.invokeScalaObject1[String, ReferenceGenome](ReferenceGenome.getClass, "parse", stringAssembler) + if (hasSequence) { + rg = rg.invoke[String, FS, String, String, Int, Int, ReferenceGenome]( + "addSequenceFromReader", + localTmpdir, + cb.getFS, + fastaReaderCfg.fastaFile, + fastaReaderCfg.indexFile, + fastaReaderCfg.blockSize, + fastaReaderCfg.capacity) + } + + for ((destRG, lo) <- liftoverMaps) { + rg = rg.invoke[String, FS, String, String, ReferenceGenome]( + "addLiftoverFromFS", + localTmpdir, + cb.getFS, + lo.chainFile, + destRG) + } + rg + } } object ReferenceGenome { @@ -618,7 +630,7 @@ object ReferenceGenome { } val rg = ReferenceGenome(name, contigs.result(), lengths.result().toMap, xContigs, yContigs, mtContigs, parInput) - rg.addSequence(ctx, fastaFile, indexFile) + rg.fastaReaderCfg = FASTAReaderConfig(tmpdir, fs.broadcast, rg, fastaFile, indexFile) rg } diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..c713dbebf15c68d19b80b88db8b30e04b08e7ea4 GIT binary patch literal 311 zcmV-70m%LziwFP!000000A-S0PlGTNhW|^iG{MA-4NdQaWyZt{Gp09$m{Q8ttWZ*l zilqPD0;PO-*{9F@_9Uk*C{RuS1n)(Ogrt)4f~3cfeOM9<93c4jsVO@ll%mG6Ju{vl z_L;dGcwih!cjFjDy)Aqxq_-VGJ{S`~Ro?+k73y@IPexDKXMPDXTvD#iSp4|KJRnR2r6Qww}^R4aKBO5jthT7_!?NQoeO2@ z?A~V_U-dhFon=HADd}KHf3@K4cCPn7riUXa@58gCQpy3{2Rl$kDgdMYaCZ-4^#`?n J6Sjf^001Pjk|6*9 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..54692bc5f1c9cb8cb6b75c00c2afe79911e048aa GIT binary patch literal 157 zcmbQqz`(#5vh?s*4~OsnA%GDm1O#FZ%peLxBqbI>s45VZmY>fAk^vJS#fcyy2_(e8 z@W39dg9#*50V1l5!QiqzE0B|%QDkIv=raow14zp7InX#T<0Y8E0~BMi1rktO7yzj+ B95Mg^ literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0582034d0360ac5598db173bef735eb4244f1e7 GIT binary patch literal 408 zcmV;J0cZXniwFP!000000F6`8Zh|lr{g*y%3453*E`8fDTbAr$QL`6fNX4r*D=jHS zC5C@rfffWd-5WjkoO3Vtus4~wo`d#;vKB-e zH~@R_0H$R6s5KI{Ac04ah7wqq5VhB=7S$A0wg^?8H2uyyCf0=Eg_2pM%q!fwBmBw) zQSQhw5cS6Rohxr_fw*S007)wkYeh=B)~_wEXp~*z$q@4rpnO-fhM-BHf*N17dV?F| zrm(>?8mZt}N|HCCJ}EY=x@pgP?>^xV5oPJ!+uYZW`$oks@o9t8a(Ivj!h%k{ag4A} z7;9|0PlcCR!XT)ydX)Y|vE_8zzX-vA@Kxb_LyiNwIjtNij&`T|ZMJ$DWt(9e-tE+G z#x=-k7|``(evf3B#UO;gw{P}r_kC^Aw>Dhp!-+(k2o7AO2%k|MP4#JRcm6f=EybMh z#SL11p;2-Czpf%{Q9`7mN?&~#iOkGk3_fkiuPS{r%MTu%ku%c&Qv3j)kN{?H1ONa8 CV!=58 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..aacec44874b0a150cace5237bae3a47b9560ed7c GIT binary patch literal 307 zcmV-30nGj%iwFP!000000A*26PlGTN|1Lebq)QYwG`$U$850j?OfQ5(SBh)aQc{YF zq~F~Z3Ssfo_kRE0%WDrREGR(Wy{fU&Ofy;0;`p&oYf1nFN`9ZZ+6hvvTB3Va{GgXg zhXE|Z03Eo*u4d83q0t745wjI;1_$y+>}}^sV4%2FOu?N4SoLXa1hp*H*$Cv;;Q^nK z!vgIY(b-NZ+|9C?6`mf0C;AdTp;t-T=I;5J*k%Og~CfZ76b L9LSPE={N=e(26%? literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/parts/part-2 b/hail/src/test/resources/backward_compatability/1.0.0/table/0.ht/rows/parts/part-2 new file mode 100644 index 0000000000000000000000000000000000000000..7d06739d8a115ec405c4a1c5d5ad96588600a0eb GIT binary patch literal 319 zcmcC!WME)i;l;?zz`$_$tB1q){}2Ea1_3b#kOTvWNJ=b%P*or*EkBpHrtWZ*l ziiH1e`6?eq?%wl0=kz2m6-ZG+00i%8hJ>V&@{~k}kFA#x3=AOn*RjkRz>}iDvN|&! zB6f-`6YdyC(lU)+qV@{DrqW&wLedeVfbup2_ED(8l|LM~T07_9cMq10K+|(Q1`-7k z>34cC`g^?(){6*kDVF||5UA9qC6x43_vj+}Vr}4_<08L#Ik;-L)h;&GAGJ=K@1F!Sdf&KW^zP^&9q9ueGL6jsB*-w?jKT35`4 zvS_m3KRXZR#wp^Mh<^}=aeP%(W>DCO9{o{(y(P{7Ct6a)e>2WAH5q{Jd1u3}Eh&u3y_Vqi>UOoGThuxDUq pVys}SG6sXo_N>VnMMg%4o=+BFU^INr&S?0Oosofu#TKNM0RZ3G9hLw9 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0582034d0360ac5598db173bef735eb4244f1e7 GIT binary patch literal 408 zcmV;J0cZXniwFP!000000F6`8Zh|lr{g*y%3453*E`8fDTbAr$QL`6fNX4r*D=jHS zC5C@rfffWd-5WjkoO3Vtus4~wo`d#;vKB-e zH~@R_0H$R6s5KI{Ac04ah7wqq5VhB=7S$A0wg^?8H2uyyCf0=Eg_2pM%q!fwBmBw) zQSQhw5cS6Rohxr_fw*S007)wkYeh=B)~_wEXp~*z$q@4rpnO-fhM-BHf*N17dV?F| zrm(>?8mZt}N|HCCJ}EY=x@pgP?>^xV5oPJ!+uYZW`$oks@o9t8a(Ivj!h%k{ag4A} z7;9|0PlcCR!XT)ydX)Y|vE_8zzX-vA@Kxb_LyiNwIjtNij&`T|ZMJ$DWt(9e-tE+G z#x=-k7|``(evf3B#UO;gw{P}r_kC^Aw>Dhp!-+(k2o7AO2%k|MP4#JRcm6f=EybMh z#SL11p;2-Czpf%{Q9`7mN?&~#iOkGk3_fkiuPS{r%MTu%ku%c&Qv3j)kN{?H1ONa8 CV!=58 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..3c51fc30edad431394ef46951b77d026266f10b0 GIT binary patch literal 316 zcmV-C0mJ?uiwFP!000000CiGNZ-XEd|1O-?geI%DuHiQAx-98osogHd14Ud(K@8$H z*ZAEBibk`H3-A5@@dDgHfdvH!ycH!@nrS8rnjPPFaY+e)K*_IDQ(8f+RZaAbW#8zf z(7p$A-$Q#Yv8`FKwrH@1qQ^{ytImO(5j)#?5VTOt6+>`i0akppCW0E4>ZAu!<8Xk> zNMVL{jOgT1Dcp>*krj^ag9rNJZ_z=Ly1IKlMy6Rn!Kc8(0x~-bRiBtAGa`iqMVNCa z+^*57E;&uQraxdjM-1dVJCu@BT%@hJUm`O~y1PcV;VE7*xke*yG#AFR34eGBSKc

j}Zi$jUW8AnZYR*oq6JH&4R#wk3HkG0x0R4s_^XG*_y179$Ose$0x<_>2Ii#1A|S3}PRq|{VqjumOk_-g$Ud-V rU}j>hV5~9*gUj};$r(jPMu(nH7GPjBe9q2j_>!HGfrrHwq?G{x;bt9@ literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-1 b/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-1 new file mode 100644 index 0000000000000000000000000000000000000000..166700bf1c4ce0403e82afb9dc3239db95256958 GIT binary patch literal 207 zcmX@jz`($`!i$md@K+Cq@Bg8Ikr5~i1Y!=%49rQ1ML=A|oR*)@#K6SBn8=s}k$qs# xz|6#0!B}Ms2AAzwlQW8pj1E1YEWp5M_?(^5@FhDV0}qQWNGr$^CR$s<005S`J0$=B literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-2 b/hail/src/test/resources/backward_compatability/1.0.0/table/1.ht/rows/parts/part-2 new file mode 100644 index 0000000000000000000000000000000000000000..47e8b57786b2d4a799554292d39c9e7b7387e316 GIT binary patch literal 207 zcmX@jz`($`!i$ml@K+Cq@Bg8Ikr5~i1Y!=%49rQ1ML=A|oR*)@#K6SBn8=s}k$qs# xz|6#0!B}Ms2AAzwlQW8pj1E1YEWp5M_?(^5@FhDV0}qQWNGr$^7Ft`v005Z_J1GDF literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..f0775f8938353e079b598a7ee5b044631740ea70 GIT binary patch literal 319 zcmV-F0l@wriwFP!000000F{zaYlAQph5t*QHgGVi*pjzt=@|53)NT(G88OB-G#ZI< zn-l;0qQ+S5(|o(Fr z9wYXd+AFwY97%g+X^FZHe95Kjj6yaLlYq)T1L`Q$;mRM6Tx*us4~wo`d#;vKB-e zH~@R_0H$R6s5KI{Ac04ah7wqq5VhB=7S$A0wg^?8H2uyyCf0=Eg_2pM%q!fwBmBw) zQSQhw5cS6Rohxr_fw*S007)wkYeh=B)~_wEXp~*z$q@4rpnO-fhM-BHf*N17dV?F| zrm(>?8mZt}N|HCCJ}EY=x@pgP?>^xV5oPJ!+uYZW`$oks@o9t8a(Ivj!h%k{ag4A} z7;9|0PlcCR!XT)ydX)Y|vE_8zzX-vA@Kxb_LyiNwIjtNij&`T|ZMJ$DWt(9e-tE+G z#x=-k7|``(evf3B#UO;gw{P}r_kC^Aw>Dhp!-+(k2o7AO2%k|MP4#JRcm6f=EybMh z#SL11p;2-Czpf%{Q9`7mN?&~#iOkGk3_fkiuPS{r%MTu%ku%c&Qv3j)kN{?H1ONa8 CV!=58 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..956f0369954eab05c0735e4cc7c7e85098bbed64 GIT binary patch literal 315 zcmV-B0mS|viwFP!000000F_ctPlGTN|1Lebq)QYCTY3|g8Iv8%m>wh?x>8)TmXcCb zT=?#`P{^1kx%9o?zgHeyK#2tf2)vdRR+?!hOPcTBHc3SZfI!KwLt9xvqE$omh2`Jq zxzxS~3*SRqF0rjy7+5q6pcpVy;ktJqXT;8S90eT|bHy0kT7Z=wor$1^r8*se%s3q2 zGBTK>4I?^zR0_9~Y+{9@hv1Pu`D?V3q^a+o_OWReQ1EMDwSdgtg{n`?(;1OLf?~`$ z6zVg8^MWjD-iX N?iZ7CEUsn(0036&lDGf> literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..cc71bbe10be36029b36e27b7343db71da2d92024 GIT binary patch literal 137 zcmZo=U|^UB#E%$PcrgNLhQnVy9KQc&WMC9z2w-3kb6{p?U|?l1N=htZXAr1jXK+Z% z&u5ZiU}R-r&}9%vWMvRaGGkDDU~j}Az$DBdP{GQeSY^z}@R8xNJx~claz>Gn(V@>Q hOco4AhR>ND859j)ax#2i;sMIB*a8V=AO;%A003x(8qoj% literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-1 b/hail/src/test/resources/backward_compatability/1.0.0/table/2.ht/rows/parts/part-1 new file mode 100644 index 0000000000000000000000000000000000000000..c8d59ee1d8c470e2fd4b1d815f0e89b8f13344a1 GIT binary patch literal 144 zcmeBSU|_IjWMFv2xWbDONHZM%>f!MHKO=)8BLgFYftUj`I|BnNgHcjq5j%rG6+44N zT7Ev06aynG1A{JuKq4!HP?8yg;sbjl1_35v27wAz2E{64Muv|Jm+gT{7?LxJjEoL_ rW?`~mFfx43?8u;K_>z+$nFlDvVq3ys05okbgFMq*hQjb42=N{KsEya4_F=j literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..f82128c2b40288c3f6e4ba8cae20f86302e05758 GIT binary patch literal 327 zcmV-N0l5AjiwFP!000000F{zoPlGTN#lK6RG{HoHu%&OpoH5yh*`^N?VoE7nvqDKJ zDlU9?TmF|H1`X&*>05N7_rlo}Ys$e8+wQiL8MA^VhulG4;1m#hqwupH`aJx}>9v1$L@F_65 zVvdw07yZqHe`hcKDou&_1MwNhCpF3hinX|--wJT-g}3}A`OR&e|Fo%R6O_%JjXoC8 ZpLb0eDFF=I;^r1a%@2QhBWvCQ006(+nr;99 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..0c942b8c1535820d9930dca42cced0ebd658938c GIT binary patch literal 100 zcmYdbU|>iA;>nC5OAmkbaQOb8k-?CWk%5un6Q`I1GXrx{Vi6EmF{kC{GchnRFeWl4 z$uTT^V9&tJ#8|;tWz5L%LEy4IYjQ@Bk

&lLZ(U4WF|!8op#_WZ+@31!-dd00$2l Ae*gdg literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..b0582034d0360ac5598db173bef735eb4244f1e7 GIT binary patch literal 408 zcmV;J0cZXniwFP!000000F6`8Zh|lr{g*y%3453*E`8fDTbAr$QL`6fNX4r*D=jHS zC5C@rfffWd-5WjkoO3Vtus4~wo`d#;vKB-e zH~@R_0H$R6s5KI{Ac04ah7wqq5VhB=7S$A0wg^?8H2uyyCf0=Eg_2pM%q!fwBmBw) zQSQhw5cS6Rohxr_fw*S007)wkYeh=B)~_wEXp~*z$q@4rpnO-fhM-BHf*N17dV?F| zrm(>?8mZt}N|HCCJ}EY=x@pgP?>^xV5oPJ!+uYZW`$oks@o9t8a(Ivj!h%k{ag4A} z7;9|0PlcCR!XT)ydX)Y|vE_8zzX-vA@Kxb_LyiNwIjtNij&`T|ZMJ$DWt(9e-tE+G z#x=-k7|``(evf3B#UO;gw{P}r_kC^Aw>Dhp!-+(k2o7AO2%k|MP4#JRcm6f=EybMh z#SL11p;2-Czpf%{Q9`7mN?&~#iOkGk3_fkiuPS{r%MTu%ku%c&Qv3j)kN{?H1ONa8 CV!=58 literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..7561d6c8773620b1d15b9843f0137d53948ef946 GIT binary patch literal 322 zcmV-I0loeoiwFP!000000F_crPlGTR{x3bbq)QwSMtT$GjL8mWOb-$cT`8_vOGzmz zF8p^}zA}s+?4|GfeD(7J3M?o<;JGNV(o8d1&}{#*iAzcV1WJA!n$iklt!kn#Ec--{ zg?1g7yAIlNiEYh%&!WBuMUR;ZSDgb5Mm*S#qo9Rit{8(G3$Ws?H4)UXR3|-<8pi+( z87a)rh7q0IDTSL!HnC!$`{0p2xdGZqQdc(*`^Yp4DEKw7UO;B|Le)Fw$&5%LK@sL0 ziec91P?wx0UDGcxts_Qqo*hcbDK661+%J(CCEZ=4%lH&en4F^tH<}aU*%iOL4_8hQ z)_G3VACizuayEOIiH;VG*_RQx&e{)tNq+S@=Ra-cycCO{&ejeH?6=x7PAhiC;?0aJycih{fAw(q{-2S-kdcvrk>L}km;*Bdb5ddv5LYp$<>xaoFflMD zGA79}EPPLaPWqa1-j3OhWL(eA*FfbZEXJ<5g$vxy N&y>pG82~bY0RUtK8^{0v literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/parts/part-2 b/hail/src/test/resources/backward_compatability/1.0.0/table/3.ht/rows/parts/part-2 new file mode 100644 index 0000000000000000000000000000000000000000..0047602d7518f987c21eb79e59a62b73952f7eb6 GIT binary patch literal 113 zcmd00U|={6#G4sccrh{`{_5fI{XZjvAtNILBf}?7F$ZP_=A^_TAg*Fg%g<+GU}9iQ zWK5D{Sopx6ftiW1g0ae&k>LaPWqa1-j3OhWL(eA*FfbZEXJ<5g$vxy N&yvdE82~bY0RUv+8_WOz literal 0 HcmV?d00001 diff --git a/hail/src/test/resources/regressionLinear.weights b/hail/src/test/resources/regressionLinear.weights deleted file mode 100644 index 65eab2ae377..00000000000 --- a/hail/src/test/resources/regressionLinear.weights +++ /dev/null @@ -1,10 +0,0 @@ -Sample Weight1 Weight2 -X 8.4 2.2 -A 0.2 4 -B 17.2 0 -C 1.0 8 -Y 0 2.1 -D 5.5 1.1 -E 1.2 6.7 -F 4.0 6.2 -Z 2.0 11.43 diff --git a/hail/src/test/scala/is/hail/annotations/RegionSuite.scala b/hail/src/test/scala/is/hail/annotations/RegionSuite.scala index 808ee8ec7b7..b5d47a19cf4 100644 --- a/hail/src/test/scala/is/hail/annotations/RegionSuite.scala +++ b/hail/src/test/scala/is/hail/annotations/RegionSuite.scala @@ -1,12 +1,9 @@ package is.hail.annotations -import is.hail.expr.ir.LongArrayBuilder import is.hail.utils.{info, using} import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test -import scala.collection.mutable.ArrayBuffer - class RegionSuite extends TestNGSuite { @Test def testRegionSizes() { @@ -235,45 +232,4 @@ class RegionSuite extends TestNGSuite { assert(pool.numFreeBlocks() == 2) } } - - @Test - def testChunkCache(): Unit = { - RegionPool.scoped { pool => - - val operations = ArrayBuffer[(String, Long)]() - - def allocate(numBytes: Long): Long = { - val pointer = Memory.malloc(numBytes) - operations += (("allocate", numBytes)) - pointer - } - def free(ptrToFree: Long): Unit = { - operations += (("free", 0L)) - Memory.free(ptrToFree) - } - val chunkCache = new ChunkCache(allocate, free) - val ab = new LongArrayBuilder() - var i = 0 - ab += chunkCache.getChunk(pool, 400L)._1 - chunkCache.freeChunkToCache(ab.pop()) - ab += chunkCache.getChunk(pool, 50L)._1 - assert(operations(0)==("allocate", 512)) - //512 size chunk freed from cache to not exceed peak memory - assert(operations(1)==("free", 0L)) - assert(operations(2)==("allocate", 64)) - chunkCache.freeChunkToCache(ab.pop()) - //No additional allocate should be made as uses cache - ab += chunkCache.getChunk(pool, 50L)._1 - assert(operations.length == 3) - ab += chunkCache.getChunk(pool, 40L)._1 - chunkCache.freeChunksToCache(ab) - assert(operations(3) == ("allocate", 64)) - assert(operations.length == 4) - chunkCache.freeAll(pool) - assert(operations(4)==("free", 0L)) - assert(operations(5)==("free", 0L)) - assert(operations.length == 6) - - } - } } diff --git a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala index b324820bb36..b1c34a16341 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ArrayFunctionsSuite.scala @@ -208,15 +208,15 @@ class ArrayFunctionsSuite extends HailSuite { @Test def indexing() { val a = IRArray(0, null, 2) - assertEvalsTo(invoke("indexArray", TInt32, a, I32(0)), 0) - assertEvalsTo(invoke("indexArray", TInt32, a, I32(1)), null) - assertEvalsTo(invoke("indexArray", TInt32, a, I32(2)), 2) - assertEvalsTo(invoke("indexArray", TInt32, a, I32(-1)), 2) - assertEvalsTo(invoke("indexArray", TInt32, a, I32(-3)), 0) - assertFatal(invoke("indexArray", TInt32, a, I32(3)), "array index out of bounds") - assertFatal(invoke("indexArray", TInt32, a, I32(-4)), "array index out of bounds") - assertEvalsTo(invoke("indexArray", TInt32, naa, I32(2)), null) - assertEvalsTo(invoke("indexArray", TInt32, a, NA(TInt32)), null) + assertEvalsTo(invoke("indexArray", TInt32, a, I32(0), Str("")), 0) + assertEvalsTo(invoke("indexArray", TInt32, a, I32(1), Str("")), null) + assertEvalsTo(invoke("indexArray", TInt32, a, I32(2), Str("")), 2) + assertEvalsTo(invoke("indexArray", TInt32, a, I32(-1), Str("")), 2) + assertEvalsTo(invoke("indexArray", TInt32, a, I32(-3), Str("")), 0) + assertFatal(invoke("indexArray", TInt32, a, I32(3), Str("")), "array index out of bounds") + assertFatal(invoke("indexArray", TInt32, a, I32(-4), Str("")), "array index out of bounds") + assertEvalsTo(invoke("indexArray", TInt32, naa, I32(2), Str("")), null) + assertEvalsTo(invoke("indexArray", TInt32, a, NA(TInt32), Str("")), null) } @Test def slicing() { diff --git a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala index fa7976e1468..a8f05dadcd4 100644 --- a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala @@ -45,10 +45,10 @@ class BlockMatrixIRSuite extends HailSuite { } @Test def testBlockMatrixMap() { - val sqrtIR = BlockMatrixMap(ones, "element", Apply("sqrt", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64, ErrorIDs.NO_ERROR), false) + val sqrtIR = BlockMatrixMap(ones, "element", Apply("sqrt", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64), false) val negIR = BlockMatrixMap(ones, "element", ApplyUnaryPrimOp(Negate(), Ref("element", TFloat64)), false) - val logIR = BlockMatrixMap(ones, "element", Apply("log", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64, ErrorIDs.NO_ERROR), true) - val absIR = BlockMatrixMap(ones, "element", Apply("abs", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64, ErrorIDs.NO_ERROR), false) + val logIR = BlockMatrixMap(ones, "element", Apply("log", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64), true) + val absIR = BlockMatrixMap(ones, "element", Apply("abs", FastIndexedSeq(), FastIndexedSeq(Ref("element", TFloat64)), TFloat64), false) assertBMEvalsTo(sqrtIR, BDM.fill[Double](3, 3)(1)) assertBMEvalsTo(negIR, BDM.fill[Double](3, 3)(-1)) diff --git a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala index 2d62c2e3b53..3490d95f16d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ETypeSuite.scala @@ -156,14 +156,4 @@ class ETypeSuite extends HailSuite { assert(encodeDecode(pStructContainingNDArray, eStructContainingNDArray, pOnlyReadB, dataStruct) == Row(3)) } - - @Test def testArrayOfString(): Unit = { - val etype = EArray(EBinary(false), false) - val toEncode = PCanonicalArray(PCanonicalStringRequired, false) - val toDecode = PCanonicalArray(PCanonicalStringOptional, false) - val longListOfStrings = (0 until 36).map(idx => s"foo_name_sample_${idx}") - val data = longListOfStrings - - assert(encodeDecode(toEncode, etype, toDecode, data) == data) - } } diff --git a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala index a4db25ff2ce..53dcefd7c30 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala @@ -33,7 +33,7 @@ class ForwardLetsSuite extends HailSuite { val y = Ref("y", TInt32) Array( NDArrayMap(In(1, TNDArray(TInt32, Nat(1))), "y", x + y), - NDArrayMap2(In(1, TNDArray(TInt32, Nat(1))), In(2, TNDArray(TInt32, Nat(1))), "y", "z", x + y + Ref("z", TInt32), ErrorIDs.NO_ERROR), + NDArrayMap2(In(1, TNDArray(TInt32, Nat(1))), In(2, TNDArray(TInt32, Nat(1))), "y", "z", x + y + Ref("z", TInt32)), TailLoop("f", FastIndexedSeq("y" -> I32(0)), If(y < x, Recur("f", FastIndexedSeq[IR](y - I32(1)), TInt32), x)) ).map(ir => Array[IR](Let("x", In(0, TInt32) + In(0, TInt32), ir))) } diff --git a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala index 012c3e68a36..1aef3d03d39 100644 --- a/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/FunctionSuite.scala @@ -28,14 +28,14 @@ class ScalaTestCompanion { object TestRegisterFunctions extends RegistryFunctions { def registerAll() { - registerIR1("addone", TInt32, TInt32)((_, a, _) => ApplyBinaryPrimOp(Add(), a, I32(1))) + registerIR1("addone", TInt32, TInt32)((_, a) => ApplyBinaryPrimOp(Add(), a, I32(1))) registerJavaStaticFunction("compare", Array(TInt32, TInt32), TInt32, null)(classOf[java.lang.Integer], "compare") registerScalaFunction("foobar1", Array(), TInt32, null)(ScalaTestObject.getClass, "testFunction") registerScalaFunction("foobar2", Array(), TInt32, null)(ScalaTestCompanion.getClass, "testFunction") registerSCode2("testCodeUnification", tnum("x"), tv("x", "int32"), tv("x"), null) { - case (_, cb, rt, a, b, _) => primitive(a.asInt.intCode(cb) + b.asInt.intCode(cb)) + case (_, cb, rt, a, b) => primitive(a.asInt.intCode(cb) + b.asInt.intCode(cb)) } - registerSCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, cb, rt, a, _) => a } + registerSCode1("testCodeUnification2", tv("x"), tv("x"), null) { case (_, cb, rt, a) => a } } } @@ -47,7 +47,7 @@ class FunctionSuite extends HailSuite { def lookup(meth: String, rt: Type, types: Type*)(irs: IR*): IR = { val l = IRFunctionRegistry.lookupUnseeded(meth, rt, types).get - l(Seq(), irs, ErrorIDs.NO_ERROR) + l(Seq(), irs) } @Test diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index c0fa791a1d8..9be2b14f0f9 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -1256,9 +1256,9 @@ class IRSuite extends HailSuite { } @Test def testArrayRef() { - assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), I32(0), ErrorIDs.NO_ERROR), 5) - assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), I32(1), ErrorIDs.NO_ERROR), null) - assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), NA(TInt32), ErrorIDs.NO_ERROR), null) + assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), I32(0)), 5) + assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), I32(1)), null) + assertEvalsTo(ArrayRef(MakeArray(FastIndexedSeq(I32(5), NA(TInt32)), TArray(TInt32)), NA(TInt32)), null) assertFatal(ArrayRef(MakeArray(FastIndexedSeq(I32(5)), TArray(TInt32)), I32(2)), "array index out of bounds") } @@ -1791,8 +1791,8 @@ class IRSuite extends HailSuite { @Test def testNDArrayReshape() { implicit val execStrats: Set[ExecStrategy] = ExecStrategy.compileOnly - val v = NDArrayReshape(matrixRowMajor, MakeTuple.ordered(Seq(I64(4))), ErrorIDs.NO_ERROR) - val mat2 = NDArrayReshape(v, MakeTuple.ordered(Seq(I64(2), I64(2))), ErrorIDs.NO_ERROR) + val v = NDArrayReshape(matrixRowMajor, MakeTuple.ordered(Seq(I64(4)))) + val mat2 = NDArrayReshape(v, MakeTuple.ordered(Seq(I64(2), I64(2)))) assertEvalsTo(makeNDArrayRef(v, FastIndexedSeq(2)), 3.0) assertEvalsTo(makeNDArrayRef(mat2, FastIndexedSeq(1, 0)), 3.0) @@ -1885,7 +1885,7 @@ class IRSuite extends HailSuite { val bools = MakeNDArray(MakeArray(Seq(True(), False(), False(), True()), TArray(TBoolean)), shape, True(), ErrorIDs.NO_ERROR) val actual = NDArrayMap2(numbers, bools, "n", "b", - ApplyBinaryPrimOp(Add(), Ref("n", TFloat64), If(Ref("b", TBoolean), F64(10), F64(20))), ErrorIDs.NO_ERROR) + ApplyBinaryPrimOp(Add(), Ref("n", TFloat64), If(Ref("b", TBoolean), F64(10), F64(20)))) val ten = makeNDArrayRef(actual, FastSeq(0L, 0L)) val twentyTwo = makeNDArrayRef(actual, FastSeq(1L, 0L)) assertEvalsTo(ten, 10.0) @@ -1922,7 +1922,7 @@ class IRSuite extends HailSuite { NDArrayReindex(scalarRowMajor, FastIndexedSeq(1, 0)), matrixRowMajor, "s", "m", - ApplyBinaryPrimOp(Add(), Ref("s", TFloat64), Ref("m", TFloat64)), ErrorIDs.NO_ERROR) + ApplyBinaryPrimOp(Add(), Ref("s", TFloat64), Ref("m", TFloat64))) val topLeft = makeNDArrayRef(scalarWithMatrix, FastIndexedSeq(0, 0)) assertEvalsTo(topLeft, 4.0) @@ -1931,7 +1931,7 @@ class IRSuite extends HailSuite { NDArrayReindex(vectorRowMajor, FastIndexedSeq(1, 0)), matrixRowMajor, "v", "m", - ApplyBinaryPrimOp(Add(), Ref("v", TFloat64), Ref("m", TFloat64)), ErrorIDs.NO_ERROR) + ApplyBinaryPrimOp(Add(), Ref("v", TFloat64), Ref("m", TFloat64))) assertEvalsTo(makeNDArrayRef(vectorWithMatrix, FastIndexedSeq(0, 0)), 2.0) assertEvalsTo(makeNDArrayRef(vectorWithMatrix, FastIndexedSeq(0, 1)), 1.0) @@ -1939,7 +1939,7 @@ class IRSuite extends HailSuite { val colVector = makeNDArray(FastIndexedSeq(1.0, -1.0), FastIndexedSeq(2, 1), True()) val colVectorWithMatrix = NDArrayMap2(colVector, matrixRowMajor, "v", "m", - ApplyBinaryPrimOp(Add(), Ref("v", TFloat64), Ref("m", TFloat64)), ErrorIDs.NO_ERROR) + ApplyBinaryPrimOp(Add(), Ref("v", TFloat64), Ref("m", TFloat64))) assertEvalsTo(makeNDArrayRef(colVectorWithMatrix, FastIndexedSeq(0, 0)), 2.0) assertEvalsTo(makeNDArrayRef(colVectorWithMatrix, FastIndexedSeq(0, 1)), 3.0) @@ -1967,28 +1967,28 @@ class IRSuite extends HailSuite { @Test def testNDArrayMatMul() { implicit val execStrats: Set[ExecStrategy] = ExecStrategy.compileOnly - val dotProduct = NDArrayMatMul(vectorRowMajor, vectorRowMajor, ErrorIDs.NO_ERROR) + val dotProduct = NDArrayMatMul(vectorRowMajor, vectorRowMajor) val zero = makeNDArrayRef(dotProduct, IndexedSeq()) assertEvalsTo(zero, 2.0) - val seven = makeNDArrayRef(NDArrayMatMul(matrixRowMajor, matrixRowMajor, ErrorIDs.NO_ERROR), IndexedSeq(0, 0)) + val seven = makeNDArrayRef(NDArrayMatMul(matrixRowMajor, matrixRowMajor), IndexedSeq(0, 0)) assertEvalsTo(seven, 7.0) val twoByThreeByFive = threeTensorRowMajor val twoByFiveByThree = NDArrayReindex(twoByThreeByFive, IndexedSeq(0, 2, 1)) - val twoByThreeByThree = NDArrayMatMul(twoByThreeByFive, twoByFiveByThree, ErrorIDs.NO_ERROR) + val twoByThreeByThree = NDArrayMatMul(twoByThreeByFive, twoByFiveByThree) val thirty = makeNDArrayRef(twoByThreeByThree, IndexedSeq(0, 0, 0)) assertEvalsTo(thirty, 30.0) val threeByTwoByFive = NDArrayReindex(twoByThreeByFive, IndexedSeq(1, 0, 2)) - val matMulCube = NDArrayMatMul(NDArrayReindex(matrixRowMajor, IndexedSeq(2, 0, 1)), threeByTwoByFive, ErrorIDs.NO_ERROR) + val matMulCube = NDArrayMatMul(NDArrayReindex(matrixRowMajor, IndexedSeq(2, 0, 1)), threeByTwoByFive) assertEvalsTo(makeNDArrayRef(matMulCube, IndexedSeq(0, 0, 0)), 30.0) } @Test def testNDArrayInv() { implicit val execStrats: Set[ExecStrategy] = ExecStrategy.compileOnly val matrixRowMajor = makeNDArray(FastSeq(1.5, 2.0, 4.0, 5.0), FastSeq(2, 2), True()) - val inv = NDArrayInv(matrixRowMajor, ErrorIDs.NO_ERROR) + val inv = NDArrayInv(matrixRowMajor) val expectedInv = FastSeq(FastSeq(-10.0, 4.0), FastSeq(8.0, -3.0)) assertNDEvals(inv, expectedInv) } @@ -2796,15 +2796,15 @@ class IRSuite extends HailSuite { MakeArray(FastSeq(i, NA(TInt32), I32(-3)), TArray(TInt32)), MakeStream(FastSeq(i, NA(TInt32), I32(-3)), TStream(TInt32)), nd, - NDArrayReshape(nd, MakeTuple.ordered(Seq(I64(4))), ErrorIDs.NO_ERROR), + NDArrayReshape(nd, MakeTuple.ordered(Seq(I64(4)))), NDArrayConcat(MakeArray(FastSeq(nd, nd), TArray(nd.typ)), 0), NDArrayRef(nd, FastSeq(I64(1), I64(2)), -1), NDArrayMap(nd, "v", ApplyUnaryPrimOp(Negate(), v)), - NDArrayMap2(nd, nd, "l", "r", ApplyBinaryPrimOp(Add(), l, r), ErrorIDs.NO_ERROR), + NDArrayMap2(nd, nd, "l", "r", ApplyBinaryPrimOp(Add(), l, r)), NDArrayReindex(nd, FastIndexedSeq(0, 1)), NDArrayAgg(nd, FastIndexedSeq(0)), NDArrayWrite(nd, Str("/path/to/ndarray")), - NDArrayMatMul(nd, nd, ErrorIDs.NO_ERROR), + NDArrayMatMul(nd, nd), NDArraySlice(nd, MakeTuple.ordered(FastSeq(MakeTuple.ordered(FastSeq(F64(0), F64(2), F64(1))), MakeTuple.ordered(FastSeq(F64(0), F64(2), F64(1)))))), NDArrayFilter(nd, FastIndexedSeq(NA(TArray(TInt64)), NA(TArray(TInt64)))), @@ -2931,7 +2931,7 @@ class IRSuite extends HailSuite { ApplySpecial("Interval", FastSeq(), FastSeq(I32(0), I32(5), True(), False()), - TInterval(TInt32), ErrorIDs.NO_ERROR)))) + TInterval(TInt32))))) } ) irs.map(x => Array(x)) @@ -2948,7 +2948,7 @@ class IRSuite extends HailSuite { try { val fs = ctx.fs - val read = TableIR.read(fs, "src/test/resources/backward_compatability/1.1.0/table/0.ht") + val read = TableIR.read(fs, "src/test/resources/backward_compatability/1.0.0/table/0.ht") val mtRead = MatrixIR.read(fs, "src/test/resources/backward_compatability/1.0.0/matrix_table/0.hmt") val b = True() @@ -3019,7 +3019,7 @@ class IRSuite extends HailSuite { IndexBgen(ctx, Array("src/test/resources/example.8bits.bgen"), rg = Some("GRCh37"), contigRecoding = Map("01" -> "1")) - val tableRead = TableIR.read(fs, "src/test/resources/backward_compatability/1.1.0/table/0.ht") + val tableRead = TableIR.read(fs, "src/test/resources/backward_compatability/1.0.0/table/0.ht") val read = MatrixIR.read(fs, "src/test/resources/backward_compatability/1.0.0/matrix_table/0.hmt") val range = MatrixIR.range(3, 7, None) val vcf = is.hail.TestUtils.importVCF(ctx, "src/test/resources/sample.vcf") diff --git a/hail/src/test/scala/is/hail/expr/ir/IntervalSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IntervalSuite.scala index be514e71e22..f78268dcc4d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IntervalSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IntervalSuite.scala @@ -79,7 +79,7 @@ class IntervalSuite extends HailSuite { SetInterval(3, 1, true, false)) def toIRInterval(i: SetInterval): IR = - invoke("Interval", TInterval(TInt32), ErrorIDs.NO_ERROR, i.start, i.end, i.includesStart, i.includesEnd) + invoke("Interval", TInterval(TInt32), i.start, i.end, i.includesStart, i.includesEnd) @Test def contains() { for (setInterval <- testIntervals; p <- points) { @@ -91,7 +91,7 @@ class IntervalSuite extends HailSuite { @Test def isEmpty() { for (setInterval <- testIntervals) { val interval = toIRInterval(setInterval) - assert(eval(invoke("isEmpty", TBoolean, ErrorIDs.NO_ERROR, interval)) == setInterval.definitelyEmpty()) + assert(eval(invoke("isEmpty", TBoolean, interval)) == setInterval.definitelyEmpty()) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala index 555fb21fcdf..80a6af18d9f 100644 --- a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala @@ -18,7 +18,7 @@ class LocusFunctionsSuite extends HailSuite { private def tlocus = TLocus(grch38) private def tvariant = TStruct("locus" -> tlocus, "alleles" -> TArray(TString)) - def locusIR: Apply = Apply("Locus", FastSeq(), FastSeq(Str("chr22"), I32(1)), tlocus, ErrorIDs.NO_ERROR) + def locusIR: Apply = Apply("Locus", FastSeq(), FastSeq(Str("chr22"), I32(1)), tlocus) def locus = Locus("chr22", 1, grch38) diff --git a/hail/src/test/scala/is/hail/expr/ir/MathFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/MathFunctionsSuite.scala index 3c28ebb2785..1104f4c6c77 100644 --- a/hail/src/test/scala/is/hail/expr/ir/MathFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/MathFunctionsSuite.scala @@ -119,7 +119,7 @@ class MathFunctionsSuite extends HailSuite { @Test(dataProvider = "chi_squared_test") def chiSquaredTest(a: Int, b: Int, c: Int, d: Int, pValue: Double, oddsRatio: Double) { - val r = eval(invoke("chi_squared_test", stats.chisqStruct.virtualType, ErrorIDs.NO_ERROR, a, b, c, d)).asInstanceOf[Row] + val r = eval(invoke("chi_squared_test", stats.chisqStruct.virtualType, a, b, c, d)).asInstanceOf[Row] assert(D0_==(pValue, r.getDouble(0))) assert(D0_==(oddsRatio, r.getDouble(1))) } @@ -133,7 +133,7 @@ class MathFunctionsSuite extends HailSuite { @Test(dataProvider = "fisher_exact_test") def fisherExactTest(a: Int, b: Int, c: Int, d: Int, pValue: Double, oddsRatio: Double, confLower: Double, confUpper: Double) { - val r = eval(invoke("fisher_exact_test", stats.fetStruct.virtualType, ErrorIDs.NO_ERROR, a, b, c, d)).asInstanceOf[Row] + val r = eval(invoke("fisher_exact_test", stats.fetStruct.virtualType, a, b, c, d)).asInstanceOf[Row] assert(D0_==(pValue, r.getDouble(0))) assert(D0_==(oddsRatio, r.getDouble(1))) assert(D0_==(confLower, r.getDouble(2))) @@ -148,7 +148,7 @@ class MathFunctionsSuite extends HailSuite { @Test(dataProvider = "contingency_table_test") def contingencyTableTest(a: Int, b: Int, c: Int, d: Int, minCellCount: Int, pValue: Double, oddsRatio: Double) { - val r = eval(invoke("contingency_table_test", stats.chisqStruct.virtualType, ErrorIDs.NO_ERROR, a, b, c, d, minCellCount)).asInstanceOf[Row] + val r = eval(invoke("contingency_table_test", stats.chisqStruct.virtualType, a, b, c, d, minCellCount)).asInstanceOf[Row] assert(D0_==(pValue, r.getDouble(0))) assert(D0_==(oddsRatio, r.getDouble(1))) } @@ -163,7 +163,7 @@ class MathFunctionsSuite extends HailSuite { @Test(dataProvider = "hardy_weinberg_test") def hardyWeinbergTest(nHomRef: Int, nHet: Int, nHomVar: Int, pValue: Double, hetFreq: Double) { - val r = eval(invoke("hardy_weinberg_test", stats.hweStruct.virtualType, ErrorIDs.NO_ERROR, nHomRef, nHet, nHomVar)).asInstanceOf[Row] + val r = eval(invoke("hardy_weinberg_test", stats.hweStruct.virtualType, nHomRef, nHet, nHomVar)).asInstanceOf[Row] assert(D0_==(pValue, r.getDouble(0))) assert(D0_==(hetFreq, r.getDouble(1))) } diff --git a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala index d94db850904..0734f8d545f 100644 --- a/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/OrderingSuite.scala @@ -538,7 +538,7 @@ class OrderingSuite extends HailSuite { FastSeq(), FastSeq( Ref("accumulator", TBoolean), - invoke("contains", TBoolean, set2, Ref("setelt", TInt32))), TBoolean, ErrorIDs.NO_ERROR)), true) + invoke("contains", TBoolean, set2, Ref("setelt", TInt32))), TBoolean)), true) } @DataProvider(name = "arrayDoubleOrderingData") diff --git a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala index 6b158e40e31..8e9fca074d8 100644 --- a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala @@ -721,12 +721,12 @@ class PruneSuite extends HailSuite { } @Test def testNDArrayMap2Memo(): Unit = { - checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", Ref("left", ref.typ), ErrorIDs.NO_ERROR), + checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", Ref("left", ref.typ)), TNDArray(justBRequired, Nat(1)), Array(TNDArray(justBRequired, Nat(1)), TNDArray(TStruct.empty, Nat(1)), null)) - checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", Ref("right", ref.typ), ErrorIDs.NO_ERROR), + checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", Ref("right", ref.typ)), TNDArray(justBRequired, Nat(1)), Array(TNDArray(TStruct.empty, Nat(1)), TNDArray(justBRequired, Nat(1)), null)) val addFieldsIR = ApplyBinaryPrimOp(Add(), GetField(Ref("left", ref.typ), "a"), GetField(Ref("right", ref.typ), "b")) - checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", addFieldsIR, ErrorIDs.NO_ERROR), + checkMemo(NDArrayMap2(ndArr, ndArr, "left", "right", addFieldsIR), TNDArray(TInt32, Nat(1)), Array(TNDArray(justARequired, Nat(1)), TNDArray(justBRequired, Nat(1)), null)) } @@ -1324,13 +1324,13 @@ class PruneSuite extends HailSuite { } @Test def testNDArrayMap2Rebuild(): Unit = { - checkRebuild(NDArrayMap2(ndArrayTS, ndArrayTS, "left", "right", Ref("left", ts), ErrorIDs.NO_ERROR), TNDArray(subsetTS("b"), Nat(1)), + checkRebuild(NDArrayMap2(ndArrayTS, ndArrayTS, "left", "right", Ref("left", ts)), TNDArray(subsetTS("b"), Nat(1)), (_: BaseIR, r: BaseIR) => { val ir = r.asInstanceOf[NDArrayMap2] ir.l.typ == TNDArray(TStruct(("b", TInt64)), Nat(1)) ir.r.typ == TNDArray(TStruct.empty, Nat(1)) }) - checkRebuild(NDArrayMap2(ndArrayTS, ndArrayTS, "left", "right", Ref("right", ts), ErrorIDs.NO_ERROR), TNDArray(subsetTS("b"), Nat(1)), + checkRebuild(NDArrayMap2(ndArrayTS, ndArrayTS, "left", "right", Ref("right", ts)), TNDArray(subsetTS("b"), Nat(1)), (_: BaseIR, r: BaseIR) => { val ir = r.asInstanceOf[NDArrayMap2] ir.l.typ == TNDArray(TStruct.empty, Nat(1)) diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala index 10fe1ba293d..549bb23711b 100644 --- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala +++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala @@ -3,7 +3,6 @@ package is.hail.types.physical import is.hail.annotations.{Annotation, Region, SafeNDArray, ScalaToRegionValue, UnsafeRow} import is.hail.asm4s._ import is.hail.expr.ir.{EmitCodeBuilder, EmitFunctionBuilder} -import is.hail.types.physical.stypes.concrete.SNDArrayPointerSettable import is.hail.utils._ import org.apache.spark.sql.Row import org.testng.annotations.Test @@ -47,10 +46,12 @@ class PNDArraySuite extends PhysicalTestUtils { // Region 2 gets an ndarray at ndaddress2, plus a reference to the one at ndarray 1. val (_, snd2Finisher) = nd.constructDataFunction(shapeSeq, shapeSeq, cb, codeRegion2) val snd2 = snd2Finisher(cb).memoize(cb, "snd2") - cb.assign(r2PointerToNDAddress1, nd.store(cb, codeRegion2, snd1, true)) + cb.assign(r2PointerToNDAddress1, codeRegion2.allocate(8L, 8L)) - // Return the 1st ndarray - snd1.asInstanceOf[SNDArrayPointerSettable].a + nd.storeAtAddress(cb, r2PointerToNDAddress1, codeRegion2, snd1, true) + + // Return the address of the 1st one + Region.loadAddress(r2PointerToNDAddress1) } } catch { case e: AssertionError => @@ -62,24 +63,23 @@ class PNDArraySuite extends PhysicalTestUtils { val f = fb.result()() val result1 = f(region1, region2, region3) - val result1Data = nd.unstagedDataFirstElementPointer(result1) // Check number of ndarrays in each region: assert(region1.memory.listNDArrayRefs().size == 1) - assert(region1.memory.listNDArrayRefs()(0) == result1Data) + assert(region1.memory.listNDArrayRefs()(0) == result1) assert(region2.memory.listNDArrayRefs().size == 2) - assert(region2.memory.listNDArrayRefs()(1) == result1Data) + assert(region2.memory.listNDArrayRefs()(1) == result1) // Check that the reference count of ndarray1 is 2: - val rc1A = Region.loadLong(result1Data - Region.sharedChunkHeaderBytes) + val rc1A = Region.loadLong(result1-16L) assert(rc1A == 2) region1.clear() assert(region1.memory.listNDArrayRefs().size == 0) // Check that ndarray 1 wasn't actually cleared, ref count should just be 1 now: - val rc1B = Region.loadLong(result1Data - Region.sharedChunkHeaderBytes) + val rc1B = Region.loadLong(result1-16L) assert(rc1B == 1) @@ -96,20 +96,18 @@ class PNDArraySuite extends PhysicalTestUtils { @Test def testUnstagedCopy(): Unit = { val region1 = Region(pool=this.pool) val region2 = Region(pool=this.pool) - val x = SafeNDArray(IndexedSeq(3L, 2L), (0 until 6).map(_.toDouble)) + val x = new SafeNDArray(IndexedSeq(3L, 2L), (0 until 6).map(_.toDouble)) val pNd = PCanonicalNDArray(PFloat64Required, 2, true) - val ndAddr1 = pNd.unstagedStoreJavaObject(x, region=region1) - val ndAddr2 = pNd.copyFromAddress(region2, pNd, ndAddr1, true) - val unsafe1 = UnsafeRow.read(pNd, region1, ndAddr1) - val unsafe2 = UnsafeRow.read(pNd, region2, ndAddr2) + val addr1 = pNd.unstagedStoreJavaObject(x, region=region1) + val addr2 = pNd.copyFromAddress(region2, pNd, addr1, true) + val unsafe1 = UnsafeRow.read(pNd, region1, addr1) + val unsafe2 = UnsafeRow.read(pNd, region2, addr2) // Deep copy same ptype just increments reference count, doesn't change the address. - val dataAddr1 = Region.loadAddress(pNd.representation.loadField(ndAddr1, 2)) - val dataAddr2 = Region.loadAddress(pNd.representation.loadField(ndAddr2, 2)) - assert(dataAddr1 == dataAddr2) - assert(Region.getSharedChunkRefCount(dataAddr1) == 2) + assert(addr1 == addr2) + assert(PNDArray.getReferenceCount(addr1) == 2) assert(unsafe1 == unsafe2) region1.clear() - assert(Region.getSharedChunkRefCount(dataAddr1) == 1) + assert(PNDArray.getReferenceCount(addr1) == 1) // Deep copy with elements that contain pointers, so have to actually do a full copy // FIXME: Currently ndarrays do not support this, reference counting needs to account for this. diff --git a/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala b/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala index 2450f252231..f009bf1b435 100644 --- a/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala +++ b/hail/src/test/scala/is/hail/variant/ReferenceGenomeSuite.scala @@ -105,8 +105,8 @@ class ReferenceGenomeSuite extends HailSuite { val rg = ReferenceGenome("test", Array("a", "b", "c"), Map("a" -> 25, "b" -> 15, "c" -> 10)) ReferenceGenome.addReference(rg) - val fr = FASTAReaderConfig(ctx.localTmpdir, ctx.fs, rg, fastaFile, indexFile, 3, 5).reader - val frGzip = FASTAReaderConfig(ctx.localTmpdir, ctx.fs, rg, fastaFileGzip, indexFile, 3, 5).reader + val fr = FASTAReaderConfig(ctx.localTmpdir, ctx.fs.broadcast, rg, fastaFile, indexFile, 3, 5).reader + val frGzip = FASTAReaderConfig(ctx.localTmpdir, ctx.fs.broadcast, rg, fastaFileGzip, indexFile, 3, 5).reader val refReaderPath = FASTAReader.getLocalFastaFile(ctx.localTmpdir, ctx.fs, fastaFile, indexFile) val refReaderPathGz = FASTAReader.getLocalFastaFile(ctx.localTmpdir, ctx.fs, fastaFileGzip, indexFile) val refReader = ReferenceSequenceFileFactory.getReferenceSequenceFile(new java.io.File(uriPath(refReaderPath))) @@ -161,7 +161,7 @@ class ReferenceGenomeSuite extends HailSuite { val grch38 = ReferenceGenome.GRCh38 val fb = EmitFunctionBuilder[String, Boolean](ctx, "serialize_rg") val cb = fb.ecb - val rgfield = fb.getReferenceGenome(grch38) + val rgfield = fb.genLazyFieldThisRef(grch38.codeSetup(ctx.localTmpdir, cb)) fb.emit(rgfield.invoke[String, Boolean]("isValidContig", fb.getCodeParam[String](1))) val f = fb.resultWithIndex()(ctx.fs, 0, ctx.r) @@ -180,7 +180,7 @@ class ReferenceGenomeSuite extends HailSuite { val fb = EmitFunctionBuilder[String, Int, Int, Int, String](ctx, "serialize_rg") val cb = fb.ecb - val rgfield = fb.getReferenceGenome(rg) + val rgfield = fb.genLazyFieldThisRef(rg.codeSetup(ctx.localTmpdir, cb)) fb.emit(rgfield.invoke[String, Int, Int, Int, String]("getSequence", fb.getCodeParam[String](1), fb.getCodeParam[Int](2), fb.getCodeParam[Int](3), fb.getCodeParam[Int](4))) val f = fb.resultWithIndex()(ctx.fs, 0, ctx.r) @@ -197,7 +197,7 @@ class ReferenceGenomeSuite extends HailSuite { val fb = EmitFunctionBuilder[String, Locus, Double, (Locus, Boolean)](ctx, "serialize_with_liftover") val cb = fb.ecb - val rgfield = fb.getReferenceGenome(grch37) + val rgfield = fb.genLazyFieldThisRef(grch37.codeSetup(ctx.localTmpdir, cb)) fb.emit(rgfield.invoke[String, Locus, Double, (Locus, Boolean)]("liftoverLocus", fb.getCodeParam[String](1), fb.getCodeParam[Locus](2), fb.getCodeParam[Double](3))) val f = fb.resultWithIndex()(ctx.fs, 0, ctx.r) diff --git a/memory/memory/memory.py b/memory/memory/memory.py index 123d0f7f4f2..f9fc8f4b935 100644 --- a/memory/memory/memory.py +++ b/memory/memory/memory.py @@ -17,7 +17,7 @@ from hailtop.hail_logging import AccessLogger from hailtop.tls import internal_server_ssl_context from hailtop.utils import AsyncWorkerPool, retry_transient_errors, dump_all_stacktraces -from gear import setup_aiohttp_session, rest_authenticated_users_only, monitor_endpoints_middleware +from gear import setup_aiohttp_session, rest_authenticated_users_only, monitor_endpoint uvloop.install() @@ -34,6 +34,7 @@ async def healthcheck(request): # pylint: disable=unused-argument @routes.get('/api/v1alpha/objects') +@monitor_endpoint @rest_authenticated_users_only async def get_object(request, userdata): filepath = request.query.get('q') @@ -47,6 +48,7 @@ async def get_object(request, userdata): @routes.post('/api/v1alpha/objects') +@monitor_endpoint @rest_authenticated_users_only async def write_object(request, userdata): filepath = request.query.get('q') @@ -157,7 +159,7 @@ async def on_cleanup(app): def run(): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() setup_aiohttp_session(app) app.add_routes(routes) diff --git a/monitoring/Makefile b/monitoring/Makefile index 7444b939fad..e3b5dcf1eec 100644 --- a/monitoring/Makefile +++ b/monitoring/Makefile @@ -23,7 +23,7 @@ build: .PHONY: deploy deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"monitoring_image":{"image":"$(MONITORING_IMAGE)"},"monitoring_database":{"user_secret_name":"sql-monitoring-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"},"scope":"$(SCOPE)"}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"default_ns":{"name":"$(NAMESPACE)"},"monitoring_image":{"image":"$(MONITORING_IMAGE)"},"monitoring_database":{"user_secret_name":"sql-monitoring-user-config"},"global":{"project":"$(PROJECT)","zone":"$(ZONE)","domain":"$(DOMAIN)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out .PHONY: clean diff --git a/monitoring/deployment.yaml b/monitoring/deployment.yaml index 4e689ca7b60..2b34f9571d2 100644 --- a/monitoring/deployment.yaml +++ b/monitoring/deployment.yaml @@ -15,7 +15,6 @@ spec: labels: app: monitoring hail.is/sha: "{{ code.sha }}" - grafanak8sapp: "true" spec: nodeSelector: preemptible: "true" @@ -35,23 +34,11 @@ spec: env: - name: HAIL_DOMAIN value: "{{ global.domain }}" - - name: PROJECT - value: "{{ global.project }}" - name: HAIL_DEPLOY_CONFIG_FILE value: /deploy-config/deploy-config.json - name: HAIL_SHA value: "{{ code.sha }}" - - name: HAIL_GCP_REGION - valueFrom: - secretKeyRef: - name: global-config - key: gcp_region - - name: HAIL_BATCH_GCP_REGIONS - valueFrom: - secretKeyRef: - name: global-config - key: batch_gcp_regions -{% if scope != "test" and scope != "dev" %} +{% if scope != "test" %} - name: HAIL_USE_FULL_QUERY value: "1" {% endif %} diff --git a/monitoring/monitoring/monitoring.py b/monitoring/monitoring/monitoring.py index 57151f523a0..cba9f4a683c 100644 --- a/monitoring/monitoring/monitoring.py +++ b/monitoring/monitoring/monitoring.py @@ -1,22 +1,17 @@ import datetime import calendar import asyncio -import os -import json from aiohttp import web import aiohttp_session import logging -from collections import defaultdict, namedtuple -from prometheus_async.aio.web import server_stats # type: ignore -import prometheus_client as pc # type: ignore +from collections import defaultdict from hailtop import aiogoogle, aiotools -from hailtop.aiogoogle import BigQueryClient, ComputeClient +from hailtop.aiogoogle import BigQueryClient from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger from hailtop.tls import internal_server_ssl_context -from hailtop.utils import (run_if_changed_idempotent, retry_long_running, time_msecs, cost_str, parse_timestamp_msecs, - url_basename, periodically_call) +from hailtop.utils import run_if_changed_idempotent, retry_long_running, time_msecs, cost_str from gear import ( Database, setup_aiohttp_session, @@ -34,18 +29,6 @@ deploy_config = get_deploy_config() -GCP_REGION = os.environ['HAIL_GCP_REGION'] -BATCH_GCP_REGIONS = set(json.loads(os.environ['HAIL_BATCH_GCP_REGIONS'])) -BATCH_GCP_REGIONS.add(GCP_REGION) - -PROJECT = os.environ['PROJECT'] - -DISK_SIZES_GB = pc.Summary('batch_disk_size_gb', 'Batch disk sizes (GB)', ['namespace', 'zone', 'state']) -INSTANCES = pc.Gauge('batch_instances', 'Batch instances', ['namespace', 'zone', 'status', 'machine_type', 'preemptible']) - -DiskLabels = namedtuple('DiskLabels', ['zone', 'namespace', 'state']) -InstanceLabels = namedtuple('InstanceLabels', ['namespace', 'zone', 'status', 'machine_type', 'preemptible']) - def get_previous_month(dt): if dt.month == 1: @@ -244,84 +227,18 @@ async def polling_loop(app): await asyncio.sleep(60) -async def monitor_disks(app): - log.info('monitoring disks') - compute_client: ComputeClient = app['compute_client'] - - disk_counts = defaultdict(list) - - for zone in app['zones']: - async for disk in await compute_client.list(f'/zones/{zone}/disks', params={'filter': '(labels.batch = 1)'}): - namespace = disk['labels']['namespace'] - size_gb = int(disk['sizeGb']) - - creation_timestamp_msecs = parse_timestamp_msecs(disk.get('creationTimestamp')) - last_attach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastAttachTimestamp')) - last_detach_timestamp_msecs = parse_timestamp_msecs(disk.get('lastDetachTimestamp')) - - if creation_timestamp_msecs is None: - state = 'creating' - elif last_attach_timestamp_msecs is None: - state = 'created' - elif last_attach_timestamp_msecs is not None and last_detach_timestamp_msecs is None: - state = 'attached' - elif last_attach_timestamp_msecs is not None and last_detach_timestamp_msecs is not None: - state = 'detached' - else: - state = 'unknown' - log.exception(f'disk is in unknown state {disk}') - - disk_labels = DiskLabels(zone=zone, namespace=namespace, state=state) - disk_counts[disk_labels].append(size_gb) - - DISK_SIZES_GB.clear() - for labels, sizes in disk_counts.items(): - for size in sizes: - DISK_SIZES_GB.labels(**labels._asdict()).observe(size) - - -async def monitor_instances(app): - log.info('monitoring instances') - compute_client: ComputeClient = app['compute_client'] - - instance_counts = defaultdict(int) - - for zone in app['zones']: - async for instance in await compute_client.list(f'/zones/{zone}/instances', params={'filter': '(labels.role = batch2-agent)'}): - instance_labels = InstanceLabels( - status=instance['status'], - zone=zone, - namespace=instance['labels']['namespace'], - machine_type=instance['machineType'].rsplit('/', 1)[1], - preemptible=instance['scheduling']['preemptible'] - ) - instance_counts[instance_labels] += 1 - - INSTANCES.clear() - for labels, count in instance_counts.items(): - INSTANCES.labels(**labels._asdict()).set(count) - - async def on_startup(app): db = Database() await db.async_init() app['db'] = db aiogoogle_credentials = aiogoogle.Credentials.from_file('/billing-monitoring-gsa-key/key.json') - bigquery_client = BigQueryClient('broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client - compute_client = ComputeClient(PROJECT, credentials=aiogoogle_credentials) - app['compute_client'] = compute_client - query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event - region_info = {name: await compute_client.get(f'/regions/{name}') for name in BATCH_GCP_REGIONS} - zones = [url_basename(z) for r in region_info.values() for z in r['zones']] - app['zones'] = zones - app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(retry_long_running('polling_loop', polling_loop, app)) @@ -332,9 +249,6 @@ async def on_startup(app): ) ) - app['task_manager'].ensure_future(periodically_call(60, monitor_disks, app)) - app['task_manager'].ensure_future(periodically_call(60, monitor_instances, app)) - async def on_cleanup(app): try: @@ -350,7 +264,6 @@ def run(): setup_aiohttp_jinja2(app, 'monitoring') setup_common_static_routes(routes) app.add_routes(routes) - app.router.add_get("/metrics", server_stats) app.on_startup.append(on_startup) app.on_cleanup.append(on_cleanup) diff --git a/notebook/notebook/notebook.py b/notebook/notebook/notebook.py index 264676ef85b..2c1ed3dd7e0 100644 --- a/notebook/notebook/notebook.py +++ b/notebook/notebook/notebook.py @@ -22,7 +22,7 @@ web_maybe_authenticated_user, web_authenticated_developers_only, check_csrf_token, - monitor_endpoints_middleware, + monitor_endpoint, ) from web_common import sass_compile, setup_aiohttp_jinja2, setup_common_static_routes, set_message, render_template @@ -491,6 +491,7 @@ async def get_error(request, userdata): @routes.get('/workshop-admin') +@monitor_endpoint @web_authenticated_developers_only() async def workshop_admin(request, userdata): dbpool = request.app['dbpool'] @@ -505,6 +506,7 @@ async def workshop_admin(request, userdata): @routes.post('/workshop-admin-create') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def create_workshop(request, userdata): # pylint: disable=unused-argument dbpool = request.app['dbpool'] @@ -538,6 +540,7 @@ async def create_workshop(request, userdata): # pylint: disable=unused-argument @routes.post('/workshop-admin-update') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def update_workshop(request, userdata): # pylint: disable=unused-argument app = request.app @@ -571,6 +574,7 @@ async def update_workshop(request, userdata): # pylint: disable=unused-argument @routes.post('/workshop-admin-delete') @check_csrf_token +@monitor_endpoint @web_authenticated_developers_only() async def delete_workshop(request, userdata): # pylint: disable=unused-argument app = request.app @@ -601,6 +605,7 @@ async def delete_workshop(request, userdata): # pylint: disable=unused-argument @workshop_routes.get('') @workshop_routes.get('/') +@monitor_endpoint @web_maybe_authenticated_workshop_guest async def workshop_get_index(request, userdata): page_context = {'notebook_service': 'workshop'} @@ -608,6 +613,7 @@ async def workshop_get_index(request, userdata): @workshop_routes.get('/login') +@monitor_endpoint @web_maybe_authenticated_workshop_guest async def workshop_get_login(request, userdata): if userdata: @@ -618,6 +624,7 @@ async def workshop_get_login(request, userdata): @workshop_routes.post('/login') +@monitor_endpoint @check_csrf_token async def workshop_post_login(request): session = await aiohttp_session.get_session(request) @@ -655,6 +662,7 @@ async def workshop_post_login(request): @workshop_routes.post('/logout') @check_csrf_token +@monitor_endpoint @web_authenticated_workshop_guest_only(redirect=True) async def workshop_post_logout(request, userdata): app = request.app @@ -678,6 +686,7 @@ async def workshop_post_logout(request, userdata): @workshop_routes.get('/resources') +@monitor_endpoint @web_maybe_authenticated_workshop_guest async def workshop_get_faq(request, userdata): page_context = {'notebook_service': 'workshop'} @@ -685,6 +694,7 @@ async def workshop_get_faq(request, userdata): @workshop_routes.get('/notebook') +@monitor_endpoint @web_authenticated_workshop_guest_only() async def workshop_get_notebook(request, userdata): return await _get_notebook('workshop', request, userdata) @@ -692,12 +702,14 @@ async def workshop_get_notebook(request, userdata): @workshop_routes.post('/notebook') @check_csrf_token +@monitor_endpoint @web_authenticated_workshop_guest_only(redirect=False) async def workshop_post_notebook(request, userdata): return await _post_notebook('workshop', request, userdata) @workshop_routes.get('/auth/{requested_notebook_token}') +@monitor_endpoint @web_authenticated_workshop_guest_only(redirect=False) async def workshop_get_auth(request, userdata): return await _get_auth(request, userdata) @@ -705,18 +717,21 @@ async def workshop_get_auth(request, userdata): @workshop_routes.post('/notebook/delete') @check_csrf_token +@monitor_endpoint @web_authenticated_workshop_guest_only(redirect=False) async def workshop_delete_notebook(request, userdata): return await _delete_notebook('workshop', request, userdata) @workshop_routes.get('/notebook/wait') +@monitor_endpoint @web_authenticated_workshop_guest_only(redirect=False) async def workshop_wait_websocket(request, userdata): return await _wait_websocket('workshop', request, userdata) @workshop_routes.get('/error') +@monitor_endpoint @web_maybe_authenticated_user async def workshop_get_error(request, userdata): return await _get_error('workshop', request, userdata) @@ -738,7 +753,7 @@ async def on_cleanup(app): def init_app(routes): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() app.on_startup.append(on_startup) app.on_cleanup.append(on_cleanup) setup_aiohttp_jinja2(app, 'notebook') diff --git a/query/query/query.py b/query/query/query.py index 051cb0fbed7..9ebab4ddee4 100644 --- a/query/query/query.py +++ b/query/query/query.py @@ -20,7 +20,7 @@ setup_aiohttp_session, rest_authenticated_users_only, rest_authenticated_developers_only, - monitor_endpoints_middleware, + monitor_endpoint, ) from .sockets import connect_to_java @@ -144,12 +144,14 @@ async def handle_ws_response(request, userdata, endpoint, f): @routes.get('/api/v1alpha/execute') +@monitor_endpoint @rest_authenticated_users_only async def execute(request, userdata): return await handle_ws_response(request, userdata, 'execute', blocking_execute) @routes.get('/api/v1alpha/load_references_from_dataset') +@monitor_endpoint @rest_authenticated_users_only async def load_references_from_dataset(request, userdata): return await handle_ws_response( @@ -158,6 +160,7 @@ async def load_references_from_dataset(request, userdata): @routes.get('/api/v1alpha/type/value') +@monitor_endpoint @rest_authenticated_users_only async def value_type(request, userdata): return await handle_ws_response( @@ -166,6 +169,7 @@ async def value_type(request, userdata): @routes.get('/api/v1alpha/type/table') +@monitor_endpoint @rest_authenticated_users_only async def table_type(request, userdata): return await handle_ws_response( @@ -174,6 +178,7 @@ async def table_type(request, userdata): @routes.get('/api/v1alpha/type/matrix') +@monitor_endpoint @rest_authenticated_users_only async def matrix_type(request, userdata): return await handle_ws_response( @@ -182,6 +187,7 @@ async def matrix_type(request, userdata): @routes.get('/api/v1alpha/type/blockmatrix') +@monitor_endpoint @rest_authenticated_users_only async def blockmatrix_type(request, userdata): return await handle_ws_response( @@ -190,6 +196,7 @@ async def blockmatrix_type(request, userdata): @routes.get('/api/v1alpha/references/get') +@monitor_endpoint @rest_authenticated_users_only async def get_reference(request, userdata): # pylint: disable=unused-argument return await handle_ws_response( @@ -198,6 +205,7 @@ async def get_reference(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/flags/get') +@monitor_endpoint @rest_authenticated_developers_only async def get_flags(request, userdata): # pylint: disable=unused-argument app = request.app @@ -207,6 +215,7 @@ async def get_flags(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/flags/get/{flag}') +@monitor_endpoint @rest_authenticated_developers_only async def get_flag(request, userdata): # pylint: disable=unused-argument app = request.app @@ -217,6 +226,7 @@ async def get_flag(request, userdata): # pylint: disable=unused-argument @routes.get('/api/v1alpha/flags/set/{flag}') +@monitor_endpoint @rest_authenticated_developers_only async def set_flag(request, userdata): # pylint: disable=unused-argument app = request.app @@ -274,7 +284,7 @@ async def on_shutdown(_): def run(): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() setup_aiohttp_session(app) diff --git a/website/website/website.py b/website/website/website.py index 47c5b35e078..8c69ea2c165 100644 --- a/website/website/website.py +++ b/website/website/website.py @@ -9,7 +9,7 @@ from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger -from gear import setup_aiohttp_session, web_maybe_authenticated_user, monitor_endpoints_middleware +from gear import setup_aiohttp_session, web_maybe_authenticated_user, monitor_endpoint from web_common import setup_aiohttp_jinja2, setup_common_static_routes, render_template, sass_compile @@ -63,6 +63,7 @@ async def get_robots(request): # pylint: disable=unused-argument @routes.get('/docs/{tail:.*}') +@monitor_endpoint @web_maybe_authenticated_user async def serve_docs(request, userdata): tail = request.match_info['tail'] @@ -90,7 +91,7 @@ async def serve(request, userdata): def run(local_mode): - app = web.Application(middlewares=[monitor_endpoints_middleware]) + app = web.Application() if local_mode: log.error('running in local mode with bogus cookie storage key') From 6a20b957875e5a8c8453c7bd3a48063f0c8c8a50 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 24 Sep 2021 17:19:01 +1000 Subject: [PATCH 271/501] Fix infra --- infra/README.md | 8 ++++++-- infra/main.tf | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/infra/README.md b/infra/README.md index 9423e132bea..01b372571a8 100644 --- a/infra/README.md +++ b/infra/README.md @@ -54,7 +54,7 @@ Instructions: gcp_project = "" - # This is the bucket location that spans the regions you're going to + # This are the bucket locations that spans the regions you're going to # schedule across in Batch. If you are running on one region, it can # just be that region. E.g. "US" batch_logs_bucket_location = "" @@ -62,7 +62,11 @@ Instructions: # The storage class for the batch logs bucket. It should span the # batch regions and be compatible with the bucket location. batch_logs_bucket_storage_class = "MULTI_REGIONAL" - + + # Similarly, bucket locations and storage classess are specified + # for other services: + hail_query_bucket_location = "" + hail_query_bucket_storage_class = "MULTI_REGIONAL" hail_test_gcs_bucket_location = "" hail_test_gcs_bucket_storage_class = "MULTI_REGIONAL" diff --git a/infra/main.tf b/infra/main.tf index ad7ff5799c8..50fc84477e8 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -17,7 +17,6 @@ terraform { variable "gsuite_organization" {} variable "batch_gcp_regions" {} variable "gcp_project" {} -variable "gcp_location" {} variable "batch_logs_bucket_location" {} variable "batch_logs_bucket_storage_class" {} variable "hail_query_bucket_location" {} @@ -430,6 +429,11 @@ module "batch_gsa_secret" { ] } +module "grafana_gsa_secret" { + source = "./gsa_k8s_secret" + name = "grafana" +} + module "query_gsa_secret" { source = "./gsa_k8s_secret" name = "query" From 312b9bf47930607407060f762bbd95b94db54641 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 25 Oct 2021 13:30:59 +1100 Subject: [PATCH 272/501] Remove duplicated grafana --- infra/main.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/infra/main.tf b/infra/main.tf index 3cf3e71908b..6a18cc4d802 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -395,11 +395,6 @@ module "batch_gsa_secret" { ] } -module "grafana_gsa_secret" { - source = "./gsa_k8s_secret" - name = "grafana" -} - module "query_gsa_secret" { source = "./gsa_k8s_secret" name = "query" From 87a6fea150ce82feec8c979f12978da2f3a89c17 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 10:26:41 +1100 Subject: [PATCH 273/501] Fix merge --- ci/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/deployment.yaml b/ci/deployment.yaml index 00921fdda16..dcde3659b00 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -42,10 +42,10 @@ spec: env: - name: HAIL_DEPLOY_CONFIG_FILE value: /deploy-config/deploy-config.json + - name: HAIL_CI_OAUTH_TOKEN + value: /secrets/oauth-token/oauth-token - name: HAIL_WATCHED_BRANCHES value: '[]' - - name: HAIL_GCP_PROJECT - value: "{{ global.project }}" - name: HAIL_DOCKER_PREFIX valueFrom: secretKeyRef: From 3ff6b409e4b47ea58fa997720a85d16681fd30c0 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 10:27:11 +1100 Subject: [PATCH 274/501] infra: set bucket uniform access level --- infra/gcs_bucket/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/gcs_bucket/main.tf b/infra/gcs_bucket/main.tf index 01fbba71d90..9827732380c 100644 --- a/infra/gcs_bucket/main.tf +++ b/infra/gcs_bucket/main.tf @@ -7,4 +7,5 @@ resource "google_storage_bucket" "bucket" { location = var.location force_destroy = true storage_class = var.storage_class + uniform_bucket_level_access = true } From 8557ea0f4cf4c42faa0325b621f39360128a946b Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 10:29:02 +1100 Subject: [PATCH 275/501] Infra fixes --- infra/README.md | 20 +++++++++++++++----- infra/main.tf | 2 ++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/infra/README.md b/infra/README.md index c831946add1..cfb69e2404b 100644 --- a/infra/README.md +++ b/infra/README.md @@ -164,6 +164,10 @@ You can now install Hail: - Create Let's Encrypt certs. Run `make -C $HAIL/letsencrypt run`. - Deploy the internal-gateway. Run `make -C $HAIL/internal-gateway deploy`. + +- Deploy the memory service. Run `make -C $HAIL/memory deploy NAMESPACE=default`. + +- `make -C auth deploy NAMESPACE=default` - Generate the version info: @@ -190,13 +194,13 @@ You can now install Hail: - Create the batch worker VM image. Run: ``` - make -C $HAIL/batch create-build-worker-image-instance + make -C $HAIL/batch gcp-create-build-worker-image-instance ``` Wait for the `build-batch-worker-image` instance to be stopped. Then run: ``` - make -C $HAIL/batch create-worker-image + make -C $HAIL/batch gcp-create-worker-image ``` - Create the worker Docker image. Run: @@ -210,7 +214,12 @@ You can now install Hail: above, make sure your `HAIL_DOCKER_PREFIX` has the format of `-docker.pkg.dev//hail`. + ```sh + mkdir /global-config + kubectl -n default get secret global-config -o json | jq -r '.data | map_values(@base64d) | to_entries|map("echo \(.value) > /global-config/\(.key)") | .[]' | bash ``` + + ```sh cd $HAIL export HAIL_DOCKER_PREFIX=gcr.io/ export HAIL_CI_UTILS_IMAGE=$HAIL_DOCKER_PREFIX/ci-utils:cache @@ -222,9 +231,10 @@ You can now install Hail: export HAIL_DOMAIN= export HAIL_GCP_ZONE= export HAIL_GCP_PROJECT= - export PYTHONPATH=$HOME/hail/ci:$HOME/hail/batch:$HOME/hail/hail/python - - python3 ci/bootstrap.py hail-is/hail:main $(git rev-parse HEAD) test_batch_0 + export PYTHONPATH=$HAIL/ci:$HAIL/batch:$HAIL/gear:$HAIL/hail/python + GIT_ORG=hail-is + GIT_BRANCH=main + python3 ci/bootstrap.py $GIT_ORG/hail:$GIT_BRANCH $(git rev-parse HEAD) test_batch_0 ``` - Deploy the gateway. First, edit `$HAIL/letsencrypt/subdomains.txt` to include diff --git a/infra/main.tf b/infra/main.tf index 6a18cc4d802..739763483ce 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -14,6 +14,7 @@ terraform { } } +variable "cloud" {} variable "gsuite_organization" {} variable "batch_gcp_regions" {} variable "gcp_project" {} @@ -237,6 +238,7 @@ resource "kubernetes_secret" "global_config" { } data = { + cloud = var.cloud batch_gcp_regions = var.batch_gcp_regions batch_logs_bucket = module.batch_logs.name hail_query_gcs_path = "gs://${module.hail_query.name}" From 3ae2e1021acbf62a4131d7df262d815644c7bf73 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 11:58:15 +1100 Subject: [PATCH 276/501] Fix merge: sql-config in ci/deployment.yaml --- ci/deployment.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ci/deployment.yaml b/ci/deployment.yaml index dcde3659b00..77ec86a718e 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -93,6 +93,9 @@ spec: - name: session-secret-key mountPath: /session-secret-key readOnly: true + - mountPath: /sql-config + name: sql-config + readOnly: true - mountPath: /user-tokens name: ci-tokens readOnly: true @@ -120,6 +123,10 @@ spec: secret: optional: false secretName: session-secret-key + - name: sql-config + secret: + optional: false + secretName: "{{ ci_database.user_secret_name }}" - name: ci-tokens secret: secretName: ci-tokens From 2f301ec821322a30e4eddb3746f8cd1eb7f66b96 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 12:24:11 +1100 Subject: [PATCH 277/501] Echo cmd in bootstrap.py --- ci/bootstrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/bootstrap.py b/ci/bootstrap.py index 8d23dfce88a..cd7af810d66 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -66,7 +66,7 @@ def __init__( async def docker_run(*args: str): script = ' '.join([shq(a) for a in args]) - outerr = await check_shell_output(script) + outerr = await check_shell_output(script, echo=True) print(f'Container output: {outerr[0]}\n' f'Container error: {outerr[1]}') cid = outerr[0].decode('ascii').strip() From d20edb32dbcea606d9e65ab9d1cabd0660b69a7e Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 13:25:38 +1100 Subject: [PATCH 278/501] Add cloud = "gcp" into infra/globals.tfvars --- infra/global.tfvars | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/global.tfvars b/infra/global.tfvars index eaf01ca359a..325295bf3e0 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -10,3 +10,4 @@ batch_logs_bucket_storage_class = "STANDARD" use_artifact_registry = true hail_query_bucket_location = "australia-southeast1" hail_query_bucket_storage_class = "STANDARD" +cloud = "gcp" From aafe80055d14b3c326ff05166f2f9758a3a3854e Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 13:33:34 +1100 Subject: [PATCH 279/501] infra: add hail_test_gcs_bucket_location into global.tfvars --- infra/global.tfvars | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infra/global.tfvars b/infra/global.tfvars index 325295bf3e0..3ca92e66cd1 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -10,4 +10,6 @@ batch_logs_bucket_storage_class = "STANDARD" use_artifact_registry = true hail_query_bucket_location = "australia-southeast1" hail_query_bucket_storage_class = "STANDARD" +hail_test_gcs_bucket_location = "australia-southeast1" +hail_test_gcs_bucket_storage_class = "REGIONAL" cloud = "gcp" From 68ef2e07a7d56b6b0106fa659b856c097ef81143 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 14:20:15 +1100 Subject: [PATCH 280/501] Add comments to global.tfvars --- infra/global.tfvars | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/infra/global.tfvars b/infra/global.tfvars index 3ca92e66cd1..bf790b5c5bc 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -1,15 +1,29 @@ gsuite_organization = "populationgenomics.org.au" + +# batch_gcp_regions is a JSON array of string, the names of the gcp +# regions to schedule over in Batch. batch_gcp_regions = "[\"australia-southeast1\"]" + gcp_project = "hail-295901" gcp_location = "australia-southeast1" gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" +use_artifact_registry = true +cloud = "gcp" + +# This is the bucket location that spans the regions you're going to +# schedule across in Batch. If you are running on one region, it can +# just be that region. E.g. "US" batch_logs_bucket_location = "australia-southeast1" + +# The storage class for the batch logs bucket. It should span the +# batch regions and be compatible with the bucket location. batch_logs_bucket_storage_class = "STANDARD" -use_artifact_registry = true + +# Similarly, bucket locations and storage classess are specified +# for other services: hail_query_bucket_location = "australia-southeast1" hail_query_bucket_storage_class = "STANDARD" hail_test_gcs_bucket_location = "australia-southeast1" hail_test_gcs_bucket_storage_class = "REGIONAL" -cloud = "gcp" From 952500c64bc7b4ac648695b1f088db2f06a60398 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 20:20:55 +1100 Subject: [PATCH 281/501] Remove return in ci/create_database.py --- ci/create_database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/create_database.py b/ci/create_database.py index aa64e870c54..e4cdc256426 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -62,7 +62,6 @@ async def create_database(): await write_user_config(namespace, database_name, 'admin', sql_config) await write_user_config(namespace, database_name, 'user', sql_config) - return scope = create_database_config['scope'] _name = create_database_config['_name'] From 6259263d8fa23ce88f2abcd959707ddf057f45eb Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 21:54:10 +1100 Subject: [PATCH 282/501] Fix removed return in ci/create_database.py --- ci/create_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/create_database.py b/ci/create_database.py index e4cdc256426..516a614f251 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -62,6 +62,7 @@ async def create_database(): await write_user_config(namespace, database_name, 'admin', sql_config) await write_user_config(namespace, database_name, 'user', sql_config) + return scope = create_database_config['scope'] _name = create_database_config['_name'] @@ -79,7 +80,6 @@ async def create_database(): rows = [row async for row in rows] if len(rows) > 0: assert len(rows) == 1 - return with open(create_database_config['admin_password_file']) as f: admin_password = f.read() From 96022f22a7928ee8c5d32cdf284fa4f4ba43b0e7 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 26 Oct 2021 21:57:06 +1100 Subject: [PATCH 283/501] Always update password --- ci/create_database.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/create_database.py b/ci/create_database.py index 516a614f251..dd3cf1d4ef3 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -96,6 +96,10 @@ async def create_database(): CREATE USER IF NOT EXISTS '{user_username}'@'%' IDENTIFIED BY '{user_password}'; GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE ON `{_name}`.* TO '{user_username}'@'%'; + +ALTER USER '{admin_username}'@'%' IDENTIFIED BY '{admin_password}'; + +ALTER USER '{user_username}'@'%' IDENTIFIED BY '{user_password}'; ''' ) From 137e577dc50b80d93fcbc212bbca19749b5e7cd4 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 26 Oct 2021 22:44:15 +1100 Subject: [PATCH 284/501] Clean up readme --- infra/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/infra/README.md b/infra/README.md index cfb69e2404b..54f8fc140d0 100644 --- a/infra/README.md +++ b/infra/README.md @@ -164,10 +164,6 @@ You can now install Hail: - Create Let's Encrypt certs. Run `make -C $HAIL/letsencrypt run`. - Deploy the internal-gateway. Run `make -C $HAIL/internal-gateway deploy`. - -- Deploy the memory service. Run `make -C $HAIL/memory deploy NAMESPACE=default`. - -- `make -C auth deploy NAMESPACE=default` - Generate the version info: From 6d09c14fb447ae6d627399d3408f697a931e8d1b Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 27 Oct 2021 01:07:21 +1100 Subject: [PATCH 285/501] Fix internal gateway IP --- config.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.mk b/config.mk index 481f668748e..e8362247fc6 100644 --- a/config.mk +++ b/config.mk @@ -5,7 +5,7 @@ DOCKER_PREFIX := $(REGION)-docker.pkg.dev/$(PROJECT)/hail DOCKER_ROOT_IMAGE := $(DOCKER_PREFIX)/ubuntu:18.04 HAIL_TEST_GCS_BUCKET := cpg-hail-test DOMAIN := hail.populationgenomics.org.au -INTERNAL_IP := 10.152.0.2 +INTERNAL_IP := 10.152.0.10 IP := 35.201.29.236 KUBERNETES_SERVER_URL := https://34.87.199.41 From 78bfddccaf29ebb251c9672af356fb07eeeb0ec6 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Oct 2021 09:42:49 +1100 Subject: [PATCH 286/501] Revert "Fix removed return in ci/create_database.py" This reverts commit 6259263d8fa23ce88f2abcd959707ddf057f45eb. --- ci/create_database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/create_database.py b/ci/create_database.py index dd3cf1d4ef3..857800d6549 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -62,7 +62,6 @@ async def create_database(): await write_user_config(namespace, database_name, 'admin', sql_config) await write_user_config(namespace, database_name, 'user', sql_config) - return scope = create_database_config['scope'] _name = create_database_config['_name'] @@ -80,6 +79,7 @@ async def create_database(): rows = [row async for row in rows] if len(rows) > 0: assert len(rows) == 1 + return with open(create_database_config['admin_password_file']) as f: admin_password = f.read() From cf13cabd9fc66b1d7c5cd40f770fbe95c5f34595 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Oct 2021 09:42:50 +1100 Subject: [PATCH 287/501] Revert "Remove return in ci/create_database.py" This reverts commit 952500c64bc7b4ac648695b1f088db2f06a60398. --- ci/create_database.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/create_database.py b/ci/create_database.py index 857800d6549..e96c81d7091 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -62,6 +62,7 @@ async def create_database(): await write_user_config(namespace, database_name, 'admin', sql_config) await write_user_config(namespace, database_name, 'user', sql_config) + return scope = create_database_config['scope'] _name = create_database_config['_name'] From 11ed85697255783bb9ab3e71067fd741ad8fe447 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Oct 2021 10:04:11 +1100 Subject: [PATCH 288/501] Revert "Always update password" This reverts commit 96022f22a7928ee8c5d32cdf284fa4f4ba43b0e7. --- ci/create_database.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/create_database.py b/ci/create_database.py index e96c81d7091..aa64e870c54 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -97,10 +97,6 @@ async def create_database(): CREATE USER IF NOT EXISTS '{user_username}'@'%' IDENTIFIED BY '{user_password}'; GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE ON `{_name}`.* TO '{user_username}'@'%'; - -ALTER USER '{admin_username}'@'%' IDENTIFIED BY '{admin_password}'; - -ALTER USER '{user_username}'@'%' IDENTIFIED BY '{user_password}'; ''' ) From 9851a4d70632a02576683fbe3248cb12e600081c Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Oct 2021 12:54:51 +1100 Subject: [PATCH 289/501] Revert "Revert "Always update password"" This reverts commit 11ed85697255783bb9ab3e71067fd741ad8fe447. --- ci/create_database.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/create_database.py b/ci/create_database.py index aa64e870c54..e96c81d7091 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -97,6 +97,10 @@ async def create_database(): CREATE USER IF NOT EXISTS '{user_username}'@'%' IDENTIFIED BY '{user_password}'; GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE ON `{_name}`.* TO '{user_username}'@'%'; + +ALTER USER '{admin_username}'@'%' IDENTIFIED BY '{admin_password}'; + +ALTER USER '{user_username}'@'%' IDENTIFIED BY '{user_password}'; ''' ) From 2ac15fcdf337b86df3777f3e0346a7f4e4a0197b Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 27 Oct 2021 14:23:09 +1100 Subject: [PATCH 290/501] Remove "return" that prevents user config to get created if the db exists --- ci/create_database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/create_database.py b/ci/create_database.py index e96c81d7091..dd3cf1d4ef3 100644 --- a/ci/create_database.py +++ b/ci/create_database.py @@ -80,7 +80,6 @@ async def create_database(): rows = [row async for row in rows] if len(rows) > 0: assert len(rows) == 1 - return with open(create_database_config['admin_password_file']) as f: admin_password = f.read() From dbf96c2e0b5c22fc347642f70d4ac8e19d7cd37a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 1 Feb 2022 17:04:09 +1100 Subject: [PATCH 291/501] Infra: some extra merge conflicts --- infra/gcp/main.tf | 4 ---- infra/global.tfvars | 3 +-- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index bc42255fa5f..173e5395e51 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -395,10 +395,6 @@ resource "kubernetes_secret" "registry_push_credentials" { } } -module "ukbb" { - source = "../ukbb" -} - module "auth_gsa_secret" { source = "./gsa_k8s_secret" name = "auth" diff --git a/infra/global.tfvars b/infra/global.tfvars index bf790b5c5bc..91ce09f42e8 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -1,4 +1,4 @@ -gsuite_organization = "populationgenomics.org.au" +organization_domain = "populationgenomics.org.au" # batch_gcp_regions is a JSON array of string, the names of the gcp # regions to schedule over in Batch. @@ -10,7 +10,6 @@ gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" use_artifact_registry = true -cloud = "gcp" # This is the bucket location that spans the regions you're going to # schedule across in Batch. If you are running on one region, it can From a3aed3b942001ab4e3b9ef101617aa1a7359aab1 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 1 Feb 2022 17:04:39 +1100 Subject: [PATCH 292/501] Infra: some extra merge conflicts - 2 --- infra/global.tfvars | 1 + 1 file changed, 1 insertion(+) diff --git a/infra/global.tfvars b/infra/global.tfvars index 91ce09f42e8..af9ec5bc001 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -10,6 +10,7 @@ gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" use_artifact_registry = true +cloud = "gcp" # This is the bucket location that spans the regions you're going to # schedule across in Batch. If you are running on one region, it can From 87e2e0e6ad4fcbe71daa18ce67d86ebadf31fc2d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 4 Feb 2022 12:45:35 +1100 Subject: [PATCH 293/501] Terraform fixes --- infra/gcp/ci/main.tf | 10 ---------- infra/gcp/main.tf | 2 -- infra/global.tfvars | 1 - 3 files changed, 13 deletions(-) diff --git a/infra/gcp/ci/main.tf b/infra/gcp/ci/main.tf index 35db2c902d6..c17e66cdc3c 100644 --- a/infra/gcp/ci/main.tf +++ b/infra/gcp/ci/main.tf @@ -30,16 +30,6 @@ resource "kubernetes_secret" "ci_config" { } } -resource "kubernetes_secret" "zulip_config" { - metadata { - name = "zulip-config" - } - - data = { - ".zuliprc" = file("~/.hail/.zuliprc") - } -} - resource "kubernetes_secret" "hail_ci_0_1_github_oauth_token" { metadata { name = "hail-ci-0-1-github-oauth-token" diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 173e5395e51..1dffffcaf8a 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -14,8 +14,6 @@ terraform { } } -variable "cloud" {} -variable "gsuite_organization" {} variable "batch_gcp_regions" {} variable "gcp_project" {} variable "batch_logs_bucket_location" {} diff --git a/infra/global.tfvars b/infra/global.tfvars index af9ec5bc001..91ce09f42e8 100644 --- a/infra/global.tfvars +++ b/infra/global.tfvars @@ -10,7 +10,6 @@ gcp_region = "australia-southeast1" gcp_zone = "australia-southeast1-b" domain = "hail.populationgenomics.org.au" use_artifact_registry = true -cloud = "gcp" # This is the bucket location that spans the regions you're going to # schedule across in Batch. If you are running on one region, it can From 07cafa54317d8eb192e0024aa6b5b39c76cae39a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 4 Feb 2022 14:12:40 +1100 Subject: [PATCH 294/501] bootstrap.sh: uncomment configure-docker --- infra/gcp/bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/gcp/bootstrap.sh b/infra/gcp/bootstrap.sh index 87009ec73d8..37e2643258b 100755 --- a/infra/gcp/bootstrap.sh +++ b/infra/gcp/bootstrap.sh @@ -7,7 +7,7 @@ function configure_gcloud() { gcloud -q auth configure-docker # If you are using the Artifact Registry: - # gcloud -q auth configure-docker $REGION-docker.pkg.dev + gcloud -q auth configure-docker $REGION-docker.pkg.dev gcloud container clusters get-credentials --zone $ZONE vdc } From a1b8f24ec0e560c83f2458da4abee2a9744f6b68 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 7 Feb 2022 13:00:55 +1100 Subject: [PATCH 295/501] infra: README fix create batch worker image --- infra/gcp/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/gcp/README.md b/infra/gcp/README.md index 017c716693e..f83708109aa 100644 --- a/infra/gcp/README.md +++ b/infra/gcp/README.md @@ -153,13 +153,13 @@ You can now install Hail: - Create the batch worker VM image. Run: ``` - make -C $HAIL/batch create-build-worker-image-instance + make -C $HAIL/batch cpg-create-build-worker-image-instance ``` Wait for the `build-batch-worker-image` instance to be stopped. Then run: ``` - make -C $HAIL/batch create-worker-image + make -C $HAIL/batch cpg-create-worker-image ``` - Download the global-config to be used by `bootstrap.py`. From e760de5214297fd5a0e5d65c1f2a31955645b3ad Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 9 Feb 2022 13:07:31 +1100 Subject: [PATCH 296/501] Conda: add botocore (#130) --- conda/hail/meta-template.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml index 3e0494b4e93..9ab889dd78f 100644 --- a/conda/hail/meta-template.yaml +++ b/conda/hail/meta-template.yaml @@ -48,6 +48,7 @@ requirements: - google-cloud-storage - google-api-core - janus >=0.6,<0.7 + - botocore >=1.20,<2.0 test: imports: From bbb9051cac722c49afa43fce170735a2636fdfab Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 11 Feb 2022 14:10:13 +1100 Subject: [PATCH 297/501] Add CLOUD into config.mk --- config.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/config.mk b/config.mk index 01cfbba32c6..d9479e67d32 100644 --- a/config.mk +++ b/config.mk @@ -8,6 +8,7 @@ DOMAIN := hail.populationgenomics.org.au INTERNAL_IP := 10.152.0.10 IP := 35.201.29.236 KUBERNETES_SERVER_URL := https://34.87.199.41 +CLOUD := gcp ifeq ($(NAMESPACE),default) SCOPE = deploy From 75a14b5533a8df187aae66876d8624885ad5a954 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 14 Feb 2022 17:16:31 +1100 Subject: [PATCH 298/501] Fixes --- infra/bootstrap_utils.sh | 6 ++++++ infra/gcp/README.md | 4 ++-- infra/gcp/bootstrap.sh | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/infra/bootstrap_utils.sh b/infra/bootstrap_utils.sh index 47f693e7ab3..acfec4ef5a7 100644 --- a/infra/bootstrap_utils.sh +++ b/infra/bootstrap_utils.sh @@ -13,12 +13,18 @@ render_config_mk() { IP=$(get_global_config_field ip) DOMAIN=$(get_global_config_field domain) CLOUD=$(get_global_config_field cloud) + PROJECT=$(get_global_config_field project) + REGION=$(get_global_config_field region) + ZONE=$(get_global_config_field zone) cat >$HAIL/config.mk < Date: Mon, 14 Feb 2022 17:17:43 +1100 Subject: [PATCH 299/501] CPG: remove unused domains from letsencrypt/subdomains.txt --- infra/bootstrap_utils.sh | 6 +++--- letsencrypt/subdomains.txt | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/infra/bootstrap_utils.sh b/infra/bootstrap_utils.sh index acfec4ef5a7..673ca0e91e6 100644 --- a/infra/bootstrap_utils.sh +++ b/infra/bootstrap_utils.sh @@ -13,9 +13,9 @@ render_config_mk() { IP=$(get_global_config_field ip) DOMAIN=$(get_global_config_field domain) CLOUD=$(get_global_config_field cloud) - PROJECT=$(get_global_config_field project) - REGION=$(get_global_config_field region) - ZONE=$(get_global_config_field zone) + PROJECT=$(get_global_config_field gcp_project) + REGION=$(get_global_config_field gcp_region) + ZONE=$(get_global_config_field gcp_zone) cat >$HAIL/config.mk < Date: Mon, 14 Feb 2022 18:02:48 +1100 Subject: [PATCH 300/501] Fix merge --- ci/ci/github.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/ci/github.py b/ci/ci/github.py index b2fbc578663..d9d883b9f3d 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -8,6 +8,7 @@ import concurrent.futures import aiohttp import gidgethub +import zulip import random import os import prometheus_client as pc # type: ignore From d2850e31dbed975e6db3d5ece2115e71c0bb7516 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 14 Feb 2022 18:04:03 +1100 Subject: [PATCH 301/501] Update config.mk (now auto-generated from k8s global-config) --- config.mk | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/config.mk b/config.mk index d9479e67d32..73f9bdceb08 100644 --- a/config.mk +++ b/config.mk @@ -1,14 +1,11 @@ -PROJECT := hail-295901 -REGION := australia-southeast1 -ZONE := australia-southeast1-b -DOCKER_PREFIX := $(REGION)-docker.pkg.dev/$(PROJECT)/hail -DOCKER_ROOT_IMAGE := $(DOCKER_PREFIX)/ubuntu:20.04 -HAIL_TEST_GCS_BUCKET := cpg-hail-test -DOMAIN := hail.populationgenomics.org.au +DOCKER_PREFIX := australia-southeast1-docker.pkg.dev/hail-295901/hail INTERNAL_IP := 10.152.0.10 IP := 35.201.29.236 -KUBERNETES_SERVER_URL := https://34.87.199.41 +DOMAIN := hail.populationgenomics.org.au CLOUD := gcp +PROJECT := hail-295901 +REGION := australia-southeast1 +ZONE := australia-southeast1-b ifeq ($(NAMESPACE),default) SCOPE = deploy From c98eda2c2b59ffd0e9d7e674ee149736237bf09a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 15 Feb 2022 16:59:46 +1100 Subject: [PATCH 302/501] Add DOCKER_ROOT_IMAGE to config.mk --- infra/bootstrap_utils.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/infra/bootstrap_utils.sh b/infra/bootstrap_utils.sh index 673ca0e91e6..d56ff33d875 100644 --- a/infra/bootstrap_utils.sh +++ b/infra/bootstrap_utils.sh @@ -9,6 +9,7 @@ source $HAIL/devbin/functions.sh render_config_mk() { DOCKER_PREFIX=$(get_global_config_field docker_prefix) + DOCKER_ROOT_IMAGE=$(get_global_config_field docker_root_image) INTERNAL_IP=$(get_global_config_field internal_ip) IP=$(get_global_config_field ip) DOMAIN=$(get_global_config_field domain) @@ -18,6 +19,7 @@ render_config_mk() { ZONE=$(get_global_config_field gcp_zone) cat >$HAIL/config.mk < Date: Tue, 15 Feb 2022 17:16:13 +1100 Subject: [PATCH 303/501] Add KUBERNETES_SERVER_URL and HAIL_TEST_GCS_BUCKET to config.mk --- infra/bootstrap_utils.sh | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/infra/bootstrap_utils.sh b/infra/bootstrap_utils.sh index d56ff33d875..7d46153fea9 100644 --- a/infra/bootstrap_utils.sh +++ b/infra/bootstrap_utils.sh @@ -8,25 +8,29 @@ fi source $HAIL/devbin/functions.sh render_config_mk() { + CLOUD=$(get_global_config_field cloud) DOCKER_PREFIX=$(get_global_config_field docker_prefix) DOCKER_ROOT_IMAGE=$(get_global_config_field docker_root_image) - INTERNAL_IP=$(get_global_config_field internal_ip) - IP=$(get_global_config_field ip) DOMAIN=$(get_global_config_field domain) - CLOUD=$(get_global_config_field cloud) PROJECT=$(get_global_config_field gcp_project) REGION=$(get_global_config_field gcp_region) ZONE=$(get_global_config_field gcp_zone) + HAIL_TEST_GCS_BUCKET=$(get_global_config_field hail_test_gcs_bucket) + INTERNAL_IP=$(get_global_config_field internal_ip) + IP=$(get_global_config_field ip) + KUBERNETES_SERVER_URL=$(get_global_config_field kubernetes_server_url) cat >$HAIL/config.mk < Date: Tue, 15 Feb 2022 17:17:46 +1100 Subject: [PATCH 304/501] Update config.mk --- config.mk | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/config.mk b/config.mk index 73f9bdceb08..fcc1c48495d 100644 --- a/config.mk +++ b/config.mk @@ -1,11 +1,14 @@ +CLOUD := gcp DOCKER_PREFIX := australia-southeast1-docker.pkg.dev/hail-295901/hail -INTERNAL_IP := 10.152.0.10 -IP := 35.201.29.236 +DOCKER_ROOT_IMAGE := australia-southeast1-docker.pkg.dev/hail-295901/hail/ubuntu:20.04 DOMAIN := hail.populationgenomics.org.au -CLOUD := gcp PROJECT := hail-295901 REGION := australia-southeast1 ZONE := australia-southeast1-b +HAIL_TEST_GCS_BUCKET := hail-test-0d3f214ff5 +INTERNAL_IP := 10.152.0.10 +IP := 35.201.29.236 +KUBERNETES_SERVER_URL := https://34.87.199.41 ifeq ($(NAMESPACE),default) SCOPE = deploy From 100c99e6898e4bb738fb778d77df7e9ef4638f0a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 11:45:55 +1100 Subject: [PATCH 305/501] Fix merge --- ci/ci/ci.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 58b9e285964..7d67066afb5 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -2,6 +2,7 @@ import json import logging import asyncio +import os import concurrent.futures from aiohttp import web import uvloop # type: ignore From c3cc97a24d37a32e8230294ff0df5aeb07c83372 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 11:50:21 +1100 Subject: [PATCH 306/501] Fix merge - 2 --- ci/ci/ci.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 7d67066afb5..9012f5d14d2 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -30,9 +30,6 @@ from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch, PR, select_random_teammate, WIP from .constants import AUTHORIZED_USERS, TEAMS -with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f: - oauth_token = f.read().strip() - log = logging.getLogger('ci') uvloop.install() From 464db0505db1a25bcc9541910b0697d4123040a8 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:05:45 +1100 Subject: [PATCH 307/501] Remove CI WatchedBranch stuff --- ci/ci/ci.py | 333 ---------------------------------------------------- 1 file changed, 333 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 9012f5d14d2..d065212f1db 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -39,69 +39,6 @@ routes = web.RouteTableDef() -class PRConfig(TypedDict): - number: int - title: str - batch_id: Optional[int] - build_state: Optional[str] - source_branch_name: str - review_state: Optional[str] - author: str - assignees: Set[str] - reviewers: Set[str] - labels: Set[str] - out_of_date: bool - - -async def pr_config(app, pr: PR) -> PRConfig: - batch_id = pr.batch.id if pr.batch and isinstance(pr.batch, Batch) else None - build_state = pr.build_state if await pr.authorized(app['dbpool']) else 'unauthorized' - if build_state is None and batch_id is not None: - build_state = 'building' - return { - 'number': pr.number, - 'title': pr.title, - # FIXME generate links to the merge log - 'batch_id': batch_id, - 'build_state': build_state, - 'source_branch_name': pr.source_branch.name, - 'review_state': pr.review_state, - 'author': pr.author, - 'assignees': pr.assignees, - 'reviewers': pr.reviewers, - 'labels': pr.labels, - 'out_of_date': pr.build_state in ['failure', 'success', None] and not pr.is_up_to_date(), - } - - -class WatchedBranchConfig(TypedDict): - index: int - branch: str - sha: Optional[str] - deploy_batch_id: Optional[int] - deploy_state: Optional[str] - repo: str - prs: List[PRConfig] - - -async def watched_branch_config(app, wb: WatchedBranch, index: int) -> WatchedBranchConfig: - if wb.prs: - pr_configs = [await pr_config(app, pr) for pr in wb.prs.values()] - else: - pr_configs = [] - # FIXME recent deploy history - return { - 'index': index, - 'branch': wb.branch.short_str(), - 'sha': wb.sha, - # FIXME generate links to the merge log - 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and isinstance(wb.deploy_batch, Batch) else None, - 'deploy_state': wb.deploy_state, - 'repo': wb.branch.repo.short_str(), - 'prs': pr_configs, - } - - @routes.get('') @routes.get('/') @web_authenticated_developers_only() @@ -110,93 +47,6 @@ async def index(request, userdata): # pylint: disable=unused-argument return web.HTTPFound(deploy_config.external_url('ci', '/batches')) -def wb_and_pr_from_request(request): - watched_branch_index = int(request.match_info['watched_branch_index']) - pr_number = int(request.match_info['pr_number']) - - if watched_branch_index < 0 or watched_branch_index >= len(watched_branches): - raise web.HTTPNotFound() - wb = watched_branches[watched_branch_index] - - if not wb.prs or pr_number not in wb.prs: - raise web.HTTPNotFound() - return wb, wb.prs[pr_number] - - -@routes.get('/watched_branches/{watched_branch_index}/pr/{pr_number}') -@web_authenticated_developers_only() -async def get_pr(request, userdata): # pylint: disable=unused-argument - wb, pr = wb_and_pr_from_request(request) - - page_context = {} - page_context['repo'] = wb.branch.repo.short_str() - page_context['wb'] = wb - page_context['pr'] = pr - # FIXME - batch = pr.batch - if batch: - if isinstance(batch, Batch): - status = await batch.last_known_status() - jobs = await collect_agen(batch.jobs()) - for j in jobs: - j['duration'] = humanize_timedelta_msecs(j['duration']) - page_context['batch'] = status - page_context['jobs'] = jobs - artifacts_uri = f'{STORAGE_URI}/build/{batch.attributes["token"]}' - page_context['artifacts_uri'] = artifacts_uri - page_context['artifacts_url'] = storage_uri_to_url(artifacts_uri) - else: - page_context['exception'] = '\n'.join( - traceback.format_exception(None, batch.exception, batch.exception.__traceback__) - ) - - batch_client = request.app['batch_client'] - target_branch = wb.branch.short_str() - batches = batch_client.list_batches(f'test=1 ' f'pr={pr.number} ' f'target_branch={target_branch} ' f'user:ci') - batches = sorted([b async for b in batches], key=lambda b: b.id, reverse=True) - page_context['history'] = [await b.last_known_status() for b in batches] - - return await render_template('ci', request, userdata, 'pr.html', page_context) - - -def storage_uri_to_url(uri: str) -> str: - if uri.startswith('gs://'): - protocol = 'gs://' - path = uri[len(protocol) :] - return f'https://console.cloud.google.com/storage/browser/{path}' - return uri - - -async def retry_pr(wb, pr, request): - app = request.app - session = await aiohttp_session.get_session(request) - - if pr.batch is None: - log.info('retry cannot be requested for PR #{pr.number} because it has no batch') - set_message(session, f'Retry cannot be requested for PR #{pr.number} because it has no batch.', 'error') - return - - batch_id = pr.batch.id - dbpool = app['dbpool'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('INSERT INTO invalidated_batches (batch_id) VALUES (%s);', batch_id) - await wb.notify_batch_changed(app) - - log.info(f'retry requested for PR: {pr.number}') - set_message(session, f'Retry requested for PR #{pr.number}.', 'info') - - -@routes.post('/watched_branches/{watched_branch_index}/pr/{pr_number}/retry') -@check_csrf_token -@web_authenticated_developers_only(redirect=False) -async def post_retry_pr(request, userdata): # pylint: disable=unused-argument - wb, pr = wb_and_pr_from_request(request) - - await asyncio.shield(retry_pr(wb, pr, request)) - return web.HTTPFound(deploy_config.external_url('ci', f'/watched_branches/{wb.index}/pr/{pr.number}')) - - @routes.get('/batches') @web_authenticated_developers_only() async def get_batches(request, userdata): @@ -238,189 +88,6 @@ async def get_job(request, userdata): return await render_template('ci', request, userdata, 'job.html', page_context) -def filter_wbs(wbs: List[WatchedBranchConfig], pred: Callable[[PRConfig], bool]): - return [{**wb, 'prs': [pr for pr in wb['prs'] if pred(pr)]} for wb in wbs] - - -def is_pr_author(gh_username: str, pr_config: PRConfig) -> bool: - return gh_username == pr_config['author'] - - -def is_pr_reviewer(gh_username: str, pr_config: PRConfig) -> bool: - return gh_username in pr_config['assignees'] or gh_username in pr_config['reviewers'] - - -def pr_requires_action(gh_username: str, pr_config: PRConfig) -> bool: - build_state = pr_config['build_state'] - review_state = pr_config['review_state'] - return ( - is_pr_author(gh_username, pr_config) - and (build_state == 'failure' or review_state == 'changes_requested' or WIP in pr_config['labels']) - ) or (is_pr_reviewer(gh_username, pr_config) and review_state == 'pending') - - -@routes.get('/me') -@web_authenticated_developers_only() -async def get_user(request, userdata): - for authorized_user in AUTHORIZED_USERS: - if authorized_user.hail_username == userdata['username']: - user = authorized_user - break - else: - raise web.HTTPForbidden() - - wbs = [await watched_branch_config(request.app, wb, i) for i, wb in enumerate(watched_branches)] - pr_wbs = filter_wbs(wbs, lambda pr: is_pr_author(user.gh_username, pr)) - review_wbs = filter_wbs(wbs, lambda pr: is_pr_reviewer(user.gh_username, pr)) - actionable_wbs = filter_wbs(wbs, lambda pr: pr_requires_action(user.gh_username, pr)) - - batch_client = request.app['batch_client'] - dev_deploys = batch_client.list_batches(f'user={user.hail_username} dev_deploy=1', limit=10) - dev_deploys = sorted([b async for b in dev_deploys], key=lambda b: b.id, reverse=True) - - team_random_member = {team: select_random_teammate(team).gh_username for team in TEAMS} - - page_context = { - 'username': user.hail_username, - 'gh_username': user.gh_username, - 'pr_wbs': pr_wbs, - 'review_wbs': review_wbs, - 'actionable_wbs': actionable_wbs, - 'team_member': team_random_member, - 'dev_deploys': [await b.last_known_status() for b in dev_deploys], - } - return await render_template('ci', request, userdata, 'user.html', page_context) - - -@routes.post('/authorize_source_sha') -@check_csrf_token -@web_authenticated_developers_only(redirect=False) -async def post_authorized_source_sha(request, userdata): # pylint: disable=unused-argument - app = request.app - dbpool = app['dbpool'] - post = await request.post() - sha = post['sha'].strip() - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('INSERT INTO authorized_shas (sha) VALUES (%s);', sha) - log.info(f'authorized sha: {sha}') - session = await aiohttp_session.get_session(request) - set_message(session, f'SHA {sha} authorized.', 'info') - return web.HTTPFound(deploy_config.external_url('ci', '/')) - - -@routes.get('/healthcheck') -async def healthcheck(request): # pylint: disable=unused-argument - return web.Response(status=200) - - -gh_router = gh_routing.Router() - - -@gh_router.register('pull_request') -async def pull_request_callback(event): - gh_pr = event.data['pull_request'] - number = gh_pr['number'] - target_branch = FQBranch.from_gh_json(gh_pr['base']) - for wb in watched_branches: - if (wb.prs and number in wb.prs) or (wb.branch == target_branch): - await wb.notify_github_changed(event.app) - - -@gh_router.register('push') -async def push_callback(event): - data = event.data - ref = data['ref'] - if ref.startswith('refs/heads/'): - branch_name = ref[len('refs/heads/') :] - branch = FQBranch(Repo.from_gh_json(data['repository']), branch_name) - for wb in watched_branches: - if wb.branch == branch or any(pr.branch == branch for pr in wb.prs.values()): - await wb.notify_github_changed(event.app) - - -@gh_router.register('pull_request_review') -async def pull_request_review_callback(event): - gh_pr = event.data['pull_request'] - number = gh_pr['number'] - for wb in watched_branches: - if number in wb.prs: - await wb.notify_github_changed(event.app) - - -async def github_callback_handler(request): - event = gh_sansio.Event.from_http(request.headers, await request.read()) - event.app = request.app - await gh_router.dispatch(event) - - -@routes.post('/github_callback') -async def github_callback(request): - await asyncio.shield(github_callback_handler(request)) - return web.Response(status=200) - - -async def batch_callback_handler(request): - app = request.app - params = await request.json() - log.info(f'batch callback {params}') - attrs = params.get('attributes') - if attrs: - target_branch = attrs.get('target_branch') - if target_branch: - for wb in watched_branches: - if wb.branch.short_str() == target_branch: - log.info(f'watched_branch {wb.branch.short_str()} notify batch changed') - await wb.notify_batch_changed(app) - - -@routes.get('/api/v1alpha/deploy_status') -@rest_authenticated_developers_only -async def deploy_status(request, userdata): # pylint: disable=unused-argument - batch_client = request.app['batch_client'] - - async def get_failure_information(batch): - if isinstance(batch, MergeFailureBatch): - exc = batch.exception - return traceback.format_exception(etype=type(exc), value=exc, tb=exc.__traceback__) - jobs = await collect_agen(batch.jobs()) - - async def fetch_job_and_log(j): - full_job = await batch_client.get_job(j['batch_id'], j['job_id']) - log = await full_job.log() - return {**full_job._status, 'log': log} - - return await asyncio.gather(*[fetch_job_and_log(j) for j in jobs if j['state'] in ('Error', 'Failed')]) - - wb_configs = [ - { - 'branch': wb.branch.short_str(), - 'sha': wb.sha, - 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and isinstance(wb.deploy_batch, Batch) else None, - 'deploy_state': wb.deploy_state, - 'repo': wb.branch.repo.short_str(), - 'failure_information': None - if wb.deploy_state == 'success' - else await get_failure_information(wb.deploy_batch), - } - for wb in watched_branches - ] - return web.json_response(wb_configs) - - -@routes.post('/api/v1alpha/update') -@rest_authenticated_developers_only -async def post_update(request, userdata): # pylint: disable=unused-argument - log.info('developer triggered update') - - async def update_all(): - for wb in watched_branches: - await wb.update(request.app) - - request.app['task_manager'].ensure_future(update_all()) - return web.Response(status=200) - - @routes.post('/api/v1alpha/dev_deploy_branch') @rest_authenticated_developers_only async def dev_deploy_branch(request, userdata): From a821c7a62f5a36fd1d0cfcfb36600e681ae1c257 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:18:12 +1100 Subject: [PATCH 308/501] Try re-adding WatchedBranch stuff with checks --- ci/ci/ci.py | 460 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 454 insertions(+), 6 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index d065212f1db..ac35191e5f6 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -6,7 +6,7 @@ import concurrent.futures from aiohttp import web import uvloop # type: ignore -from gidgethub import aiohttp as gh_aiohttp +from gidgethub import aiohttp as gh_aiohttp, routing as gh_routing, sansio as gh_sansio from hailtop.utils import collect_agen, humanize_timedelta_msecs from hailtop.batch_client.aioclient import BatchClient from hailtop.config import get_deploy_config @@ -30,15 +30,89 @@ from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch, PR, select_random_teammate, WIP from .constants import AUTHORIZED_USERS, TEAMS +if os.environ.get('HAIL_CI_OAUTH_TOKEN') or os.path.exists('oauth-token/oauth-token'): + with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f: + oauth_token = f.read().strip() +else: + oauth_token = None + log = logging.getLogger('ci') uvloop.install() deploy_config = get_deploy_config() +watched_branches: List[WatchedBranch] = [ + WatchedBranch(index, FQBranch.from_short_str(bss), deployable, mergeable) + for (index, [bss, deployable, mergeable]) in enumerate(json.loads(os.environ.get('HAIL_WATCHED_BRANCHES', '[]'))) +] + routes = web.RouteTableDef() +class PRConfig(TypedDict): + number: int + title: str + batch_id: Optional[int] + build_state: Optional[str] + source_branch_name: str + review_state: Optional[str] + author: str + assignees: Set[str] + reviewers: Set[str] + labels: Set[str] + out_of_date: bool + + +async def pr_config(app, pr: PR) -> PRConfig: + batch_id = pr.batch.id if pr.batch and isinstance(pr.batch, Batch) else None + build_state = pr.build_state if await pr.authorized(app['dbpool']) else 'unauthorized' + if build_state is None and batch_id is not None: + build_state = 'building' + return { + 'number': pr.number, + 'title': pr.title, + # FIXME generate links to the merge log + 'batch_id': batch_id, + 'build_state': build_state, + 'source_branch_name': pr.source_branch.name, + 'review_state': pr.review_state, + 'author': pr.author, + 'assignees': pr.assignees, + 'reviewers': pr.reviewers, + 'labels': pr.labels, + 'out_of_date': pr.build_state in ['failure', 'success', None] and not pr.is_up_to_date(), + } + + +class WatchedBranchConfig(TypedDict): + index: int + branch: str + sha: Optional[str] + deploy_batch_id: Optional[int] + deploy_state: Optional[str] + repo: str + prs: List[PRConfig] + + +async def watched_branch_config(app, wb: WatchedBranch, index: int) -> WatchedBranchConfig: + if wb.prs: + pr_configs = [await pr_config(app, pr) for pr in wb.prs.values()] + else: + pr_configs = [] + # FIXME recent deploy history + return { + 'index': index, + 'branch': wb.branch.short_str(), + 'sha': wb.sha, + # FIXME generate links to the merge log + 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and isinstance(wb.deploy_batch, Batch) else None, + 'deploy_state': wb.deploy_state, + 'repo': wb.branch.repo.short_str(), + 'prs': pr_configs, + } + + @routes.get('') @routes.get('/') @web_authenticated_developers_only() @@ -47,6 +121,93 @@ async def index(request, userdata): # pylint: disable=unused-argument return web.HTTPFound(deploy_config.external_url('ci', '/batches')) +def wb_and_pr_from_request(request): + watched_branch_index = int(request.match_info['watched_branch_index']) + pr_number = int(request.match_info['pr_number']) + + if watched_branch_index < 0 or watched_branch_index >= len(watched_branches): + raise web.HTTPNotFound() + wb = watched_branches[watched_branch_index] + + if not wb.prs or pr_number not in wb.prs: + raise web.HTTPNotFound() + return wb, wb.prs[pr_number] + + +@routes.get('/watched_branches/{watched_branch_index}/pr/{pr_number}') +@web_authenticated_developers_only() +async def get_pr(request, userdata): # pylint: disable=unused-argument + wb, pr = wb_and_pr_from_request(request) + + page_context = {} + page_context['repo'] = wb.branch.repo.short_str() + page_context['wb'] = wb + page_context['pr'] = pr + # FIXME + batch = pr.batch + if batch: + if isinstance(batch, Batch): + status = await batch.last_known_status() + jobs = await collect_agen(batch.jobs()) + for j in jobs: + j['duration'] = humanize_timedelta_msecs(j['duration']) + page_context['batch'] = status + page_context['jobs'] = jobs + artifacts_uri = f'{STORAGE_URI}/build/{batch.attributes["token"]}' + page_context['artifacts_uri'] = artifacts_uri + page_context['artifacts_url'] = storage_uri_to_url(artifacts_uri) + else: + page_context['exception'] = '\n'.join( + traceback.format_exception(None, batch.exception, batch.exception.__traceback__) + ) + + batch_client = request.app['batch_client'] + target_branch = wb.branch.short_str() + batches = batch_client.list_batches(f'test=1 ' f'pr={pr.number} ' f'target_branch={target_branch} ' f'user:ci') + batches = sorted([b async for b in batches], key=lambda b: b.id, reverse=True) + page_context['history'] = [await b.last_known_status() for b in batches] + + return await render_template('ci', request, userdata, 'pr.html', page_context) + + +def storage_uri_to_url(uri: str) -> str: + if uri.startswith('gs://'): + protocol = 'gs://' + path = uri[len(protocol) :] + return f'https://console.cloud.google.com/storage/browser/{path}' + return uri + + +async def retry_pr(wb, pr, request): + app = request.app + session = await aiohttp_session.get_session(request) + + if pr.batch is None: + log.info('retry cannot be requested for PR #{pr.number} because it has no batch') + set_message(session, f'Retry cannot be requested for PR #{pr.number} because it has no batch.', 'error') + return + + batch_id = pr.batch.id + dbpool = app['dbpool'] + async with dbpool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute('INSERT INTO invalidated_batches (batch_id) VALUES (%s);', batch_id) + await wb.notify_batch_changed(app) + + log.info(f'retry requested for PR: {pr.number}') + set_message(session, f'Retry requested for PR #{pr.number}.', 'info') + + +@routes.post('/watched_branches/{watched_branch_index}/pr/{pr_number}/retry') +@check_csrf_token +@web_authenticated_developers_only(redirect=False) +async def post_retry_pr(request, userdata): # pylint: disable=unused-argument + wb, pr = wb_and_pr_from_request(request) + + await asyncio.shield(retry_pr(wb, pr, request)) + return web.HTTPFound(deploy_config.external_url('ci', f'/watched_branches/{wb.index}/pr/{pr.number}')) + + @routes.get('/batches') @web_authenticated_developers_only() async def get_batches(request, userdata): @@ -190,6 +351,290 @@ async def prod_deploy(request, userdata): raise web.HTTPBadRequest(text=f'starting prod deploy failed due to\n{message}') from batch.exception +def get_maybe_wb_for_batch(b: Batch): + if 'target_branch' in b.attributes and 'pr' in b.attributes: + branch = b.attributes['target_branch'] + wbs = [wb for wb in watched_branches if wb.branch.short_str() == branch] + if len(wbs) == 0: + pr = b.attributes['pr'] + log.exception(f"Attempted to load PR {pr} for unwatched branch {branch}") + else: + assert len(wbs) == 1 + return wbs[0].index + return None + + +@routes.get('/batches/{batch_id}/jobs/{job_id}') +@web_authenticated_developers_only() +async def get_job(request, userdata): + batch_id = int(request.match_info['batch_id']) + job_id = int(request.match_info['job_id']) + batch_client = request.app['batch_client'] + job = await batch_client.get_job(batch_id, job_id) + page_context = { + 'batch_id': batch_id, + 'job_id': job_id, + 'job_log': await job.log(), + 'job_status': json.dumps(await job.status(), indent=2), + 'attempts': await job.attempts(), + } + return await render_template('ci', request, userdata, 'job.html', page_context) + + +def filter_wbs(wbs: List[WatchedBranchConfig], pred: Callable[[PRConfig], bool]): + return [{**wb, 'prs': [pr for pr in wb['prs'] if pred(pr)]} for wb in wbs] + + +def is_pr_author(gh_username: str, pr_config: PRConfig) -> bool: + return gh_username == pr_config['author'] + + +def is_pr_reviewer(gh_username: str, pr_config: PRConfig) -> bool: + return gh_username in pr_config['assignees'] or gh_username in pr_config['reviewers'] + + +def pr_requires_action(gh_username: str, pr_config: PRConfig) -> bool: + build_state = pr_config['build_state'] + review_state = pr_config['review_state'] + return ( + is_pr_author(gh_username, pr_config) + and (build_state == 'failure' or review_state == 'changes_requested' or WIP in pr_config['labels']) + ) or (is_pr_reviewer(gh_username, pr_config) and review_state == 'pending') + + +@routes.get('/me') +@web_authenticated_developers_only() +async def get_user(request, userdata): + for authorized_user in AUTHORIZED_USERS: + if authorized_user.hail_username == userdata['username']: + user = authorized_user + break + else: + raise web.HTTPForbidden() + + wbs = [await watched_branch_config(request.app, wb, i) for i, wb in enumerate(watched_branches)] + pr_wbs = filter_wbs(wbs, lambda pr: is_pr_author(user.gh_username, pr)) + review_wbs = filter_wbs(wbs, lambda pr: is_pr_reviewer(user.gh_username, pr)) + actionable_wbs = filter_wbs(wbs, lambda pr: pr_requires_action(user.gh_username, pr)) + + batch_client = request.app['batch_client'] + dev_deploys = batch_client.list_batches(f'user={user.hail_username} dev_deploy=1', limit=10) + dev_deploys = sorted([b async for b in dev_deploys], key=lambda b: b.id, reverse=True) + + team_random_member = {team: select_random_teammate(team).gh_username for team in TEAMS} + + page_context = { + 'username': user.hail_username, + 'gh_username': user.gh_username, + 'pr_wbs': pr_wbs, + 'review_wbs': review_wbs, + 'actionable_wbs': actionable_wbs, + 'team_member': team_random_member, + 'dev_deploys': [await b.last_known_status() for b in dev_deploys], + } + return await render_template('ci', request, userdata, 'user.html', page_context) + + +@routes.post('/authorize_source_sha') +@check_csrf_token +@web_authenticated_developers_only(redirect=False) +async def post_authorized_source_sha(request, userdata): # pylint: disable=unused-argument + app = request.app + dbpool = app['dbpool'] + post = await request.post() + sha = post['sha'].strip() + async with dbpool.acquire() as conn: + async with conn.cursor() as cursor: + await cursor.execute('INSERT INTO authorized_shas (sha) VALUES (%s);', sha) + log.info(f'authorized sha: {sha}') + session = await aiohttp_session.get_session(request) + set_message(session, f'SHA {sha} authorized.', 'info') + return web.HTTPFound(deploy_config.external_url('ci', '/')) + + +@routes.get('/healthcheck') +async def healthcheck(request): # pylint: disable=unused-argument + return web.Response(status=200) + + +gh_router = gh_routing.Router() + + +@gh_router.register('pull_request') +async def pull_request_callback(event): + gh_pr = event.data['pull_request'] + number = gh_pr['number'] + target_branch = FQBranch.from_gh_json(gh_pr['base']) + for wb in watched_branches: + if (wb.prs and number in wb.prs) or (wb.branch == target_branch): + await wb.notify_github_changed(event.app) + + +@gh_router.register('push') +async def push_callback(event): + data = event.data + ref = data['ref'] + if ref.startswith('refs/heads/'): + branch_name = ref[len('refs/heads/') :] + branch = FQBranch(Repo.from_gh_json(data['repository']), branch_name) + for wb in watched_branches: + if wb.branch == branch or any(pr.branch == branch for pr in wb.prs.values()): + await wb.notify_github_changed(event.app) + + +@gh_router.register('pull_request_review') +async def pull_request_review_callback(event): + gh_pr = event.data['pull_request'] + number = gh_pr['number'] + for wb in watched_branches: + if number in wb.prs: + await wb.notify_github_changed(event.app) + + +async def github_callback_handler(request): + event = gh_sansio.Event.from_http(request.headers, await request.read()) + event.app = request.app + await gh_router.dispatch(event) + + +@routes.post('/github_callback') +async def github_callback(request): + await asyncio.shield(github_callback_handler(request)) + return web.Response(status=200) + + +async def batch_callback_handler(request): + app = request.app + params = await request.json() + log.info(f'batch callback {params}') + attrs = params.get('attributes') + if attrs: + target_branch = attrs.get('target_branch') + if target_branch: + for wb in watched_branches: + if wb.branch.short_str() == target_branch: + log.info(f'watched_branch {wb.branch.short_str()} notify batch changed') + await wb.notify_batch_changed(app) + + +@routes.get('/api/v1alpha/deploy_status') +@rest_authenticated_developers_only +async def deploy_status(request, userdata): # pylint: disable=unused-argument + batch_client = request.app['batch_client'] + + async def get_failure_information(batch): + if isinstance(batch, MergeFailureBatch): + exc = batch.exception + return traceback.format_exception(etype=type(exc), value=exc, tb=exc.__traceback__) + jobs = await collect_agen(batch.jobs()) + + async def fetch_job_and_log(j): + full_job = await batch_client.get_job(j['batch_id'], j['job_id']) + log = await full_job.log() + return {**full_job._status, 'log': log} + + return await asyncio.gather(*[fetch_job_and_log(j) for j in jobs if j['state'] in ('Error', 'Failed')]) + + wb_configs = [ + { + 'branch': wb.branch.short_str(), + 'sha': wb.sha, + 'deploy_batch_id': wb.deploy_batch.id if wb.deploy_batch and isinstance(wb.deploy_batch, Batch) else None, + 'deploy_state': wb.deploy_state, + 'repo': wb.branch.repo.short_str(), + 'failure_information': None + if wb.deploy_state == 'success' + else await get_failure_information(wb.deploy_batch), + } + for wb in watched_branches + ] + return web.json_response(wb_configs) + + +@routes.post('/api/v1alpha/update') +@rest_authenticated_developers_only +async def post_update(request, userdata): # pylint: disable=unused-argument + log.info('developer triggered update') + + async def update_all(): + for wb in watched_branches: + await wb.update(request.app) + + request.app['task_manager'].ensure_future(update_all()) + return web.Response(status=200) + + +@routes.post('/api/v1alpha/dev_deploy_branch') +@rest_authenticated_developers_only +async def dev_deploy_branch(request, userdata): + app = request.app + try: + params = await request.json() + except asyncio.CancelledError: + raise + except Exception as e: + message = 'could not read body as JSON' + log.info('dev deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + try: + branch = FQBranch.from_short_str(params['branch']) + steps = params['steps'] + excluded_steps = params['excluded_steps'] + extra_config = params.get('extra_config', {}) + except asyncio.CancelledError: + raise + except Exception as e: + message = f'parameters are wrong; check the branch and steps syntax.\n\n{params}' + log.info('dev deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + gh = app['github_client'] + request_string = f'/repos/{branch.repo.owner}/{branch.repo.name}/git/refs/heads/{branch.name}' + + try: + branch_gh_json = await gh.getitem(request_string) + sha = branch_gh_json['object']['sha'] + except asyncio.CancelledError: + raise + except Exception as e: + message = f'error finding {branch} at GitHub' + log.info('dev deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + unwatched_branch = UnwatchedBranch(branch, sha, userdata, extra_config) + + batch_client = app['batch_client'] + + try: + batch_id = await unwatched_branch.deploy(batch_client, steps, excluded_steps=excluded_steps) + except asyncio.CancelledError: + raise + except Exception as e: # pylint: disable=broad-except + message = traceback.format_exc() + raise web.HTTPBadRequest(text=f'starting the deploy failed due to\n{message}') from e + return web.json_response({'sha': sha, 'batch_id': batch_id}) + + +@routes.post('/api/v1alpha/batch_callback') +async def batch_callback(request): + await asyncio.shield(batch_callback_handler(request)) + return web.Response(status=200) + + +async def update_loop(app): + while True: + try: + for wb in watched_branches: + log.info(f'updating {wb.branch.short_str()}') + await wb.update(app) + except concurrent.futures.CancelledError: + raise + except Exception: # pylint: disable=broad-except + log.exception(f'{wb.branch.short_str()} update failed due to exception') + await asyncio.sleep(300) + + async def on_startup(app): app['client_session'] = httpx.client_session() app['github_client'] = gh_aiohttp.GitHubAPI(app['client_session'], 'ci') @@ -198,11 +643,14 @@ async def on_startup(app): async def on_cleanup(app): - dbpool = app['dbpool'] - dbpool.close() - await dbpool.wait_closed() - await app['client_session'].close() - await app['batch_client'].close() + try: + dbpool = app['dbpool'] + dbpool.close() + await dbpool.wait_closed() + await app['client_session'].close() + await app['batch_client'].close() + finally: + app['task_manager'].shutdown() def run(): From b01af257c2674686c0cffd2177020e7c3bdd73a7 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:22:53 +1100 Subject: [PATCH 309/501] Fix --- ci/ci/ci.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index ac35191e5f6..1d1159a3889 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -30,8 +30,9 @@ from .github import Repo, FQBranch, WatchedBranch, UnwatchedBranch, MergeFailureBatch, PR, select_random_teammate, WIP from .constants import AUTHORIZED_USERS, TEAMS -if os.environ.get('HAIL_CI_OAUTH_TOKEN') or os.path.exists('oauth-token/oauth-token'): - with open(os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token'), 'r') as f: +oauth_path = os.environ.get('HAIL_CI_OAUTH_TOKEN', 'oauth-token/oauth-token') +if os.path.exists(oauth_path): + with open(oauth_path, 'r') as f: oauth_token = f.read().strip() else: oauth_token = None From 6b535454f27a5bf9e1220e23919eae0c71eeaeaa Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:26:14 +1100 Subject: [PATCH 310/501] Fix --- ci/ci/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 1d1159a3889..e10791eeda1 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -8,7 +8,7 @@ import uvloop # type: ignore from gidgethub import aiohttp as gh_aiohttp, routing as gh_routing, sansio as gh_sansio from hailtop.utils import collect_agen, humanize_timedelta_msecs -from hailtop.batch_client.aioclient import BatchClient +from hailtop.batch_client.aioclient import BatchClient, Batch from hailtop.config import get_deploy_config from hailtop.tls import internal_server_ssl_context from hailtop.hail_logging import AccessLogger From 535fe04b66852456c5b46ab8c9727d812094d885 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:33:01 +1100 Subject: [PATCH 311/501] Reorder --- ci/ci/ci.py | 98 ++++++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index e10791eeda1..5d01eda8e7c 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -303,55 +303,6 @@ async def dev_deploy_branch(request, userdata): return web.json_response({'sha': sha, 'batch_id': batch_id}) -# This is CPG-specific, as the Hail team redeploys by watching the main branch. -@routes.post('/api/v1alpha/prod_deploy') -@rest_authenticated_users_only -async def prod_deploy(request, userdata): - """Deploys the main branch to the production namespace ("default").""" - # Only allow access by "ci" or dev accounts. - if not (userdata['username'] == 'ci' or userdata['is_developer'] == 1): - raise web.HTTPUnauthorized() - app = request.app - try: - params = await request.json() - except Exception as e: - message = 'could not read body as JSON' - log.info('prod deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) from e - - try: - steps = params['steps'] - except Exception as e: - message = f'parameters are wrong; check the steps syntax.\n\n{params}' - log.info('prod deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) from e - - if 'sha' not in params: - message = f'parameter "sha" is required.\n\n{params}' - log.info('prod deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) - if params['sha'] == 'HEAD': - message = ( - f'SHA must be a specific commit hash, and can\'t be a HEAD reference. ' - f'The reason is that HEAD can change in the middle of the deploy.\n\n{params}' - ) - log.info('prod deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) - - watched_branch = WatchedBranch(0, FQBranch.from_short_str('populationgenomics/hail:main'), True) - watched_branch.sha = params['sha'] - await watched_branch._start_deploy(app['batch_client'], steps) - - batch = watched_branch.deploy_batch - if not isinstance(batch, MergeFailureBatch): - url = deploy_config.external_url('ci', f'/batches/{batch.id}') - return web.Response(text=f'{url}\n') - else: - message = traceback.format_exc() - log.info('prod deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=f'starting prod deploy failed due to\n{message}') from batch.exception - - def get_maybe_wb_for_batch(b: Batch): if 'target_branch' in b.attributes and 'pr' in b.attributes: branch = b.attributes['target_branch'] @@ -617,6 +568,55 @@ async def dev_deploy_branch(request, userdata): return web.json_response({'sha': sha, 'batch_id': batch_id}) +# This is CPG-specific, as the Hail team redeploys by watching the main branch. +@routes.post('/api/v1alpha/prod_deploy') +@rest_authenticated_users_only +async def prod_deploy(request, userdata): + """Deploys the main branch to the production namespace ("default").""" + # Only allow access by "ci" or dev accounts. + if not (userdata['username'] == 'ci' or userdata['is_developer'] == 1): + raise web.HTTPUnauthorized() + app = request.app + try: + params = await request.json() + except Exception as e: + message = 'could not read body as JSON' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + try: + steps = params['steps'] + except Exception as e: + message = f'parameters are wrong; check the steps syntax.\n\n{params}' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) from e + + if 'sha' not in params: + message = f'parameter "sha" is required.\n\n{params}' + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) + if params['sha'] == 'HEAD': + message = ( + f'SHA must be a specific commit hash, and can\'t be a HEAD reference. ' + f'The reason is that HEAD can change in the middle of the deploy.\n\n{params}' + ) + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=message) + + watched_branch = WatchedBranch(0, FQBranch.from_short_str('populationgenomics/hail:main'), True) + watched_branch.sha = params['sha'] + await watched_branch._start_deploy(app['batch_client'], steps) + + batch = watched_branch.deploy_batch + if not isinstance(batch, MergeFailureBatch): + url = deploy_config.external_url('ci', f'/batches/{batch.id}') + return web.Response(text=f'{url}\n') + else: + message = traceback.format_exc() + log.info('prod deploy failed: ' + message, exc_info=True) + raise web.HTTPBadRequest(text=f'starting prod deploy failed due to\n{message}') from batch.exception + + @routes.post('/api/v1alpha/batch_callback') async def batch_callback(request): await asyncio.shield(batch_callback_handler(request)) From 29f2d0e4b2a38fa5ef04c80d640879a821072819 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:35:36 +1100 Subject: [PATCH 312/501] Fix --- ci/ci/ci.py | 53 ----------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 5d01eda8e7c..433b81a5743 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -250,59 +250,6 @@ async def get_job(request, userdata): return await render_template('ci', request, userdata, 'job.html', page_context) -@routes.post('/api/v1alpha/dev_deploy_branch') -@rest_authenticated_developers_only -async def dev_deploy_branch(request, userdata): - app = request.app - try: - params = await request.json() - except asyncio.CancelledError: - raise - except Exception as e: - message = 'could not read body as JSON' - log.info('dev deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) from e - - try: - branch = FQBranch.from_short_str(params['branch']) - steps = params['steps'] - excluded_steps = params['excluded_steps'] - extra_config = params.get('extra_config', {}) - except asyncio.CancelledError: - raise - except Exception as e: - message = f'parameters are wrong; check the branch and steps syntax.\n\n{params}' - log.info('dev deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) from e - - gh = app['github_client'] - request_string = f'/repos/{branch.repo.owner}/{branch.repo.name}/git/refs/heads/{branch.name}' - - try: - branch_gh_json = await gh.getitem(request_string) - sha = branch_gh_json['object']['sha'] - except asyncio.CancelledError: - raise - except Exception as e: - message = f'error finding {branch} at GitHub' - log.info('dev deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=message) from e - - unwatched_branch = UnwatchedBranch(branch, sha, userdata, extra_config) - - batch_client = app['batch_client'] - - try: - batch_id = await unwatched_branch.deploy(batch_client, steps, excluded_steps=excluded_steps) - except asyncio.CancelledError: - raise - except Exception as e: # pylint: disable=broad-except - message = traceback.format_exc() - log.info('dev deploy failed: ' + message, exc_info=True) - raise web.HTTPBadRequest(text=f'starting the deploy failed due to\n{message}') from e - return web.json_response({'sha': sha, 'batch_id': batch_id}) - - def get_maybe_wb_for_batch(b: Batch): if 'target_branch' in b.attributes and 'pr' in b.attributes: branch = b.attributes['target_branch'] From d83f6bae1efcb36acbc8b22a4b96658e62aad44a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 12:36:36 +1100 Subject: [PATCH 313/501] Fix --- ci/ci/ci.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index 433b81a5743..b0a04d49d6d 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -263,23 +263,6 @@ def get_maybe_wb_for_batch(b: Batch): return None -@routes.get('/batches/{batch_id}/jobs/{job_id}') -@web_authenticated_developers_only() -async def get_job(request, userdata): - batch_id = int(request.match_info['batch_id']) - job_id = int(request.match_info['job_id']) - batch_client = request.app['batch_client'] - job = await batch_client.get_job(batch_id, job_id) - page_context = { - 'batch_id': batch_id, - 'job_id': job_id, - 'job_log': await job.log(), - 'job_status': json.dumps(await job.status(), indent=2), - 'attempts': await job.attempts(), - } - return await render_template('ci', request, userdata, 'job.html', page_context) - - def filter_wbs(wbs: List[WatchedBranchConfig], pred: Callable[[PRConfig], bool]): return [{**wb, 'prs': [pr for pr in wb['prs'] if pred(pr)]} for wb in wbs] From 22b76a670733d58db152b236d8f81e106bfd1290 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 14:24:45 +1100 Subject: [PATCH 314/501] Fix merge --- ci/ci/ci.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index b0a04d49d6d..dc87389a821 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -533,7 +533,12 @@ async def prod_deploy(request, userdata): log.info('prod deploy failed: ' + message, exc_info=True) raise web.HTTPBadRequest(text=message) - watched_branch = WatchedBranch(0, FQBranch.from_short_str('populationgenomics/hail:main'), True) + watched_branch = WatchedBranch( + index=0, + branch=FQBranch.from_short_str('populationgenomics/hail:main'), + deployable=True, + mergeable=False, + ) watched_branch.sha = params['sha'] await watched_branch._start_deploy(app['batch_client'], steps) From 5a5b145e5e8a164158038b1bf4890aa9f4b0efbe Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Wed, 16 Feb 2022 14:30:29 +1100 Subject: [PATCH 315/501] Ci: remove app[task_manager].shutdown() to avoid warning --- ci/ci/ci.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ci/ci/ci.py b/ci/ci/ci.py index dc87389a821..24225be1ad7 100644 --- a/ci/ci/ci.py +++ b/ci/ci/ci.py @@ -579,14 +579,11 @@ async def on_startup(app): async def on_cleanup(app): - try: - dbpool = app['dbpool'] - dbpool.close() - await dbpool.wait_closed() - await app['client_session'].close() - await app['batch_client'].close() - finally: - app['task_manager'].shutdown() + dbpool = app['dbpool'] + dbpool.close() + await dbpool.wait_closed() + await app['client_session'].close() + await app['batch_client'].close() def run(): From 137012dd3df17562c5b793d71b8837c5f3be077d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 18 Feb 2022 13:57:14 +1100 Subject: [PATCH 316/501] Batch: fix deleting workers --- batch/batch/driver/instance_collection/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/batch/batch/driver/instance_collection/base.py b/batch/batch/driver/instance_collection/base.py index 58a7454fb08..e2a18e7ecf8 100644 --- a/batch/batch/driver/instance_collection/base.py +++ b/batch/batch/driver/instance_collection/base.py @@ -284,10 +284,10 @@ async def check_on_instance(self, instance: Instance): ): log.exception(f'{instance} (state: {str(vm_state)}) has made no progress in last 5m, deleting') await self.call_delete_instance(instance, 'activation_timeout') - elif isinstance(vm_state, VMStateTerminated): + if isinstance(vm_state, VMStateTerminated): log.info(f'{instance} live but stopping or terminated, deactivating') await instance.deactivate('terminated') - elif instance.state == 'inactive': + if instance.state == 'inactive': log.info(f'{instance} (vm_state: {vm_state}) is inactive, deleting') await self.call_delete_instance(instance, 'inactive') From 5a664aa0606d2c5dd4a953f1581fef9434050f78 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Sat, 19 Feb 2022 14:50:01 +1100 Subject: [PATCH 317/501] Pin Jinja2 to 3.0.3 --- docker/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index dad4189881f..1da9df50f9b 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -36,7 +36,8 @@ orjson==3.6.4 # importlib-metadata<4: in dev-requirements, jupyter depends on (an unpinned) ipykernel which needs importlib-metadata<4 importlib-metadata<4 janus==0.6.1 -Jinja2==2.11.3 +# hitting with Jinja2<3: https://github.com/pallets/markupsafe/issues/284 +Jinja2==3.0.3 # keyrings.alt>3.1: https://bugs.launchpad.net/usd-importer/+bug/1794041/comments/6 keyrings.alt>=3.1 kubernetes-asyncio==9.1.0 From 6e05c92b5878c7d0d03d9d66a2eda1256150a338 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 21 Feb 2022 16:06:25 +1100 Subject: [PATCH 318/501] Pin tabulate (see https://github.com/Azure/azure-cli/issues/20887) --- docker/requirements.txt | 2 +- hail/python/test/hailtop/hailctl/dataproc/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/requirements.txt b/docker/requirements.txt index 1da9df50f9b..4ba5383ce60 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -64,7 +64,7 @@ python-json-logger==0.1.11 requests==2.25.1 setuptools>=38.6.0 sortedcontainers==2.1.0 -tabulate==0.8.3 +tabulate==0.8.9 tqdm==4.42.1 twine>=1.11.0 urllib3==1.26.5 diff --git a/hail/python/test/hailtop/hailctl/dataproc/conftest.py b/hail/python/test/hailtop/hailctl/dataproc/conftest.py index 31a361cc094..6a0eb25c4d5 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/conftest.py +++ b/hail/python/test/hailtop/hailctl/dataproc/conftest.py @@ -43,7 +43,7 @@ def deploy_metadata(): return { "wheel": "gs://hail-common/hailctl/dataproc/test-version/hail-test-version-py3-none-any.whl", "init_notebook.py": "gs://hail-common/hailctl/dataproc/test-version/init_notebook.py", - "pip_dependencies": "aiohttp>=3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|gcsfs==0.2.1|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.3|tqdm==4.42.1|", + "pip_dependencies": "aiohttp>=3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|gcsfs==0.2.1|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.9|tqdm==4.42.1|", "vep-GRCh37.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh37.sh", "vep-GRCh38.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh38.sh", } From 4b802d90eea63710472c0a1d31be1a83342223be Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 21 Feb 2022 18:38:08 +1100 Subject: [PATCH 319/501] Use cpg-hail for pip package name --- hail/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/setup.py b/hail/python/setup.py index 527cae199f1..53653b95c5f 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -27,7 +27,7 @@ dependencies.append(pkg) setup( - name="hail", + name="cpg-hail", version=hail_pip_version, author="Hail Team", author_email="hail@broadinstitute.org", From e66aaf04ad5c79181294d4b4972ff606a35c0bf7 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 21 Feb 2022 22:25:31 +1100 Subject: [PATCH 320/501] Pin tabulate --- hail/python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 7ed749498f7..bba7fab3ab2 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -28,6 +28,6 @@ python-json-logger==0.1.11 requests==2.25.1 scipy>1.2,<1.8 sortedcontainers==2.1.0 -tabulate==0.8.3 +tabulate==0.8.9 tqdm==4.* uvloop==0.16.0 From 4d16aa79d5a769efd3d0b5834021cbb12686bad6 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 22 Feb 2022 11:59:44 +1100 Subject: [PATCH 321/501] Build for pypi instead of conda (#131) * Conda env: more missing packages and changed version pins * CI: build conda package on pull requests, but upload only on push to main * Build in py 3.10 enviornment * Stick to py3.9. See https://github.com/conda/conda/issues/10969 * Conda: avro -> python-avro * Build pip instead of conda * Render conda recipe from pip requirements * Drop the pypi package * Render SPARK_VERSION before building recipe * Setup conda before rendering recipe to make sure py3 is activated * Conda: upload macos as well * Conda: build for py310 * Build for py310 * Build for py39 and py310 * Small fix * Remove trailing space * Update conda/render_recipe.py Co-authored-by: Michael Franklin * Update conda/render_recipe.py Co-authored-by: Michael Franklin * Drop conda, replace with pip * GH workflow: setup java and pin python * Fix * Fix * Java 8 * Use twine action * Fix * Install package for test * Fix * Remove fix-conda2 branch * Fix * Add back macos-latest * Build only on ubuntu * Add comments, rename to build.yaml * Rename back * Rename to package.yaml Co-authored-by: Michael Franklin --- .github/workflows/condarise.yaml | 64 ------------------------------ .github/workflows/package.yaml | 67 ++++++++++++++++++++++++++++++++ conda/README.md | 51 ------------------------ conda/hail/build.sh | 7 ---- conda/hail/meta-template.yaml | 64 ------------------------------ 5 files changed, 67 insertions(+), 186 deletions(-) delete mode 100644 .github/workflows/condarise.yaml create mode 100644 .github/workflows/package.yaml delete mode 100644 conda/README.md delete mode 100755 conda/hail/build.sh delete mode 100644 conda/hail/meta-template.yaml diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml deleted file mode 100644 index 816003713fc..00000000000 --- a/.github/workflows/condarise.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Condarise -on: - push: - branches: - - main -jobs: - build-publish: - strategy: - matrix: - os: [ubuntu-latest, macos-latest] - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@main - - - name: Fix meta YAML - run: | - make -C hail python-version-info - VERSION=$(cat hail/python/hail/hail_version) - VERSION=${VERSION/-/.dev} - cat conda/hail/meta-template.yaml \ - | sed s/{version}/${VERSION}/ > conda/hail/meta.yaml - echo "version=$VERSION" >> $GITHUB_ENV - - - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: buildenv - channels: cpg,conda-forge,bioconda,defaults - channel-priority: true - python-version: 3.7 - - - name: Setup build env - run: conda install pip conda-build anaconda-client - - - name: Build package - run: conda build conda/hail - - # Remove old packages to save space on anaconda - - name: Remove old versions - # Only run this step once. - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - # Keep 8 latest versions - KEEP=8 - anaconda -t ${{ secrets.ANACONDA_TOKEN }} show cpg/hail 2>&1 \ - | grep "^ +" | head -n-${KEEP} | sed 's# \+ ##' \ - | xargs -I '{}' -L1 echo anaconda -t ${{ secrets.ANACONDA_TOKEN }} remove -f cpg/hail/{} - - - name: Upload to anaconda package repository - run: | - anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ - upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 - - - name: Redeploy the analysis-server - # Only run this step once. - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - curl --fail --silent --show-error -X POST \ - -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d '{"ref": "main", "inputs": {"hail_version": "${{ env.version }}"}}' diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml new file mode 100644 index 00000000000..862ca56fba4 --- /dev/null +++ b/.github/workflows/package.yaml @@ -0,0 +1,67 @@ +name: Build pip package +on: + # Building on pull-requests and pushes to main, but restricting publishing + # to push events with `if: github.event_name == 'push'` for specific steps + pull_request: + push: + branches: + - main +jobs: + build: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@main + + - uses: actions/setup-python@v2 + with: + python-version: '3.10' + + # Hail builds only on java=8 + - uses: actions/setup-java@v2 + with: + java-version: '8' + distribution: 'temurin' + + # Building the wheel. + # Note that because PyPI doesn't have channels like conda, we have to prefix + # the package name with "cpg-*" to avoid clashing with the official package. + - name: Build wheel + run: | + sed -i 's/name="hail",/name="cpg-hail",/' hail/python/setup.py + make -C hail python-version-info wheel + + - name: Install + run: | + version=$(cat hail/python/hail/hail_pip_version) + pip install hail/build/deploy/dist/cpg_hail-$version-py3-none-any.whl + + - name: Test hailctl + run: hailctl version + + - name: Test hailtop import + run: python -c "import hailtop.batch" + + - name: Test version + run: test "$(hailctl version)" = "$(cat hail/python/hail/hail_version)" + + - name: Publish the wheel to PyPI + if: github.event_name == 'push' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + packages_dir: hail/build/deploy/dist/ + skip_existing: true + + - name: Redeploy the analysis-server + if: github.event_name == 'push' + run: | + echo "version=$(cat hail/python/hail/hail_pip_version)" >> $GITHUB_ENV + curl --fail --silent --show-error -X POST \ + -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ + -d '{"ref": "main", "inputs": {"hail_version": "${{ env.version }}"}}' diff --git a/conda/README.md b/conda/README.md deleted file mode 100644 index 9f4f8afc2ba..00000000000 --- a/conda/README.md +++ /dev/null @@ -1,51 +0,0 @@ -# Conda package - -This folder contains a conda recipe to build the `hail` package for -the [`cpg` Anaconda channel](https://anaconda.org/cpg/hail). - -Note that there is also a `hail` package in the -[`bioconda` channel](https://github.com/bioconda/bioconda-recipes/tree/master/recipes/hail) -synced with the [official PyPI release](https://pypi.org/project/hail). However, having -a separate conda package in the `cpg` channel allows us to build it against the codebase -in our fork. - -We don't control versioning of original Hail project, so our `cpg` conda release name -is the official version tag appended with the git commit has, e.g. `0.2.62.dev289c163`. - -[GitHub Actions CI](../.github/workflows/condarise.yaml) is set up to build the package -using this recipe and push it to Anaconda on every push event to the `main` branch in -the -[CPG hail fork](https://github.com/populationgenomics/hail). - -When installing the package, list the `cpg` channel before `bioconda` to prioritize -the channel order: - -``` -conda create --name hail -c cpg -c bioconda -c conda-forge hail -conda activate hail -``` - -You can also install Hail into an existing environment. However, note that Hail requires -Python of versions 3.6 or 3.7, so conda might downgrade Python in that environment, -which may affect other installed packages. - -Note that if you don't have `conda` installed, here are handy commands to do that: - -``` -if [[ "$OSTYPE" == "darwin"* ]]; then - wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -else - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -fi -bash miniconda.sh -``` - -When installing, to prioritize the CPG package, list the `cpg` channel before `bioconda`: - -``` -conda create --name hail -c cpg -c conda-forge hail -conda activate hail -``` - -You can also install Hail into an existing environment; however note that Hail requires Python of versions 3.6 or 3.7, so conda might downgrade Python in that environment, which may affect other installed packages. - diff --git a/conda/hail/build.sh b/conda/hail/build.sh deleted file mode 100755 index 2c3c348b87f..00000000000 --- a/conda/hail/build.sh +++ /dev/null @@ -1,7 +0,0 @@ -#! /bin/bash - -# Build instructions -# https://hail.is/docs/0.2/getting_started_developing.html#requirements -pushd $SRC_DIR/hail -make install HAIL_COMPILE_NATIVES='build' -j ${CPU_COUNT} SKIP_UPLOAD_ARTIFACTS=1 -popd diff --git a/conda/hail/meta-template.yaml b/conda/hail/meta-template.yaml deleted file mode 100644 index 9ab889dd78f..00000000000 --- a/conda/hail/meta-template.yaml +++ /dev/null @@ -1,64 +0,0 @@ -package: - name: hail - version: {version} - -source: - path: ../../ - -build: - number: 0 - -requirements: - build: - - {{ compiler('cxx') }} - - make - - rsync - host: - - python - - pyspark >=3.1.1,<3.2.0 - - openjdk 8.* - - lz4 - - pytest-runner - - pip - - google-cloud-sdk - - google-cloud-storage - run: - - python - - openjdk 8.* - - pyspark >=3.1.1,<3.2.0 - - aiohttp - - aiohttp-session - - bokeh >1.3,<2.0 - - decorator <5 - - deprecated - - gcsfs - - humanize - - hurry.filesize - - nest-asyncio - - parsimonious - - pyjwt - - python-json-logger ==0.1.11 - - requests - - scipy - - tabulate ==0.8.3 - - tqdm ==4.42.1 - - dill - - asyncinit - - google-cloud-sdk - - google-cloud-storage - - google-api-core - - janus >=0.6,<0.7 - - botocore >=1.20,<2.0 - -test: - imports: - - hail - - hailtop.batch - -about: - home: https://hail.is - dev_url: https://github.com/populationgenomics/hail - license: MIT - license_file: LICENSE - summary: | - Hail is Python-based data analysis tool for working with genomic data. From 3b5a40aa89825104050c98df525ec68d68b1ca1a Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 22 Feb 2022 12:00:20 +1100 Subject: [PATCH 322/501] Mount github-oauth-token from ci-config (#136) * Add oauth-token to ci deployment * Fix --- ci/deployment.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/deployment.yaml b/ci/deployment.yaml index f4fa8fe07ca..68ef2812605 100644 --- a/ci/deployment.yaml +++ b/ci/deployment.yaml @@ -102,6 +102,9 @@ spec: - mountPath: /sql-config name: sql-config readOnly: true + - mountPath: /secrets/oauth-token + name: hail-ci-0-1-github-oauth-token + readOnly: true - mountPath: /user-tokens name: ci-tokens readOnly: true @@ -133,6 +136,9 @@ spec: secret: optional: false secretName: "{{ ci_database.user_secret_name }}" + - name: hail-ci-0-1-github-oauth-token + secret: + secretName: hail-ci-0-1-github-oauth-token - name: ci-tokens secret: secretName: ci-tokens From aa902532266658858235d04cb62a3b4e9e79a681 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 1 Mar 2022 19:35:01 +1100 Subject: [PATCH 323/501] Fix CI --- .github/workflows/package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml index 862ca56fba4..25f4e9f72df 100644 --- a/.github/workflows/package.yaml +++ b/.github/workflows/package.yaml @@ -59,9 +59,9 @@ jobs: - name: Redeploy the analysis-server if: github.event_name == 'push' run: | - echo "version=$(cat hail/python/hail/hail_pip_version)" >> $GITHUB_ENV + version=$(cat hail/python/hail/hail_pip_version) curl --fail --silent --show-error -X POST \ -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d '{"ref": "main", "inputs": {"hail_version": "${{ env.version }}"}}' + -d '{"ref": "main", "inputs": {"hail_version": "'$version'"}}' From eb665037a5fceee3df30078e8972dd31ea3f138d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Tue, 1 Mar 2022 19:49:42 +1100 Subject: [PATCH 324/501] Keep upload artefacts --- hail/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hail/Makefile b/hail/Makefile index 5cd634ae81e..61a4bf6839e 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -215,10 +215,8 @@ cloud_base := gs://hail-common/hailctl/dataproc/$(HAIL_PIP_VERSION) UPLOAD_RETENTION = gsutil -m retention temp set "$(cloud_base)/*" endif -ifdef SKIP_UPLOAD_ARTIFACTS DEV_CLARIFIER = CLOUD_SUB_FOLDER := $(HAIL_PIP_VERSION) -endif HAILCTL_BUCKET_BASE ?= gs://hail-common/hailctl/dataproc @@ -261,11 +259,7 @@ install-on-cluster: $(WHEEL) $(PIP) install $(WHEEL) --no-deps .PHONY: install-hailctl -ifdef SKIP_UPLOAD_ARTIFACTS -install-hailctl: install -else install-hailctl: install upload-artifacts -endif .PHONY: test-dataproc-37 test-dataproc-37: install-hailctl From 806ecd95933bf0affa7707b31195cedeb6c7e20a Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 11 Mar 2022 08:51:33 +1100 Subject: [PATCH 325/501] Replace deploy_query step with upload_query_jar --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 4fbbdd98773..94f3e53b273 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "deploy_query"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "upload_query_jar"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV From 508398496d442a67f3b414d7f59c7093f03b81b5 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Fri, 11 Mar 2022 12:24:13 +1100 Subject: [PATCH 326/501] Add version endpoint to batch (originally in query) (#145) --- batch/batch/front_end/front_end.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 2eec60016ab..530eb9ec44e 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -33,7 +33,7 @@ ) from gear.clients import get_cloud_async_fs from gear.database import CallError -from hailtop import aiotools, dictfix, httpx +from hailtop import aiotools, dictfix, httpx, version from hailtop.batch_client.parse import parse_cpu_in_mcpu, parse_memory_in_bytes, parse_storage_in_bytes from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger @@ -174,6 +174,11 @@ async def get_healthcheck(request): # pylint: disable=W0613 return web.Response() +@routes.get('/api/v1alpha/version') +async def rest_get_version(request): # pylint: disable=W0613 + return web.Response(text=version()) + + async def _handle_ui_error(session, f, *args, **kwargs): try: await f(*args, **kwargs) From 69df94b32fc9f38319b62170a990206c314c1dae Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Fri, 11 Mar 2022 12:53:51 +1100 Subject: [PATCH 327/501] Fix build_hail_jar_and_wheel_only (#146) --- build.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.yaml b/build.yaml index c78847b30c0..57619962ed8 100644 --- a/build.yaml +++ b/build.yaml @@ -714,7 +714,7 @@ steps: chmod 755 ./gradlew time retry ./gradlew --version time retry make jars wheel HAIL_DEBUG_MODE=1 - (cd build/deploy/dist/ && tar -cvf debug-wheel-container.tar hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf debug-wheel-container.tar cpg_hail-*-py3-none-any.whl) cd /io/repo/hail mkdir build/debug_libs mv build/libs/hail-all-spark.jar build/debug_libs/ @@ -727,7 +727,7 @@ steps: time tar czf data.tar.gz -C python/hail/docs data (cd .. && time tar czf hail/website-src.tar.gz website) time tar czf cluster-tests.tar.gz python/cluster-tests - (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) time TESTNG_SPLITS=5 python3 generate_splits.py time tar czf splits.tar.gz testng-splits-*.xml inputs: From 2f555c3a99cd79894d168b60e1b4d94ef7a14c50 Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 11 Mar 2022 13:38:00 +1100 Subject: [PATCH 328/501] Build as cpg_hail pip package in few more places --- build.yaml | 58 +++++++++++++++--------------- docker/hail/Dockerfile | 6 ++-- hail/Dockerfile.hail-pip-installed | 4 +-- hail/Makefile | 8 ++--- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/build.yaml b/build.yaml index 57619962ed8..3595dbc09ed 100644 --- a/build.yaml +++ b/build.yaml @@ -689,7 +689,7 @@ steps: chmod 755 ./gradlew time retry ./gradlew --version time retry make shadowJar wheel - (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) inputs: - from: /repo to: /io/repo @@ -821,7 +821,7 @@ steps: time retry ./gradlew --version export SPARK_VERSION="3.0.2" SCALA_VERSION="2.12.10" time retry make wheel - (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) inputs: - from: /repo to: /io/repo @@ -1281,7 +1281,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1335,7 +1335,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1389,7 +1389,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1443,7 +1443,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1497,7 +1497,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1551,7 +1551,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1605,7 +1605,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1657,7 +1657,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1715,7 +1715,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1773,7 +1773,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1831,7 +1831,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1889,7 +1889,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1949,7 +1949,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -2002,7 +2002,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2051,7 +2051,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2100,7 +2100,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2149,7 +2149,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2198,7 +2198,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2247,7 +2247,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2296,7 +2296,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -3120,7 +3120,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3202,7 +3202,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3284,7 +3284,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3366,7 +3366,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3448,7 +3448,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -4748,10 +4748,10 @@ steps: $(cat /io/hail_version) \ $(cat /io/git_version) \ origin \ - /io/repo/hail/build/deploy/dist/hail-*-py3-none-any.whl \ + /io/repo/hail/build/deploy/dist/cpg_hail-*-py3-none-any.whl \ /io/github-oauth \ docker://{{ hailgenetics_hail_image.image }} \ - /io/wheel-for-azure/hail-*-py3-none-any.whl \ + /io/wheel-for-azure/cpg_hail-*-py3-none-any.whl \ /io/www.tar.gz inputs: - from: /hail_version diff --git a/docker/hail/Dockerfile b/docker/hail/Dockerfile index e6b366a6fa5..49acb135fa7 100644 --- a/docker/hail/Dockerfile +++ b/docker/hail/Dockerfile @@ -13,8 +13,8 @@ RUN hail-apt-get-install \ vim pv COPY wheel-container.tar ./ RUN tar -xf wheel-container.tar && \ - pip3 install -U hail-*-py3-none-any.whl && \ - rm -rf hail-*-py3-none-any.whl + pip3 install -U cpg_hail-*-py3-none-any.whl && \ + rm -rf cpg_hail-*-py3-none-any.whl RUN hail-pip-install \ ipython \ matplotlib \ @@ -22,7 +22,7 @@ RUN hail-pip-install \ scikit-learn \ dill \ scipy \ - && rm -rf hail-*-py3-none-any.whl + && rm -rf cpg_hail-*-py3-none-any.whl RUN export SPARK_HOME=$(find_spark_home.py) && \ curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar \ >$SPARK_HOME/jars/gcs-connector-hadoop2-2.0.1.jar && \ diff --git a/hail/Dockerfile.hail-pip-installed b/hail/Dockerfile.hail-pip-installed index 7aee220f6f1..e134bc1ad67 100644 --- a/hail/Dockerfile.hail-pip-installed +++ b/hail/Dockerfile.hail-pip-installed @@ -8,5 +8,5 @@ RUN hail-pip-install -r dev-requirements.txt COPY wheel-container.tar ./ RUN tar -xf wheel-container.tar && \ - pip3 install -U hail-*-py3-none-any.whl && \ - rm -rf hail-*-py3-none-any.whl + pip3 install -U cpg_hail-*-py3-none-any.whl && \ + rm -rf cpg_hail-*-py3-none-any.whl diff --git a/hail/Makefile b/hail/Makefile index 61a4bf6839e..26e8b90ee51 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -58,8 +58,8 @@ SCALA_BUILD_INFO := src/main/resources/build-info.properties SHADOW_JAR := build/libs/hail-all-spark.jar SHADOW_TEST_JAR := build/libs/hail-all-spark-test.jar PYTHON_JAR := python/hail/backend/hail-all-spark.jar -WHEEL := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl -EGG := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3.6.egg +WHEEL := build/deploy/dist/cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl +EGG := build/deploy/dist/cpg_hail-$(HAIL_PIP_VERSION)-py3.6.egg GRADLE_ARGS += -Dscala.version=$(SCALA_VERSION) -Dspark.version=$(SPARK_VERSION) -Delasticsearch.major-version=$(ELASTIC_MAJOR_VERSION) @@ -221,7 +221,7 @@ CLOUD_SUB_FOLDER := $(HAIL_PIP_VERSION) HAILCTL_BUCKET_BASE ?= gs://hail-common/hailctl/dataproc cloud_base := $(HAILCTL_BUCKET_BASE)/$(DEV_CLARIFIER)$(CLOUD_SUB_FOLDER) -wheel_cloud_path := $(cloud_base)/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl +wheel_cloud_path := $(cloud_base)/cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl resources := $(wildcard python/hailtop/hailctl/dataproc/resources/*) $(eval $(call ENV_VAR,cloud_base)) $(eval $(call ENV_VAR,wheel_cloud_path)) @@ -403,4 +403,4 @@ clean: clean-env clean-libs native-lib-clean update-hail-repl: NAMESPACE ?= default update-hail-repl: wheel kubectl -n $(NAMESPACE) cp $(WHEEL) $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}'):. - kubectl -n $(NAMESPACE) exec -it $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}') -- pip3 install -U hail-$(HAIL_PIP_VERSION)-py3-none-any.whl + kubectl -n $(NAMESPACE) exec -it $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}') -- pip3 install -U cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl From a05e76339e56b109eca982c40d6edb0f66f6c32a Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Fri, 11 Mar 2022 14:52:10 +1100 Subject: [PATCH 329/501] Add "deploy" step to CI --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 94f3e53b273..a28858c8931 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "upload_query_jar"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "upload_query_jar", "deploy"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV From 1911f4dd2404ce690112f505b534085fb0fcd6dd Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 16 Mar 2022 13:13:51 +1100 Subject: [PATCH 330/501] Use official pip package (#150) * Revert building cpg_hail, use official hail pip package * Skip test_dataproc-37 and test_dataproc-38 steps in deploy --- .github/workflows/package.yaml | 67 ------------------------------ build.yaml | 64 ++++++++++++++-------------- docker/hail/Dockerfile | 6 +-- hail/Dockerfile.hail-pip-installed | 4 +- hail/Makefile | 8 ++-- hail/python/setup.py | 2 +- 6 files changed, 41 insertions(+), 110 deletions(-) delete mode 100644 .github/workflows/package.yaml diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml deleted file mode 100644 index 25f4e9f72df..00000000000 --- a/.github/workflows/package.yaml +++ /dev/null @@ -1,67 +0,0 @@ -name: Build pip package -on: - # Building on pull-requests and pushes to main, but restricting publishing - # to push events with `if: github.event_name == 'push'` for specific steps - pull_request: - push: - branches: - - main -jobs: - build: - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@main - - - uses: actions/setup-python@v2 - with: - python-version: '3.10' - - # Hail builds only on java=8 - - uses: actions/setup-java@v2 - with: - java-version: '8' - distribution: 'temurin' - - # Building the wheel. - # Note that because PyPI doesn't have channels like conda, we have to prefix - # the package name with "cpg-*" to avoid clashing with the official package. - - name: Build wheel - run: | - sed -i 's/name="hail",/name="cpg-hail",/' hail/python/setup.py - make -C hail python-version-info wheel - - - name: Install - run: | - version=$(cat hail/python/hail/hail_pip_version) - pip install hail/build/deploy/dist/cpg_hail-$version-py3-none-any.whl - - - name: Test hailctl - run: hailctl version - - - name: Test hailtop import - run: python -c "import hailtop.batch" - - - name: Test version - run: test "$(hailctl version)" = "$(cat hail/python/hail/hail_version)" - - - name: Publish the wheel to PyPI - if: github.event_name == 'push' - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} - packages_dir: hail/build/deploy/dist/ - skip_existing: true - - - name: Redeploy the analysis-server - if: github.event_name == 'push' - run: | - version=$(cat hail/python/hail/hail_pip_version) - curl --fail --silent --show-error -X POST \ - -H "Authorization: token ${{ secrets.ANALYSIS_SERVER_GITHUB_TOKEN }}" \ - -H "Accept: application/vnd.github.v3+json" \ - https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d '{"ref": "main", "inputs": {"hail_version": "'$version'"}}' diff --git a/build.yaml b/build.yaml index 16f37bfe63b..f717cf3abbc 100644 --- a/build.yaml +++ b/build.yaml @@ -691,7 +691,7 @@ steps: chmod 755 ./gradlew time retry ./gradlew --version time retry make shadowJar wheel - (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) inputs: - from: /repo to: /io/repo @@ -716,7 +716,7 @@ steps: chmod 755 ./gradlew time retry ./gradlew --version time retry make jars wheel HAIL_DEBUG_MODE=1 - (cd build/deploy/dist/ && tar -cvf debug-wheel-container.tar cpg_hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf debug-wheel-container.tar hail-*-py3-none-any.whl) cd /io/repo/hail mkdir build/debug_libs mv build/libs/hail-all-spark.jar build/debug_libs/ @@ -729,7 +729,7 @@ steps: time tar czf data.tar.gz -C python/hail/docs data (cd .. && time tar czf hail/website-src.tar.gz website) time tar czf cluster-tests.tar.gz python/cluster-tests - (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) time TESTNG_SPLITS=5 python3 generate_splits.py time tar czf splits.tar.gz testng-splits-*.xml inputs: @@ -823,7 +823,7 @@ steps: time retry ./gradlew --version export SPARK_VERSION="3.0.2" SCALA_VERSION="2.12.10" time retry make wheel - (cd build/deploy/dist/ && tar -cvf wheel-container.tar cpg_hail-*-py3-none-any.whl) + (cd build/deploy/dist/ && tar -cvf wheel-container.tar hail-*-py3-none-any.whl) inputs: - from: /repo to: /io/repo @@ -1283,7 +1283,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1337,7 +1337,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1391,7 +1391,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1445,7 +1445,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1499,7 +1499,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1553,7 +1553,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1607,7 +1607,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -1659,7 +1659,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1717,7 +1717,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1775,7 +1775,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1833,7 +1833,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1891,7 +1891,7 @@ steps: cd /io tar xzf test.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl export HAIL_TEST_GCS_BUCKET={{ global.hail_test_gcs_bucket }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json @@ -1951,7 +1951,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl # pyspark/conf/core-site.xml already points at /gsa-key/key.json mv /test-gsa-key/key.json /gsa-key/key.json @@ -2004,7 +2004,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2053,7 +2053,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2102,7 +2102,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2151,7 +2151,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2200,7 +2200,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2249,7 +2249,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -2298,7 +2298,7 @@ steps: tar xzf resources.tar.gz tar xzf data.tar.gz tar xvf debug-wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl mkdir -p /io/tmp export HAIL_TEST_STORAGE_URI=/io/tmp/ @@ -3141,7 +3141,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3224,7 +3224,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3307,7 +3307,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3390,7 +3390,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -3473,7 +3473,7 @@ steps: script: | set -ex tar xvf /io/wheel-container.tar - python3 -m pip install --no-dependencies cpg_hail-*-py3-none-any.whl + python3 -m pip install --no-dependencies hail-*-py3-none-any.whl cd /io/repo/hail/python @@ -4758,10 +4758,10 @@ steps: $(cat /io/hail_version) \ $(cat /io/git_version) \ origin \ - /io/repo/hail/build/deploy/dist/cpg_hail-*-py3-none-any.whl \ + /io/repo/hail/build/deploy/dist/hail-*-py3-none-any.whl \ /io/github-oauth \ docker://{{ hailgenetics_hail_image.image }} \ - /io/wheel-for-azure/cpg_hail-*-py3-none-any.whl \ + /io/wheel-for-azure/hail-*-py3-none-any.whl \ /io/www.tar.gz inputs: - from: /hail_version @@ -4797,8 +4797,6 @@ steps: - deploy - dev dependsOn: - - test_dataproc-37 - - test_dataproc-38 - default_ns - ci_utils_image - build_hail diff --git a/docker/hail/Dockerfile b/docker/hail/Dockerfile index 49acb135fa7..e6b366a6fa5 100644 --- a/docker/hail/Dockerfile +++ b/docker/hail/Dockerfile @@ -13,8 +13,8 @@ RUN hail-apt-get-install \ vim pv COPY wheel-container.tar ./ RUN tar -xf wheel-container.tar && \ - pip3 install -U cpg_hail-*-py3-none-any.whl && \ - rm -rf cpg_hail-*-py3-none-any.whl + pip3 install -U hail-*-py3-none-any.whl && \ + rm -rf hail-*-py3-none-any.whl RUN hail-pip-install \ ipython \ matplotlib \ @@ -22,7 +22,7 @@ RUN hail-pip-install \ scikit-learn \ dill \ scipy \ - && rm -rf cpg_hail-*-py3-none-any.whl + && rm -rf hail-*-py3-none-any.whl RUN export SPARK_HOME=$(find_spark_home.py) && \ curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar \ >$SPARK_HOME/jars/gcs-connector-hadoop2-2.0.1.jar && \ diff --git a/hail/Dockerfile.hail-pip-installed b/hail/Dockerfile.hail-pip-installed index ffab458f020..ae8084914f9 100644 --- a/hail/Dockerfile.hail-pip-installed +++ b/hail/Dockerfile.hail-pip-installed @@ -8,5 +8,5 @@ RUN hail-pip-install -r dev-requirements.txt COPY wheel-container.tar ./ RUN tar -xf wheel-container.tar && \ - pip3 install -U cpg_hail-*-py3-none-any.whl && \ - rm -rf cpg_hail-*-py3-none-any.whl + pip3 install -U hail-*-py3-none-any.whl && \ + rm -rf hail-*-py3-none-any.whl diff --git a/hail/Makefile b/hail/Makefile index d8ef62a5a6e..1e702d38f14 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -58,8 +58,8 @@ SCALA_BUILD_INFO := src/main/resources/build-info.properties SHADOW_JAR := build/libs/hail-all-spark.jar SHADOW_TEST_JAR := build/libs/hail-all-spark-test.jar PYTHON_JAR := python/hail/backend/hail-all-spark.jar -WHEEL := build/deploy/dist/cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl -EGG := build/deploy/dist/cpg_hail-$(HAIL_PIP_VERSION)-py3.6.egg +WHEEL := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl +EGG := build/deploy/dist/hail-$(HAIL_PIP_VERSION)-py3.6.egg GRADLE_ARGS += -Dscala.version=$(SCALA_VERSION) -Dspark.version=$(SPARK_VERSION) -Delasticsearch.major-version=$(ELASTIC_MAJOR_VERSION) @@ -221,7 +221,7 @@ CLOUD_SUB_FOLDER := $(HAIL_PIP_VERSION) HAILCTL_BUCKET_BASE ?= gs://hail-common/hailctl/dataproc cloud_base := $(HAILCTL_BUCKET_BASE)/$(DEV_CLARIFIER)$(CLOUD_SUB_FOLDER) -wheel_cloud_path := $(cloud_base)/cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl +wheel_cloud_path := $(cloud_base)/hail-$(HAIL_PIP_VERSION)-py3-none-any.whl resources := $(wildcard python/hailtop/hailctl/dataproc/resources/*) $(eval $(call ENV_VAR,cloud_base)) $(eval $(call ENV_VAR,wheel_cloud_path)) @@ -403,4 +403,4 @@ clean: clean-env clean-libs native-lib-clean update-hail-repl: NAMESPACE ?= default update-hail-repl: wheel kubectl -n $(NAMESPACE) cp $(WHEEL) $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}'):. - kubectl -n $(NAMESPACE) exec -it $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}') -- pip3 install -U cpg_hail-$(HAIL_PIP_VERSION)-py3-none-any.whl + kubectl -n $(NAMESPACE) exec -it $$(kubectl get pods -n $(NAMESPACE) -l app=hail-repl | tail -n +2 | awk '{print $$1}') -- pip3 install -U hail-$(HAIL_PIP_VERSION)-py3-none-any.whl diff --git a/hail/python/setup.py b/hail/python/setup.py index 53653b95c5f..527cae199f1 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -27,7 +27,7 @@ dependencies.append(pkg) setup( - name="cpg-hail", + name="hail", version=hail_pip_version, author="Hail Team", author_email="hail@broadinstitute.org", From a5d151d313902e6177d0f311531fe422b5cbdc03 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 21 Mar 2022 13:09:46 +1100 Subject: [PATCH 331/501] Try highmem workers for Query JVM jobs (#155) --- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 4ae13b2bcbb..507e6050bed 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. - # We only allocate 3700 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. + # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. + # We only allocate 6500 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. return await BufferedOutputProcess.create( 'java', - '-Xmx1480M', + '-Xmx2600M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('standard', 'D'): + if instance_config.worker_type() in ('highmem', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index a1b968d0d74..e5b90cdd7cd 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): From bd5b962a9fec0238692df77eab3d309b8f3ed1f4 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 21 Mar 2022 17:18:10 +1100 Subject: [PATCH 332/501] Revert "Try highmem workers for Query JVM jobs (#155)" (#156) This reverts commit a5d151d313902e6177d0f311531fe422b5cbdc03. --- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 507e6050bed..4ae13b2bcbb 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. - # We only allocate 6500 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. + # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. + # We only allocate 3700 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. return await BufferedOutputProcess.create( 'java', - '-Xmx2600M', + '-Xmx1480M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('highmem', 'D'): + if instance_config.worker_type() in ('standard', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index e5b90cdd7cd..a1b968d0d74 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): From 80d9b5ff7bc6fb4e1a93d7d5a01159e3ade390ae Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 21 Mar 2022 17:49:16 +1100 Subject: [PATCH 333/501] Revert "Revert "Try highmem workers for Query JVM jobs (#155)" (#156)" (#157) This reverts commit bd5b962a9fec0238692df77eab3d309b8f3ed1f4. --- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 4ae13b2bcbb..507e6050bed 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. - # We only allocate 3700 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. + # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. + # We only allocate 6500 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. return await BufferedOutputProcess.create( 'java', - '-Xmx1480M', + '-Xmx2600M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('standard', 'D'): + if instance_config.worker_type() in ('highmem', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index a1b968d0d74..e5b90cdd7cd 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): From 4f5b48a210bf5a05dc3c124bda97ef916bd18975 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 22 Mar 2022 08:46:31 +1100 Subject: [PATCH 334/501] Fix JVM standard memory check (#158) --- batch/batch/front_end/front_end.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index a170d1ef0ea..3ca0016bf26 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -762,7 +762,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if spec['process']['type'] == 'jvm': if 'cpu' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') - if 'memory' in resources and resources['memory'] != 'standard': + if 'memory' in resources and resources['memory'] != 'highmem': raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') From da754a1a3ea51fc3e01ffacaccbd9018542ae5f6 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 22 Mar 2022 09:32:43 +1100 Subject: [PATCH 335/501] Revert JVM highmem experiment (#159) * Revert "Fix JVM standard memory check (#158)" This reverts commit 4f5b48a210bf5a05dc3c124bda97ef916bd18975. * Revert "Revert "Revert "Try highmem workers for Query JVM jobs (#155)" (#156)" (#157)" This reverts commit 80d9b5ff7bc6fb4e1a93d7d5a01159e3ade390ae. --- batch/batch/front_end/front_end.py | 2 +- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 3ca0016bf26..a170d1ef0ea 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -762,7 +762,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if spec['process']['type'] == 'jvm': if 'cpu' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') - if 'memory' in resources and resources['memory'] != 'highmem': + if 'memory' in resources and resources['memory'] != 'standard': raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 507e6050bed..4ae13b2bcbb 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. - # We only allocate 6500 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. + # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. + # We only allocate 3700 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. return await BufferedOutputProcess.create( 'java', - '-Xmx2600M', + '-Xmx1480M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('highmem', 'D'): + if instance_config.worker_type() in ('standard', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index e5b90cdd7cd..a1b968d0d74 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): From e8bb4790f0fa715cb210d2ac57bd781f990bf4b2 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 22 Mar 2022 11:51:37 +1100 Subject: [PATCH 336/501] Try highmem JVM Query jobs again (#160) * Revert "Revert JVM highmem experiment (#159)" This reverts commit da754a1a3ea51fc3e01ffacaccbd9018542ae5f6. * Also use highmem workers (not only drivers) * Use JString --- batch/batch/front_end/front_end.py | 2 +- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- .../is/hail/backend/service/ServiceBackend.scala | 4 +++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index a170d1ef0ea..3ca0016bf26 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -762,7 +762,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if spec['process']['type'] == 'jvm': if 'cpu' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') - if 'memory' in resources and resources['memory'] != 'standard': + if 'memory' in resources and resources['memory'] != 'highmem': raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 4ae13b2bcbb..507e6050bed 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. - # We only allocate 3700 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. + # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. + # We only allocate 6500 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. return await BufferedOutputProcess.create( 'java', - '-Xmx1480M', + '-Xmx2600M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('standard', 'D'): + if instance_config.worker_type() in ('highmem', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('standard', 'D'): + if instance_config.worker_type() not in ('highmem', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index a1b968d0d74..e5b90cdd7cd 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index fd5003eb666..9d7a618645a 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -164,7 +164,9 @@ class ServiceBackend( JString(s"$i"))), "type" -> JString("jvm")), "mount_tokens" -> JBool(true), - "resources" -> JObject("preemptible" -> JBool(true)) + "resources" -> JObject( + "preemptible" -> JBool(true), + "memory" -> JString("highmem")) ) i += 1 } From 61203e0259ccf0c65430173de9c27dbbfed55736 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 23 Mar 2022 15:47:20 +1100 Subject: [PATCH 337/501] Add build.yaml step to tag the hailgenetics image in prod_deploy (#161) * Add step to tag the hailgenetics image * Use skopeo instead of docker --- .github/workflows/prod_deploy.yaml | 2 +- build.yaml | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index a28858c8931..24da4550c99 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_memory", "deploy_notebook", "upload_query_jar", "deploy"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_hailgenetics_image", "deploy_memory", "upload_query_jar"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV diff --git a/build.yaml b/build.yaml index d81a07f3cbb..20fe9dff658 100644 --- a/build.yaml +++ b/build.yaml @@ -4808,6 +4808,32 @@ steps: - make_docs clouds: - gcp + - kind: runImage + name: deploy_hailgenetics_image + image: + valueFrom: ci_utils_image.image + script: | + set -ex + gcloud auth activate-service-account --key-file=/ci-deploy-0-1--hail-is-hail/ci-deploy-0-1--hail-is-hail.json + gcloud auth -q configure-docker australia-southeast1-docker.pkg.dev + skopeo copy docker://{{ hailgenetics_hail_image.image }} docker://australia-southeast1-docker.pkg.dev/hail-295901/hail/hailgenetics/hail:$(cat /io/hail_pip_version) + inputs: + - from: /hail_pip_version + to: /io/hail_pip_version + secrets: + - name: ci-deploy-0-1--hail-is-hail + namespace: + valueFrom: default_ns.name + mountPath: /ci-deploy-0-1--hail-is-hail + scopes: + - deploy + - dev + dependsOn: + - default_ns + - ci_utils_image + - hailgenetics_hail_image + clouds: + - gcp - kind: buildImage2 name: website_image dockerFile: /io/website/Dockerfile From 1222027dc5c0ab24e3873ab950a7c9fd18d90fc7 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 23 Mar 2022 18:04:00 +1100 Subject: [PATCH 338/501] Use ES version 8 (#162) --- hail/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/Makefile b/hail/Makefile index f9fd1679d05..5913ca35cd0 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -17,7 +17,7 @@ HAIL_MAJOR_MINOR_VERSION := 0.2 HAIL_PATCH_VERSION := 91 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) -ELASTIC_MAJOR_VERSION ?= 7 +ELASTIC_MAJOR_VERSION ?= 8 $(eval $(call ENV_VAR,REVISION)) From 850a34817c02678105d208b05dcdd841b536c4d6 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 23 Mar 2022 18:09:43 +1100 Subject: [PATCH 339/501] Use /gsa-key/key.json for deploy_hailgenetics_image step (#163) --- build.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/build.yaml b/build.yaml index 20fe9dff658..0081d225da3 100644 --- a/build.yaml +++ b/build.yaml @@ -4814,17 +4814,12 @@ steps: valueFrom: ci_utils_image.image script: | set -ex - gcloud auth activate-service-account --key-file=/ci-deploy-0-1--hail-is-hail/ci-deploy-0-1--hail-is-hail.json + gcloud auth activate-service-account --key-file=/gsa-key/key.json gcloud auth -q configure-docker australia-southeast1-docker.pkg.dev skopeo copy docker://{{ hailgenetics_hail_image.image }} docker://australia-southeast1-docker.pkg.dev/hail-295901/hail/hailgenetics/hail:$(cat /io/hail_pip_version) inputs: - from: /hail_pip_version to: /io/hail_pip_version - secrets: - - name: ci-deploy-0-1--hail-is-hail - namespace: - valueFrom: default_ns.name - mountPath: /ci-deploy-0-1--hail-is-hail scopes: - deploy - dev From 7d96996c48636a2a4be2e8d4804710f1bc323889 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Wed, 23 Mar 2022 22:51:47 +1100 Subject: [PATCH 340/501] Add build.yaml step to upload hail wheel in prod_deploy (#164) --- .github/workflows/prod_deploy.yaml | 2 +- build.yaml | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 24da4550c99..6188a50c9e8 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_hailgenetics_image", "deploy_memory", "upload_query_jar"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_hailgenetics_image", "deploy_memory", "upload_query_jar", "deploy_wheel"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV diff --git a/build.yaml b/build.yaml index 0081d225da3..3f97e9dd4a6 100644 --- a/build.yaml +++ b/build.yaml @@ -4808,6 +4808,31 @@ steps: - make_docs clouds: - gcp + - kind: runImage + name: deploy_wheel + image: + valueFrom: hail_pip_installed_image.image + script: | + set -ex + tar xvf /io/wheel-container.tar + gcloud auth activate-service-account --key-file=/gsa-key/key.json + HAIL_WHEEL_URL=gs://cpg-hail-ci/wheels/hail-$(cat /io/hail_pip_version)-py3-none-any.whl + python3 -m hailtop.aiotools.copy 'null' '[ + {"from": "hail-*-py3-none-any.whl", "to": "'${HAIL_WHEEL_URL}'"}]' + inputs: + - from: /hail_pip_version + to: /io/hail_pip_version + - from: /just-wheel/wheel-container.tar + to: /io/wheel-container.tar + scopes: + - deploy + - dev + dependsOn: + - default_ns + - hail_pip_installed_image + - build_hail_jar_and_wheel_only + clouds: + - gcp - kind: runImage name: deploy_hailgenetics_image image: From 14cb330947c1c0c2b7d162e9bbf896a9c367718e Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Wed, 23 Mar 2022 22:55:26 +1100 Subject: [PATCH 341/501] Use registry-push-credentials secret to tag hailgenetics image (#165) --- build.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index 3f97e9dd4a6..a698d2b3b7c 100644 --- a/build.yaml +++ b/build.yaml @@ -4839,12 +4839,17 @@ steps: valueFrom: ci_utils_image.image script: | set -ex - gcloud auth activate-service-account --key-file=/gsa-key/key.json + gcloud auth activate-service-account --key-file=/registry-push-credentials/registry-push-credentials.json gcloud auth -q configure-docker australia-southeast1-docker.pkg.dev skopeo copy docker://{{ hailgenetics_hail_image.image }} docker://australia-southeast1-docker.pkg.dev/hail-295901/hail/hailgenetics/hail:$(cat /io/hail_pip_version) inputs: - from: /hail_pip_version to: /io/hail_pip_version + secrets: + - name: registry-push-credentials + namespace: + valueFrom: default_ns.name + mountPath: /registry-push-credentials scopes: - deploy - dev From 24c603840540d8552cb9b35384ace376020fc5f1 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 24 Mar 2022 08:46:45 +1100 Subject: [PATCH 342/501] Fix path to mounted registry-push-credentials in deploy_hailgenetics_image (#166) --- build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.yaml b/build.yaml index a698d2b3b7c..a4ed7c3bc1f 100644 --- a/build.yaml +++ b/build.yaml @@ -4839,7 +4839,7 @@ steps: valueFrom: ci_utils_image.image script: | set -ex - gcloud auth activate-service-account --key-file=/registry-push-credentials/registry-push-credentials.json + gcloud auth activate-service-account --key-file=/registry-push-credentials/credentials.json gcloud auth -q configure-docker australia-southeast1-docker.pkg.dev skopeo copy docker://{{ hailgenetics_hail_image.image }} docker://australia-southeast1-docker.pkg.dev/hail-295901/hail/hailgenetics/hail:$(cat /io/hail_pip_version) inputs: From d4d7144281646e927712e0f03ae3f4afca01ebed Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 24 Mar 2022 08:50:28 +1100 Subject: [PATCH 343/501] Fix deploy_wheel copy, which doesn't take wildcards (#167) * Fix deploy_wheel copy, which doesn't take wildcards * Simplify --- build.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.yaml b/build.yaml index a4ed7c3bc1f..2bfa9174b5d 100644 --- a/build.yaml +++ b/build.yaml @@ -4816,9 +4816,9 @@ steps: set -ex tar xvf /io/wheel-container.tar gcloud auth activate-service-account --key-file=/gsa-key/key.json - HAIL_WHEEL_URL=gs://cpg-hail-ci/wheels/hail-$(cat /io/hail_pip_version)-py3-none-any.whl + HAIL_WHEEL_FILE=hail-$(cat /io/hail_pip_version)-py3-none-any.whl python3 -m hailtop.aiotools.copy 'null' '[ - {"from": "hail-*-py3-none-any.whl", "to": "'${HAIL_WHEEL_URL}'"}]' + {"from": "'$HAIL_WHEEL_FILE'", "to": "gs://cpg-hail-ci/wheels/'$HAIL_WHEEL_FILE'"}]' inputs: - from: /hail_pip_version to: /io/hail_pip_version From d65b5fabb31af0d7eacd1dfab60e186e406d2904 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 24 Mar 2022 09:12:37 +1100 Subject: [PATCH 344/501] Sort steps lexicographically (#168) --- .github/workflows/prod_deploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prod_deploy.yaml b/.github/workflows/prod_deploy.yaml index 6188a50c9e8..4be6f68c6bb 100644 --- a/.github/workflows/prod_deploy.yaml +++ b/.github/workflows/prod_deploy.yaml @@ -12,7 +12,7 @@ jobs: DEPLOY_BATCH_URL=$(curl --fail --silent --show-error -X POST \ -H "Authorization: Bearer ${{ secrets.CI_TOKEN }}" \ -H "Content-Type:application/json" \ - -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_hailgenetics_image", "deploy_memory", "upload_query_jar", "deploy_wheel"], "sha": "${{ github.sha }}"}' \ + -d '{"steps": ["deploy_auth", "deploy_batch", "deploy_ci", "deploy_hailgenetics_image", "deploy_memory", "deploy_wheel", "upload_query_jar"], "sha": "${{ github.sha }}"}' \ https://ci.hail.populationgenomics.org.au/api/v1alpha/prod_deploy) echo DEPLOY_BATCH_URL=$DEPLOY_BATCH_URL >> $GITHUB_ENV From 48356cb15d1094daba9134e13d8ac09580f1cfe5 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 24 Mar 2022 15:41:49 +1100 Subject: [PATCH 345/501] Print external URL in batch submission log (#170) --- hail/python/hailtop/batch/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 12a07032a6c..9d6c0d75de4 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -718,7 +718,7 @@ async def compile_job(job): print('') deploy_config = get_deploy_config() - url = deploy_config.url('batch', f'/batches/{batch_handle.id}') + url = deploy_config.external_url('batch', f'/batches/{batch_handle.id}') print(f'Submitted batch {batch_handle.id}, see {url}') if open: From e2d28d96f6f491a17ed35a7d05f4310a2b6bdca9 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Fri, 25 Mar 2022 08:34:40 +1100 Subject: [PATCH 346/501] Delete dependabot.yml (#174) We rely on upstream version changes --- .github/dependabot.yml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 68d31bef950..00000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,17 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "pip" - directory: "/docker" - schedule: - interval: "daily" - open-pull-requests-limit: 1 - - package-ecosystem: "pip" - directory: "/hail/python" - schedule: - interval: "daily" - open-pull-requests-limit: 1 - - package-ecosystem: "pip" - directory: "/hail/python/dev" - schedule: - interval: "daily" - open-pull-requests-limit: 1 From 98d5e75ee7173b61a6c9f9ec9b98293e64313930 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 28 Mar 2022 09:51:22 +1100 Subject: [PATCH 347/501] Revert "Try highmem JVM Query jobs again (#160)" (#175) This reverts commit e8bb4790f0fa715cb210d2ac57bd781f990bf4b2. --- batch/batch/front_end/front_end.py | 2 +- batch/batch/worker/worker.py | 16 ++++++++-------- hail/python/hail/backend/service_backend.py | 2 +- .../is/hail/backend/service/ServiceBackend.scala | 4 +--- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 3ca0016bf26..a170d1ef0ea 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -762,7 +762,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if spec['process']['type'] == 'jvm': if 'cpu' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') - if 'memory' in resources and resources['memory'] != 'highmem': + if 'memory' in resources and resources['memory'] != 'standard': raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 507e6050bed..4ae13b2bcbb 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -1858,17 +1858,17 @@ class JVM: @classmethod async def create_process(cls, socket_file: str) -> BufferedOutputProcess: # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in highmem workers which have 6.5 GiB == 6656 MiB per core. - # We only allocate 6500 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 2600 + 3900 = 6500. + # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. + # We only allocate 3700 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. return await BufferedOutputProcess.create( 'java', - '-Xmx2600M', + '-Xmx1480M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '3900'}, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, ) @classmethod @@ -2066,19 +2066,19 @@ def __init__(self, client_session: httpx.ClientSession): self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('highmem', 'D'): + if instance_config.worker_type() in ('standard', 'D'): self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') await asyncio.shield(self._jvm_initializer_task) assert self._jvms return self._jvms.pop() def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('highmem', 'D'): + if instance_config.worker_type() not in ('standard', 'D'): raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() self._jvms.append(jvm) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index e5b90cdd7cd..a1b968d0d74 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -214,7 +214,7 @@ async def _rpc(self, batch_attributes['name'], iodir + '/in', iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'highmem'}) + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 9d7a618645a..fd5003eb666 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -164,9 +164,7 @@ class ServiceBackend( JString(s"$i"))), "type" -> JString("jvm")), "mount_tokens" -> JBool(true), - "resources" -> JObject( - "preemptible" -> JBool(true), - "memory" -> JString("highmem")) + "resources" -> JObject("preemptible" -> JBool(true)) ) i += 1 } From e279414c8766be01ab7da46e7bb91b4a3f52241d Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Tue, 29 Mar 2022 14:43:04 +1100 Subject: [PATCH 348/501] Revert "Merge upstream 0.2.93" (#177) --- auth/Dockerfile | 2 +- auth/deployment.yaml | 2 +- batch/Dockerfile.driver-nginx | 12 - batch/Dockerfile.worker | 2 +- batch/Makefile | 11 +- .../batch/driver/instance_collection/pool.py | 8 - batch/batch/driver/job.py | 3 - batch/batch/driver/main.py | 2 + batch/batch/file_store.py | 20 - batch/batch/front_end/front_end.py | 294 ++-- batch/batch/front_end/templates/job.html | 21 +- batch/batch/front_end/validate.py | 7 +- batch/batch/resource_usage.py | 103 -- batch/batch/spec_writer.py | 1 - batch/batch/worker/worker.py | 1225 ++++++-------- batch/deployment.yaml | 54 +- batch/driver-nginx.conf | 78 - batch/test/test_batch.py | 2 +- batch2/react-batch/package-lock.json | 12 +- .../benchmark_hail/run/table_benchmarks.py | 5 - bootstrap-gateway/deployment.yaml | 2 +- build.yaml | 214 +-- ci/ci/build.py | 6 +- ci/ci/github.py | 2 +- dev-docs/kubernetes-operations.md | 49 +- docker/Dockerfile.base | 2 +- docker/hail-ubuntu/curlrc | 2 +- docker/requirements.txt | 2 +- gateway/deployment.yaml | 2 +- gear/gear/database.py | 16 +- gear/gear/metrics.py | 1 - hail/.gitignore | 1 - hail/Dockerfile.hail-run-tests | 2 +- hail/Makefile | 8 +- hail/python/dev/requirements.txt | 2 +- hail/python/hail/__init__.py | 8 +- hail/python/hail/backend/backend.py | 69 +- hail/python/hail/backend/local_backend.py | 14 +- hail/python/hail/backend/py4j_backend.py | 83 +- hail/python/hail/backend/service_backend.py | 367 +--- hail/python/hail/backend/spark_backend.py | 12 +- hail/python/hail/context.py | 246 +-- hail/python/hail/docs/change_log.md | 33 - .../python/hail/docs/cloud/query_on_batch.rst | 65 - .../datasets/schemas/gnomad_genome_sites.rst | 368 ++-- .../schemas/gnomad_hgdp_1kg_callset.rst | 651 +++++++ .../schemas/gnomad_hgdp_1kg_subset_dense.rst | 1501 ----------------- ...gnomad_hgdp_1kg_subset_sample_metadata.rst | 653 ------- .../schemas/gnomad_hgdp_1kg_subset_sparse.rst | 54 - ...ad_hgdp_1kg_subset_variant_annotations.rst | 857 ---------- .../schemas/gnomad_pca_variant_loadings.rst | 26 - .../schemas/gnomad_variant_co-occurrence.rst | 60 - hail/python/hail/docs/ggplot/index.rst | 8 - hail/python/hail/docs/hail_on_the_cloud.rst | 10 +- hail/python/hail/experimental/datasets.json | 156 +- hail/python/hail/experimental/datasets.py | 6 +- hail/python/hail/experimental/expressions.py | 17 +- .../experimental/vcf_combiner/vcf_combiner.py | 1 - .../hail/expr/expressions/base_expression.py | 5 +- .../expr/expressions/typed_expressions.py | 45 +- hail/python/hail/expr/types.py | 4 +- hail/python/hail/fs/fs.py | 4 - hail/python/hail/fs/google_fs.py | 148 ++ hail/python/hail/fs/hadoop_fs.py | 8 +- hail/python/hail/fs/local_fs.py | 49 +- hail/python/hail/fs/router_fs.py | 32 +- hail/python/hail/fs/stat_result.py | 4 +- hail/python/hail/ggplot/__init__.py | 9 +- hail/python/hail/ggplot/geoms.py | 9 +- hail/python/hail/ggplot/ggplot.py | 13 +- hail/python/hail/ggplot/scale.py | 106 +- hail/python/hail/ggplot/stats.py | 2 - hail/python/hail/ggplot/utils.py | 18 +- hail/python/hail/ir/__init__.py | 3 +- hail/python/hail/ir/base_ir.py | 2 +- hail/python/hail/ir/blockmatrix_ir.py | 10 +- hail/python/hail/ir/blockmatrix_writer.py | 29 - hail/python/hail/ir/ir.py | 7 +- hail/python/hail/ir/matrix_ir.py | 13 +- hail/python/hail/ir/matrix_reader.py | 90 +- hail/python/hail/ir/register_functions.py | 7 +- hail/python/hail/ir/table_ir.py | 13 +- hail/python/hail/ir/table_reader.py | 39 +- hail/python/hail/ir/utils.py | 52 - hail/python/hail/linalg/blockmatrix.py | 43 +- hail/python/hail/matrixtable.py | 33 +- hail/python/hail/methods/impex.py | 509 +----- hail/python/hail/methods/statgen.py | 4 - hail/python/hail/stats/linear_mixed_model.py | 1 - hail/python/hail/table.py | 33 +- hail/python/hail/utils/__init__.py | 3 +- hail/python/hail/utils/hadoop_utils.py | 23 +- hail/python/hail/utils/java.py | 35 +- hail/python/hail/utils/misc.py | 40 +- .../vds/combiner/variant_dataset_combiner.py | 1 - hail/python/hail/vds/variant_dataset.py | 1 - hail/python/hailtop/aiotools/copy.py | 13 +- hail/python/hailtop/batch_client/aioclient.py | 4 +- hail/python/hailtop/config/user_config.py | 3 + hail/python/hailtop/hailctl/config/cli.py | 8 +- hail/python/hailtop/utils/__init__.py | 5 +- hail/python/hailtop/utils/time.py | 4 - hail/python/hailtop/utils/utils.py | 13 +- hail/python/requirements.txt | 2 +- hail/python/setup.py | 1 - hail/python/test/hail/backend/__init__.py | 0 .../test/hail/backend/test_service_backend.py | 28 - .../hail/experimental/test_experimental.py | 3 +- hail/python/test/hail/expr/test_expr.py | 37 +- hail/python/test/hail/expr/test_ndarrays.py | 10 +- .../test/hail/genetics/test_pedigree.py | 2 +- hail/python/test/hail/ggplot/test_ggplot.py | 36 +- hail/python/test/hail/helpers.py | 29 +- hail/python/test/hail/linalg/test_linalg.py | 30 +- .../hail/matrixtable/test_file_formats.py | 23 +- .../hail/matrixtable/test_matrix_table.py | 5 + .../methods/relatedness/test_pc_relate.py | 1 - .../test/hail/methods/test_family_methods.py | 4 + hail/python/test/hail/methods/test_impex.py | 275 +-- hail/python/test/hail/methods/test_misc.py | 2 + hail/python/test/hail/methods/test_pca.py | 22 +- hail/python/test/hail/methods/test_qc.py | 3 + hail/python/test/hail/methods/test_statgen.py | 19 +- .../hail/stats/test_linear_mixed_model.py | 3 +- hail/python/test/hail/table/test_table.py | 44 +- hail/python/test/hail/test_context.py | 6 +- .../test/hail/utils/test_google_fs_utils.py | 86 - hail/python/test/hail/utils/test_utils.py | 22 +- hail/python/test/hail/vds/test_combiner.py | 2 - hail/python/test/hail/vds/test_vds.py | 3 + .../test/hailtop/hailctl/dataproc/conftest.py | 2 +- hail/src/main/scala/is/hail/HailContext.scala | 57 + .../main/scala/is/hail/HailFeatureFlags.scala | 73 - .../scala/is/hail/asm4s/ClassBuilder.scala | 24 +- hail/src/main/scala/is/hail/asm4s/Code.scala | 4 +- .../is/hail/backend/ExecuteContext.scala | 40 +- .../is/hail/backend/local/LocalBackend.scala | 17 +- .../scala/is/hail/backend/service/Main.scala | 2 +- .../hail/backend/service/ServiceBackend.scala | 599 +++---- .../is/hail/backend/service/Worker.scala | 61 +- .../is/hail/backend/spark/SparkBackend.scala | 47 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 30 +- .../is/hail/expr/ir/BlockMatrixWriter.scala | 36 +- .../main/scala/is/hail/expr/ir/Compile.scala | 14 +- .../src/main/scala/is/hail/expr/ir/Emit.scala | 13 +- .../is/hail/expr/ir/EmitClassBuilder.scala | 20 +- .../hail/expr/ir/ExtractIntervalFilters.scala | 13 +- .../scala/is/hail/expr/ir/GenericLines.scala | 17 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 10 +- .../scala/is/hail/expr/ir/InferType.scala | 2 +- .../scala/is/hail/expr/ir/LowerMatrixIR.scala | 169 +- .../ir/LowerOrInterpretNonCompilable.scala | 2 +- .../main/scala/is/hail/expr/ir/MatrixIR.scala | 2 + .../scala/is/hail/expr/ir/MatrixWriter.scala | 424 +---- .../main/scala/is/hail/expr/ir/Optimize.scala | 18 +- .../main/scala/is/hail/expr/ir/Parser.scala | 2 +- .../main/scala/is/hail/expr/ir/Pretty.scala | 5 +- .../is/hail/expr/ir/PruneDeadFields.scala | 942 +++++------ .../main/scala/is/hail/expr/ir/Random.scala | 481 ------ .../scala/is/hail/expr/ir/Requiredness.scala | 3 +- .../main/scala/is/hail/expr/ir/Simplify.scala | 53 +- .../expr/ir/SpecializedArrayBuilders.scala | 63 - .../is/hail/expr/ir/StringTableReader.scala | 25 +- .../main/scala/is/hail/expr/ir/TableIR.scala | 15 +- .../scala/is/hail/expr/ir/TypeCheck.scala | 23 +- .../ir/analyses/ComputeMethodSplits.scala | 5 +- .../is/hail/expr/ir/functions/Functions.scala | 38 - .../ir/functions/RelationalFunctions.scala | 1 - .../expr/ir/functions/StringFunctions.scala | 208 +-- .../ir/lowering/CanLowerEfficiently.scala | 14 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 82 +- .../ir/lowering/LowerDistributedSort.scala | 98 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 105 +- .../is/hail/expr/ir/lowering/LowerToCDA.scala | 6 +- .../hail/expr/ir/lowering/LoweringPass.scala | 8 +- .../expr/ir/lowering/LoweringPipeline.scala | 2 +- .../main/scala/is/hail/expr/ir/package.scala | 11 +- .../scala/is/hail/io/TextMatrixReader.scala | 702 ++++++++ .../main/scala/is/hail/io/bgen/LoadBgen.scala | 6 +- .../scala/is/hail/io/fs/GoogleStorageFS.scala | 33 +- .../scala/is/hail/io/plink/LoadPlink.scala | 14 +- .../main/scala/is/hail/io/vcf/ExportVCF.scala | 449 ++++- hail/src/main/scala/is/hail/lir/X.scala | 4 +- .../types/encoded/ENumpyBinaryNDArray.scala | 77 - .../scala/is/hail/types/encoded/EType.scala | 4 +- .../scala/is/hail/types/physical/PType.scala | 2 +- .../is/hail/types/physical/stypes/SCode.scala | 3 - .../physical/stypes/concrete/SRNGState.scala | 117 -- .../is/hail/types/virtual/TRNGState.scala | 12 - .../main/scala/is/hail/utils/Bitstring.scala | 90 - .../scala/is/hail/utils/ErrorHandling.scala | 6 - .../scala/is/hail/utils/TextTableReader.scala | 444 +++++ .../hail/utils/richUtils/RichContextRDD.scala | 1 + .../test/resources/sampleheaderdiffelem.txt | 11 - .../test/resources/samplenonintentries.txt | 6 - hail/src/test/scala/is/hail/HailSuite.scala | 217 +-- hail/src/test/scala/is/hail/TestUtils.scala | 163 ++ .../annotations/StagedConstructorSuite.scala | 16 +- .../test/scala/is/hail/asm4s/ASM4SSuite.scala | 73 +- .../test/scala/is/hail/asm4s/CodeSuite.scala | 14 +- .../is/hail/expr/ir/BlockMatrixIRSuite.scala | 1 + .../is/hail/expr/ir/EmitStreamSuite.scala | 64 +- .../expr/ir/ExtractIntervalFiltersSuite.scala | 4 +- .../is/hail/expr/ir/ForwardLetsSuite.scala | 9 +- .../test/scala/is/hail/expr/ir/IRSuite.scala | 12 +- .../is/hail/expr/ir/LocusFunctionsSuite.scala | 1 + .../scala/is/hail/expr/ir/PruneSuite.scala | 51 +- .../scala/is/hail/expr/ir/RandomSuite.scala | 106 -- .../is/hail/expr/ir/RequirednessSuite.scala | 20 +- .../scala/is/hail/expr/ir/SimplifySuite.scala | 67 +- .../scala/is/hail/expr/ir/TableIRSuite.scala | 4 +- .../ir/agg/StagedBlockLinkedListSuite.scala | 10 +- .../ir/lowering/BlockMatrixStageSuite.scala | 4 +- .../lowering/LowerDistributedSortSuite.scala | 61 +- .../scala/is/hail/io/AvroReaderSuite.scala | 1 + .../is/hail/io/compress/BGzipCodecSuite.scala | 2 +- .../hail/types/physical/PContainerTest.scala | 4 +- .../hail/types/physical/PNDArraySuite.scala | 2 +- .../types/physical/PhysicalTestUtils.scala | 2 +- .../scala/is/hail/utils/TextTableSuite.scala | 35 + infra/azure/main.tf | 12 +- infra/azure/modules/batch/main.tf | 10 +- infra/azure/modules/ci/main.tf | 6 +- infra/azure/modules/vdc/main.tf | 30 +- infra/azure/modules/vdc/variables.tf | 14 +- infra/azure/variables.tf | 27 +- infra/gcp/main.tf | 22 +- internal-gateway/internal-gateway.nginx.conf | 2 +- letsencrypt/subdomains.txt | 3 - query/Makefile | 71 +- tls/config.yaml | 4 - 231 files changed, 5714 insertions(+), 10932 deletions(-) delete mode 100644 batch/Dockerfile.driver-nginx delete mode 100644 batch/batch/resource_usage.py delete mode 100644 batch/driver-nginx.conf delete mode 100644 hail/python/hail/docs/cloud/query_on_batch.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst create mode 100644 hail/python/hail/fs/google_fs.py delete mode 100644 hail/python/test/hail/backend/__init__.py delete mode 100644 hail/python/test/hail/backend/test_service_backend.py delete mode 100644 hail/src/main/scala/is/hail/HailFeatureFlags.scala delete mode 100644 hail/src/main/scala/is/hail/expr/ir/Random.scala create mode 100644 hail/src/main/scala/is/hail/io/TextMatrixReader.scala delete mode 100644 hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala delete mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala delete mode 100644 hail/src/main/scala/is/hail/types/virtual/TRNGState.scala delete mode 100644 hail/src/main/scala/is/hail/utils/Bitstring.scala create mode 100644 hail/src/main/scala/is/hail/utils/TextTableReader.scala delete mode 100644 hail/src/test/resources/sampleheaderdiffelem.txt delete mode 100644 hail/src/test/resources/samplenonintentries.txt delete mode 100644 hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala create mode 100644 hail/src/test/scala/is/hail/utils/TextTableSuite.scala diff --git a/auth/Dockerfile b/auth/Dockerfile index 0c2bfa4dad1..9e886004c45 100644 --- a/auth/Dockerfile +++ b/auth/Dockerfile @@ -1,7 +1,7 @@ FROM {{ service_base_image.image }} RUN hail-pip-install \ - google-auth-oauthlib==0.4.6 \ + google-auth-oauthlib==0.4.2 \ google-auth==1.25.0 COPY auth/setup.py auth/MANIFEST.in /auth/ diff --git a/auth/deployment.yaml b/auth/deployment.yaml index a790339f89f..20c45711fc2 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -242,7 +242,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 2500 + targetAverageUtilization: 95 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/batch/Dockerfile.driver-nginx b/batch/Dockerfile.driver-nginx deleted file mode 100644 index 0e095d572a7..00000000000 --- a/batch/Dockerfile.driver-nginx +++ /dev/null @@ -1,12 +0,0 @@ -FROM {{ hail_ubuntu_image.image }} - -RUN hail-apt-get-install nginx - -RUN rm -f /etc/nginx/sites-enabled/default && \ - rm -f /etc/nginx/nginx.conf -ADD driver-nginx.conf /etc/nginx/nginx.conf - -RUN ln -sf /dev/stdout /var/log/nginx/access.log -RUN ln -sf /dev/stderr /var/log/nginx/error.log - -CMD ["nginx", "-g", "daemon off;"] diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index 1686e193381..ee43e8863d9 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -56,7 +56,7 @@ FROM base AS crun_builder RUN hail-apt-get-install make git gcc build-essential pkgconf libtool \ libsystemd-dev libcap-dev libseccomp-dev \ go-md2man libtool autoconf automake -RUN git clone --depth 1 --branch 1.4.4 https://github.com/containers/crun.git && \ +RUN git clone --depth 1 --branch 0.19.1 https://github.com/containers/crun.git && \ cd crun && \ ./autogen.sh && \ ./configure && \ diff --git a/batch/Makefile b/batch/Makefile index c024438bde4..28a2c62ac10 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -5,8 +5,6 @@ TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) BATCH_IMAGE := $(DOCKER_PREFIX)/batch:$(TOKEN) BATCH_WORKER_IMAGE := $(DOCKER_PREFIX)/batch-worker:$(TOKEN) -BATCH_DRIVER_NGINX_IMAGE := $(DOCKER_PREFIX)/batch-driver-nginx:$(TOKEN) - EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 CLOUD := $(shell kubectl get secret global-config --template={{.data.cloud}} | base64 --decode) @@ -36,20 +34,15 @@ jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar: src/main/java/is/hail/JVMEntryway.class: src/main/java/is/hail/JVMEntryway.java jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar javac -cp jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar $< -.PHONY: build-batch-driver-nginx -build-batch-driver-nginx: - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.driver-nginx Dockerfile.driver-nginx.out - ../docker-build.sh . Dockerfile.driver-nginx.out $(BATCH_DRIVER_NGINX_IMAGE) - .PHONY: build-worker build-worker: src/main/java/is/hail/JVMEntryway.class python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"},"global":{"cloud":"$(CLOUD)"}}' Dockerfile.worker Dockerfile.worker.out ../docker-build.sh .. batch/Dockerfile.worker.out $(BATCH_WORKER_IMAGE) .PHONY: build -build: build-batch build-batch-driver-nginx build-worker +build: build-batch build-worker -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"scope":"$(SCOPE)","batch_driver_nginx_image":{"image":"$(BATCH_DRIVER_NGINX_IMAGE)"}}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"scope":"$(SCOPE)"}' .PHONY: deploy deploy: build diff --git a/batch/batch/driver/instance_collection/pool.py b/batch/batch/driver/instance_collection/pool.py index 93c22d4e3bf..c177ec839ad 100644 --- a/batch/batch/driver/instance_collection/pool.py +++ b/batch/batch/driver/instance_collection/pool.py @@ -3,7 +3,6 @@ import random from typing import Optional -import prometheus_client as pc import sortedcontainers from gear import Database @@ -29,12 +28,6 @@ log = logging.getLogger('pool') -SCHEDULING_LOOP_RUNS = pc.Counter( - 'scheduling_loop_runs', - 'Number of scheduling loop executions per pool', - ['pool_name'], -) - class Pool(InstanceCollection): @staticmethod @@ -380,7 +373,6 @@ async def schedule_loop_body(self): log.info(f'schedule {self.pool}: starting') start = time_msecs() - SCHEDULING_LOOP_RUNS.labels(pool_name=self.pool.name).inc() n_scheduled = 0 user_resources = await self.compute_fair_share() diff --git a/batch/batch/driver/job.py b/batch/batch/driver/job.py index ebd28f4c1fa..c008053693b 100644 --- a/batch/batch/driver/job.py +++ b/batch/batch/driver/job.py @@ -46,7 +46,6 @@ async def notify_batch_job_complete(db: Database, client_session: httpx.ClientSe GROUP BY batches.id; ''', (batch_id,), - 'notify_batch_job_complete', ) if not record: @@ -86,7 +85,6 @@ async def add_attempt_resources(db, batch_id, job_id, attempt_id, resources): ON DUPLICATE KEY UPDATE quantity = quantity; ''', resource_args, - 'add_attempt_resources', ) except Exception: log.exception(f'error while inserting resources for job {job_id}, attempt {attempt_id}') @@ -211,7 +209,6 @@ async def mark_job_creating( CALL mark_job_creating(%s, %s, %s, %s, %s); ''', (batch_id, job_id, attempt_id, instance.name, start_time), - 'mark_job_creating', ) except Exception: log.info(f'error while marking job {id} creating on {instance}') diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index 63f1d6c260e..266c29b96d3 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -30,6 +30,7 @@ from hailtop import aiotools, httpx from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger +from hailtop.tls import internal_server_ssl_context from hailtop.utils import AsyncWorkerPool, Notice, dump_all_stacktraces, periodically_call, serialization, time_msecs from web_common import render_template, set_message, setup_aiohttp_jinja2, setup_common_static_routes @@ -1260,4 +1261,5 @@ def run(): host='0.0.0.0', port=5000, access_log_class=AccessLogger, + ssl_context=internal_server_ssl_context(), ) diff --git a/batch/batch/file_store.py b/batch/batch/file_store.py index bd073e9318e..a6490475ae0 100644 --- a/batch/batch/file_store.py +++ b/batch/batch/file_store.py @@ -1,14 +1,10 @@ import asyncio import logging -from typing import Optional - -import pandas as pd from hailtop.aiotools.fs import AsyncFS from .batch_format_version import BatchFormatVersion from .globals import BATCH_FORMAT_VERSION -from .resource_usage import ResourceUsageMonitor from .spec_writer import SpecWriter log = logging.getLogger('logstore') @@ -34,11 +30,6 @@ def log_path(self, format_version, batch_id, job_id, attempt_id, task): return f'{self.batch_log_dir(batch_id)}/{job_id}/{task}/log' return f'{self.batch_log_dir(batch_id)}/{job_id}/{attempt_id}/{task}/log' - def resource_usage_path(self, format_version, batch_id, job_id, attempt_id, task): - if not format_version.has_attempt_in_log_path(): - return f'{self.batch_log_dir(batch_id)}/{job_id}/{task}/resource_usage' - return f'{self.batch_log_dir(batch_id)}/{job_id}/{attempt_id}/{task}/resource_usage' - async def read_log_file(self, format_version, batch_id, job_id, attempt_id, task): url = self.log_path(format_version, batch_id, job_id, attempt_id, task) data = await self.fs.read(url) @@ -48,17 +39,6 @@ async def write_log_file(self, format_version, batch_id, job_id, attempt_id, tas url = self.log_path(format_version, batch_id, job_id, attempt_id, task) await self.fs.write(url, data.encode('utf-8')) - async def read_resource_usage_file( - self, format_version, batch_id, job_id, attempt_id, task - ) -> Optional[pd.DataFrame]: - url = self.resource_usage_path(format_version, batch_id, job_id, attempt_id, task) - data = await self.fs.read(url) - return ResourceUsageMonitor.decode_to_df(data) - - async def write_resource_usage_file(self, format_version, batch_id, job_id, attempt_id, task, data): - url = self.resource_usage_path(format_version, batch_id, job_id, attempt_id, task) - await self.fs.write(url, data) - async def delete_batch_logs(self, batch_id): url = self.batch_log_dir(batch_id) await self.fs.rmtree(None, url) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index b24a7c3a1c6..a170d1ef0ea 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -5,12 +5,11 @@ import logging import os import random -import re import signal import traceback from functools import wraps from numbers import Number -from typing import Any, Awaitable, Callable, Dict, Optional, Union +from typing import Dict, Optional, Union import aiohttp import aiohttp_session @@ -63,7 +62,6 @@ memory_to_worker_type, valid_machine_types, ) -from ..cloud.utils import ACCEPTABLE_QUERY_JAR_URL_PREFIX from ..exceptions import ( BatchOperationAlreadyCompletedError, BatchUserError, @@ -74,7 +72,6 @@ from ..file_store import FileStore from ..globals import BATCH_FORMAT_VERSION, HTTP_CLIENT_MAX_SIZE from ..inst_coll_config import InstanceCollectionConfigs -from ..resource_usage import ResourceUsageMonitor from ..spec_writer import SpecWriter from ..utils import accrued_cost_from_cost_and_msec_mcpu, coalesce, query_billing_projects from .validate import ValidationError, validate_and_clean_jobs, validate_batch @@ -331,45 +328,7 @@ async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argum return web.json_response(resp) -async def _get_job_record(app, batch_id, job_id): - db: Database = app['db'] - - record = await db.select_and_fetchone( - ''' -SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id -FROM jobs -INNER JOIN batches - ON jobs.batch_id = batches.id -LEFT JOIN attempts - ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id AND jobs.attempt_id = attempts.attempt_id -LEFT JOIN instances - ON attempts.instance_name = instances.name -LEFT JOIN ( - SELECT batch_id, job_id, attempt_id - FROM attempts - WHERE reason = "cancelled" AND batch_id = %s AND job_id = %s - ORDER BY end_time DESC - LIMIT 1 -) AS t - ON jobs.batch_id = t.batch_id AND jobs.job_id = t.job_id -WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; -''', - (batch_id, job_id, batch_id, job_id), - ) - if not record: - raise web.HTTPNotFound() - return record - - -async def _get_resource_from_record( - app, - batch_id: int, - job_id: int, - record: dict, - endpoint: str, - handle_running_response: Callable[[aiohttp.ClientResponse], Awaitable[Any]], - cloud_storage_reader: Callable[[BatchFormatVersion, int, int, str, str], Awaitable[Any]], -) -> Optional[Dict[str, Any]]: +async def _get_job_log_from_record(app, batch_id, job_id, record): client_session: httpx.ClientSession = app['client_session'] batch_format_version = BatchFormatVersion(record['format_version']) @@ -391,11 +350,13 @@ async def _get_resource_from_record( if state == 'Running': try: - resp = await request_retry_transient_errors(client_session, 'GET', f'http://{ip_address}:5000{endpoint}') - return await handle_running_response(resp) + resp = await request_retry_transient_errors( + client_session, 'GET', f'http://{ip_address}:5000/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log' + ) + return await resp.json() except aiohttp.ClientResponseError: - log.exception(f'while getting resource for {(batch_id, job_id)}') - return {task: None for task in tasks} + log.exception(f'while getting log for {(batch_id, job_id)}') + return {task: 'ERROR: encountered a problem while fetching the log' for task in tasks} if state in ('Pending', 'Ready', 'Creating'): return None @@ -408,76 +369,49 @@ async def _get_resource_from_record( attempt_id = record['attempt_id'] or record['last_cancelled_attempt_id'] assert attempt_id is not None - async def _read_resource_from_cloud_storage(task): + file_store: FileStore = app['file_store'] + batch_format_version = BatchFormatVersion(record['format_version']) + + async def _read_log_from_cloud_storage(task): try: - data = await cloud_storage_reader(batch_format_version, batch_id, job_id, attempt_id, task) + data = await file_store.read_log_file(batch_format_version, batch_id, job_id, attempt_id, task) except FileNotFoundError: id = (batch_id, job_id) - log.exception(f'missing file for {id} and task {task}') - data = None + log.exception(f'missing log file for {id} and task {task}') + data = 'ERROR: could not find log file' return task, data - return dict(await asyncio.gather(*[_read_resource_from_cloud_storage(task) for task in tasks])) - + return dict(await asyncio.gather(*[_read_log_from_cloud_storage(task) for task in tasks])) -async def _get_job_log(app, batch_id, job_id) -> Optional[Dict[str, str]]: - file_store: FileStore = app['file_store'] - record = await _get_job_record(app, batch_id, job_id) - - async def handle_running_response(resp: aiohttp.ClientResponse) -> Dict[str, str]: - return await resp.json() - - maybe_data = await _get_resource_from_record( - app, - batch_id, - job_id, - record, - f'/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log', - handle_running_response, - file_store.read_log_file, - ) - - if maybe_data is None: - return None - - data = {} - for task, log in maybe_data.items(): - if log is None: - log = 'ERROR: could not find file' - data[task] = log - return data +async def _get_job_log(app, batch_id, job_id): + db: Database = app['db'] -async def _get_job_resource_usage(app, batch_id, job_id) -> Optional[Dict[str, Optional[pd.DataFrame]]]: - file_store: FileStore = app['file_store'] - record = await _get_job_record(app, batch_id, job_id) - - async def handle_running_response(resp: aiohttp.ClientResponse) -> Dict[str, Optional[pd.DataFrame]]: - resource_usage = {} - - reader = aiohttp.MultipartReader.from_response(resp) - while True: - part = await reader.next() # pylint: disable=not-callable - if part is None: - break - - assert isinstance(part, aiohttp.BodyPartReader) - task = part.filename - assert task in ('input', 'main', 'output'), task - data = await part.read() - resource_usage[task] = ResourceUsageMonitor.decode_to_df(data) - - return resource_usage - - return await _get_resource_from_record( - app, - batch_id, - job_id, - record, - f'/api/v1alpha/batches/{batch_id}/jobs/{job_id}/resource_usage', - handle_running_response, - file_store.read_resource_usage_file, + record = await db.select_and_fetchone( + ''' +SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id +FROM jobs +INNER JOIN batches + ON jobs.batch_id = batches.id +LEFT JOIN attempts + ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id AND jobs.attempt_id = attempts.attempt_id +LEFT JOIN instances + ON attempts.instance_name = instances.name +LEFT JOIN ( + SELECT batch_id, job_id, attempt_id + FROM attempts + WHERE reason = "cancelled" AND batch_id = %s AND job_id = %s + ORDER BY end_time DESC + LIMIT 1 +) AS t + ON jobs.batch_id = t.batch_id AND jobs.job_id = t.job_id +WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; +''', + (batch_id, job_id, batch_id, job_id), ) + if not record: + raise web.HTTPNotFound() + return await _get_job_log_from_record(app, batch_id, job_id, record) async def _get_attributes(app, record): @@ -731,14 +665,6 @@ async def create_jobs(request: aiohttp.web.Request, userdata: dict): return await _create_jobs(userdata, job_specs, batch_id, app) -NON_HEX_DIGIT = re.compile('[^A-Fa-f0-9]') - - -def assert_is_sha_1_hex_string(revision: str): - if len(revision) != 40 or NON_HEX_DIGIT.search(revision): - raise web.HTTPBadRequest(reason=f'revision must be 40 character hexadecimal encoded SHA-1, got: {revision}') - - async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aiohttp.web.Application): db: Database = app['db'] file_store: FileStore = app['file_store'] @@ -834,25 +760,14 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh raise web.HTTPBadRequest(reason='cannot specify cpu and memory with machine_type') if spec['process']['type'] == 'jvm': - jvm_requested_cpu = parse_cpu_in_mcpu(resources.get('cpu', BATCH_JOB_DEFAULT_CPU)) - if 'cpu' in resources and jvm_requested_cpu not in (1000, 8000): - raise web.HTTPBadRequest(reason='invalid cpu for jvm jobs. must be 1 or 8') - if 'memory' in resources and resources['memory'] == 'lowmem': - raise web.HTTPBadRequest(reason='jvm jobs cannot be on lowmem machines') + if 'cpu' in resources: + raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') + if 'memory' in resources and resources['memory'] != 'standard': + raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') if machine_type is not None: raise web.HTTPBadRequest(reason='jvm jobs may not specify machine_type') - if spec['process']['jar_spec']['type'] == 'git_revision': - revision = spec['process']['jar_spec']['value'] - assert_is_sha_1_hex_string(revision) - spec['process']['jar_spec']['type'] = 'jar_url' - spec['process']['jar_spec']['value'] = ACCEPTABLE_QUERY_JAR_URL_PREFIX + '/' + revision + '.jar' - else: - assert spec['process']['jar_spec']['type'] == 'jar_url' - jar_url = spec['process']['jar_spec']['value'] - if not jar_url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX): - raise web.HTTPBadRequest(reason=f'unacceptable JAR url: {jar_url}') req_memory_bytes: Optional[int] if machine_type is None: @@ -1394,7 +1309,7 @@ async def _close_batch(app: aiohttp.web.Application, batch_id: int, user: str, d client_session: httpx.ClientSession = app['client_session'] try: now = time_msecs() - await db.check_call_procedure('CALL close_batch(%s, %s);', (batch_id, now), 'close_batch') + await db.check_call_procedure('CALL close_batch(%s, %s);', (batch_id, now)) except CallError as e: # 2: wrong number of jobs if e.rv['rc'] == 2: @@ -1596,63 +1511,6 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume return web.json_response(status) -def plot_job_durations(container_statuses: dict, batch_id: int, job_id: int): - data = [] - for step in ['input', 'main', 'output']: - if container_statuses[step]: - for timing_name, timing_data in container_statuses[step]['timing'].items(): - if timing_data is not None: - plot_dict = { - 'Title': f'{(batch_id, job_id)}', - 'Step': step, - 'Task': timing_name, - } - - if timing_data.get('start_time') is not None: - plot_dict['Start'] = datetime.datetime.fromtimestamp(timing_data['start_time'] / 1000) - - finish_time = timing_data.get('finish_time') - if finish_time is None: - finish_time = time_msecs() - plot_dict['Finish'] = datetime.datetime.fromtimestamp(finish_time / 1000) - - data.append(plot_dict) - - if not data: - return None - - df = pd.DataFrame(data) - - fig = px.timeline( - df, - x_start='Start', - x_end='Finish', - y='Step', - color='Task', - hover_data=['Step'], - color_discrete_sequence=px.colors.sequential.dense, - category_orders={ - 'Step': ['input', 'main', 'output'], - 'Task': [ - 'pulling', - 'setting up overlay', - 'setting up network', - 'running', - 'uploading_log', - 'uploading_resource_usage', - ], - }, - ) - - return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - - -def plot_resource_usage( - resource_usage: Optional[Dict[str, Optional[pd.DataFrame]]] # pylint: disable=unused-argument -) -> Optional[str]: - return None - - @routes.get('/batches/{batch_id}/jobs/{job_id}') @web_billing_project_users_only() @catch_ui_error_in_dev @@ -1660,11 +1518,8 @@ async def ui_get_job(request, userdata, batch_id): app = request.app job_id = int(request.match_info['job_id']) - job, attempts, job_log, resource_usage = await asyncio.gather( - _get_job(app, batch_id, job_id), - _get_attempts(app, batch_id, job_id), - _get_job_log(app, batch_id, job_id), - _get_job_resource_usage(app, batch_id, job_id), + job, attempts, job_log = await asyncio.gather( + _get_job(app, batch_id, job_id), _get_attempts(app, batch_id, job_id), _get_job_log(app, batch_id, job_id) ) job['duration'] = humanize_timedelta_msecs(job['duration']) @@ -1677,7 +1532,6 @@ async def ui_get_job(request, userdata, batch_id): 'timing': { 'pulling': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), 'running': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), - 'uploading_resource_usage': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), }, 'short_error': dictfix.NoneOr(str), 'error': dictfix.NoneOr(str), @@ -1694,9 +1548,10 @@ async def ui_get_job(request, userdata, batch_id): } job_status = dictfix.dictfix(job_status, job_status_spec) container_statuses = job_status['container_statuses'] + step_statuses = [container_statuses['input'], container_statuses['main'], container_statuses['output']] step_errors = {step: status['error'] for step, status in container_statuses.items() if status is not None} - for status in container_statuses.values(): + for status in step_statuses: # backwards compatibility if status and status['short_error'] is None and status['container_status']['out_of_memory']: status['short_error'] = 'out of memory' @@ -1724,19 +1579,60 @@ async def ui_get_job(request, userdata, batch_id): resources['actual_cpu'] = resources['cores_mcpu'] / 1000 del resources['cores_mcpu'] + data = [] + for step in ['input', 'main', 'output']: + if container_statuses[step]: + for timing_name, timing_data in container_statuses[step]['timing'].items(): + if timing_data is not None: + plot_dict = { + 'Title': f'{(batch_id, job_id)}', + 'Step': step, + 'Task': timing_name, + } + + if timing_data.get('start_time') is not None: + plot_dict['Start'] = datetime.datetime.fromtimestamp(timing_data['start_time'] / 1000) + + finish_time = timing_data.get('finish_time') + if finish_time is None: + finish_time = time_msecs() + plot_dict['Finish'] = datetime.datetime.fromtimestamp(finish_time / 1000) + + data.append(plot_dict) + + if data: + df = pd.DataFrame(data) + + fig = px.timeline( + df, + x_start='Start', + x_end='Finish', + y='Step', + color='Task', + hover_data=['Step'], + color_discrete_sequence=px.colors.sequential.dense, + category_orders={ + 'Step': ['input', 'main', 'output'], + 'Task': ['pulling', 'setting up overlay', 'setting up network', 'running', 'uploading_log'], + }, + ) + + plot_json = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) + else: + plot_json = None + page_context = { 'batch_id': batch_id, 'job_id': job_id, 'job': job, 'job_log': job_log, 'attempts': attempts, - 'container_statuses': container_statuses, + 'step_statuses': step_statuses, 'job_specification': job_specification, 'job_status_str': json.dumps(job, indent=2), 'step_errors': step_errors, 'error': job_status.get('error'), - 'plot_job_durations': plot_job_durations(container_statuses, batch_id, job_id), - 'plot_resource_usage': plot_resource_usage(resource_usage), + 'plot_json': plot_json, } return await render_template('batch', request, userdata, 'job.html', page_context) diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html index e32f26c8e51..f4229d8f11d 100644 --- a/batch/batch/front_end/templates/job.html +++ b/batch/batch/front_end/templates/job.html @@ -63,21 +63,12 @@

Attempts

Step Status

-{% if plot_job_durations is not none %} +{% if plot_json is not none %} -
+
-{% endif %} - -{% if plot_resource_usage is not none %} - -
- {% endif %} @@ -92,10 +83,10 @@

Step Status

- {% for name, step in container_statuses.items() %} + {% for step in step_statuses %} {% if step %} - {{ name }} + {{ step['name'] }} {% if step['timing']['pulling'] and step['timing']['pulling']['duration'] %} {{ step['timing']['pulling']['duration'] / 1000.0 }} diff --git a/batch/batch/front_end/validate.py b/batch/batch/front_end/validate.py index 3dff4d80bda..f1242d51525 100644 --- a/batch/batch/front_end/validate.py +++ b/batch/batch/front_end/validate.py @@ -71,12 +71,7 @@ required('image'): image_str, required('mount_docker_socket'): bool_type, }, - 'jvm': { - required('jar_spec'): keyed( - {required('type'): oneof('git_revision', 'jar_url'), required('value'): str_type} - ), - required('command'): listof(str_type), - }, + 'jvm': {required('command'): listof(str_type)}, }, ), 'requester_pays_project': str_type, diff --git a/batch/batch/resource_usage.py b/batch/batch/resource_usage.py deleted file mode 100644 index ebdac21f801..00000000000 --- a/batch/batch/resource_usage.py +++ /dev/null @@ -1,103 +0,0 @@ -import asyncio -import os -import struct -from typing import Optional - -import numpy as np -import pandas as pd - -from hailtop.utils import periodically_call, retry_long_running, time_msecs, time_ns - - -class ResourceUsageMonitor: - VERSION = 1 - - @staticmethod - def no_data() -> bytes: - return ResourceUsageMonitor.version_to_bytes() - - @staticmethod - def version_to_bytes() -> bytes: - return struct.pack('>q', ResourceUsageMonitor.VERSION) - - @staticmethod - def decode_to_df(data: bytes) -> Optional[pd.DataFrame]: - if len(data) == 0: - return None - - (version,) = struct.unpack_from('>q', data, 0) - assert version == ResourceUsageMonitor.VERSION, version - - dtype = [('time_msecs', '>i8'), ('memory_in_bytes', '>i8'), ('cpu_usage', '>f8')] - np_array = np.frombuffer(data, offset=8, dtype=dtype) - return pd.DataFrame.from_records(np_array) - - def __init__(self, container_name: str, output_file_path: str): - self.container_name = container_name - self.output_file_path = output_file_path - - self.last_time_ns: Optional[int] = None - self.last_cpu_ns: Optional[int] = None - - self.out = open(output_file_path, 'wb') # pylint: disable=consider-using-with - self.write_header() - - self.task: Optional[asyncio.Future] = None - - def write_header(self): - data = ResourceUsageMonitor.version_to_bytes() - self.out.write(data) - self.out.flush() - - def cpu_ns(self) -> Optional[int]: - usage_file = f'/sys/fs/cgroup/cpu/{self.container_name}/cpuacct.usage' - if os.path.exists(usage_file): - with open(usage_file, 'r', encoding='utf-8') as f: - return int(f.read().rstrip()) - return None - - def percent_cpu_usage(self) -> Optional[float]: - now_time_ns = time_ns() - now_cpu_ns = self.cpu_ns() - - if now_cpu_ns is None or self.last_cpu_ns is None or self.last_time_ns is None: - self.last_time_ns = now_time_ns - self.last_cpu_ns = now_cpu_ns - return None - - cpu_usage = (now_cpu_ns - self.last_cpu_ns) / (now_time_ns - self.last_time_ns) - - self.last_time_ns = now_time_ns - self.last_cpu_ns = now_cpu_ns - - return cpu_usage - - def memory_usage_bytes(self) -> Optional[int]: - usage_file = f'/sys/fs/cgroup/memory/{self.container_name}/memory.usage_in_bytes' - if os.path.exists(usage_file): - with open(usage_file, 'r', encoding='utf-8') as f: - return int(f.read().rstrip()) - return None - - async def measure(self): - now = time_msecs() - memory_usage_bytes = self.memory_usage_bytes() - percent_cpu_usage = self.percent_cpu_usage() - - if memory_usage_bytes is None or percent_cpu_usage is None: - return - - data = struct.pack('>2qd', now, memory_usage_bytes, percent_cpu_usage) - self.out.write(data) - self.out.flush() - - async def __aenter__(self): - self.task = asyncio.ensure_future( - retry_long_running(f'monitor {self.container_name} resource usage', periodically_call, 5, self.measure) - ) - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - if self.task is not None: - self.task.cancel() - self.out.close() diff --git a/batch/batch/spec_writer.py b/batch/batch/spec_writer.py index 4fe4ae00eb7..1546ced2c8d 100644 --- a/batch/batch/spec_writer.py +++ b/batch/batch/spec_writer.py @@ -36,7 +36,6 @@ async def get_token_start_id(db, batch_id, job_id): LIMIT 1; ''', (batch_id, job_id), - 'get_token_start_id', ) token = bunch_record['token'] start_job_id = bunch_record['start_job_id'] diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 1c5e63ea106..4ae13b2bcbb 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -16,19 +16,7 @@ import warnings from collections import defaultdict from contextlib import ExitStack, contextmanager -from typing import ( - Any, - Awaitable, - Callable, - ContextManager, - Dict, - Iterator, - List, - MutableMapping, - Optional, - Tuple, - Union, -) +from typing import Any, Awaitable, Callable, Dict, Iterator, List, MutableMapping, Optional, Tuple, Union import aiodocker # type: ignore import aiodocker.images @@ -38,11 +26,10 @@ import async_timeout from aiodocker.exceptions import DockerError # type: ignore from aiohttp import web -from sortedcontainers import SortedSet from gear.clients import get_cloud_async_fs, get_compute_client from hailtop import aiotools, httpx -from hailtop.aiotools import AsyncFS, LocalAsyncFS +from hailtop.aiotools import LocalAsyncFS from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES from hailtop.config import DeployConfig @@ -68,16 +55,10 @@ from ..batch_format_version import BatchFormatVersion from ..cloud.azure.worker.worker_api import AzureWorkerAPI from ..cloud.gcp.worker.worker_api import GCPWorkerAPI -from ..cloud.resource_utils import ( - is_valid_storage_request, - storage_gib_to_bytes, - worker_memory_per_core_bytes, - worker_memory_per_core_mib, -) +from ..cloud.resource_utils import is_valid_storage_request, storage_gib_to_bytes from ..file_store import FileStore from ..globals import HTTP_CLIENT_MAX_SIZE, RESERVED_STORAGE_GB_PER_CORE, STATUS_FORMAT_VERSION from ..publicly_available_images import publicly_available_images -from ..resource_usage import ResourceUsageMonitor from ..semaphore import FIFOWeightedSemaphore from ..utils import Box from ..worker.worker_api import CloudWorkerAPI @@ -166,8 +147,6 @@ def compose(auth: Union[MutableMapping, str, bytes], registry_addr: str = None): log.info(f'INSTANCE_CONFIG {INSTANCE_CONFIG}') log.info(f'CLOUD_WORKER_API {CLOUD_WORKER_API}') log.info(f'MAX_IDLE_TIME_MSECS {MAX_IDLE_TIME_MSECS}') -log.info(f'BATCH_WORKER_IMAGE {BATCH_WORKER_IMAGE}') -log.info(f'BATCH_WORKER_IMAGE_ID {BATCH_WORKER_IMAGE_ID}') log.info(f'INTERNET_INTERFACE {INTERNET_INTERFACE}') log.info(f'UNRESERVED_WORKER_DATA_DISK_SIZE_GB {UNRESERVED_WORKER_DATA_DISK_SIZE_GB}') log.info(f'ACCEPTABLE_QUERY_JAR_URL_PREFIX {ACCEPTABLE_QUERY_JAR_URL_PREFIX}') @@ -367,161 +346,13 @@ async def wrapper(f, *args, **kwargs): return wrapper -class ImageCannotBePulled(Exception): - pass - - -class ImageNotFound(Exception): - pass - - -class Image: - def __init__( - self, - name: str, - credentials: Union[CloudUserCredentials, 'JVMUserCredentials'], - client_session: httpx.ClientSession, - pool: concurrent.futures.ThreadPoolExecutor, - ): - self.image_name = name - self.credentials = credentials - self.client_session = client_session - self.pool = pool - - image_ref = parse_docker_image_reference(name) - if image_ref.tag is None and image_ref.digest is None: - log.info(f'adding latest tag to image {name} for {self}') - image_ref.tag = 'latest' - - if image_ref.name() in HAIL_GENETICS_IMAGES: - # We want the "hailgenetics/python-dill" translate to (based on the prefix): - # * gcr.io/hail-vdc/hailgenetics/python-dill - # * us-central1-docker.pkg.dev/hail-vdc/hail/hailgenetics/python-dill - image_ref.path = image_ref.name() - image_ref.domain = DOCKER_PREFIX.split('/', maxsplit=1)[0] - image_ref.path = '/'.join(DOCKER_PREFIX.split('/')[1:] + [image_ref.path]) - - self.image_ref = image_ref - self.image_ref_str = str(image_ref) - self.image_config: Optional[Dict[str, Any]] = None - self.image_id: Optional[str] = None - - @property - def is_cloud_image(self): - return (CLOUD == 'gcp' and self.image_ref.hosted_in('google')) or ( - CLOUD == 'azure' and self.image_ref.hosted_in('azure') - ) - - @property - def is_public_image(self): - return self.image_ref.name() in PUBLIC_IMAGES - - @property - def rootfs_path(self) -> str: - assert self.image_id is not None - return f'/host/rootfs/{self.image_id}' - - async def _pull_image(self): - assert docker - - try: - if not self.is_cloud_image: - await self._ensure_image_is_pulled() - elif self.is_public_image: - auth = await self._batch_worker_access_token() - await self._ensure_image_is_pulled(auth=auth) - elif self.image_ref_str == BATCH_WORKER_IMAGE and isinstance(self.credentials, JVMUserCredentials): - pass - else: - # Pull to verify this user has access to this - # image. - # FIXME improve the performance of this with a - # per-user image cache. - auth = self._current_user_access_token() - await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( - docker.images.pull, self.image_ref_str, auth=auth - ) - except DockerError as e: - if e.status == 404 and 'pull access denied' in e.message: - raise ImageCannotBePulled from e - if 'not found: manifest unknown' in e.message: - raise ImageNotFound from e - raise - - image_config, _ = await check_exec_output('docker', 'inspect', self.image_ref_str) - image_configs[self.image_ref_str] = json.loads(image_config)[0] - - async def _ensure_image_is_pulled(self, auth: Optional[Dict[str, str]] = None): - assert docker - - try: - await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(docker.images.get, self.image_ref_str) - except DockerError as e: - if e.status == 404: - await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( - docker.images.pull, self.image_ref_str, auth=auth - ) - else: - raise - - async def _batch_worker_access_token(self) -> Dict[str, str]: - return await CLOUD_WORKER_API.worker_access_token(self.client_session) - - def _current_user_access_token(self) -> Dict[str, str]: - assert self.credentials - return {'username': self.credentials.username, 'password': self.credentials.password} - - async def _extract_rootfs(self): - assert self.image_id - os.makedirs(self.rootfs_path) - await check_shell( - f'id=$(docker create {self.image_id}) && docker export $id | tar -C {self.rootfs_path} -xf - && docker rm $id' - ) - - async def _localize_rootfs(self): - async with image_lock.reader_lock: - # FIXME Authentication is entangled with pulling images. We need a way to test - # that a user has access to a cached image without pulling. - await self._pull_image() - self.image_config = image_configs[self.image_ref_str] - self.image_id = self.image_config['Id'].split(":")[1] - assert self.image_id - - worker.image_data[self.image_id] += 1 - - image_data = worker.image_data[self.image_id] - async with image_data.lock: - if not image_data.extracted: - try: - await self._extract_rootfs() - image_data.extracted = True - log.info(f'Added expanded image to cache: {self.image_ref_str}, ID: {self.image_id}') - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'while extracting image {self.image_ref_str}, ID: {self.image_id}') - await blocking_to_async(self.pool, shutil.rmtree, self.rootfs_path) - raise - - async def pull(self): - await asyncio.shield(self._localize_rootfs()) - - def release(self): - if self.image_id is not None: - worker.image_data[self.image_id] -= 1 - - -class StepInterruptedError(Exception): - pass - - async def run_until_done_or_deleted(event: asyncio.Event, f: Callable[..., Awaitable[Any]], *args, **kwargs): step = asyncio.ensure_future(f(*args, **kwargs)) deleted = asyncio.ensure_future(event.wait()) try: await asyncio.wait([deleted, step], return_when=asyncio.FIRST_COMPLETED) if deleted.done(): - raise StepInterruptedError + raise JobDeletedError assert step.done() return step.result() finally: @@ -551,19 +382,7 @@ class JobDeletedError(Exception): pass -class ContainerDeletedError(Exception): - pass - - -class ContainerTimeoutError(Exception): - pass - - -class ContainerCreateError(Exception): - pass - - -class ContainerStartError(Exception): +class JobTimeoutError(Exception): pass @@ -585,248 +404,209 @@ def user_error(e): # bucket name and your credentials.\n') if b'Bad credentials for bucket' in e.stderr: return True - if isinstance(e, (ImageNotFound, ImageCannotBePulled)): - return True - if isinstance(e, (ContainerTimeoutError, ContainerDeletedError)): - return True return False class Container: - def __init__( - self, - fs: AsyncFS, - name: str, - image: Image, - scratch_dir: str, - command: List[str], - cpu_in_mcpu: int, - memory_in_bytes: int, - network: Optional[Union[bool, str]] = None, - port: Optional[int] = None, - timeout: Optional[int] = None, - unconfined: Optional[bool] = None, - volume_mounts: Optional[List[dict]] = None, - env: Optional[List[str]] = None, - ): - self.fs = fs - assert self.fs - + def __init__(self, job, name, spec, client_session: httpx.ClientSession, worker: 'Worker'): + self.job = job self.name = name - self.image = image - self.command = command - self.cpu_in_mcpu = cpu_in_mcpu - self.memory_in_bytes = memory_in_bytes - self.network = network - self.port = port - self.timeout = timeout - self.unconfined = unconfined - self.volume_mounts = volume_mounts or [] - self.env = env or [] - + self.spec = spec + self.client_session = client_session + self.worker = worker self.deleted_event = asyncio.Event() + image_ref = parse_docker_image_reference(self.spec['image']) + if image_ref.tag is None and image_ref.digest is None: + log.info(f'adding latest tag to image {self.spec["image"]} for {self}') + image_ref.tag = 'latest' + + if image_ref.name() in HAIL_GENETICS_IMAGES: + # We want the "hailgenetics/python-dill" translate to (based on the prefix): + # * gcr.io/hail-vdc/hailgenetics/python-dill + # * us-central1-docker.pkg.dev/hail-vdc/hail/hailgenetics/python-dill + image_ref.path = image_ref.name() + image_ref.domain = DOCKER_PREFIX.split('/', maxsplit=1)[0] + image_ref.path = '/'.join(DOCKER_PREFIX.split('/')[1:] + [image_ref.path]) + + self.image_ref = image_ref + self.image_ref_str = str(image_ref) + self.image_id = None + + self.port = self.spec.get('port') self.host_port = None + self.timeout = self.spec.get('timeout') + self.state = 'pending' - self.error: Optional[str] = None - self.short_error: Optional[str] = None - self.container_status: Optional[dict] = None + self.error = None + self.short_error = None + self.container_status = None self.started_at: Optional[int] = None self.finished_at: Optional[int] = None self.timings = Timings() + self.logbuffer = bytearray() self.overlay_path = None - self.container_scratch = scratch_dir + self.image_config = None + self.rootfs_path = None + scratch = self.spec['scratch'] + self.container_scratch = f'{scratch}/{self.name}' self.container_overlay_path = f'{self.container_scratch}/rootfs_overlay' self.config_path = f'{self.container_scratch}/config' self.log_path = f'{self.container_scratch}/container.log' - self.resource_usage_path = f'{self.container_scratch}/resource_usage' self.overlay_mounted = False + self.container_name = f'batch-{self.job.batch_id}-job-{self.job.job_id}-{self.name}' + self.netns: Optional[NetworkNamespace] = None # regarding no-member: https://github.com/PyCQA/pylint/issues/4223 self.process: Optional[asyncio.subprocess.Process] = None # pylint: disable=no-member - self._run_fut: Optional[asyncio.Future] = None - self._cleanup_lock = asyncio.Lock() - - self._killed = False - self._cleaned_up = False + assert self.worker.fs is not None - async def create(self): - self.state = 'creating' + async def run(self): try: - with self._step('pulling'): - await self._run_until_done_or_deleted(self.image.pull) - - with self._step('setting up overlay'): - await self._run_until_done_or_deleted(self._setup_overlay) - with self._step('setting up network'): - await self._run_until_done_or_deleted(self._setup_network_namespace) + async def localize_rootfs(): + async def _localize_rootfs(): + async with image_lock.reader_lock: + # FIXME Authentication is entangled with pulling images. We need a way to test + # that a user has access to a cached image without pulling. + await self.pull_image() + self.image_config = image_configs[self.image_ref_str] + self.image_id = self.image_config['Id'].split(":")[1] + self.worker.image_data[self.image_id] += 1 + + self.rootfs_path = f'/host/rootfs/{self.image_id}' + + image_data = self.worker.image_data[self.image_id] + async with image_data.lock: + if not image_data.extracted: + try: + await self.extract_rootfs() + image_data.extracted = True + log.info( + f'Added expanded image to cache: {self.image_ref_str}, ID: {self.image_id}' + ) + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'while extracting image {self.image_ref_str}, ID: {self.image_id}') + await blocking_to_async(worker.pool, shutil.rmtree, self.rootfs_path) + + await asyncio.shield(_localize_rootfs()) + + with self.step('pulling'): + await self.run_until_done_or_deleted(localize_rootfs) + + with self.step('setting up overlay'): + await self.run_until_done_or_deleted(self.setup_overlay) + + with self.step('setting up network'): + await self.run_until_done_or_deleted(self.setup_network_namespace) + + with self.step('running'): + timed_out = await self.run_until_done_or_deleted(self.run_container) + + self.container_status = self.get_container_status() + + if timed_out: + self.short_error = 'timed out' + raise JobTimeoutError(f'timed out after {self.timeout}s') + + if self.container_status['exit_code'] == 0: + self.state = 'succeeded' + else: + if self.container_status['out_of_memory']: + self.short_error = 'out of memory' + self.state = 'failed' except asyncio.CancelledError: raise + except JobDeletedError: + self.state = 'cancelled' except Exception as e: - if isinstance(e, ImageNotFound): - self.short_error = 'image not found' - elif isinstance(e, ImageCannotBePulled): - self.short_error = 'image cannot be pulled' - + if not isinstance(e, JobTimeoutError) and not user_error(e): + log.exception(f'while running {self}') self.state = 'error' self.error = traceback.format_exc() - - if not isinstance(e, ContainerDeletedError) and not user_error(e): - log.exception(f'while creating {self}') - raise ContainerCreateError from e - raise - - async def start(self): - async def _run(): - self.state = 'running' - try: - with self._step('running'): - timed_out = await self._run_until_done_or_deleted(self._run_container) - - self.container_status = self.get_container_status() - assert self.container_status is not None - - if timed_out: - self.short_error = 'timed out' - raise ContainerTimeoutError(f'timed out after {self.timeout}s') - - if self.container_status['exit_code'] == 0: - self.state = 'succeeded' - else: - if self.container_status['out_of_memory']: - self.short_error = 'out of memory' - self.state = 'failed' - except asyncio.CancelledError: - raise - except ContainerDeletedError: - self.state = 'cancelled' - except Exception as e: - self.state = 'error' - self.error = traceback.format_exc() - - if not isinstance(e, ContainerTimeoutError) and not user_error(e): - log.exception(f'while running {self}') - raise ContainerStartError from e - raise - - self._run_fut = asyncio.ensure_future(self._run_until_done_or_deleted(_run)) - - async def wait(self): - assert self._run_fut - try: - await self._run_fut finally: - self._run_fut = None - - async def run(self, on_completion: Callable[..., Awaitable[Any]], *args, **kwargs): - async with self._cleanup_lock: try: - await self.create() - await self.start() - await self.wait() + with self.step('uploading_log'): + await self.upload_log() finally: try: - await on_completion(*args, **kwargs) + await self.delete_container() finally: - try: - await self._kill() - finally: - await self._cleanup() + if self.image_id: + self.worker.image_data[self.image_id] -= 1 - async def _kill(self): - if self._killed: - return + async def run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]]): + return await run_until_done_or_deleted(self.deleted_event, f) - try: - if self._run_fut is not None: - await self._run_fut - finally: - try: - if self.container_is_running(): - assert self.process is not None - try: - log.info(f'{self} container is still running, killing crun process') - try: - await check_exec_output('crun', 'kill', '--all', self.name, 'SIGKILL') - except CalledProcessError as e: - not_extant_message = ( - b'error opening file `/run/crun/' - + self.name.encode() - + b'/status`: No such file or directory' - ) - if not (e.returncode == 1 and not_extant_message in e.stderr): - log.exception(f'while deleting container {self}', exc_info=True) - finally: - try: - await send_signal_and_wait(self.process, 'SIGTERM', timeout=5) - except asyncio.TimeoutError: - try: - await send_signal_and_wait(self.process, 'SIGKILL', timeout=5) - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'could not kill process for container {self}') - finally: - self.process = None - finally: - self._run_fut = None - self._killed = True + def step(self, name: str): + return self.timings.step(name) - async def _cleanup(self): - if self._cleaned_up: - return + async def pull_image(self): + is_cloud_image = (CLOUD == 'gcp' and self.image_ref.hosted_in('google')) or ( + CLOUD == 'azure' and self.image_ref.hosted_in('azure') + ) + is_public_image = self.image_ref.name() in PUBLIC_IMAGES - assert self._run_fut is None try: - if self.overlay_mounted: - try: - await check_shell(f'umount -l {self.container_overlay_path}/merged') - self.overlay_mounted = False - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'while unmounting overlay in {self}', exc_info=True) + if not is_cloud_image: + await self.ensure_image_is_pulled() + elif is_public_image: + auth = await self.batch_worker_access_token() + await self.ensure_image_is_pulled(auth=auth) + else: + # Pull to verify this user has access to this + # image. + # FIXME improve the performance of this with a + # per-user image cache. + auth = self.current_user_access_token() + await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( + docker.images.pull, self.image_ref_str, auth=auth + ) + except DockerError as e: + if e.status == 404 and 'pull access denied' in e.message: + self.short_error = 'image cannot be pulled' + elif 'not found: manifest unknown' in e.message: + self.short_error = 'image not found' + raise - if self.host_port is not None: - port_allocator.free(self.host_port) - self.host_port = None + image_config, _ = await check_exec_output('docker', 'inspect', self.image_ref_str) + image_configs[self.image_ref_str] = json.loads(image_config)[0] - if self.netns: - network_allocator.free(self.netns) - self.netns = None - finally: - try: - self.image.release() - finally: - self._cleaned_up = True + async def ensure_image_is_pulled(self, auth=None): + try: + await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(docker.images.get, self.image_ref_str) + except DockerError as e: + if e.status == 404: + await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( + docker.images.pull, self.image_ref_str, auth=auth + ) + else: + raise - async def remove(self): - self.deleted_event.set() - async with self._cleanup_lock: - try: - await self._kill() - finally: - await self._cleanup() + async def batch_worker_access_token(self): + return await CLOUD_WORKER_API.worker_access_token(self.client_session) - async def _run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]], *args, **kwargs): - try: - return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) - except StepInterruptedError as e: - raise ContainerDeletedError from e + def current_user_access_token(self): + return {'username': self.job.credentials.username, 'password': self.job.credentials.password} - def _step(self, name: str) -> ContextManager: - return self.timings.step(name) + async def extract_rootfs(self): + assert self.rootfs_path + os.makedirs(self.rootfs_path) + await check_shell( + f'id=$(docker create {self.image_id}) && docker export $id | tar -C {self.rootfs_path} -xf - && docker rm $id' + ) - async def _setup_overlay(self): - lower_dir = self.image.rootfs_path + async def setup_overlay(self): + lower_dir = self.rootfs_path upper_dir = f'{self.container_overlay_path}/upper' work_dir = f'{self.container_overlay_path}/work' merged_dir = f'{self.container_overlay_path}/merged' @@ -837,21 +617,21 @@ async def _setup_overlay(self): ) self.overlay_mounted = True - async def _setup_network_namespace(self): - if self.network == 'private': - self.netns = await network_allocator.allocate_private() - else: - assert self.network is None or self.network == 'public' + async def setup_network_namespace(self): + network = self.spec.get('network') + if network is None or network is True: self.netns = await network_allocator.allocate_public() - + else: + assert network == 'private' + self.netns = await network_allocator.allocate_private() if self.port is not None: self.host_port = await port_allocator.allocate() await self.netns.expose_port(self.port, self.host_port) - async def _run_container(self) -> bool: + async def run_container(self) -> bool: self.started_at = time_msecs() try: - await self._write_container_config() + await self.write_container_config() async with async_timeout.timeout(self.timeout): with open(self.log_path, 'w', encoding='utf-8') as container_log: log.info(f'Creating the crun run process for {self}') @@ -862,13 +642,11 @@ async def _run_container(self) -> bool: f'{self.container_overlay_path}/merged', '--config', f'{self.config_path}/config.json', - self.name, + self.container_name, stdout=container_log, stderr=container_log, ) - - async with ResourceUsageMonitor(self.name, self.resource_usage_path): - await self.process.wait() + await self.process.wait() log.info(f'crun process completed for {self}') except asyncio.TimeoutError: return True @@ -877,7 +655,7 @@ async def _run_container(self) -> bool: return False - async def _write_container_config(self): + async def write_container_config(self): os.makedirs(self.config_path) with open(f'{self.config_path}/config.json', 'w', encoding='utf-8') as f: f.write(json.dumps(await self.container_config())) @@ -885,8 +663,8 @@ async def _write_container_config(self): # https://github.com/opencontainers/runtime-spec/blob/master/config.md async def container_config(self): uid, gid = await self._get_in_container_user() - weight = worker_fraction_in_1024ths(self.cpu_in_mcpu) - workdir = self.image.image_config['Config']['WorkingDir'] + weight = worker_fraction_in_1024ths(self.spec['cpu']) + workdir = self.image_config['Config']['WorkingDir'] default_docker_capabilities = [ 'CAP_CHOWN', 'CAP_DAC_OVERRIDE', @@ -916,7 +694,7 @@ async def container_config(self): 'uid': uid, 'gid': gid, }, - 'args': self.command, + 'args': self.spec['command'], 'env': self._env(), 'cwd': workdir if workdir != "" else "/", 'capabilities': { @@ -943,8 +721,8 @@ async def container_config(self): 'resources': { 'cpu': {'shares': weight}, 'memory': { - 'limit': self.memory_in_bytes, - 'reservation': self.memory_in_bytes, + 'limit': self.spec['memory'], + 'reservation': self.spec['memory'], }, # 'blockIO': {'weight': min(weight, 1000)}, FIXME blkio.weight not supported }, @@ -970,7 +748,7 @@ async def container_config(self): }, } - if self.unconfined: + if self.spec.get('unconfined'): config['linux']['maskedPaths'] = [] config['linux']['readonlyPaths'] = [] config['process']['apparmorProfile'] = 'unconfined' @@ -979,7 +757,7 @@ async def container_config(self): return config async def _get_in_container_user(self): - user = self.image.image_config['Config']['User'] + user = self.image_config['Config']['User'] if not user: uid, gid = 0, 0 elif ":" in user: @@ -989,7 +767,7 @@ async def _get_in_container_user(self): return int(uid), int(gid) async def _read_user_from_rootfs(self, user) -> Tuple[str, str]: - with open(f'{self.image.rootfs_path}/etc/passwd', 'r', encoding='utf-8') as passwd: + with open(f'{self.rootfs_path}/etc/passwd', 'r', encoding='utf-8') as passwd: for record in passwd: if record.startswith(user): _, _, uid, gid, _, _, _ = record.split(":") @@ -999,7 +777,7 @@ async def _read_user_from_rootfs(self, user) -> Tuple[str, str]: def _mounts(self, uid, gid): # Only supports empty volumes external_volumes = [] - volumes = self.image.image_config['Config']['Volumes'] + volumes = self.image_config['Config']['Volumes'] if volumes: for v_container_path in volumes: if not v_container_path.startswith('/'): @@ -1018,7 +796,7 @@ def _mounts(self, uid, gid): ) return ( - self.volume_mounts + self.spec.get('volume_mounts') + external_volumes + [ # Recommended filesystems: @@ -1081,16 +859,65 @@ def _mounts(self, uid, gid): ) def _env(self): - env = self.image.image_config['Config']['Env'] + self.env + env = self.image_config['Config']['Env'] + self.spec.get('env', []) if self.port is not None: assert self.host_port is not None env.append(f'HAIL_BATCH_WORKER_PORT={self.host_port}') env.append(f'HAIL_BATCH_WORKER_IP={IP_ADDRESS}') return env + async def delete_container(self): + if self.container_is_running(): + assert self.process is not None + try: + log.info(f'{self} container is still running, killing crun process') + try: + await check_exec_output('crun', 'kill', '--all', self.container_name, 'SIGKILL') + except CalledProcessError as e: + not_extant_message = ( + b'error opening file `/run/crun/' + + self.container_name.encode() + + b'/status`: No such file or directory' + ) + if not (e.returncode == 1 and not_extant_message in e.stderr): + log.exception(f'while deleting container {self}', exc_info=True) + finally: + try: + await send_signal_and_wait(self.process, 'SIGTERM', timeout=5) + except asyncio.TimeoutError: + try: + await send_signal_and_wait(self.process, 'SIGKILL', timeout=5) + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'could not kill process for container {self}') + finally: + self.process = None + + if self.overlay_mounted: + try: + await check_shell(f'umount -l {self.container_overlay_path}/merged') + self.overlay_mounted = False + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'while unmounting overlay in {self}', exc_info=True) + + if self.host_port is not None: + port_allocator.free(self.host_port) + self.host_port = None + + if self.netns: + network_allocator.free(self.netns) + self.netns = None + + async def delete(self): + log.info(f'deleting {self}') + self.deleted_event.set() + # { # name: str, - # state: str, (pending, running, succeeded, error, failed) + # state: str, (pending, pulling, creating, starting, running, uploading_log, deleting, succeeded, error, failed) # timing: dict(str, float), # error: str, (optional) # short_error: str, (optional) @@ -1114,11 +941,11 @@ def status(self): status['container_status'] = self.get_container_status() return status - def get_container_status(self) -> Optional[dict]: + def get_container_status(self): if not self.process: return None - status: dict = { + status = { 'started_at': self.started_at, 'finished_at': self.finished_at, } @@ -1138,19 +965,23 @@ def container_is_running(self): def container_finished(self): return self.process is not None and self.process.returncode is not None - async def get_log(self, offset: Optional[int] = None): - if os.path.exists(self.log_path): - if offset is None: - return (await self.fs.read(self.log_path)).decode() - return (await self.fs.read_from(self.log_path, offset)).decode() + async def upload_log(self): + await self.worker.file_store.write_log_file( + self.job.format_version, + self.job.batch_id, + self.job.job_id, + self.job.attempt_id, + self.name, + await self.get_log(), + ) - async def get_resource_usage(self) -> bytes: - if os.path.exists(self.resource_usage_path): - return await self.fs.read(self.resource_usage_path) - return ResourceUsageMonitor.no_data() + async def get_log(self): + if os.path.exists(self.log_path): + return (await self.worker.fs.read(self.log_path)).decode() + return '' def __str__(self): - return f'container {self.name}' + return f'container {self.job.id}/{self.name}' def populate_secret_host_path(host_path: str, secret_data: Optional[Dict[str, bytes]]): @@ -1162,39 +993,36 @@ def populate_secret_host_path(host_path: str, secret_data: Optional[Dict[str, by def copy_container( - job: 'DockerJob', - task_name: str, - files: List[dict], - volume_mounts: List[dict], - cpu_in_mcpu: int, - memory_in_bytes: int, + job: 'Job', + name: str, + files, + volume_mounts, + cpu, + memory, scratch: str, requester_pays_project: str, client_session: httpx.ClientSession, + worker: 'Worker', ) -> Container: assert files - assert job.worker.fs is not None - - command = [ - '/usr/bin/python3', - '-m', - 'hailtop.aiotools.copy', - json.dumps(requester_pays_project), - json.dumps(files), - '-v', - ] - - return Container( - fs=job.worker.fs, - name=job.container_name(task_name), - image=Image(BATCH_WORKER_IMAGE, job.credentials, client_session, job.pool), - scratch_dir=f'{scratch}/{task_name}', - command=command, - cpu_in_mcpu=cpu_in_mcpu, - memory_in_bytes=memory_in_bytes, - volume_mounts=volume_mounts, - env=[f'{job.credentials.cloud_env_name}={job.credentials.mount_path}'], - ) + copy_spec = { + 'image': BATCH_WORKER_IMAGE, + 'name': name, + 'command': [ + '/usr/bin/python3', + '-m', + 'hailtop.aiotools.copy', + json.dumps(requester_pays_project), + json.dumps(files), + '-v', + ], + 'env': [f'{job.credentials.cloud_env_name}={job.credentials.mount_path}'], + 'cpu': cpu, + 'memory': memory, + 'scratch': scratch, + 'volume_mounts': volume_mounts, + } + return Container(job, name, copy_spec, client_session, worker) class Job: @@ -1277,8 +1105,6 @@ def __init__( self.format_version = format_version self.task_manager = task_manager self.pool = pool - - assert worker self.worker = worker self.deleted_event = asyncio.Event() @@ -1373,9 +1199,6 @@ async def run(self): async def get_log(self): pass - async def get_resource_usage(self) -> Dict[str, Optional[bytes]]: - raise NotImplementedError - async def delete(self): log.info(f'deleting {self}') self.deleted_event.set() @@ -1451,14 +1274,12 @@ def __init__( worker: 'Worker', ): super().__init__(batch_id, user, credentials, job_spec, format_version, task_manager, pool, worker) - assert worker.fs - input_files = job_spec.get('input_files') output_files = job_spec.get('output_files') requester_pays_project = job_spec.get('requester_pays_project') - self.timings: Timings = Timings() + self.timings = Timings() if self.secrets: for secret in self.secrets: @@ -1488,23 +1309,34 @@ def __init__( self.scratch, requester_pays_project, client_session, + worker, ) - containers['main'] = Container( - fs=self.worker.fs, - name=self.container_name('main'), - image=Image(job_spec['process']['image'], self.credentials, client_session, pool), - scratch_dir=f'{self.scratch}/main', - command=job_spec['process']['command'], - cpu_in_mcpu=self.cpu_in_mcpu, - memory_in_bytes=self.memory_in_bytes, - network=job_spec.get('network'), - port=job_spec.get('port'), - timeout=job_spec.get('timeout'), - unconfined=job_spec.get('unconfined'), - volume_mounts=self.main_volume_mounts, - env=[f'{var["name"]}={var["value"]}' for var in self.env], - ) + # main container + main_spec = { + 'command': job_spec['process']['command'], + 'image': job_spec['process']['image'], + 'name': 'main', + 'env': [f'{var["name"]}={var["value"]}' for var in self.env], + 'cpu': self.cpu_in_mcpu, + 'memory': self.memory_in_bytes, + 'volume_mounts': self.main_volume_mounts, + } + port = job_spec.get('port') + if port: + main_spec['port'] = port + timeout = job_spec.get('timeout') + if timeout: + main_spec['timeout'] = timeout + network = job_spec.get('network') + if network: + assert network in ('public', 'private') + main_spec['network'] = network + unconfined = job_spec.get('unconfined') + if unconfined: + main_spec['unconfined'] = unconfined + main_spec['scratch'] = self.scratch + containers['main'] = Container(self, 'main', main_spec, client_session, worker) if output_files: containers['output'] = copy_container( @@ -1517,16 +1349,14 @@ def __init__( self.scratch, requester_pays_project, client_session, + worker, ) self.containers = containers - def step(self, name: str) -> ContextManager: + def step(self, name: str): return self.timings.step(name) - def container_name(self, task_name: str): - return f'batch-{self.batch_id}-job-{self.job_id}-{task_name}' - async def setup_io(self): if not instance_config.job_private: if self.worker.data_disk_space_remaining.value < self.external_storage_in_gib: @@ -1557,36 +1387,6 @@ async def setup_io(self): assert self.disk is None, self.disk os.makedirs(self.io_host_path()) - async def run_container(self, container: Container, task_name: str): - async def on_completion(): - with container._step('uploading_log'): - assert self.worker.file_store - await self.worker.file_store.write_log_file( - self.format_version, - self.batch_id, - self.job_id, - self.attempt_id, - task_name, - await container.get_log(), - ) - - with container._step('uploading_resource_usage'): - await self.worker.file_store.write_resource_usage_file( - self.format_version, - self.batch_id, - self.job_id, - self.attempt_id, - task_name, - await container.get_resource_usage(), - ) - - try: - await container.run(on_completion) - except asyncio.CancelledError: - raise - except Exception: - pass - async def run(self): async with self.worker.cpu_sem(self.cpu_in_mcpu): self.start_time = time_msecs() @@ -1654,21 +1454,21 @@ async def run(self): input = self.containers.get('input') if input: log.info(f'{self}: running input') - await self.run_container(input, 'input') + await input.run() log.info(f'{self} input: {input.state}') if not input or input.state == 'succeeded': log.info(f'{self}: running main') main = self.containers['main'] - await self.run_container(main, 'main') + await main.run() log.info(f'{self} main: {main.state}') output = self.containers.get('output') if output: log.info(f'{self}: running output') - await self.run_container(output, 'output') + await output.run() log.info(f'{self} output: {output.state}') if main.state != 'succeeded': @@ -1681,7 +1481,7 @@ async def run(self): self.state = input.state except asyncio.CancelledError: raise - except ContainerDeletedError: + except JobDeletedError: self.state = 'cancelled' except Exception as e: if not user_error(e): @@ -1740,12 +1540,9 @@ async def cleanup(self): async def get_log(self): return {name: await c.get_log() for name, c in self.containers.items()} - async def get_resource_usage(self): - return {name: await c.get_resource_usage() for name, c in self.containers.items()} - async def delete(self): await super().delete() - await asyncio.wait([c.remove() for c in self.containers.values()]) + await asyncio.wait([c.delete() for c in self.containers.values()]) def status(self): status = super().status() @@ -1780,9 +1577,10 @@ def __init__( if input_files or output_files: raise Exception("i/o not supported") - assert job_spec['process']['jar_spec']['type'] == 'jar_url' - self.jar_url = job_spec['process']['jar_spec']['value'] - self.argv = job_spec['process']['command'] + self.user_command_string = job_spec['process']['command'] + assert len(self.user_command_string) >= 3, self.user_command_string + self.revision = self.user_command_string[1] + self.jar_url = self.user_command_string[2] self.timings = Timings() self.state = 'pending' @@ -1798,20 +1596,21 @@ def step(self, name): return self.timings.step(name) async def run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]], *args, **kwargs): - try: - return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) - except StepInterruptedError as e: - raise JobDeletedError from e + return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) + + def verify_is_acceptable_query_jar_url(self, url: str): + if not url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX): + log.error(f'user submitted unacceptable JAR url: {url} for {self}. {ACCEPTABLE_QUERY_JAR_URL_PREFIX}') + raise ValueError(f'unacceptable JAR url: {url}') def secret_host_path(self, secret): return f'{self.scratch}/secrets/{secret["mount_path"]}' async def download_jar(self): - async with self.worker.jar_download_locks[self.jar_url]: - unique_key = self.jar_url.replace('_', '__').replace('/', '_') - local_jar_location = f'/hail-jars/{unique_key}.jar' + async with self.worker.jar_download_locks[self.revision]: + local_jar_location = f'/hail-jars/{self.revision}.jar' if not os.path.isfile(local_jar_location): - assert self.jar_url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX) + self.verify_is_acceptable_query_jar_url(self.jar_url) temporary_file = tempfile.NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with try: async with await self.worker.fs.open(self.jar_url) as jar_data: @@ -1839,7 +1638,7 @@ async def run(self): try: with self.step('connecting_to_jvm'): - self.jvm = await self.worker.borrow_jvm(self.cpu_in_mcpu // 1000) + self.jvm = await self.run_until_done_or_deleted(self.worker.borrow_jvm) self.jvm_name = str(self.jvm) self.task_manager.ensure_future(self.worker.post_job_started(self)) @@ -1860,11 +1659,11 @@ async def run(self): log.info(f'{self}: downloading JAR') with self.step('downloading_jar'): - local_jar_location = await self.download_jar() + local_jar_location = await self.run_until_done_or_deleted(self.download_jar) log.info(f'{self}: running jvm process') with self.step('running'): - await self.jvm.execute(local_jar_location, self.scratch, self.log_file, self.jar_url, self.argv) + await self.jvm.execute(local_jar_location, self.scratch, self.log_file, self.user_command_string) self.state = 'succeeded' log.info(f'{self} main: {self.state}') @@ -1877,15 +1676,12 @@ async def run(self): except JobDeletedError: self.state = 'cancelled' await self.cleanup() - except JVMCreationError: - self.state = 'error' - log.exception(f'while running {self}') - await self.cleanup() - raise except Exception: log.exception(f'while running {self}') + self.state = 'error' self.error = traceback.format_exc() + await self.cleanup() else: await self.cleanup() @@ -1899,7 +1695,7 @@ async def cleanup(self): with self.step('uploading_log'): log.info(f'{self}: uploading log') - await self.worker.file_store.write_log_file( + await worker.file_store.write_log_file( self.format_version, self.batch_id, self.job_id, self.attempt_id, 'main', await self._get_log() ) @@ -1920,9 +1716,6 @@ async def _get_log(self): async def get_log(self): return {'main': await self._get_log()} - async def get_resource_usage(self): - return {'main': ResourceUsageMonitor.no_data()} - async def delete(self): await super().delete() if self.jvm is not None: @@ -1994,112 +1787,59 @@ def scoped_ensure_future(coro_or_future, *, loop=None) -> Iterator[asyncio.Futur fut.cancel() -class JVMCreationError(Exception): - pass - - -class JVMUserCredentials: - def __init__(self): - self.username = None - self.password = None - - -class JVMContainer: - @staticmethod - async def create_and_start( - index: int, - n_cores: int, - socket_file: str, - root_dir: str, - client_session: httpx.ClientSession, - pool: concurrent.futures.ThreadPoolExecutor, - ): - assert os.path.commonpath([socket_file, root_dir]) == root_dir - assert os.path.isdir(root_dir) - - total_memory_bytes = n_cores * worker_memory_per_core_bytes(CLOUD, instance_config.worker_type()) - - # We allocate 60% of memory per core to off heap memory - memory_per_core_mib = worker_memory_per_core_mib(CLOUD, instance_config.worker_type()) - heap_memory_mb = int(0.4 * n_cores * memory_per_core_mib) - off_heap_memory_per_core_mb = int(0.6 * memory_per_core_mib) - - command = [ - 'java', - f'-Xmx{heap_memory_mb}M', - '-cp', - f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', - 'is.hail.JVMEntryway', - socket_file, - ] - - volume_mounts = [ - { - 'source': JVM.SPARK_HOME, - 'destination': JVM.SPARK_HOME, - 'type': 'none', - 'options': ['rbind', 'rw'], - }, - { - 'source': '/jvm-entryway', - 'destination': '/jvm-entryway', - 'type': 'none', - 'options': ['rbind', 'rw'], - }, - { - 'source': '/hail-jars', - 'destination': '/hail-jars', - 'type': 'none', - 'options': ['rbind', 'rw'], - }, - { - 'source': root_dir, - 'destination': root_dir, - 'type': 'none', - 'options': ['rbind', 'rw'], - }, - { - 'source': '/batch', - 'destination': '/batch', - 'type': 'none', - 'options': ['rbind', 'rw'], - }, - ] +class BufferedOutputProcess: + @classmethod + async def create(cls, *args, **kwargs): + assert 'stdout' not in kwargs + assert 'stderr' not in kwargs - fs = LocalAsyncFS(pool) # worker does not have a fs when initializing JVMs - - c = Container( - fs=fs, - name=f'jvm-{index}', - image=Image(BATCH_WORKER_IMAGE, JVMUserCredentials(), client_session, pool), - scratch_dir=f'{root_dir}/container', - command=command, - cpu_in_mcpu=n_cores * 1000, - memory_in_bytes=total_memory_bytes, - env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mb}'], - volume_mounts=volume_mounts, + process = await asyncio.create_subprocess_exec( + *args, **kwargs, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) - - await c.create() - await c.start() - - return JVMContainer(c, fs) - - def __init__(self, container: Container, fs: LocalAsyncFS): - self.container = container - self.fs = fs + stop_event = asyncio.Event() + return cls(process, stop_event) + + def __init__(self, process, stop_event: asyncio.Event): + self.process = process + self.stop_event = stop_event + self.buf = bytearray() + assert process.stdout is not None + self.stdout_pump = asyncio.ensure_future(self.pump_to_buffer(process.stdout)) + assert process.stderr is not None + self.stderr_pump = asyncio.ensure_future(self.pump_to_buffer(process.stderr)) + + async def pump_to_buffer(self, strm: asyncio.StreamReader): + with scoped_ensure_future(self.stop_event.wait()) as stop_fut: + while not strm.at_eof() and not self.stop_event.is_set(): + with scoped_ensure_future(strm.readline()) as read_fut: + await asyncio.wait([read_fut, stop_fut], return_when=asyncio.FIRST_COMPLETED) + if read_fut.done(): + result = read_fut.result() + self.buf.extend(result) + + def output(self) -> str: + return self.buf.decode() + + def retrieve_and_clear_output(self) -> str: + buf = self.buf.decode() + self.buf = bytearray() + return buf + + def kill(self): + return self.process.kill() @property def returncode(self) -> Optional[int]: - if self.container.process is None: - return None - return self.container.process.returncode + return self.process.returncode - async def remove(self): - if self.fs is not None: - await self.fs.close() - self.fs = None - await self.container.remove() + def close(self): + try: + self.kill() + finally: + try: + self.stdout_pump.cancel() + finally: + self.stderr_pump.cancel() class JVMUserError(Exception): @@ -2116,18 +1856,25 @@ class JVM: FINISH_JVM_EOS = 4 @classmethod - async def create_container_and_connect( - cls, - index: int, - n_cores: int, - socket_file: str, - root_dir: str, - client_session: httpx.ClientSession, - pool: concurrent.futures.ThreadPoolExecutor, - ) -> JVMContainer: - try: - container = await JVMContainer.create_and_start(index, n_cores, socket_file, root_dir, client_session, pool) + async def create_process(cls, socket_file: str) -> BufferedOutputProcess: + # JVM and Hail both treat MB as 1024 * 1024 bytes. + # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. + # We only allocate 3700 MiB so that we stay well below the machine's max memory. + # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. + return await BufferedOutputProcess.create( + 'java', + '-Xmx1480M', + '-cp', + f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', + 'is.hail.JVMEntryway', + socket_file, + env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, + ) + @classmethod + async def create_process_and_connect(cls, index: int, socket_file: str) -> Tuple[BufferedOutputProcess, str]: + process = await cls.create_process(socket_file) + try: attempts = 0 delay = 0.25 while True: @@ -2142,77 +1889,71 @@ async def create_container_and_connect( break finally: writer.close() - except (FileNotFoundError, ConnectionRefusedError) as err: + except ConnectionRefusedError: + output = process.retrieve_and_clear_output() + log.warning(f'JVM-{index}: connection refused. {output}') + raise + except FileNotFoundError as err: attempts += 1 if attempts == 240: - jvm_output = await container.container.get_log() or '' raise ValueError( - f'JVM-{index}: failed to establish connection after {240 * delay} seconds. ' - 'JVM output:\n\n' + jvm_output + f'JVM-{index}: failed to establish connection after {240 * delay} seconds' ) from err await asyncio.sleep(delay) - return container - except Exception as e: - raise JVMCreationError from e + startup_output = process.retrieve_and_clear_output() + return process, startup_output + except: + process.close() + raise @classmethod - async def create(cls, index: int, n_cores: int, worker: 'Worker'): - token = uuid.uuid4().hex - root_dir = f'/host/jvm-{token}' - socket_file = root_dir + '/socket' - output_file = root_dir + '/output' - should_interrupt = asyncio.Event() - await blocking_to_async(worker.pool, os.makedirs, root_dir) - container = await cls.create_container_and_connect( - index, n_cores, socket_file, root_dir, worker.client_session, worker.pool - ) - return cls( - index, - n_cores, - socket_file, - root_dir, - output_file, - should_interrupt, - container, - worker.client_session, - worker.pool, - ) + async def create(cls, index: int): + assert worker is not None + + while True: + try: + token = uuid.uuid4().hex + socket_file = '/socket-' + token + root_dir = '/root-' + token + output_file = root_dir + '/output' + should_interrupt = asyncio.Event() + await blocking_to_async(worker.pool, os.mkdir, root_dir) + process, startup_output = await cls.create_process_and_connect(index, socket_file) + log.info(f'JVM-{index}: startup output: {startup_output}') + return cls(index, socket_file, root_dir, output_file, should_interrupt, process) + except ConnectionRefusedError: + pass async def new_connection(self): while True: try: + interim_output = self.process.retrieve_and_clear_output() + if len(interim_output) > 0: + log.warning(f'{self}: unexpected output between jobs') + return await asyncio.open_unix_connection(self.socket_file) except ConnectionRefusedError: + log.warning(f'{self}: unexpected exit between jobs', extra=dict(output=self.process.output())) os.remove(self.socket_file) - if self.container: - await self.container.remove() - - container = await self.create_container_and_connect( - self.index, self.n_cores, self.socket_file, self.root_dir, self.client_session, self.pool - ) - self.container = container + process, startup_output = await self.create_process_and_connect(self.index, self.socket_file) + self.process = process + log.info(f'JVM-{self.index}: startup output: {startup_output}') def __init__( self, index: int, - n_cores: int, socket_file: str, root_dir: str, output_file: str, should_interrupt: asyncio.Event, - container: JVMContainer, - client_session: httpx.ClientSession, - pool: concurrent.futures.ThreadPoolExecutor, + process: BufferedOutputProcess, ): self.index = index - self.n_cores = n_cores self.socket_file = socket_file self.root_dir = root_dir self.output_file = output_file self.should_interrupt = should_interrupt - self.container = container - self.client_session = client_session - self.pool = pool + self.process = process def __str__(self): return f'JVM-{self.index}' @@ -2226,11 +1967,14 @@ def interrupt(self): def reset(self): self.should_interrupt.clear() - async def kill(self): - if self.container is not None: - await self.container.remove() + def kill(self): + if self.process is not None: + self.process.kill() + + def close(self): + self.process.close() - async def execute(self, classpath: str, scratch_dir: str, log_file: str, jar_url: str, argv: List[str]): + async def execute(self, classpath: str, scratch_dir: str, log_file: str, command_string: List[str]): assert worker is not None log.info(f'{self}: execute') @@ -2242,35 +1986,33 @@ async def execute(self, classpath: str, scratch_dir: str, log_file: str, jar_url stack.callback(writer.close) log.info(f'{self}: connection acquired') - command = [classpath, 'is.hail.backend.service.Main', scratch_dir, log_file, jar_url, *argv] + command_string = [classpath, 'is.hail.backend.service.Main', scratch_dir, log_file, *command_string] - write_int(writer, len(command)) - for part in command: - assert isinstance(part, str) - write_str(writer, part) + write_int(writer, len(command_string)) + for arg in command_string: + assert isinstance(arg, str) + write_str(writer, arg) await writer.drain() - wait_for_message_from_container: asyncio.Future = asyncio.ensure_future(read_int(reader)) - stack.callback(wait_for_message_from_container.cancel) + wait_for_message_from_process: asyncio.Future = asyncio.ensure_future(read_int(reader)) + stack.callback(wait_for_message_from_process.cancel) wait_for_interrupt: asyncio.Future = asyncio.ensure_future(self.should_interrupt.wait()) stack.callback(wait_for_interrupt.cancel) - await asyncio.wait( - [wait_for_message_from_container, wait_for_interrupt], return_when=asyncio.FIRST_COMPLETED - ) + await asyncio.wait([wait_for_message_from_process, wait_for_interrupt], return_when=asyncio.FIRST_COMPLETED) if wait_for_interrupt.done(): await wait_for_interrupt # retrieve exceptions - if not wait_for_message_from_container.done(): + if not wait_for_message_from_process.done(): write_int(writer, 0) # tell process to cancel await writer.drain() eos_exception = None try: - message = await wait_for_message_from_container + message = await wait_for_message_from_process except EndOfStream as exc: try: - await self.kill() + self.kill() except ProcessLookupError: log.warning(f'{self}: JVM died after we received EOS') message = JVM.FINISH_JVM_EOS @@ -2321,45 +2063,41 @@ def __init__(self, client_session: httpx.ClientSession): self.compute_client = None self._jvm_initializer_task = asyncio.ensure_future(self._initialize_jvms()) - self._jvms: SortedSet[JVM] = SortedSet([], key=lambda jvm: jvm.n_cores) + self._jvms: List[JVM] = [] async def _initialize_jvms(self): - if instance_config.worker_type() in ('standard', 'D', 'highmem', 'E'): - jvms = await asyncio.gather( - *[JVM.create(i, 1, self) for i in range(CORES)], - *[JVM.create(CORES + i, 8, self) for i in range(CORES // 8)], - ) - self._jvms.update(jvms) + if instance_config.worker_type() in ('standard', 'D'): + self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) log.info(f'JVMs initialized {self._jvms}') - async def borrow_jvm(self, n_cores: int) -> JVM: - if instance_config.worker_type() not in ('standard', 'D', 'highmem', 'E'): - raise ValueError(f'no JVMs available on {instance_config.worker_type()}') - await self._jvm_initializer_task + async def borrow_jvm(self) -> JVM: + if instance_config.worker_type() not in ('standard', 'D'): + raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') + await asyncio.shield(self._jvm_initializer_task) assert self._jvms - index = self._jvms.bisect_key_left(n_cores) - assert index < len(self._jvms), index - return self._jvms.pop(index) + return self._jvms.pop() def return_jvm(self, jvm: JVM): + if instance_config.worker_type() not in ('standard', 'D'): + raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() - self._jvms.add(jvm) + self._jvms.append(jvm) async def shutdown(self): log.info('Worker.shutdown') try: with ExitStack() as cleanup: for jvm in self._jvms: - cleanup.callback(jvm.kill) + cleanup.callback(jvm.close) finally: try: self.task_manager.shutdown() log.info('shutdown task manager') finally: try: - if self.file_store: - await self.file_store.close() - log.info('closed file store') + if self.fs: + await self.fs.close() + log.info('closed worker file system') finally: try: if self.compute_client: @@ -2367,9 +2105,9 @@ async def shutdown(self): log.info('closed compute client') finally: try: - if self.fs: - await self.fs.close() - log.info('closed worker file system') + if self.file_store: + await self.file_store.close() + log.info('closed file store') finally: await self.client_session.close() log.info('closed client session') @@ -2379,8 +2117,6 @@ async def run_job(self, job): # pylint: disable=no-self-use await job.run() except asyncio.CancelledError: raise - except JVMCreationError: - self.stop_event.set() except Exception as e: if not user_error(e): log.exception(f'while running {job}, ignoring') @@ -2449,46 +2185,26 @@ async def create_job(self, request): raise web.HTTPServiceUnavailable return await asyncio.shield(self.create_job_1(request)) - def _job_from_request(self, request): + async def get_job_log(self, request): + if not self.active: + raise web.HTTPServiceUnavailable batch_id = int(request.match_info['batch_id']) job_id = int(request.match_info['job_id']) id = (batch_id, job_id) job = self.jobs.get(id) if not job: raise web.HTTPNotFound() - return job - - async def get_job_log(self, request): - if not self.active: - raise web.HTTPServiceUnavailable - job = self._job_from_request(request) return web.json_response(await job.get_log()) - async def get_job_resource_usage(self, request): - if not self.active: - raise web.HTTPServiceUnavailable - job = self._job_from_request(request) - resource_usage = await job.get_resource_usage() - - boundary = '----WebKitFormBoundarywiBIWjWR7osAkgFI' - - resp = web.StreamResponse( - status=200, reason='OK', headers={'Content-Type': f'multipart/mixed;boundary={boundary}'} - ) - await resp.prepare(request) - - with aiohttp.MultipartWriter('mixed', boundary=boundary) as mpwriter: - for task, data in resource_usage.items(): - part = mpwriter.append(data) - part.set_content_disposition('attachment', filename=task) - await mpwriter.write(resp) - - return resp - async def get_job_status(self, request): if not self.active: raise web.HTTPServiceUnavailable - job = self._job_from_request(request) + batch_id = int(request.match_info['batch_id']) + job_id = int(request.match_info['job_id']) + id = (batch_id, job_id) + job = self.jobs.get(id) + if not job: + raise web.HTTPNotFound() return web.json_response(job.status()) async def delete_job_1(self, request): @@ -2527,7 +2243,6 @@ async def run(self): web.post('/api/v1alpha/batches/jobs/create', self.create_job), web.delete('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/delete', self.delete_job), web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log', self.get_job_log), - web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/resource_usage', self.get_job_resource_usage), web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/status', self.get_job_status), web.get('/healthcheck', self.healthcheck), ] diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 2447b3424fe..b2f0c03560d 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -34,26 +34,6 @@ spec: value: "spot" {% endif %} containers: - - name: nginx - image: {{ batch_driver_nginx_image.image }} - resources: - requests: - cpu: "4" - memory: "2G" - limits: - cpu: "4.5" - memory: "4G" - ports: - - containerPort: 443 - volumeMounts: - - name: ssl-config-batch-driver-nginx - mountPath: /ssl-config - readOnly: true - readinessProbe: - tcpSocket: - port: 443 - initialDelaySeconds: 5 - periodSeconds: 5 - name: batch-driver image: {{ batch_image.image }} command: @@ -65,10 +45,10 @@ spec: - batch.driver resources: requests: - cpu: "1" + cpu: "600m" memory: "2G" limits: - cpu: "1.5" + cpu: "1" memory: "2.5G" env: - name: HAIL_DOMAIN @@ -147,7 +127,15 @@ spec: key: query_storage_uri - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER value: "/jars" -{% elif scope == "test" or scope == "dev" %} +{% elif scope == "test" %} + - name: HAIL_QUERY_STORAGE_URI + valueFrom: + secretKeyRef: + name: global-config + key: test_storage_uri + - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER + value: "/{{ token }}/jars" +{% elif scope == "dev" %} - name: HAIL_QUERY_STORAGE_URI valueFrom: secretKeyRef: @@ -162,6 +150,8 @@ spec: - name: HAIL_SHOULD_CHECK_INVARIANTS value: "1" {% endif %} + ports: + - containerPort: 5000 volumeMounts: - name: deploy-config mountPath: /deploy-config @@ -209,10 +199,6 @@ spec: secret: optional: false secretName: ssl-config-batch-driver - - name: ssl-config-batch-driver-nginx - secret: - optional: false - secretName: ssl-config-batch-driver-nginx - name: ssh-public-key secret: secretName: batch-worker-ssh-public-key @@ -331,7 +317,15 @@ spec: key: query_storage_uri - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER value: "/jars" -{% elif scope == "test" or scope == "dev" %} +{% elif scope == "test" %} + - name: HAIL_QUERY_STORAGE_URI + valueFrom: + secretKeyRef: + name: global-config + key: test_storage_uri + - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER + value: "/{{ token }}/jars" +{% elif scope == "dev" %} - name: HAIL_QUERY_STORAGE_URI valueFrom: secretKeyRef: @@ -411,7 +405,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 2500 + targetAverageUtilization: 80 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget @@ -447,6 +441,6 @@ spec: ports: - port: 443 protocol: TCP - targetPort: 443 + targetPort: 5000 selector: app: batch-driver diff --git a/batch/driver-nginx.conf b/batch/driver-nginx.conf deleted file mode 100644 index da65fbd41b4..00000000000 --- a/batch/driver-nginx.conf +++ /dev/null @@ -1,78 +0,0 @@ -worker_processes auto; -pid /run/nginx.pid; -include /etc/nginx/modules-enabled/*.conf; - -events { - worker_connections 768; -} - -http { - - sendfile on; - tcp_nopush on; - tcp_nodelay on; - keepalive_timeout 65; - types_hash_max_size 2048; - server_names_hash_bucket_size 128; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE - ssl_prefer_server_ciphers on; - - log_format json-log escape=json '{' - '"message":"$scheme $request done in ${request_time}s: $status",' - '"response_status":$status,' - '"request_duration":$request_time,' - '"remote_address":"$remote_addr",' - '"x_real_ip":"$http_x_real_ip",' - '"request_start_time":"$time_local",' - '"body_bytes_sent":"$body_bytes_sent",' - '"http_referer":"$http_referer",' - '"http_user_agent":"$http_user_agent"' - '}'; - - access_log /var/log/nginx/access.log json-log; - error_log /var/log/nginx/error.log; - - gzip on; - - include /ssl-config/ssl-config-http.conf; - map $http_x_forwarded_proto $updated_scheme { - default $http_x_forwarded_proto; - '' $scheme; - } - map $http_x_forwarded_host $updated_host { - default $http_x_forwarded_host; - '' $http_host; - } - map $http_upgrade $connection_upgrade { - default upgrade; - '' close; - } - - server { - server_name batch-driver.*; - - location = /healthcheck { - return 204; - } - - location / { - proxy_pass http://127.0.0.1:5000/; - - proxy_set_header Host $http_host; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Host $updated_host; - proxy_set_header X-Forwarded-Proto $updated_scheme; - proxy_set_header X-Real-IP $http_x_real_ip; - - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection $connection_upgrade; - } - - listen 443 ssl; - listen [::]:443 ssl; - } -} diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index df53771e2fc..f724f7ce262 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -611,7 +611,7 @@ def test_timeout(client: BatchClient): status = j.wait() assert status['state'] == 'Error', str((status, b.debug_info())) error_msg = j._get_error(status, 'main') - assert error_msg and 'ContainerTimeoutError' in error_msg, str((error_msg, b.debug_info())) + assert error_msg and 'JobTimeoutError' in error_msg, str((error_msg, b.debug_info())) assert j.exit_code(status) is None, str((status, b.debug_info())) diff --git a/batch2/react-batch/package-lock.json b/batch2/react-batch/package-lock.json index a4739d09f06..d96370ea382 100644 --- a/batch2/react-batch/package-lock.json +++ b/batch2/react-batch/package-lock.json @@ -2492,9 +2492,9 @@ } }, "node_modules/minimist": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", - "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", "dev": true }, "node_modules/ms": { @@ -5644,9 +5644,9 @@ } }, "minimist": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", - "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", "dev": true }, "ms": { diff --git a/benchmark/python/benchmark_hail/run/table_benchmarks.py b/benchmark/python/benchmark_hail/run/table_benchmarks.py index 2105d361a20..a4e345fe229 100644 --- a/benchmark/python/benchmark_hail/run/table_benchmarks.py +++ b/benchmark/python/benchmark_hail/run/table_benchmarks.py @@ -160,11 +160,6 @@ def table_import_ints(tsv): )._force_count() -@benchmark(args=many_ints_table.handle('tsv')) -def table_import_ints_impute(tsv): - hl.import_table(tsv, impute=True)._force_count() - - @benchmark(args=many_strings_table.handle('tsv')) def table_import_strings(tsv): hl.import_table(tsv)._force_count() diff --git a/bootstrap-gateway/deployment.yaml b/bootstrap-gateway/deployment.yaml index 50cbd1d76f2..f6f51ad697f 100644 --- a/bootstrap-gateway/deployment.yaml +++ b/bootstrap-gateway/deployment.yaml @@ -63,7 +63,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 500 + targetAverageUtilization: 80 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/build.yaml b/build.yaml index 496265f71b0..2bfa9174b5d 100644 --- a/build.yaml +++ b/build.yaml @@ -559,17 +559,6 @@ steps: dependsOn: - service_base_image - merge_code - - kind: buildImage2 - name: batch_driver_nginx_image - dockerFile: /io/batch/Dockerfile.driver-nginx - contextPath: /io/batch - publishAs: batch-driver-nginx - inputs: - - from: /repo/batch - to: /io/batch - dependsOn: - - hail_ubuntu_image - - merge_code - kind: buildImage2 name: batch_image dockerFile: /io/batch/Dockerfile @@ -734,13 +723,6 @@ steps: mv build/libs/hail-all-spark-test.jar build/debug_libs/ mv build/deploy/dist/debug-wheel-container.tar build/debug_libs time retry make jars python-version-info wheel - - # Check wheel size is small enough for pypi (< 100 MB) - HAIL_PIP_VERSION=$(cat python/hail/hail_pip_version) - WHEEL_PATH="build/deploy/dist/hail-$HAIL_PIP_VERSION-py3-none-any.whl" - du -h $WHEEL_PATH - $(python3 -c "import os; exit(1) if (os.path.getsize('$WHEEL_PATH')) > 100_000_000 else exit(0)") - time (cd python && zip -r hail.zip hail hailtop) time tar czf test.tar.gz -C python test time tar czf resources.tar.gz -C src/test resources @@ -2030,9 +2012,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=0 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2081,9 +2061,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=1 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2132,9 +2110,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=2 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2183,9 +2159,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=3 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2234,9 +2208,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=4 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2285,9 +2257,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=5 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2336,9 +2306,7 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=6 - - hailctl config set query/backend local - + export HAIL_QUERY_BACKEND=local python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2997,7 +2965,6 @@ steps: - create_accounts - batch_image - batch_worker_image - - batch_driver_nginx_image - batch_database - deploy_auth - create_certs @@ -3123,7 +3090,9 @@ steps: {% if scope == "deploy" %} HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" or scope == "dev" %} + {% elif scope == "test" %} + HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} {% else %} echo "!!! unexpected scope {{ scope }} !!!" @@ -3178,17 +3147,27 @@ steps: cd /io/repo/hail/python + export HAIL_SHA="$(cat /io/git_version)" + {% if scope == "deploy" %} + export HAIL_JAR_URL={{ global.query_storage_uri }} + {% elif scope == "test" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} + {% else %} + echo "!!! unexpected scope {{ scope }} !!!" + exit 1 + {% endif %} + export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar + export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 + export HAIL_QUERY_BACKEND=service - export HAIL_SHUFFLE_MAX_BRANCH=4 - export HAIL_SHUFFLE_CUTOFF=1000000 - - hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3201,12 +3180,13 @@ steps: --durations=50 \ -n 4 \ test - timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test + - from: /git_version + to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3250,17 +3230,27 @@ steps: cd /io/repo/hail/python + export HAIL_SHA="$(cat /io/git_version)" + {% if scope == "deploy" %} + export HAIL_JAR_URL={{ global.query_storage_uri }} + {% elif scope == "test" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} + {% else %} + echo "!!! unexpected scope {{ scope }} !!!" + exit 1 + {% endif %} + export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar + export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 + export HAIL_QUERY_BACKEND=service - export HAIL_SHUFFLE_MAX_BRANCH=4 - export HAIL_SHUFFLE_CUTOFF=1000000 - - hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3273,12 +3263,13 @@ steps: --durations=50 \ -n 4 \ test - timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test + - from: /git_version + to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3322,17 +3313,27 @@ steps: cd /io/repo/hail/python + export HAIL_SHA="$(cat /io/git_version)" + {% if scope == "deploy" %} + export HAIL_JAR_URL={{ global.query_storage_uri }} + {% elif scope == "test" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} + {% else %} + echo "!!! unexpected scope {{ scope }} !!!" + exit 1 + {% endif %} + export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar + export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 + export HAIL_QUERY_BACKEND=service - export HAIL_SHUFFLE_MAX_BRANCH=4 - export HAIL_SHUFFLE_CUTOFF=1000000 - - hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3345,12 +3346,13 @@ steps: --durations=50 \ -n 4 \ test - timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test + - from: /git_version + to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3394,17 +3396,27 @@ steps: cd /io/repo/hail/python + export HAIL_SHA="$(cat /io/git_version)" + {% if scope == "deploy" %} + export HAIL_JAR_URL={{ global.query_storage_uri }} + {% elif scope == "test" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} + {% else %} + echo "!!! unexpected scope {{ scope }} !!!" + exit 1 + {% endif %} + export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar + export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 + export HAIL_QUERY_BACKEND=service - export HAIL_SHUFFLE_MAX_BRANCH=4 - export HAIL_SHUFFLE_CUTOFF=1000000 - - hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3417,12 +3429,13 @@ steps: --durations=50 \ -n 4 \ test - timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test + - from: /git_version + to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3466,17 +3479,27 @@ steps: cd /io/repo/hail/python + export HAIL_SHA="$(cat /io/git_version)" + {% if scope == "deploy" %} + export HAIL_JAR_URL={{ global.query_storage_uri }} + {% elif scope == "test" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} + {% elif scope == "dev" %} + export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} + {% else %} + echo "!!! unexpected scope {{ scope }} !!!" + exit 1 + {% endif %} + export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar + export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 + export HAIL_QUERY_BACKEND=service - export HAIL_SHUFFLE_MAX_BRANCH=4 - export HAIL_SHUFFLE_CUTOFF=1000000 - - hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3489,12 +3512,13 @@ steps: --durations=50 \ -n 4 \ test - timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test + - from: /git_version + to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -5008,61 +5032,6 @@ steps: - hail_run_tests_image - build_hail - deploy_batch - - kind: runImage - name: cancel_all_running_test_batches - image: - valueFrom: service_base_image.image - script: | - cat >cancel_all_running_test_batches.py <<'EOF' - from hailtop.batch_client.aioclient import BatchClient - import asyncio - - async def cancel_all(query): - bc = await BatchClient.create('test') - async for b in bc.list_batches(query): - status = await b.last_known_status() - print(status) - await b.cancel() - - asyncio.get_event_loop().run_until_complete(cancel_all('user:test running')) - EOF - - python3 cancel_all_running_test_batches.py - secrets: - - name: worker-deploy-config - namespace: - valueFrom: default_ns.name - mountPath: /deploy-config - - name: test-dev-tokens - namespace: - valueFrom: default_ns.name - mountPath: /user-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - dependsOn: - - create_deploy_config - - create_accounts - - default_ns - - service_base_image - - deploy_batch - - test_batch_0 - - test_batch_1 - - test_batch_2 - - test_batch_3 - - test_batch_4 - - test_ci - - test_hailtop_batch_0 - - test_hailtop_batch_1 - - test_hailtop_batch_2 - - test_hailtop_batch_3 - - test_hailtop_batch_4 - - test_hail_python_service_backend_0 - - test_hail_python_service_backend_1 - - test_hail_python_service_backend_2 - - test_hail_python_service_backend_3 - - test_hail_python_service_backend_4 - kind: runImage name: test_batch_invariants image: @@ -5114,7 +5083,6 @@ steps: - test_hail_python_service_backend_2 - test_hail_python_service_backend_3 - test_hail_python_service_backend_4 - - cancel_all_running_test_batches - kind: runImage name: delete_gcp_batch_instances image: @@ -5164,7 +5132,6 @@ steps: - test_hail_python_service_backend_2 - test_hail_python_service_backend_3 - test_hail_python_service_backend_4 - - cancel_all_running_test_batches - kind: runImage name: delete_azure_batch_instances image: mcr.microsoft.com/azure-cli @@ -5215,4 +5182,3 @@ steps: - test_hailtop_batch_2 - test_hailtop_batch_3 - test_hailtop_batch_4 - - cancel_all_running_test_batches diff --git a/ci/ci/build.py b/ci/ci/build.py index dc0e8d49759..801dae20d4f 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -9,7 +9,7 @@ import yaml from gear.cloud_config import get_global_config -from hailtop.utils import RETRY_FUNCTION_SCRIPT, flatten +from hailtop.utils import flatten from .environment import BUILDKIT_IMAGE, CI_UTILS_IMAGE, CLOUD, DEFAULT_NAMESPACE, DOCKER_PREFIX, DOMAIN, STORAGE_URI from .globals import is_test_deployment @@ -311,11 +311,9 @@ def build(self, batch, code, scope): /bin/sh /home/user/convert-cloud-credentials-to-docker-auth-config set -x -{RETRY_FUNCTION_SCRIPT} - export BUILDKITD_FLAGS='--oci-worker-no-process-sandbox --oci-worker-snapshotter=overlayfs' export BUILDCTL_CONNECT_RETRIES_MAX=100 # https://github.com/moby/buildkit/issues/1423 -retry buildctl-daemonless.sh \ +buildctl-daemonless.sh \ build \ --frontend dockerfile.v0 \ --local context={shq(context)} \ diff --git a/ci/ci/github.py b/ci/ci/github.py index 9d6faf9e0a1..42400c11ae8 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -406,7 +406,7 @@ def _hail_github_status_from_statuses(statuses_json) -> Dict[str, GithubStatus]: hail_statuses = {} for s in statuses: context = s['context'] - if context == GITHUB_STATUS_CONTEXT or context.startswith('hail-ci'): + if context == GITHUB_STATUS_CONTEXT: if context in hail_statuses: raise ValueError( f'github sent multiple status summaries for context {context}: {s}\n\n{statuses_json}' diff --git a/dev-docs/kubernetes-operations.md b/dev-docs/kubernetes-operations.md index 0690a4ca520..845fd817153 100644 --- a/dev-docs/kubernetes-operations.md +++ b/dev-docs/kubernetes-operations.md @@ -2,15 +2,13 @@ ## Altering a Node Pool -### When managing node pools manually - We will have the old node pool and the new node pool active simultaneously. We will use `cordon` and `drain` to move all load from the old node pool to the new node pool. Then we will delete the old node pool. 1. Add a new node pool to the cluster. You can use the UI or `gcloud`. We have two kinds of node pools: non-preemptible and preemptible, their names should always be non-preemptible-pool-N and - preemptible-pool-N, respectively. When you re-create the nodepool, increment the number by + prremptible-pool-N, respectively. When you re-create the nodepool, increment the number by one. Take care to copy the taints and tags correctly. 2. Wait for the new nodepool to be ready. @@ -49,48 +47,3 @@ kubectl drain --delete-emptydir-data --ignore-daemonsets --selector="cloud.googl ``` gcloud container node-pools delete $OLD_POOL_NAME --cluster $CLUSTER_NAME ``` - -### When using terraform -If using terraform to manage the node pools, we use terraform to create and delete -the pools. Assume we are replacing a pool whose terraform resource name is -`vdc_preemptible_pool`. NOTE: the following names apply to the *terraform resource*, -not the names of the node pools themselves, which should adhere to the naming -conventions outlined above and specified as terraform variables. - -To complete step 1, copy the existing node pool resource -under a new name, `vdc_preemptible_pool_2`, make the desired changes to the new -resource and apply the terraform. This should not alter existing node pools. - -Once draining is complete, take the following steps to remove the old node pool -and restore a clean terraform state: -1. Delete the resource `vdc_preemptible_pool` and apply. This should delete the old node pool. -2. Move the state of the new resource into the old one. For example, if in Azure, run - -``` -terraform state mv \ -module.vdc.azurerm_kubernetes_cluster_node_pool.vdc_preemptible_pool_2 \ -module.vdc.azurerm_kubernetes_cluster_node_pool.vdc_preemptible_pool -``` - -3. Rename `vdc_preemptible_pool_2` to `vdc_preemptible_pool`. If you try -to `terraform apply`, there should be no planned changes and the git history -should be clean. - - -## Troubleshooting - -### Terraform Kubernetes provider dialing localhost -Occasionally, the `kubernetes` provider can initialize before fetching necessary -state (as the credentials are themselves terraform resources) and fall back to -dialing localhost. This can occur if you are switching between Hail installations -and the local mirror of the terraform state needs to be sync'd from remote storage -at the start of `terraform apply`. - -As of writing, this -[remains an issue](https://github.com/hashicorp/terraform-provider-kubernetes/issues/1028) -with the kubernetes provider. A workaround to fully initialize the state is instead -of just running `terraform apply` for the entire module, to instead target just -the resources that generate the kubernetes configuration but do not themselves -rely on the kubernetes provider. Run `terraform apply -var-file=global.tfvars -target=module.vdc` -to correctly sync local terraform state, and subsequent invocations of `terraform apply` -should work as expected. diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index d2193ad02bf..e31f517b6de 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -38,7 +38,7 @@ RUN hail-apt-get-install xz-utils libncurses5 && \ ln -s /opt/mysql-8.0.26-linux-glibc2.17-x86_64-minimal-rebuild/bin/* /usr/bin/ # Regarding explicitly selecting 2.0.1: https://github.com/hail-is/hail/issues/8343 -RUN curl >${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar +RUN wget -nv -O ${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar COPY docker/core-site.xml ${SPARK_HOME}/conf/core-site.xml RUN git clone https://github.com/catchorg/Catch2.git --depth 1 --branch v2.13.3 && \ diff --git a/docker/hail-ubuntu/curlrc b/docker/hail-ubuntu/curlrc index c0d88933041..bbf75c92cc5 100644 --- a/docker/hail-ubuntu/curlrc +++ b/docker/hail-ubuntu/curlrc @@ -1,7 +1,7 @@ --connect-timeout 5 --max-time 10 --retry 5 ---retry-connrefused +--retry-all-errors --retry-max-time 40 --location --fail diff --git a/docker/requirements.txt b/docker/requirements.txt index fe18a9692d1..6b409f33c12 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -22,9 +22,9 @@ flake8==4.0.1 Flask-Cors==3.0.10 Flask-Sockets==0.2.1 Flask==2.0.3 +gcsfs==2021.* gidgethub==4.1.0 google-api-python-client==1.7.10 -google-auth-oauthlib==0.4.6 google-cloud-logging==1.12.1 humanize==1.0.0 hurry.filesize==0.9 diff --git a/gateway/deployment.yaml b/gateway/deployment.yaml index d6c78a53cc3..c071fd83e3d 100644 --- a/gateway/deployment.yaml +++ b/gateway/deployment.yaml @@ -77,7 +77,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 2500 + targetAverageUtilization: 80 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/gear/gear/database.py b/gear/gear/database.py index 728fe8b0874..03d6f3f4a20 100644 --- a/gear/gear/database.py +++ b/gear/gear/database.py @@ -9,7 +9,7 @@ import aiomysql import pymysql -from gear.metrics import DB_CONNECTION_QUEUE_SIZE, SQL_TRANSACTIONS, PrometheusSQLTimer +from gear.metrics import DB_CONNECTION_QUEUE_SIZE, PrometheusSQLTimer from hailtop.auth.sql_config import SQLConfig from hailtop.utils import sleep_and_backoff @@ -159,7 +159,6 @@ async def async_init(self, db_pool, read_only): try: self.conn_context_manager = db_pool.acquire() DB_CONNECTION_QUEUE_SIZE.inc() - SQL_TRANSACTIONS.inc() self.conn = await aenter(self.conn_context_manager) DB_CONNECTION_QUEUE_SIZE.dec() async with self.conn.cursor() as cursor: @@ -236,15 +235,10 @@ async def execute_update(self, sql, args=None): async with self.conn.cursor() as cursor: return await cursor.execute(sql, args) - async def execute_many(self, sql, args_array, query_name=None): + async def execute_many(self, sql, args_array): assert self.conn async with self.conn.cursor() as cursor: - if query_name is None: - res = await cursor.executemany(sql, args_array) - else: - async with PrometheusSQLTimer(query_name): - res = await cursor.executemany(sql, args_array) - return res + return await cursor.executemany(sql, args_array) class CallError(Exception): @@ -299,9 +293,9 @@ async def execute_update(self, sql, args=None): return await tx.execute_update(sql, args) @retry_transient_mysql_errors - async def execute_many(self, sql, args_array, query_name=None): + async def execute_many(self, sql, args_array): async with self.start() as tx: - return await tx.execute_many(sql, args_array, query_name=query_name) + return await tx.execute_many(sql, args_array) @retry_transient_mysql_errors async def check_call_procedure(self, sql, args=None, query_name=None): diff --git a/gear/gear/metrics.py b/gear/gear/metrics.py index a04c9616880..5f9d68a8380 100644 --- a/gear/gear/metrics.py +++ b/gear/gear/metrics.py @@ -6,7 +6,6 @@ REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests', ['endpoint', 'verb', 'status']) CONCURRENT_REQUESTS = pc.Gauge('http_concurrent_requests', 'Number of in progress HTTP requests', ['endpoint', 'verb']) -SQL_TRANSACTIONS = pc.Counter('sql_transactions', 'Number of SQL transactions') SQL_QUERY_COUNT = pc.Counter('sql_query_count', 'Number of SQL Queries', ['query_name']) SQL_QUERY_LATENCY = pc.Summary('sql_query_latency_seconds', 'SQL Query latency in seconds', ['query_name']) DB_CONNECTION_QUEUE_SIZE = pc.Gauge('sql_connection_queue_size', 'Number of coroutines waiting for a connection') diff --git a/hail/.gitignore b/hail/.gitignore index 33862f4ba05..d075af07c46 100644 --- a/hail/.gitignore +++ b/hail/.gitignore @@ -3,7 +3,6 @@ python/README.md python/dist python/hail.egg-info python/hail/backend/hail-all-spark.jar -python/hail/hail_revision python/hail/hail_pip_version python/hail/docs/change_log.rst python/hail/docs/_build/* diff --git a/hail/Dockerfile.hail-run-tests b/hail/Dockerfile.hail-run-tests index e8299c2cd0f..66bfc4cc340 100644 --- a/hail/Dockerfile.hail-run-tests +++ b/hail/Dockerfile.hail-run-tests @@ -2,7 +2,7 @@ FROM {{ hail_run_image.image }} RUN mkdir -p plink && \ cd plink && \ - curl >plink_linux_x86_64.zip https://storage.googleapis.com/hail-common/plink_linux_x86_64_20181202.zip && \ + wget -O plink_linux_x86_64.zip https://storage.googleapis.com/hail-common/plink_linux_x86_64_20181202.zip && \ unzip plink_linux_x86_64.zip && \ mv plink /usr/local/bin && \ cd .. && \ diff --git a/hail/Makefile b/hail/Makefile index a9252d22c34..5913ca35cd0 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.13 SPARK_VERSION ?= 3.1.2 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 93 +HAIL_PATCH_VERSION := 91 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 8 @@ -49,8 +49,7 @@ JAR_DEBUG_CLASSES := $(addprefix $(BUILD_DEBUG_PREFIX)/, $(JAR_DEBUG_SOURCES:src PY_FILES := $(shell git ls-files python) INIT_SCRIPTS := python/hailtop/hailctl/deploy.yaml -PYTHON_VERSION_INFO := python/hail/hail_revision -PYTHON_VERSION_INFO += python/hail/hail_version +PYTHON_VERSION_INFO := python/hail/hail_version PYTHON_VERSION_INFO += python/hail/hail_pip_version PYTHON_VERSION_INFO += python/hailtop/hail_version PYTHON_VERSION_INFO += python/hail/docs/_static/hail_version.js @@ -120,9 +119,6 @@ src/main/resources/build-info.properties: Makefile .PHONY: python-version-info python-version-info: $(PYTHON_VERSION_INFO) -python/hail/hail_revision: env/REVISION - echo $(REVISION) > $@ - python/hail/hail_version: env/SHORT_REVISION env/HAIL_PIP_VERSION echo $(HAIL_VERSION) > $@ diff --git a/hail/python/dev/requirements.txt b/hail/python/dev/requirements.txt index a0837e3d327..7d3e936da50 100644 --- a/hail/python/dev/requirements.txt +++ b/hail/python/dev/requirements.txt @@ -5,7 +5,7 @@ pre-commit==2.17.0 black==22.1.0 curlylint==0.12.0 isort==5.10.1 -pytest==7.1.1 +pytest==6.2.5 pytest-html==1.20.0 pytest-xdist==2.2.1 pytest-instafail==0.4.2 diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index f9746988525..a81aabf168f 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -1,4 +1,3 @@ -from typing import Optional import pkg_resources import sys import asyncio @@ -64,7 +63,7 @@ hadoop_stat, hadoop_exists, hadoop_is_file, hadoop_is_dir, hadoop_scheme_supported, copy_log) -from .context import (init, init_local, init_batch, stop, spark_context, tmp_dir, # noqa: E402 +from .context import (init, init_local, init_service, stop, spark_context, tmp_dir, # noqa: E402 default_reference, get_reference, set_global_seed, _set_flags, _get_flags, _async_current_backend, current_backend, debug_info, citation, cite_hail, cite_hail_bibtex, version, TemporaryFilename, TemporaryDirectory) @@ -74,7 +73,7 @@ __all__ = [ 'init', 'init_local', - 'init_batch', + 'init_service', 'stop', 'spark_context', 'tmp_dir', @@ -136,8 +135,7 @@ ir.register_functions() ir.register_aggregators() -__version__: Optional[str] = None # set by hail.version() -__revision__: Optional[str] = None # set by hail.revision() +__version__ = None # set in hail.init() import warnings # noqa: E402 diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index 1caa4d5a3e6..5e6fa0e36c4 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -1,23 +1,5 @@ -from typing import Mapping, List, Union, Tuple, Dict, Optional, Any import abc from ..fs.fs import FS -from ..expr import Expression -from ..expr.types import HailType -from ..ir import BaseIR -from ..utils.java import FatalError, HailUserError - - -def fatal_error_from_java_error_triplet(short_message, expanded_message, error_id): - from .. import __version__ - if error_id != -1: - return FatalError(f'Error summary: {short_message}', error_id) - return FatalError(f'''{short_message} - -Java stack trace: -{expanded_message} -Hail version: {__version__} -Error summary: {short_message}''', - error_id) class Backend(abc.ABC): @@ -26,7 +8,7 @@ def stop(self): pass @abc.abstractmethod - def execute(self, ir: BaseIR, timed: bool = False) -> Any: + def execute(self, ir, timed=False): pass @abc.abstractmethod @@ -115,12 +97,7 @@ def fs(self) -> FS: pass @abc.abstractmethod - def index_bgen(self, - files: List[str], - index_file_map: Dict[str, str], - referenceGenomeName: Optional[str], - contig_recoding: Dict[str, str], - skip_invalid_loci: bool): + def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): pass @abc.abstractmethod @@ -128,7 +105,6 @@ def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str) pass def persist_table(self, t, storage_level): - # FIXME: this can't possibly be right. return t def unpersist_table(self, t): @@ -144,46 +120,9 @@ def unpersist_block_matrix(self, id): pass @abc.abstractmethod - def register_ir_function(self, - name: str, - type_parameters: Union[Tuple[HailType, ...], List[HailType]], - value_parameter_names: Union[Tuple[str, ...], List[str]], - value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], - return_type: HailType, - body: Expression): - pass - - @abc.abstractmethod - def persist_expression(self, expr: Expression) -> Expression: - pass - - @abc.abstractmethod - def set_flags(self, **flags: Mapping[str, str]): - """Set Hail flags.""" - pass - - @abc.abstractmethod - def get_flags(self, *flags) -> Mapping[str, str]: - """Mapping of Hail flags.""" + def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): pass - @property @abc.abstractmethod - def requires_lowering(self): + def persist_ir(self, ir): pass - - def _handle_fatal_error_from_backend(self, err: FatalError, ir: BaseIR): - if err._error_id is None: - raise err - - error_sources = ir.base_search(lambda x: x._error_id == err._error_id) - if len(error_sources) == 0: - raise err - - better_stack_trace = error_sources[0]._stack_trace - error_message = str(err) - message_and_trace = (f'{error_message}\n' - '------------\n' - 'Hail stack trace:\n' - f'{better_stack_trace}') - raise HailUserError(message_and_trace) from None diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py index 9ca30b4c3c6..9eb7cd0bbf1 100644 --- a/hail/python/hail/backend/local_backend.py +++ b/hail/python/hail/backend/local_backend.py @@ -13,6 +13,7 @@ from hail.expr.matrix_type import tmatrix from hail.expr.table_type import ttable from hail.expr.types import dtype +from hail.ir import JavaIR from hail.ir.renderer import CSERenderer from hail.utils.java import scala_package_object, scala_object from .py4j_backend import Py4JBackend, handle_java_exception @@ -174,7 +175,7 @@ def utils_package_object(self): def stop(self): self._jhc.stop() self._jhc = None - self._gateway.shutdown() + # FIXME stop gateway? uninstall_exception_handler() def _parse_value_ir(self, code, ref_map={}, ir_map={}): @@ -267,14 +268,13 @@ def remove_liftover(self, name, dest_reference_genome): name, dest_reference_genome) def parse_vcf_metadata(self, path): - return json.loads(self._jhc.pyParseVCFMetadataJSON(self._jbackend.fs(), path)) + return json.loads(self._jhc.pyParseVCFMetadataJSON(self.fs._jfs, path)) - def index_bgen(self, files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci): - self._jbackend.pyIndexBgen(files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci) + def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): + self._jbackend.pyIndexBgen(files, index_file_map, rg, contig_recoding, skip_invalid_loci) def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): return json.loads(self._jbackend.pyImportFam(path, quant_pheno, delimiter, missing)) - @property - def requires_lowering(self): - return True + def persist_ir(self, ir): + return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py index fc808d9d70e..7ae92ee8ab7 100644 --- a/hail/python/hail/backend/py4j_backend.py +++ b/hail/python/hail/backend/py4j_backend.py @@ -1,17 +1,11 @@ -from typing import Mapping, Union, Tuple, List import abc import py4j -import py4j.java_gateway import hail -from hail.expr import construct_expr -from hail.ir import JavaIR from hail.ir.renderer import CSERenderer -from hail.utils.java import FatalError, Env -from .backend import Backend, fatal_error_from_java_error_triplet -from ..expr import Expression -from ..expr.types import HailType +from hail.utils.java import FatalError, Env, HailUserError +from .backend import Backend def handle_java_exception(f): @@ -28,7 +22,13 @@ def deco(*args, **kwargs): tpl = Env.jutils().handleForPython(e.java_exception) deepest, full, error_id = tpl._1(), tpl._2(), tpl._3() - raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None + + if error_id != -1: + raise FatalError('Error summary: %s' % (deepest,), error_id) from None + else: + raise FatalError('%s\n\nJava stack trace:\n%s\n' + 'Hail version: %s\n' + 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None except pyspark.sql.utils.CapturedException as e: raise FatalError('%s\n\nJava stack trace:\n%s\n' 'Hail version: %s\n' @@ -38,7 +38,6 @@ def deco(*args, **kwargs): class Py4JBackend(Backend): - _jbackend: py4j.java_gateway.JavaObject @abc.abstractmethod def __init__(self): @@ -67,26 +66,15 @@ def utils_package_object(self): def _parse_value_ir(self, code, ref_map={}, ir_map={}): pass - @abc.abstractmethod - def _to_java_value_ir(self, ir): - pass - - def register_ir_function(self, - name: str, - type_parameters: Union[Tuple[HailType, ...], List[HailType]], - value_parameter_names: Union[Tuple[str, ...], List[str]], - value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], - return_type: HailType, - body: Expression): + def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): r = CSERenderer(stop_at_jir=True) code = r(body._ir) - jbody = (self._parse_value_ir(code, ref_map=dict(zip(value_parameter_names, value_parameter_types)), ir_map=r.jirs)) + jbody = (self._parse_value_ir(code, ref_map=dict(zip(argument_names, argument_types)), ir_map=r.jirs)) Env.hail().expr.ir.functions.IRFunctionRegistry.pyRegisterIR( name, [ta._parsable_string() for ta in type_parameters], - value_parameter_names, - [pt._parsable_string() for pt in value_parameter_types], + argument_names, [pt._parsable_string() for pt in argument_types], return_type._parsable_string(), jbody) @@ -95,13 +83,31 @@ def execute(self, ir, timed=False): stream_codec = '{"name":"StreamBufferSpec"}' # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1)) try: - result_tuple = self._jbackend.executeEncode(jir, stream_codec) + result_tuple = self._jhc.backend().executeEncode(jir, stream_codec) (result, timings) = (result_tuple._1(), result_tuple._2()) value = ir.typ._from_encoding(result) return (value, timings) if timed else value except FatalError as e: - self._handle_fatal_error_from_backend(e, ir) + error_id = e._error_id + + def criteria(hail_ir): + return hail_ir._error_id is not None and hail_ir._error_id == error_id + + error_sources = ir.base_search(criteria) + better_stack_trace = None + if error_sources: + better_stack_trace = error_sources[0]._stack_trace + + if better_stack_trace: + error_message = str(e) + message_and_trace = (f'{error_message}\n' + '------------\n' + 'Hail stack trace:\n' + f'{better_stack_trace}') + raise HailUserError(message_and_trace) from None + + raise e async def _async_execute(self, ir, timed=False): raise NotImplementedError('no async available in Py4JBackend') @@ -114,28 +120,3 @@ async def _async_get_reference(self, name): async def _async_get_references(self, names): raise NotImplementedError('no async available in Py4JBackend') - - def persist_expression(self, expr): - return construct_expr( - JavaIR(self._jbackend.executeLiteral(self._to_java_value_ir(expr._ir))), - expr.dtype - ) - - def set_flags(self, **flags: Mapping[str, str]): - available = self._jbackend.availableFlags() - invalid = [] - for flag, value in flags.items(): - if flag in available: - self._jbackend.setFlag(flag, value) - else: - invalid.append(flag) - if len(invalid) != 0: - raise FatalError("Flags {} not valid. Valid flags: \n {}" - .format(', '.join(invalid), '\n '.join(available))) - - def get_flags(self, *flags) -> Mapping[str, str]: - return {flag: self._jbackend.getFlag(flag) for flag in flags} - - @property - def requires_lowering(self): - return True diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index 2c802a938c5..a1b968d0d74 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -1,25 +1,22 @@ -from typing import Dict, Optional, Callable, Awaitable, Mapping, Any, List, Union, Tuple -import abc +from typing import Dict, Optional, Callable, Awaitable import asyncio import struct import os -from hail.expr.expressions.base_expression import Expression import orjson import logging import re import yaml from pathlib import Path -from hail.context import TemporaryDirectory, tmp_dir, TemporaryFilename, revision +from hail.context import TemporaryDirectory, tmp_dir from hail.utils import FatalError -from hail.expr.types import HailType, dtype, ttuple, tvoid +from hail.expr.types import dtype from hail.expr.table_type import ttable from hail.expr.matrix_type import tmatrix from hail.expr.blockmatrix_type import tblockmatrix -from hail.experimental import write_expression, read_expression from hail.ir.renderer import CSERenderer -from hailtop.config import (get_user_config, get_user_local_cache_dir, get_remote_tmpdir) +from hailtop.config import get_user_config, get_user_local_cache_dir, get_remote_tmpdir from hailtop.utils import async_to_blocking, secret_alnum_string, TransientError, Timings from hailtop.batch_client import client as hb from hailtop.batch_client import aioclient as aiohb @@ -27,25 +24,16 @@ from hailtop.aiotools.router_fs import RouterAsyncFS import hailtop.aiotools.fs as afs -from .backend import Backend, fatal_error_from_java_error_triplet +from .backend import Backend from ..builtin_references import BUILTIN_REFERENCES from ..fs.fs import FS from ..fs.router_fs import RouterFS -from ..ir import BaseIR from ..context import version -from ..utils import frozendict log = logging.getLogger('backend.service_backend') -async def write_bool(strm: afs.WritableStream, v: bool): - if v: - await strm.write(b'\x01') - else: - await strm.write(b'\x00') - - async def write_int(strm: afs.WritableStream, v: int): await strm.write(struct.pack(' Dict[str, str]: - raise NotImplementedError - - -class JarUrl(JarSpec): - def __init__(self, url): - self.url = url - - def to_dict(self) -> Dict[str, str]: - return {'type': 'jar_url', 'value': self.url} - - def __repr__(self): - return f'JarUrl({self.url})' - - -class GitRevision(JarSpec): - def __init__(self, revision): - self.revision = revision - - def to_dict(self) -> Dict[str, str]: - return {'type': 'git_revision', 'value': self.revision} - - def __repr__(self): - return f'GitRevision({self.revision})' - - -def _get_jar_specification(jar_url: Optional[str]) -> JarSpec: - user_config = get_user_config() - - jar_url = jar_url or os.environ.get('HAIL_JAR_URL') - jar_url = jar_url or user_config.get('query', 'jar_url', fallback=None) - - if jar_url is not None: - return JarUrl(jar_url) - return GitRevision(revision()) - - -class IRFunction: - def __init__(self, - name: str, - type_parameters: Union[Tuple[HailType, ...], List[HailType]], - value_parameter_names: Union[Tuple[str, ...], List[str]], - value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], - return_type: HailType, - body: Expression): - assert len(value_parameter_names) == len(value_parameter_types) - render = CSERenderer(stop_at_jir=True) - self._name = name - self._type_parameters = type_parameters - self._value_parameter_names = value_parameter_names - self._value_parameter_types = value_parameter_types - self._return_type = return_type - self._rendered_body = render(body._ir) - - async def serialize(self, writer: afs.WritableStream): - await write_str(writer, self._name) - - await write_int(writer, len(self._type_parameters)) - for type_parameter in self._type_parameters: - await write_str(writer, type_parameter._parsable_string()) - - await write_int(writer, len(self._value_parameter_names)) - for value_parameter_name in self._value_parameter_names: - await write_str(writer, value_parameter_name) - - await write_int(writer, len(self._value_parameter_types)) - for value_parameter_type in self._value_parameter_types: - await write_str(writer, value_parameter_type._parsable_string()) - - await write_str(writer, self._return_type._parsable_string()) - await write_str(writer, self._rendered_body) - - class ServiceBackend(Backend): HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE = re.compile("is.hail.backend.service.HailBatchFailure: ([0-9]+)\n") @@ -200,23 +113,22 @@ class ServiceBackend(Backend): PARSE_VCF_METADATA = 8 INDEX_BGEN = 9 IMPORT_FAM = 10 + GOODBYE = 254 @staticmethod async def create(*, billing_project: Optional[str] = None, batch_client: Optional[aiohb.BatchClient] = None, + skip_logging_configuration: Optional[bool] = None, disable_progress_bar: bool = True, - remote_tmpdir: Optional[str] = None, - flags: Optional[Dict[str, str]] = None, - jar_url: Optional[str] = None, - driver_cores: Optional[Union[int, str]] = None, - driver_memory: Optional[Union[int, str]] = None, - name_prefix: Optional[str] = None): + remote_tmpdir: Optional[str] = None): + del skip_logging_configuration + if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: raise ValueError( - "No billing project. Call 'init_batch' with the billing " + "No billing project. Call 'init_service' with the billing " "project or run 'hailctl config set batch/billing_project " "MY_BILLING_PROJECT'" ) @@ -230,21 +142,6 @@ async def create(*, user_local_reference_cache_dir = Path(get_user_local_cache_dir(), 'references', version()) os.makedirs(user_local_reference_cache_dir, exist_ok=True) remote_tmpdir = get_remote_tmpdir('ServiceBackend', remote_tmpdir=remote_tmpdir) - jar_spec = _get_jar_specification(jar_url) - - driver_cores = ( - driver_cores - or os.environ.get('HAIL_QUERY_BATCH_DRIVER_CORES', None) - or get_user_config().get('query', 'batch_driver_cores', fallback=None) - or '1' - ) - - driver_memory = ( - driver_memory - or os.environ.get('HAIL_QUERY_BATCH_DRIVER_MEMORY', None) - or get_user_config().get('query', 'batch_driver_memory', fallback=None) - or 'standard' - ) return ServiceBackend( billing_project=billing_project, @@ -255,15 +152,9 @@ async def create(*, batch_attributes=batch_attributes, user_local_reference_cache_dir=user_local_reference_cache_dir, remote_tmpdir=remote_tmpdir, - flags=flags or {}, - jar_spec=jar_spec, - driver_cores=driver_cores, - driver_memory=driver_memory, - name_prefix=name_prefix or '' ) def __init__(self, - *, billing_project: str, sync_fs: FS, async_fs: AsyncFS, @@ -271,12 +162,7 @@ def __init__(self, disable_progress_bar: bool, batch_attributes: Dict[str, str], user_local_reference_cache_dir: Path, - remote_tmpdir: str, - flags: Dict[str, str], - jar_spec: JarSpec, - driver_cores: Optional[Union[int, str]], - driver_memory: Optional[Union[int, str]], - name_prefix: str): + remote_tmpdir: str): self.billing_project = billing_project self._sync_fs = sync_fs self._async_fs = async_fs @@ -286,27 +172,6 @@ def __init__(self, self.batch_attributes = batch_attributes self.user_local_reference_cache_dir = user_local_reference_cache_dir self.remote_tmpdir = remote_tmpdir - self.flags = flags - self.jar_spec = jar_spec - self.functions: List[IRFunction] = [] - self.driver_cores = driver_cores - self.driver_memory = driver_memory - self.name_prefix = name_prefix - - if "use_new_shuffle" not in self.flags: - self.flags["use_new_shuffle"] = "1" - - def debug_info(self) -> Dict[str, Any]: - return { - 'jar_spec': str(self.jar_spec), - 'billing_project': self.billing_project, - 'batch_attributes': self.batch_attributes, - 'user_local_reference_cache_dir': str(self.user_local_reference_cache_dir), - 'remote_tmpdir': self.remote_tmpdir, - 'flags': self.flags, - 'driver_cores': self.driver_cores, - 'driver_memory': self.driver_memory - } @property def fs(self) -> FS: @@ -319,7 +184,6 @@ def logger(self): def stop(self): async_to_blocking(self._async_fs.close()) async_to_blocking(self.async_bc.close()) - self.functions = [] def render(self, ir): r = CSERenderer() @@ -328,44 +192,29 @@ def render(self, ir): async def _rpc(self, name: str, - inputs: Callable[[afs.WritableStream, str], Awaitable[None]], - *, - ir: Optional[BaseIR] = None): + inputs: Callable[[afs.WritableStream, str], Awaitable[None]]): timings = Timings() token = secret_alnum_string() iodir = TemporaryDirectory(ensure_exists=False).name # FIXME: actually cleanup with TemporaryDirectory(ensure_exists=False) as _: with timings.step("write input"): async with await self._async_fs.create(iodir + '/in') as infile: - nonnull_flag_count = sum(v is not None for v in self.flags.values()) - await write_int(infile, nonnull_flag_count) - for k, v in self.flags.items(): - if v is not None: - await write_str(infile, k) - await write_str(infile, v) await inputs(infile, token) with timings.step("submit batch"): batch_attributes = self.batch_attributes if 'name' not in batch_attributes: - batch_attributes = {**batch_attributes, 'name': self.name_prefix + name} + batch_attributes = {**batch_attributes, 'name': name} bb = self.async_bc.create_batch(token=token, attributes=batch_attributes) - j = bb.create_jvm_job( - jar_spec=self.jar_spec.to_dict(), - argv=[ - ServiceBackend.DRIVER, - batch_attributes['name'], - iodir + '/in', - iodir + '/out' - ], - mount_tokens=True, - resources={ - 'preemptible': False, - 'cpu': str(self.driver_cores), - 'memory': str(self.driver_memory) - } - ) + j = bb.create_jvm_job([ + ServiceBackend.DRIVER, + os.environ['HAIL_SHA'], + os.environ['HAIL_JAR_URL'], + batch_attributes['name'], + iodir + '/in', + iodir + '/out', + ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): @@ -384,38 +233,26 @@ async def _rpc(self, logs = await j.log() for k in logs: logs[k] = yaml_literally_shown_str(logs[k].strip()) - message = {'service_backend_debug_info': self.debug_info(), - 'batch_status': status, + message = {'batch_status': status, 'job_status': job_status, 'log': logs} log.error(yaml.dump(message)) - raise FatalError(message) + raise ValueError(message) with timings.step("read output"): async with await self._async_fs.open(iodir + '/out') as outfile: success = await read_bool(outfile) if success: - result_bytes = await read_bytes(outfile) + json_bytes = await read_bytes(outfile) try: - return token, result_bytes, timings + return token, orjson.loads(json_bytes), timings except orjson.JSONDecodeError as err: - raise FatalError(f'batch id was {b.id}\ncould not decode {result_bytes}') from err + raise ValueError(f'batch id was {b.id}\ncould not decode {json_bytes}') from err else: - short_message = await read_str(outfile) - expanded_message = await read_str(outfile) - error_id = await read_int(outfile) - if error_id == -1: - error_id = None - maybe_batch_id = ServiceBackend.HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE.match(expanded_message) - if error_id is not None: - assert maybe_batch_id is None, str((short_message, expanded_message, error_id)) - assert ir is not None - self._handle_fatal_error_from_backend( - fatal_error_from_java_error_triplet(short_message, expanded_message, error_id), - ir) - if maybe_batch_id is not None: - assert error_id is None, str((short_message, expanded_message, error_id)) - batch_id = maybe_batch_id.groups()[0] + jstacktrace = await read_str(outfile) + maybe_id = ServiceBackend.HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE.match(jstacktrace) + if maybe_id: + batch_id = maybe_id.groups()[0] b2 = await self.async_bc.get_batch(batch_id) b2_status = await b2.status() assert b2_status['state'] != 'success' @@ -438,18 +275,16 @@ async def _rpc(self, }) message = { 'id': b.id, - 'service_backend_debug_info': self.debug_info(), - 'short_message': yaml_literally_shown_str(short_message.strip()), - 'expanded_message': yaml_literally_shown_str(expanded_message.strip()), + 'stacktrace': yaml_literally_shown_str(jstacktrace.strip()), 'cause': {'id': batch_id, 'batch_status': b2_status, 'failed_jobs': failed_jobs}} log.error(yaml.dump(message)) - raise FatalError(orjson.dumps(message).decode('utf-8')) - raise FatalError(f'batch id was {b.id}\n' + short_message + '\n' + expanded_message) + raise ValueError(orjson.dumps(message).decode('utf-8')) + raise FatalError(f'batch id was {b.id}\n' + jstacktrace) - def execute(self, ir: BaseIR, timed: bool = False): + def execute(self, ir, timed=False): return async_to_blocking(self._async_execute(ir, timed=timed)) - async def _async_execute(self, ir: BaseIR, timed: bool = False): + async def _async_execute(self, ir, timed=False): async def inputs(infile, token): await write_int(infile, ServiceBackend.EXECUTE) await write_str(infile, tmp_dir()) @@ -457,18 +292,9 @@ async def inputs(infile, token): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(ir)) await write_str(infile, token) - await write_int(infile, len(self.functions)) - for fun in self.functions: - await fun.serialize(infile) - await write_str(infile, '{"name":"StreamBufferSpec"}') - - _, resp, timings = await self._rpc('execute(...)', inputs, ir=ir) - typ: HailType = ir.typ - if typ == tvoid: - assert resp == b'', (typ, resp) - converted_value = None - else: - converted_value = ttuple(typ)._from_encoding(resp)[0] + _, resp, timings = await self._rpc('execute(...)', inputs) + typ = dtype(resp['type']) + converted_value = typ._convert_from_json_na(resp['value']) if timed: return converted_value, timings return converted_value @@ -490,7 +316,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(ir)) _, resp, _ = await self._rpc('value_type(...)', inputs) - return dtype(orjson.loads(resp)) + return dtype(resp) def table_type(self, tir): return async_to_blocking(self._async_table_type(tir)) @@ -503,7 +329,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(tir)) _, resp, _ = await self._rpc('table_type(...)', inputs) - return ttable._from_json(orjson.loads(resp)) + return ttable._from_json(resp) def matrix_type(self, mir): return async_to_blocking(self._async_matrix_type(mir)) @@ -516,7 +342,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(mir)) _, resp, _ = await self._rpc('matrix_type(...)', inputs) - return tmatrix._from_json(orjson.loads(resp)) + return tmatrix._from_json(resp) def blockmatrix_type(self, bmir): return async_to_blocking(self._async_blockmatrix_type(bmir)) @@ -529,7 +355,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(bmir)) _, resp, _ = await self._rpc('blockmatrix_type(...)', inputs) - return tblockmatrix._from_json(orjson.loads(resp)) + return tblockmatrix._from_json(resp) def add_reference(self, config): raise NotImplementedError("ServiceBackend does not support 'add_reference'") @@ -560,8 +386,8 @@ async def inputs(infile, _): _, resp, _ = await self._rpc('get_reference(...)', inputs) if name in BUILTIN_REFERENCES: with open(Path(self.user_local_reference_cache_dir, name), 'wb') as f: - f.write(resp) - return orjson.loads(resp) + f.write(orjson.dumps(resp)) + return resp def get_references(self, names): return async_to_blocking(self._async_get_references(names)) @@ -580,7 +406,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, path) _, resp, _ = await self._rpc('load_references_from_dataset(...)', inputs) - return orjson.loads(resp) + return resp def add_sequence(self, name, fasta_file, index_file): raise NotImplementedError("ServiceBackend does not support 'add_sequence'") @@ -605,99 +431,16 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, path) _, resp, _ = await self._rpc('parse_vcf_metadata(...)', inputs) - return orjson.loads(resp) - - def index_bgen(self, - files: List[str], - index_file_map: Dict[str, str], - referenceGenomeName: Optional[str], - contig_recoding: Dict[str, str], - skip_invalid_loci: bool): - return async_to_blocking(self._async_index_bgen( - files, - index_file_map, - referenceGenomeName, - contig_recoding, - skip_invalid_loci - )) - - async def _async_index_bgen(self, - files: List[str], - index_file_map: Dict[str, str], - referenceGenomeName: Optional[str], - contig_recoding: Dict[str, str], - skip_invalid_loci: bool): - async def inputs(infile, _): - await write_int(infile, ServiceBackend.INDEX_BGEN) - await write_str(infile, tmp_dir()) - await write_str(infile, self.billing_project) - await write_str(infile, self.remote_tmpdir) - await write_int(infile, len(files)) - for fname in files: - await write_str(infile, fname) - await write_int(infile, len(index_file_map)) - for k, v in index_file_map.items(): - await write_str(infile, k) - await write_str(infile, v) - if referenceGenomeName is None: - await write_bool(infile, False) - else: - await write_bool(infile, True) - await write_str(infile, referenceGenomeName) - await write_int(infile, len(contig_recoding)) - for k, v in contig_recoding.items(): - await write_str(infile, k) - await write_str(infile, v) - await write_bool(infile, skip_invalid_loci) - - _, resp, _ = await self._rpc('index_bgen(...)', inputs) - assert resp == b'null' - return None + return resp + + def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): + raise NotImplementedError("ServiceBackend does not support 'index_bgen'") def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): - return async_to_blocking(self._async_import_fam(path, quant_pheno, delimiter, missing)) + raise NotImplementedError("ServiceBackend does not support 'import_fam'") - async def _async_import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): - async def inputs(infile, _): - await write_int(infile, ServiceBackend.IMPORT_FAM) - await write_str(infile, tmp_dir()) - await write_str(infile, self.billing_project) - await write_str(infile, self.remote_tmpdir) - await write_str(infile, path) - await write_bool(infile, quant_pheno) - await write_str(infile, delimiter) - await write_str(infile, missing) - _, resp, _ = await self._rpc('import_fam(...)', inputs) - return orjson.loads(resp) - - def register_ir_function(self, - name: str, - type_parameters: Union[Tuple[HailType, ...], List[HailType]], - value_parameter_names: Union[Tuple[str, ...], List[str]], - value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], - return_type: HailType, - body: Expression): - self.functions.append(IRFunction( - name, - type_parameters, - value_parameter_names, - value_parameter_types, - return_type, - body - )) - - def persist_expression(self, expr): - # FIXME: should use context manager to clean up persisted resources - fname = TemporaryFilename().name - write_expression(expr, fname) - return read_expression(fname, _assert_type=expr.dtype) - - def set_flags(self, **flags: str): - self.flags.update(flags) - - def get_flags(self, *flags) -> Mapping[str, str]: - return frozendict(self.flags) + def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): + raise NotImplementedError("ServiceBackend does not support 'register_ir_function'") - @property - def requires_lowering(self): - return True + def persist_ir(self, ir): + raise NotImplementedError("ServiceBackend does not support 'persist_ir'") diff --git a/hail/python/hail/backend/spark_backend.py b/hail/python/hail/backend/spark_backend.py index e8440517f4b..38a08e40781 100644 --- a/hail/python/hail/backend/spark_backend.py +++ b/hail/python/hail/backend/spark_backend.py @@ -17,6 +17,7 @@ from hail.expr.matrix_type import tmatrix from hail.expr.blockmatrix_type import tblockmatrix from hail.ir.renderer import CSERenderer +from hail.ir import JavaIR from hail.table import Table from hail.matrixtable import MatrixTable @@ -352,8 +353,8 @@ def remove_liftover(self, name, dest_reference_genome): def parse_vcf_metadata(self, path): return json.loads(self._jhc.pyParseVCFMetadataJSON(self.fs._jfs, path)) - def index_bgen(self, files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci): - self._jbackend.pyIndexBgen(files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci) + def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): + self._jbackend.pyIndexBgen(files, index_file_map, rg, contig_recoding, skip_invalid_loci) def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): return json.loads(self._jbackend.pyImportFam(path, quant_pheno, delimiter, missing)) @@ -371,6 +372,9 @@ def register_ir_function(self, name, type_parameters, argument_names, argument_t return_type._parsable_string(), jbody) + def persist_ir(self, ir): + return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) + def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.Interval]', intervals_type): json_repr = { 'paths': paths, @@ -380,7 +384,3 @@ def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.In results = self._jhc.backend().pyReadMultipleMatrixTables(json.dumps(json_repr)) return [MatrixTable._from_java(jm) for jm in results] - - @property - def requires_lowering(self): - return False diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index a02a71a39f7..eef95806426 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -1,5 +1,4 @@ -from typing import Optional, Union -import warnings +from typing import Optional import sys import os from urllib.parse import urlparse, urlunparse @@ -9,9 +8,9 @@ import hail from hail.genetics.reference_genome import ReferenceGenome -from hail.typecheck import nullable, typecheck, typecheck_method, enumeration, dictof, oneof +from hail.typecheck import nullable, typecheck, typecheck_method, enumeration, dictof from hail.utils import get_env_or_default -from hail.utils.java import Env, warning, choose_backend +from hail.utils.java import Env, FatalError, warning from hail.backend import Backend from hailtop.utils import secret_alnum_string from .builtin_references import BUILTIN_REFERENCES @@ -168,10 +167,7 @@ def stop(self): spark_conf=nullable(dictof(str, str)), skip_logging_configuration=bool, local_tmpdir=nullable(str), - _optimizer_iterations=nullable(int), - backend=nullable(str), - driver_cores=nullable(oneof(str, int)), - driver_memory=nullable(str)) + _optimizer_iterations=nullable(int)) def init(sc=None, app_name='Hail', master=None, local='local[*]', log=None, quiet=False, append=False, min_block_size=0, branching_factor=50, tmp_dir=None, @@ -180,37 +176,37 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', spark_conf=None, skip_logging_configuration=False, local_tmpdir=None, - _optimizer_iterations=None, - *, - backend=None, - driver_cores=None, - driver_memory=None): - """Initialize and configure Hail. - - This function will be called with default arguments if any Hail functionality is used. If you - need custom configuration, you must explicitly call this function before using Hail. For - example, to set the default reference genome to GRCh38, import Hail and immediately call - :func:`.init`: + _optimizer_iterations=None): + """Initialize Hail and Spark. + + Examples + -------- + Import and initialize Hail using GRCh38 as the default reference genome: >>> import hail as hl >>> hl.init(default_reference='GRCh38') # doctest: +SKIP - Hail has two backends, ``spark`` and ``batch``. Hail selects a backend by consulting, in order, - these configuration locations: + Notes + ----- + Hail is not only a Python library; most of Hail is written in Java/Scala + and runs together with Apache Spark in the Java Virtual Machine (JVM). + In order to use Hail, a JVM needs to run as well. The :func:`.init` + function is used to initialize Hail and Spark. - 1. The ``backend`` parameter of this function. - 2. The ``HAIL_QUERY_BACKEND`` environment variable. - 3. The value of ``hailctl config get query/backend``. + This function also sets global configuration parameters used for the Hail + session, like the default reference genome and log file location. - If no configuration is found, Hail will select the Spark backend. + This function will be called automatically (with default parameters) if + any Hail functionality requiring the backend (most of the libary!) is used. + To initialize Hail explicitly with non-default arguments, be sure to do so + directly after importing the module, as in the above example. - Examples - -------- - Configure Hail to use the Batch backend: - - >>> import hail as hl - >>> hl.init(backend='batch') # doctest: +SKIP + To facilitate the migration from Spark to the ServiceBackend, this method + calls init_service when the environment variable HAIL_QUERY_BACKEND is set + to "service". + Note + ---- If a :class:`pyspark.SparkContext` is already running, then Hail must be initialized with it as an argument: @@ -223,22 +219,20 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', Parameters ---------- sc : pyspark.SparkContext, optional - Spark Backend only. Spark context. If not specified, the Spark backend will create a new - Spark context. + Spark context. By default, a Spark context will be created. app_name : :class:`str` - A name for this pipeline. In the Spark backend, this becomes the Spark application name. In - the Batch backend, this is a prefix for the name of every Batch. + Spark application name. master : :class:`str`, optional - Spark Backend only. URL identifying the Spark leader (master) node or `local[N]` for local - clusters. + URL identifying the Spark leader (master) node or `local[N]` for local clusters. local : :class:`str` - Spark Backend only. Local-mode core limit indicator. Must either be `local[N]` where N is a - positive integer or `local[*]`. The latter indicates Spark should use all cores - available. `local[*]` does not respect most containerization CPU limits. This option is only - used if `master` is unset and `spark.master` is not set in the Spark configuration. + Local-mode core limit indicator. Must either be `local[N]` where N is a + positive integer or `local[*]`. The latter indicates Spark should use all + cores available. `local[*]` does not respect most containerization CPU + limits. This option is only used if `master` is unset and `spark.master` + is not set in the Spark configuration. log : :class:`str` - Local path for Hail log file. Does not currently support distributed file systems like - Google Storage, S3, or HDFS. + Local path for Hail log file. Does not currently support distributed + file systems like Google Storage, S3, or HDFS. quiet : :obj:`bool` Print fewer log messages. append : :obj:`bool` @@ -258,19 +252,12 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', global_seed : :obj:`int`, optional Global random seed. spark_conf : :obj:`dict` of :class:`str` to :class`str`, optional - Spark backend only. Spark configuration parameters. + Spark configuration parameters. skip_logging_configuration : :obj:`bool` - Spark Backend only. Skip logging configuration in java and python. + Skip logging configuration in java and python. local_tmpdir : :class:`str`, optional Local temporary directory. Used on driver and executor nodes. Must use the file scheme. Defaults to TMPDIR, or /tmp. - driver_cores : :class:`str` or :class:`int`, optional - Batch backend only. Number of cores to use for the driver process. May be 1 or 8. Default is - 1. - driver_memory : :class:`str`, optional - Batch backend only. Number of cores to use for the driver process. May be standard or - highmem. Default is standard. - """ if Env._hc: if idempotent: @@ -279,93 +266,20 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', warning('Hail has already been initialized. If this call was intended to change configuration,' ' close the session with hl.stop() first.') - backend = choose_backend(backend) - - if backend == 'service': - warnings.warn( - 'The "service" backend is now called the "batch" backend. Support for "service" will be removed in a ' - 'future release.' - ) - backend = 'batch' - - if backend == 'batch': + if os.environ.get('HAIL_QUERY_BACKEND') == 'service': import asyncio - try: - asyncio.get_running_loop() - raise ValueError( - 'When using Hail Query in async code, initialize the ServiceBackend with `await hl.init_batch()`' - ) - except RuntimeError: # RuntimeError implies there is no running loop, so we may start one - return asyncio.get_event_loop().run_until_complete(init_batch( - log=log, - quiet=quiet, - append=append, - tmpdir=tmp_dir, - local_tmpdir=local_tmpdir, - default_reference=default_reference, - global_seed=global_seed, - driver_cores=driver_cores, - driver_memory=driver_memory, - name_prefix=app_name - )) - if backend == 'spark': - return init_spark( - log=log, - quiet=quiet, - append=append, - tmp_dir=tmp_dir, - local_tmpdir=local_tmpdir, - default_reference=default_reference, - global_seed=global_seed, - skip_logging_configuration=skip_logging_configuration - ) - if backend == 'local': - return init_local( + # NB: do not use warning because that will initialize Env._hc, which we are trying to do right now. + print('When using the query service backend, use `await init_service\'', file=sys.stderr) + return asyncio.get_event_loop().run_until_complete(init_service( log=log, quiet=quiet, append=append, tmpdir=tmp_dir, + local_tmpdir=local_tmpdir, default_reference=default_reference, global_seed=global_seed, - skip_logging_configuration=skip_logging_configuration - ) - raise ValueError(f'unknown Hail Query backend: {backend}') + skip_logging_configuration=skip_logging_configuration)) - -@typecheck(sc=nullable(SparkContext), - app_name=str, - master=nullable(str), - local=str, - log=nullable(str), - quiet=bool, - append=bool, - min_block_size=int, - branching_factor=int, - tmp_dir=nullable(str), - default_reference=enumeration(*BUILTIN_REFERENCES), - idempotent=bool, - global_seed=nullable(int), - spark_conf=nullable(dictof(str, str)), - skip_logging_configuration=bool, - local_tmpdir=nullable(str), - _optimizer_iterations=nullable(int)) -def init_spark(sc=None, - app_name='Hail', - master=None, - local='local[*]', - log=None, - quiet=False, - append=False, - min_block_size=0, - branching_factor=50, - tmp_dir=None, - default_reference='GRCh37', - idempotent=False, - global_seed=6348563392232659379, - spark_conf=None, - skip_logging_configuration=False, - local_tmpdir=None, - _optimizer_iterations=None): from hail.backend.spark_backend import SparkBackend log = _get_log(log) @@ -377,6 +291,7 @@ def init_spark(sc=None, idempotent, sc, spark_conf, app_name, master, local, log, quiet, append, min_block_size, branching_factor, tmpdir, local_tmpdir, skip_logging_configuration, optimizer_iterations) + if not backend.fs.exists(tmpdir): backend.fs.mkdir(tmpdir) @@ -395,35 +310,26 @@ def init_spark(sc=None, local_tmpdir=nullable(str), default_reference=enumeration(*BUILTIN_REFERENCES), global_seed=nullable(int), - disable_progress_bar=bool, - driver_cores=nullable(oneof(str, int)), - driver_memory=nullable(str), - name_prefix=nullable(str) -) -async def init_batch( - *, + skip_logging_configuration=bool, + disable_progress_bar=bool) +async def init_service( billing_project: Optional[str] = None, remote_tmpdir: Optional[str] = None, - log: Optional[str] = None, - quiet: bool = False, - append: bool = False, - tmpdir: Optional[str] = None, - local_tmpdir: Optional[str] = None, - default_reference: str = 'GRCh37', - global_seed: int = 6348563392232659379, - disable_progress_bar: bool = True, - driver_cores: Optional[Union[str, int]] = None, - driver_memory: Optional[str] = None, - name_prefix: Optional[str] = None -): + log=None, + quiet=False, + append=False, + tmpdir=None, + local_tmpdir=None, + default_reference='GRCh37', + global_seed=6348563392232659379, + skip_logging_configuration=False, + *, + disable_progress_bar=True): from hail.backend.service_backend import ServiceBackend - # FIXME: pass local_tmpdir and use on worker and driver backend = await ServiceBackend.create(billing_project=billing_project, remote_tmpdir=remote_tmpdir, - disable_progress_bar=disable_progress_bar, - driver_cores=driver_cores, - driver_memory=driver_memory, - name_prefix=name_prefix) + skip_logging_configuration=skip_logging_configuration, + disable_progress_bar=disable_progress_bar) log = _get_log(log) if tmpdir is None: @@ -473,8 +379,8 @@ def init_local( global_seed, backend) -def version() -> str: - """Get the installed Hail version. +def version(): + """Get the installed hail version. Returns ------- @@ -486,19 +392,6 @@ def version() -> str: return hail.__version__ -def revision() -> str: - """Get the installed Hail git revision. - - Returns - ------- - str - """ - if hail.__revision__ is None: - # https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package - hail.__revision__ = pkg_resources.resource_string(__name__, 'hail_revision').decode().strip() - return hail.__revision__ - - def _hail_cite_url(): v = version() [tag, sha_prefix] = v.split("-") @@ -732,11 +625,20 @@ def set_global_seed(seed): def _set_flags(**flags): - Env.backend().set_flags(**flags) + available = set(Env.backend()._jhc.flags().available()) + invalid = [] + for flag, value in flags.items(): + if flag in available: + Env.backend()._jhc.flags().set(flag, value) + else: + invalid.append(flag) + if len(invalid) != 0: + raise FatalError("Flags {} not valid. Valid flags: \n {}" + .format(', '.join(invalid), '\n '.join(available))) def _get_flags(*flags): - return Env.backend().get_flags(*flags) + return {flag: Env.backend()._jhc.flags().get(flag) for flag in flags} def debug_info(): diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 33ff343abfb..533af14b768 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -23,39 +23,6 @@ relating to file formats**: this means that it may not be possible to use an earlier version of Hail to read files written in a later version. ---- - -## Version 0.2.93 - -Release 2022-03-27 - -### Beta features - -- Several issues with the beta version of Hail Query on Hail Batch are addressed in this release. - ---- - -## Version 0.2.92 - -Release 2022-03-25 - -### New features - -- (hail#11613) Add `hl.ggplot` support for `scale_fill_hue`, `scale_color_hue`, and `scale_fill_manual`, - `scale_color_manual`. This allows for an infinite number of discrete colors. -- (hail#11608) Add all remaining and all versions of extant public gnomAD datasets to the Hail - Annotation Database and Datasets API. Current as of March 23rd 2022. -- (hail#11662) Add the `weight` aesthetic `geom_bar`. - -### Beta features - -- This version of Hail includes all the necessary client-side infrastructure to execute Hail Query - pipelines on a Hail Batch cluster. This effectively enables a "serverless" version of Hail Query - which is independent of Apache Spark. Broad affiliated users should contact the Hail team for help - using Hail Query on Hail Batch. Unaffiliated users should also contact the Hail team to discuss - the feasibility of running your own Hail Batch cluster. The Hail team is accessible at both - https://hail.zulipchat.com and https://discuss.hail.is . - --- ## Version 0.2.91 diff --git a/hail/python/hail/docs/cloud/query_on_batch.rst b/hail/python/hail/docs/cloud/query_on_batch.rst deleted file mode 100644 index 6365d12fb29..00000000000 --- a/hail/python/hail/docs/cloud/query_on_batch.rst +++ /dev/null @@ -1,65 +0,0 @@ -=================== -Hail Query-on-Batch -=================== - -.. warning:: - - Hail Query-on-Batch (the Batch backend) is currently in beta. This means some functionality is - not yet working. Please `contact us `__ if you would like to use missing - functionality on Query-on-Batch! - - -Hail Query-on-Batch uses Hail Batch instead of Apache Spark to execute jobs. Instead of a Dataproc -cluster, you will need a Hail Batch cluster. For more information on using Hail Batch, see the `Hail -Batch docs `__. For more information on deploying a Hail Batch cluster, -please contact the Hail Team at our `discussion forum `__. - -Getting Started ---------------- - -1. Install Hail version 0.2.93 or later: - -.. code-block:: text - - pip install 'hail>=0.2.93' - -2. `Sign up for a Hail Batch account `__ (currently only available to - Broad affiliates). - -3. Authenticate with Hail Batch. - -.. code-block:: text - - hailctl auth login - -3. Specify a bucket for Hail to use for temporary intermediate files. In Google Cloud, we recommend - using a bucket with `automatic deletion after a set period of time - `__. - -.. code-block:: text - - hailctl config set batch/tmp_dir gs://my-auto-delete-bucket/hail-query-temporaries - -4. Specify a Hail Batch billing project (these are different from Google Cloud projects). Every new - user has a trial billing project loaded with 10 USD. The name is available on the `Hail User - account page `__. - -.. code-block:: text - - hailctl config set batch/billing_proejct my-billing-project - -5. Set the default Hail Query backend to ``batch``: - -.. code-block:: text - - hailctl config set query/backend batch - -6. Now you are ready to `try Hail <../install/try.rst>`__! - -.. _vep_query_on_batch: - -Variant Effect Predictor (VEP) ------------------------------- - -More information coming very soon. If you want to use VEP with Hail Query-on-Batch, please contact -the Hail Team at our `discussion forum `__. diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst b/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst index f4cb3173eb3..2191bf1b453 100644 --- a/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst +++ b/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst @@ -3,49 +3,63 @@ gnomad_genome_sites =================== -* **Versions:** 2.1.1, 3.1, 3.1.1, 3.1.2 +* **Versions:** 2.1.1, 3.1 * **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.Table` -Schema (3.1.2, GRCh38) +Schema (2.1.1, GRCh37) ~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: text ---------------------------------------- Global fields: - 'freq_meta': array> - 'freq_index_dict': dict - 'faf_index_dict': dict - 'faf_meta': array> - 'vep_version': str - 'vep_csq_header': str - 'dbsnp_version': str - 'filtering_model': struct { - model_name: str, - score_name: str, - snv_cutoff: struct { - bin: float64, + 'rf': struct { + variants_by_type: dict, + feature_medians: dict, + test_intervals: array>>, + test_results: array, + features_importance: dict, + features: array, + vqsr_training: bool, + no_transmitted_singletons: bool, + adj: bool, + rf_hash: str, + rf_snv_cutoff: struct { + bin: int32, min_score: float64 }, - indel_cutoff: struct { - bin: float64, + rf_indel_cutoff: struct { + bin: int32, min_score: float64 - }, - model_id: str, - snv_training_variables: array, - indel_training_variables: array - } - 'age_distribution': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int32, - n_larger: int32 + } } - 'freq_sample_count': array + 'freq_meta': array> + 'freq_index_dict': dict + 'popmax_index_dict': dict + 'age_index_dict': dict + 'faf_index_dict': dict + 'age_distribution': array ---------------------------------------- Row fields: - 'locus': locus + 'locus': locus 'alleles': array 'freq': array - 'raw_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'popmax': struct { + 'age_hist_het': array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }> + 'age_hist_hom': array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }> + 'popmax': array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } + pop: str + }> 'faf': array, faf95: float64, faf99: float64 }> - 'a_index': int32 + 'lcr': bool + 'decoy': bool + 'segdup': bool + 'nonpar': bool + 'variant_type': str + 'allele_type': str + 'n_alt_alleles': int32 + 'was_mixed': bool + 'has_star': bool + 'qd': float64 + 'pab_max': float64 + 'info_MQRankSum': float64 + 'info_SOR': float64 + 'info_InbreedingCoeff': float64 + 'info_ReadPosRankSum': float64 + 'info_FS': float64 + 'info_QD': float64 + 'info_MQ': float64 + 'info_DP': int32 + 'transmitted_singleton': bool + 'fail_hard_filters': bool + 'info_POSITIVE_TRAIN_SITE': bool + 'info_NEGATIVE_TRAIN_SITE': bool + 'omni': bool + 'mills': bool + 'tp': bool + 'rf_train': bool + 'rf_label': str + 'rf_probability': float64 + 'rank': int64 'was_split': bool - 'rsid': set + 'singleton': bool + '_score': float64 + '_singleton': bool + 'biallelic_rank': int64 + 'singleton_rank': int64 + 'n_nonref': int32 + 'score': float64 + 'adj_biallelic_singleton_rank': int64 + 'adj_rank': int64 + 'adj_biallelic_rank': int64 + 'adj_singleton_rank': int64 + 'biallelic_singleton_rank': int64 'filters': set - 'info': struct { - QUALapprox: int64, - SB: array, - MQ: float64, - MQRankSum: float64, - VarDP: int32, - AS_ReadPosRankSum: float64, - AS_pab_max: float64, - AS_QD: float32, - AS_MQ: float64, - QD: float32, - AS_MQRankSum: float64, - FS: float64, - AS_FS: float64, - ReadPosRankSum: float64, - AS_QUALapprox: int64, - AS_SB_TABLE: array, - AS_VarDP: int32, - AS_SOR: float64, - SOR: float64, - singleton: bool, - transmitted_singleton: bool, - omni: bool, - mills: bool, - monoallelic: bool, - AS_VQSLOD: float64, - InbreedingCoeff: float64 + 'gq_hist_alt': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 } + 'gq_hist_all': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'dp_hist_alt': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'dp_hist_all': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'ab_hist_alt': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'qual': float64 'vep': struct { assembly_name: str, allele_string: str, ancestral: str, + colocated_variants: array, + end: int32, + eas_allele: str, + eas_maf: float64, + ea_allele: str, + ea_maf: float64, + eur_allele: str, + eur_maf: float64, + exac_adj_allele: str, + exac_adj_maf: float64, + exac_allele: str, + exac_afr_allele: str, + exac_afr_maf: float64, + exac_amr_allele: str, + exac_amr_maf: float64, + exac_eas_allele: str, + exac_eas_maf: float64, + exac_fin_allele: str, + exac_fin_maf: float64, + exac_maf: float64, + exac_nfe_allele: str, + exac_nfe_maf: float64, + exac_oth_allele: str, + exac_oth_maf: float64, + exac_sas_allele: str, + exac_sas_maf: float64, + id: str, + minor_allele: str, + minor_allele_freq: float64, + phenotype_or_disease: int32, + pubmed: array, + sas_allele: str, + sas_maf: float64, + somatic: int32, + start: int32, + strand: int32 + }>, context: str, end: int32, id: str, @@ -205,7 +255,6 @@ Schema (3.1.2, GRCh38) transcript_consequences: array, variant_class: str } - 'vqsr': struct { - AS_VQSLOD: float64, - AS_culprit: str, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool - } - 'region_flag': struct { - lcr: bool, - segdup: bool - } 'allele_info': struct { - variant_type: str, - allele_type: str, - n_alt_alleles: int32, - was_mixed: bool - } - 'age_hist_het': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'age_hist_hom': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'cadd': struct { - phred: float32, - raw_score: float32, - has_duplicate: bool - } - 'revel': struct { - revel_score: float64, - has_duplicate: bool - } - 'splice_ai': struct { - splice_ai_score: float32, - splice_consequence: str, - has_duplicate: bool - } - 'primate_ai': struct { - primate_ai_score: float32, - has_duplicate: bool + BaseQRankSum: float64, + ClippingRankSum: float64, + DB: bool, + DP: int32, + DS: bool, + END: int32, + FS: float64, + HaplotypeScore: float64, + InbreedingCoeff: float64, + MQ: float64, + MQ0: int32, + MQRankSum: float64, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool, + QD: float64, + RAW_MQ: float64, + ReadPosRankSum: float64, + SOR: float64, + VQSLOD: float64, + culprit: str } + 'rsid': str ---------------------------------------- Key: ['locus', 'alleles'] ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst new file mode 100644 index 00000000000..c9cea3b61b0 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst @@ -0,0 +1,651 @@ +.. _gnomad_hgdp_1kg_callset: + +gnomad_hgdp_1kg_callset +======================= + +* **Versions:** 3.1 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (3.1, GRCh38) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'global_annotation_descriptions': struct { + sex_imputation_ploidy_cutoffs: struct { + Description: str + }, + population_inference_pca_metrics: struct { + Description: str + }, + hard_filter_cutoffs: struct { + Description: str + }, + cohort_freq_meta: struct { + Description: str + }, + gnomad_freq_meta: struct { + Description: str + }, + cohort_freq_index_dict: struct { + Description: str + }, + gnomad_freq_index_dict: struct { + Description: str + }, + gnomad_faf_index_dict: struct { + Description: str + }, + gnomad_faf_meta: struct { + Description: str + }, + vep_version: struct { + Description: str + }, + vep_csq_header: struct { + Description: str + }, + dbsnp_version: struct { + Description: str + }, + filtering_model: struct { + Description: str, + sub_globals: struct { + model_name: struct { + Description: str + }, + score_name: struct { + Description: str + }, + snv_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + indel_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + snv_training_variables: struct { + Description: str + }, + indel_training_variables: struct { + Description: str + } + } + }, + inbreeding_coeff_cutoff: struct { + Description: str + } + } + 'sample_annotation_descriptions': struct { + s: struct { + Description: str + }, + bam_metrics: struct { + Description: str, + sub_annotations: struct { + pct_bases_20x: struct { + Description: str + }, + pct_chimeras: struct { + Description: str + }, + freemix: struct { + Description: str + }, + mean_coverage: struct { + Description: str + }, + median_coverage: struct { + Description: str + }, + mean_insert_size: struct { + Description: str + }, + median_insert_size: struct { + Description: str + }, + pct_bases_10x: struct { + Description: str + } + } + }, + subsets: struct { + Description: str, + sub_annotations: struct { + tgp: struct { + Description: str + }, + hgdp: struct { + Description: str + } + } + }, + sex_imputation: struct { + Description: str, + sub_annotations: struct { + f_stat: struct { + Description: str + }, + n_called: struct { + Description: str + }, + expected_homs: struct { + Description: str + }, + observed_homs: struct { + Description: str + }, + chr20_mean_dp: struct { + Description: str + }, + chrX_mean_dp: struct { + Description: str + }, + chrY_mean_dp: struct { + Description: str + }, + chrX_ploidy: struct { + Description: str + }, + chrY_ploidy: struct { + Description: str + }, + X_karyotype: struct { + Description: str + }, + Y_karyotype: struct { + Description: str + }, + sex_karyotype: struct { + Description: str + } + } + }, + sample_qc: struct { + Description: str, + sub_annotations: struct { + n_hom_ref: struct { + Description: str + }, + n_het: struct { + Description: str + }, + n_hom_var: struct { + Description: str + }, + n_non_ref: struct { + Description: str + }, + n_snp: struct { + Description: str + }, + n_insertion: struct { + Description: str + }, + n_deletion: struct { + Description: str + }, + n_transition: struct { + Description: str + }, + n_transversion: struct { + Description: str + }, + r_ti_tv: struct { + Description: str + }, + r_het_hom_var: struct { + Description: str + }, + r_insertion_deletion: struct { + Description: str + } + } + }, + population_inference: struct { + Description: str, + sub_annotations: struct { + pca_scores: struct { + Description: str + }, + pop: struct { + Description: str + }, + prob_afr: struct { + Description: str + }, + prob_ami: struct { + Description: str + }, + prob_amr: struct { + Description: str + }, + prob_asj: struct { + Description: str + }, + prob_eas: struct { + Description: str + }, + prob_fin: struct { + Description: str + }, + prob_mid: struct { + Description: str + }, + prob_nfe: struct { + Description: str + }, + prob_oth: struct { + Description: str + }, + prob_sas: struct { + Description: str + } + } + }, + labeled_subpop: struct { + Description: str + }, + gnomad_release: struct { + Description: str + } + } + 'sex_imputation_ploidy_cutoffs': struct { + x_ploidy_cutoffs: struct { + upper_cutoff_X: float64, + lower_cutoff_XX: float64, + upper_cutoff_XX: float64, + lower_cutoff_XXX: float64 + }, + y_ploidy_cutoffs: struct { + lower_cutoff_Y: float64, + upper_cutoff_Y: float64, + lower_cutoff_YY: float64 + }, + f_stat_cutoff: float64 + } + 'population_inference_pca_metrics': struct { + n_pcs: int32, + min_prob: float64 + } + 'hard_filter_cutoffs': struct { + min_cov: int32, + max_n_snp: float64, + min_n_snp: float64, + max_n_singleton: float64, + max_r_het_hom_var: float64, + max_pct_contamination: float64, + max_pct_chimera: float64, + min_median_insert_size: int32 + } + 'cohort_freq_meta': array> + 'cohort_freq_index_dict': dict + 'gnomad_freq_meta': array> + 'gnomad_freq_index_dict': dict + 'gnomad_faf_index_dict': dict + 'gnomad_faf_meta': array> + 'vep_version': str + 'vep_csq_header': str + 'dbsnp_version': str + 'filtering_model': struct { + model_name: str, + score_name: str, + snv_cutoff: struct { + bin: float64, + min_score: float64 + }, + indel_cutoff: struct { + bin: float64, + min_score: float64 + }, + snv_training_variables: array, + indel_training_variables: array + } + 'inbreeding_coeff_cutoff': float64 + ---------------------------------------- + Column fields: + 's': str + 'bam_metrics': struct { + pct_bases_20x: float64, + pct_chimeras: float64, + freemix: float64, + mean_coverage: float64, + median_coverage: float64, + mean_insert_size: float64, + median_insert_size: float64, + pct_bases_10x: float64 + } + 'subsets': struct { + tgp: bool, + hgdp: bool + } + 'sex_imputation': struct { + chr20_mean_dp: float32, + chrX_mean_dp: float32, + chrY_mean_dp: float32, + chrX_ploidy: float32, + chrY_ploidy: float32, + X_karyotype: str, + Y_karyotype: str, + sex_karyotype: str, + impute_sex_stats: struct { + f_stat: float64, + n_called: int64, + expected_homs: float64, + observed_homs: int64 + } + } + 'sample_qc': struct { + n_hom_ref: int64, + n_het: int64, + n_hom_var: int64, + n_non_ref: int64, + n_snp: int64, + n_insertion: int64, + n_deletion: int64, + n_transition: int64, + n_transversion: int64, + r_ti_tv: float64, + r_het_hom_var: float64, + r_insertion_deletion: float64 + } + 'population_inference': struct { + pca_scores: array, + pop: str, + prob_afr: float64, + prob_ami: float64, + prob_amr: float64, + prob_asj: float64, + prob_eas: float64, + prob_fin: float64, + prob_mid: float64, + prob_nfe: float64, + prob_oth: float64, + prob_sas: float64 + } + 'labeled_subpop': str + 'gnomad_release': bool + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + 'AS_lowqual': bool + 'telomere_or_centromere': bool + 'cohort_freq': array + 'gnomad_freq': array + 'gnomad_raw_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_popmax': struct { + AC: int32, + AF: float64, + AN: int32, + homozygote_count: int32, + pop: str, + faf95: float64 + } + 'gnomad_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_faf': array + 'filters': set + 'info': struct { + QUALapprox: int64, + SB: array, + MQ: float64, + MQRankSum: float64, + VarDP: int32, + AS_ReadPosRankSum: float64, + AS_pab_max: float64, + AS_QD: float32, + AS_MQ: float64, + QD: float32, + AS_MQRankSum: float64, + FS: float64, + AS_FS: float64, + ReadPosRankSum: float64, + AS_QUALapprox: int64, + AS_SB_TABLE: array, + AS_VarDP: int32, + AS_SOR: float64, + SOR: float64, + transmitted_singleton: bool, + omni: bool, + mills: bool, + monoallelic: bool, + AS_VQSLOD: float64, + InbreedingCoeff: float32 + } + 'vep': struct { + assembly_name: str, + allele_string: str, + ancestral: str, + context: str, + end: int32, + id: str, + input: str, + intergenic_consequences: array, + impact: str, + minimised: int32, + variant_allele: str + }>, + most_severe_consequence: str, + motif_feature_consequences: array, + high_inf_pos: str, + impact: str, + minimised: int32, + motif_feature_id: str, + motif_name: str, + motif_pos: int32, + motif_score_change: float64, + strand: int32, + variant_allele: str + }>, + regulatory_feature_consequences: array, + impact: str, + minimised: int32, + regulatory_feature_id: str, + variant_allele: str + }>, + seq_region_name: str, + start: int32, + strand: int32, + transcript_consequences: array, + distance: int32, + domains: array, + exon: str, + gene_id: str, + gene_pheno: int32, + gene_symbol: str, + gene_symbol_source: str, + hgnc_id: str, + hgvsc: str, + hgvsp: str, + hgvs_offset: int32, + impact: str, + intron: str, + lof: str, + lof_flags: str, + lof_filter: str, + lof_info: str, + minimised: int32, + polyphen_prediction: str, + polyphen_score: float64, + protein_end: int32, + protein_start: int32, + protein_id: str, + sift_prediction: str, + sift_score: float64, + strand: int32, + swissprot: str, + transcript_id: str, + trembl: str, + tsl: int32, + uniparc: str, + variant_allele: str + }>, + variant_class: str + } + 'vqsr': struct { + AS_VQSLOD: float64, + AS_culprit: str, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool + } + 'region_flag': struct { + lcr: bool, + segdup: bool + } + 'allele_info': struct { + variant_type: str, + allele_type: str, + n_alt_alleles: int32, + was_mixed: bool + } + 'cadd': struct { + raw_score: float32, + phred: float32 + } + 'revel': struct { + revel_score: float64, + ref_aa: str, + alt_aa: str + } + 'splice_ai': struct { + splice_ai: array, + max_ds: float32, + splice_consequence: str + } + 'primate_ai': struct { + primate_ai_score: float32 + } + ---------------------------------------- + Entry fields: + 'END': int32 + 'DP': int32 + 'GQ': int32 + 'MIN_DP': int32 + 'PID': str + 'RGQ': int32 + 'SB': array + 'GT': call + 'PGT': call + 'AD': array + 'PL': array + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- + diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst deleted file mode 100644 index 0dd1a553f42..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst +++ /dev/null @@ -1,1501 +0,0 @@ -.. _gnomad_hgdp_1kg_subset_dense: - -gnomad_hgdp_1kg_subset_dense -============================ - -* **Versions:** 3.1, 3.1.2 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (3.1.2, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'global_annotation_descriptions': struct { - gnomad_sex_imputation_ploidy_cutoffs: struct { - Description: str - }, - gnomad_population_inference_pca_metrics: struct { - Description: str - }, - sample_hard_filter_cutoffs: struct { - Description: str - }, - gnomad_sample_qc_metric_outlier_cutoffs: struct { - Description: str - }, - gnomad_age_distribution: struct { - Description: str, - sub_globals: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - hgdp_tgp_freq_meta: struct { - Description: str - }, - gnomad_freq_meta: struct { - Description: str - }, - hgdp_tgp_freq_index_dict: struct { - Description: str - }, - gnomad_freq_index_dict: struct { - Description: str - }, - gnomad_faf_meta: struct { - Description: str - }, - gnomad_faf_index_dict: struct { - Description: str - }, - variant_filtering_model: struct { - Description: set, - sub_globals: struct { - model_name: struct { - Description: str - }, - score_name: struct { - Description: str - }, - snv_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - indel_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - snv_training_variables: struct { - Description: str - }, - indel_training_variables: struct { - Description: str - } - } - }, - variant_inbreeding_coeff_cutoff: struct { - Description: str - }, - vep_version: struct { - Description: str - }, - vep_csq_header: struct { - Description: str - }, - dbsnp_version: struct { - Description: str - } - } - 'sample_annotation_descriptions': struct { - s: struct { - Description: str - }, - bam_metrics: struct { - Description: str, - sub_annotations: struct { - pct_bases_20x: struct { - Description: str - }, - pct_chimeras: struct { - Description: str - }, - freemix: struct { - Description: str - }, - mean_coverage: struct { - Description: str - }, - median_coverage: struct { - Description: str - }, - mean_insert_size: struct { - Description: str - }, - median_insert_size: struct { - Description: str - }, - pct_bases_10x: struct { - Description: str - } - } - }, - sample_qc: struct { - Description: str, - sub_annotations: struct { - n_deletion: struct { - Description: str - }, - n_het: struct { - Description: str - }, - n_hom_ref: struct { - Description: str - }, - n_hom_var: struct { - Description: str - }, - n_insertion: struct { - Description: str - }, - n_non_ref: struct { - Description: str - }, - n_snp: struct { - Description: str - }, - n_transition: struct { - Description: str - }, - n_transversion: struct { - Description: str - }, - r_het_hom_var: struct { - Description: str - }, - r_insertion_deletion: struct { - Description: str - }, - r_ti_tv: struct { - Description: str - } - } - }, - gnomad_sex_imputation: struct { - Description: str, - sub_annotations: struct { - chr20_mean_dp: struct { - Description: str - }, - chrX_mean_dp: struct { - Description: str - }, - chrY_mean_dp: struct { - Description: str - }, - chrX_ploidy: struct { - Description: str - }, - chrY_ploidy: struct { - Description: str - }, - X_karyotype: struct { - Description: str - }, - Y_karyotype: struct { - Description: str - }, - sex_karyotype: struct { - Description: str - }, - f_stat: struct { - Description: str - }, - n_called: struct { - Description: str - }, - expected_homs: struct { - Description: str - }, - observed_homs: struct { - Description: str - } - } - }, - gnomad_population_inference: struct { - Description: str, - sub_annotations: struct { - pca_scores: struct { - Description: str - }, - pop: struct { - Description: str - }, - prob_afr: struct { - Description: str - }, - prob_ami: struct { - Description: str - }, - prob_amr: struct { - Description: str - }, - prob_asj: struct { - Description: str - }, - prob_eas: struct { - Description: str - }, - prob_fin: struct { - Description: str - }, - prob_mid: struct { - Description: str - }, - prob_nfe: struct { - Description: str - }, - prob_oth: struct { - Description: str - }, - prob_sas: struct { - Description: str - } - } - }, - gnomad_sample_qc_residuals: struct { - Description: tuple ( - str - ), - sub_annotations: struct { - n_snp_residual: struct { - Description: str - }, - r_ti_tv_residual: struct { - Description: str - }, - r_insertion_deletion_residual: struct { - Description: str - }, - n_insertion_residual: struct { - Description: str - }, - n_deletion_residual: struct { - Description: str - }, - r_het_hom_var_residual: struct { - Description: str - }, - n_transition_residual: struct { - Description: str - }, - n_transversion_residual: struct { - Description: str - } - } - }, - gnomad_sample_filters: struct { - Description: str, - sub_annotations: struct { - hard_filters: struct { - Description: str - }, - hard_filtered: struct { - Description: str - }, - release_related: struct { - Description: str - }, - qc_metrics_filters: struct { - Description: str - } - } - }, - gnomad_high_quality: struct { - Description: str - }, - gnomad_release: struct { - Description: str - }, - relatedness_inference: struct { - Description: str, - sub_annotations: struct { - related_samples: struct { - Description: str, - sub_annotations: struct { - s: struct { - Description: str - }, - kin: struct { - Description: str - }, - ibd0: struct { - Description: str - }, - ibd1: struct { - Description: str - }, - ibd2: struct { - Description: str - } - } - }, - related: struct { - Description: str - } - } - }, - hgdp_tgp_meta: struct { - Description: str, - sub_annotations: struct { - project: struct { - Description: str - }, - study_region: struct { - Description: str - }, - population: struct { - Description: str - }, - genetic_region: struct { - Description: str - }, - latitude: struct { - Description: str - }, - longitude: struct { - Description: str - }, - hgdp_technical_meta: struct { - Description: str, - sub_annotations: struct { - source: struct { - Description: str - }, - library_type: struct { - Description: str - } - } - }, - global_pca_scores: struct { - Description: str - }, - subcontinental_pca: struct { - Description: str, - sub_annotations: struct { - pca_scores: struct { - Description: str - }, - pca_scores_outliers_removed: struct { - Description: str - }, - outlier: struct { - Description: str - } - } - }, - gnomad_labeled_subpop: struct { - Description: str - } - } - }, - high_quality: struct { - Description: str - } - } - 'gnomad_sex_imputation_ploidy_cutoffs': struct { - x_ploidy_cutoffs: struct { - upper_cutoff_X: float64, - lower_cutoff_XX: float64, - upper_cutoff_XX: float64, - lower_cutoff_XXX: float64 - }, - y_ploidy_cutoffs: struct { - lower_cutoff_Y: float64, - upper_cutoff_Y: float64, - lower_cutoff_YY: float64 - }, - f_stat_cutoff: float64 - } - 'gnomad_population_inference_pca_metrics': struct { - n_pcs: int32, - min_prob: float64 - } - 'sample_hard_filter_cutoffs': struct { - min_cov: int32, - max_n_snp: float64, - min_n_snp: float64, - max_n_singleton: float64, - max_r_het_hom_var: float64, - max_pct_contamination: float64, - max_pct_chimera: float64, - min_median_insert_size: int32 - } - 'gnomad_sample_qc_metric_outlier_cutoffs': struct { - lms: struct { - n_snp: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_singleton: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_ti_tv: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_insertion_deletion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_insertion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_deletion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_het_hom_var: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_transition: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_transversion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - } - }, - qc_metrics_stats: struct { - n_snp_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_singleton_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_ti_tv_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_insertion_deletion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_insertion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_deletion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_het_hom_var_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_transition_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_transversion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - } - }, - n_pcs: int32, - used_regressed_metrics: bool - } - 'gnomad_age_distribution': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int32, - n_larger: int32 - } - 'variant_annotation_descriptions': struct { - locus: struct { - Description: str - }, - alleles: struct { - Description: str - }, - rsid: struct { - Description: str - }, - a_index: struct { - Description: str - }, - was_split: struct { - Description: str - }, - hgdp_tgp_freq: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - } - } - }, - gnomad_freq: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - } - } - }, - gnomad_popmax: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - }, - pop: struct { - Description: str - }, - faf95: struct { - Description: str - } - } - }, - gnomad_faf: struct { - Description: str, - sub_annotations: struct { - faf95: struct { - Description: str - }, - faf99: struct { - Description: str - } - } - }, - gnomad_qual_hists: struct { - Description: str, - sub_annotations: struct { - gq_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gq_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - ab_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - } - } - }, - gnomad_raw_qual_hists: struct { - Description: str, - sub_annotations: struct { - gq_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gq_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - ab_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - } - } - }, - gnomad_age_hist_het: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gnomad_age_hist_hom: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - filters: struct { - Description: str - }, - info: struct { - Description: str, - sub_annotations: struct { - QUALapprox: struct { - Description: str - }, - SB: struct { - Description: str - }, - MQ: struct { - Description: str - }, - MQRankSum: struct { - Description: str - }, - VarDP: struct { - Description: str - }, - AS_ReadPosRankSum: struct { - Description: str - }, - AS_pab_max: struct { - Description: str - }, - AS_QD: struct { - Description: str - }, - AS_MQ: struct { - Description: str - }, - QD: struct { - Description: str - }, - AS_MQRankSum: struct { - Description: str - }, - FS: struct { - Description: str - }, - AS_FS: struct { - Description: str - }, - ReadPosRankSum: struct { - Description: str - }, - AS_QUALapprox: struct { - Description: str - }, - AS_SB_TABLE: struct { - Description: str - }, - AS_VarDP: struct { - Description: str - }, - AS_SOR: struct { - Description: str - }, - SOR: struct { - Description: str - }, - transmitted_singleton: struct { - Description: str - }, - omni: struct { - Description: str - }, - mills: struct { - Description: str - }, - monoallelic: struct { - Description: str - }, - InbreedingCoeff: struct { - Description: str - } - } - }, - vep: struct { - Description: str - }, - vqsr: struct { - Description: str, - sub_annotations: struct { - AS_VQSLOD: struct { - Description: str - }, - AS_culprit: struct { - Description: str - }, - NEGATIVE_TRAIN_SITE: struct { - Description: str - }, - POSITIVE_TRAIN_SITE: struct { - Description: str - } - } - }, - region_flag: struct { - Description: str, - sub_annotations: struct { - lcr: struct { - Description: str - }, - segdup: struct { - Description: str - } - } - }, - allele_info: struct { - Description: str, - sub_annotations: struct { - variant_type: struct { - Description: str - }, - allele_type: struct { - Description: str - }, - n_alt_alleles: struct { - Description: str - } - } - }, - was_mixed: struct { - Description: str - }, - cadd: struct { - sub_annotations: struct { - raw_score: struct { - Description: str - }, - phred: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - revel: struct { - Description: str, - sub_annotations: struct { - revel_score: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - splice_ai: struct { - sub_annotations: struct { - splice_ai: struct { - Description: str - }, - splice_consequence: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - primate_ai: struct { - sub_annotations: struct { - primate_ai_score: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - AS_lowqual: struct { - Description: str - }, - telomere_or_centromere: struct { - Description: str - } - } - 'hgdp_tgp_freq_meta': array> - 'hgdp_tgp_freq_index_dict': dict - 'gnomad_freq_meta': array> - 'gnomad_freq_index_dict': dict - 'gnomad_faf_index_dict': dict - 'gnomad_faf_meta': array> - 'vep_version': str - 'vep_csq_header': str - 'dbsnp_version': str - 'variant_filtering_model': struct { - model_name: str, - score_name: str, - snv_cutoff: struct { - bin: float64, - min_score: float64 - }, - indel_cutoff: struct { - bin: float64, - min_score: float64 - }, - snv_training_variables: array, - indel_training_variables: array - } - 'variant_inbreeding_coeff_cutoff': float64 - ---------------------------------------- - Column fields: - 's': str - 'bam_metrics': struct { - pct_bases_20x: float64, - pct_chimeras: float64, - freemix: float64, - mean_coverage: float64, - median_coverage: float64, - mean_insert_size: float64, - median_insert_size: float64, - pct_bases_10x: float64 - } - 'sample_qc': struct { - n_deletion: int64, - n_het: int64, - n_hom_ref: int64, - n_hom_var: int64, - n_insertion: int64, - n_non_ref: int64, - n_snp: int64, - n_transition: int64, - n_transversion: int64, - r_het_hom_var: float64, - r_insertion_deletion: float64, - r_ti_tv: float64 - } - 'gnomad_sex_imputation': struct { - chr20_mean_dp: float32, - chrX_mean_dp: float32, - chrY_mean_dp: float32, - chrX_ploidy: float32, - chrY_ploidy: float32, - X_karyotype: str, - Y_karyotype: str, - sex_karyotype: str, - f_stat: float64, - n_called: int64, - expected_homs: float64, - observed_homs: int64 - } - 'gnomad_population_inference': struct { - pca_scores: array, - pop: str, - prob_afr: float64, - prob_ami: float64, - prob_amr: float64, - prob_asj: float64, - prob_eas: float64, - prob_fin: float64, - prob_mid: float64, - prob_nfe: float64, - prob_oth: float64, - prob_sas: float64 - } - 'gnomad_sample_qc_residuals': struct { - n_snp_residual: float64, - r_ti_tv_residual: float64, - r_insertion_deletion_residual: float64, - n_insertion_residual: float64, - n_deletion_residual: float64, - r_het_hom_var_residual: float64, - n_transition_residual: float64, - n_transversion_residual: float64 - } - 'gnomad_sample_filters': struct { - hard_filters: set, - hard_filtered: bool, - release_related: bool, - qc_metrics_filters: set - } - 'gnomad_high_quality': bool - 'gnomad_release': bool - 'relatedness_inference': struct { - related_samples: set, - related: bool - } - 'hgdp_tgp_meta': struct { - project: str, - study_region: str, - population: str, - genetic_region: str, - latitude: float64, - longitude: float64, - hgdp_technical_meta: struct { - source: str, - library_type: str - }, - global_pca_scores: array, - subcontinental_pca: struct { - pca_scores: array, - pca_scores_outliers_removed: array, - outlier: bool - }, - gnomad_labeled_subpop: str - } - 'high_quality': bool - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': set - 'a_index': int32 - 'was_split': bool - 'filters': set - 'info': struct { - SB: array, - MQRankSum: float64, - VarDP: int32, - AS_FS: float64, - AS_ReadPosRankSum: float64, - AS_pab_max: float64, - AS_QD: float32, - AS_MQ: float64, - AS_QUALapprox: int64, - QD: float32, - AS_MQRankSum: float64, - FS: float64, - MQ: float64, - ReadPosRankSum: float64, - QUALapprox: int64, - AS_SB_TABLE: array, - AS_VarDP: int32, - AS_SOR: float64, - SOR: float64, - transmitted_singleton: bool, - omni: bool, - mills: bool, - monoallelic: bool, - InbreedingCoeff: float32, - AS_VQSLOD: float64 - } - 'vep': struct { - assembly_name: str, - allele_string: str, - ancestral: str, - context: str, - end: int32, - id: str, - input: str, - intergenic_consequences: array, - impact: str, - minimised: int32, - variant_allele: str - }>, - most_severe_consequence: str, - motif_feature_consequences: array, - high_inf_pos: str, - impact: str, - minimised: int32, - motif_feature_id: str, - motif_name: str, - motif_pos: int32, - motif_score_change: float64, - strand: int32, - variant_allele: str - }>, - regulatory_feature_consequences: array, - impact: str, - minimised: int32, - regulatory_feature_id: str, - variant_allele: str - }>, - seq_region_name: str, - start: int32, - strand: int32, - transcript_consequences: array, - distance: int32, - domains: array, - exon: str, - gene_id: str, - gene_pheno: int32, - gene_symbol: str, - gene_symbol_source: str, - hgnc_id: str, - hgvsc: str, - hgvsp: str, - hgvs_offset: int32, - impact: str, - intron: str, - lof: str, - lof_flags: str, - lof_filter: str, - lof_info: str, - minimised: int32, - polyphen_prediction: str, - polyphen_score: float64, - protein_end: int32, - protein_start: int32, - protein_id: str, - sift_prediction: str, - sift_score: float64, - strand: int32, - swissprot: str, - transcript_id: str, - trembl: str, - tsl: int32, - uniparc: str, - variant_allele: str - }>, - variant_class: str - } - 'vqsr': struct { - AS_VQSLOD: float64, - AS_culprit: str, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool - } - 'region_flag': struct { - lcr: bool, - segdup: bool - } - 'allele_info': struct { - variant_type: str, - allele_type: str, - n_alt_alleles: int32, - was_mixed: bool - } - 'hgdp_tgp_freq': array - 'gnomad_freq': array - 'gnomad_popmax': struct { - AC: int32, - AF: float64, - AN: int32, - homozygote_count: int32, - pop: str, - faf95: float64 - } - 'gnomad_faf': array - 'gnomad_raw_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_age_hist_het': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'gnomad_age_hist_hom': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'cadd': struct { - phred: float32, - raw_score: float32, - has_duplicate: bool - } - 'revel': struct { - revel_score: float64, - has_duplicate: bool - } - 'splice_ai': struct { - splice_ai_score: float32, - splice_consequence: str, - has_duplicate: bool - } - 'primate_ai': struct { - primate_ai_score: float32, - has_duplicate: bool - } - ---------------------------------------- - Entry fields: - 'DP': int32 - 'GQ': int32 - 'MIN_DP': int32 - 'PID': str - 'RGQ': int32 - 'SB': array - 'GT': call - 'PGT': call - 'AD': array - 'PL': array - 'adj': bool - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst deleted file mode 100644 index 7d5f6ceb559..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst +++ /dev/null @@ -1,653 +0,0 @@ -.. _gnomad_hgdp_1kg_subset_sample_metadata: - -gnomad_hgdp_1kg_subset_sample_metadata -====================================== - -* **Versions:** 3.1.2 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (3.1.2, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'global_annotation_descriptions': struct { - gnomad_sex_imputation_ploidy_cutoffs: struct { - Description: str - }, - gnomad_population_inference_pca_metrics: struct { - Description: str - }, - sample_hard_filter_cutoffs: struct { - Description: str - }, - gnomad_sample_qc_metric_outlier_cutoffs: struct { - Description: str - }, - gnomad_age_distribution: struct { - Description: str, - sub_globals: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - } - } - 'sample_annotation_descriptions': struct { - s: struct { - Description: str - }, - bam_metrics: struct { - Description: str, - sub_annotations: struct { - pct_bases_20x: struct { - Description: str - }, - pct_chimeras: struct { - Description: str - }, - freemix: struct { - Description: str - }, - mean_coverage: struct { - Description: str - }, - median_coverage: struct { - Description: str - }, - mean_insert_size: struct { - Description: str - }, - median_insert_size: struct { - Description: str - }, - pct_bases_10x: struct { - Description: str - } - } - }, - sample_qc: struct { - Description: str, - sub_annotations: struct { - n_deletion: struct { - Description: str - }, - n_het: struct { - Description: str - }, - n_hom_ref: struct { - Description: str - }, - n_hom_var: struct { - Description: str - }, - n_insertion: struct { - Description: str - }, - n_non_ref: struct { - Description: str - }, - n_snp: struct { - Description: str - }, - n_transition: struct { - Description: str - }, - n_transversion: struct { - Description: str - }, - r_het_hom_var: struct { - Description: str - }, - r_insertion_deletion: struct { - Description: str - }, - r_ti_tv: struct { - Description: str - } - } - }, - gnomad_sex_imputation: struct { - Description: str, - sub_annotations: struct { - chr20_mean_dp: struct { - Description: str - }, - chrX_mean_dp: struct { - Description: str - }, - chrY_mean_dp: struct { - Description: str - }, - chrX_ploidy: struct { - Description: str - }, - chrY_ploidy: struct { - Description: str - }, - X_karyotype: struct { - Description: str - }, - Y_karyotype: struct { - Description: str - }, - sex_karyotype: struct { - Description: str - }, - f_stat: struct { - Description: str - }, - n_called: struct { - Description: str - }, - expected_homs: struct { - Description: str - }, - observed_homs: struct { - Description: str - } - } - }, - gnomad_population_inference: struct { - Description: str, - sub_annotations: struct { - pca_scores: struct { - Description: str - }, - pop: struct { - Description: str - }, - prob_afr: struct { - Description: str - }, - prob_ami: struct { - Description: str - }, - prob_amr: struct { - Description: str - }, - prob_asj: struct { - Description: str - }, - prob_eas: struct { - Description: str - }, - prob_fin: struct { - Description: str - }, - prob_mid: struct { - Description: str - }, - prob_nfe: struct { - Description: str - }, - prob_oth: struct { - Description: str - }, - prob_sas: struct { - Description: str - } - } - }, - gnomad_sample_qc_residuals: struct { - Description: tuple ( - str - ), - sub_annotations: struct { - n_snp_residual: struct { - Description: str - }, - r_ti_tv_residual: struct { - Description: str - }, - r_insertion_deletion_residual: struct { - Description: str - }, - n_insertion_residual: struct { - Description: str - }, - n_deletion_residual: struct { - Description: str - }, - r_het_hom_var_residual: struct { - Description: str - }, - n_transition_residual: struct { - Description: str - }, - n_transversion_residual: struct { - Description: str - } - } - }, - gnomad_sample_filters: struct { - Description: str, - sub_annotations: struct { - hard_filters: struct { - Description: str - }, - hard_filtered: struct { - Description: str - }, - release_related: struct { - Description: str - }, - qc_metrics_filters: struct { - Description: str - } - } - }, - gnomad_high_quality: struct { - Description: str - }, - gnomad_release: struct { - Description: str - }, - relatedness_inference: struct { - Description: str, - sub_annotations: struct { - related_samples: struct { - Description: str, - sub_annotations: struct { - s: struct { - Description: str - }, - kin: struct { - Description: str - }, - ibd0: struct { - Description: str - }, - ibd1: struct { - Description: str - }, - ibd2: struct { - Description: str - } - } - }, - related: struct { - Description: str - } - } - }, - hgdp_tgp_meta: struct { - Description: str, - sub_annotations: struct { - project: struct { - Description: str - }, - study_region: struct { - Description: str - }, - population: struct { - Description: str - }, - genetic_region: struct { - Description: str - }, - latitude: struct { - Description: str - }, - longitude: struct { - Description: str - }, - hgdp_technical_meta: struct { - Description: str, - sub_annotations: struct { - source: struct { - Description: str - }, - library_type: struct { - Description: str - } - } - }, - global_pca_scores: struct { - Description: str - }, - subcontinental_pca: struct { - Description: str, - sub_annotations: struct { - pca_scores: struct { - Description: str - }, - pca_scores_outliers_removed: struct { - Description: str - }, - outlier: struct { - Description: str - } - } - }, - gnomad_labeled_subpop: struct { - Description: str - } - } - }, - high_quality: struct { - Description: str - } - } - 'gnomad_sex_imputation_ploidy_cutoffs': struct { - x_ploidy_cutoffs: struct { - upper_cutoff_X: float64, - lower_cutoff_XX: float64, - upper_cutoff_XX: float64, - lower_cutoff_XXX: float64 - }, - y_ploidy_cutoffs: struct { - lower_cutoff_Y: float64, - upper_cutoff_Y: float64, - lower_cutoff_YY: float64 - }, - f_stat_cutoff: float64 - } - 'gnomad_population_inference_pca_metrics': struct { - n_pcs: int32, - min_prob: float64 - } - 'sample_hard_filter_cutoffs': struct { - min_cov: int32, - max_n_snp: float64, - min_n_snp: float64, - max_n_singleton: float64, - max_r_het_hom_var: float64, - max_pct_contamination: float64, - max_pct_chimera: float64, - min_median_insert_size: int32 - } - 'gnomad_sample_qc_metric_outlier_cutoffs': struct { - lms: struct { - n_snp: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_singleton: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_ti_tv: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_insertion_deletion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_insertion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_deletion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - r_het_hom_var: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_transition: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - }, - n_transversion: struct { - beta: array, - standard_error: array, - t_stat: array, - p_value: array, - multiple_standard_error: float64, - multiple_r_squared: float64, - adjusted_r_squared: float64, - f_stat: float64, - multiple_p_value: float64, - n: int32 - } - }, - qc_metrics_stats: struct { - n_snp_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_singleton_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_ti_tv_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_insertion_deletion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_insertion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_deletion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - r_het_hom_var_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_transition_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - }, - n_transversion_residual: struct { - median: float64, - mad: float64, - lower: float64, - upper: float64 - } - }, - n_pcs: int32, - used_regressed_metrics: bool - } - 'gnomad_age_distribution': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int32, - n_larger: int32 - } - ---------------------------------------- - Row fields: - 's': str - 'bam_metrics': struct { - pct_bases_20x: float64, - pct_chimeras: float64, - freemix: float64, - mean_coverage: float64, - median_coverage: float64, - mean_insert_size: float64, - median_insert_size: float64, - pct_bases_10x: float64 - } - 'sample_qc': struct { - n_deletion: int64, - n_het: int64, - n_hom_ref: int64, - n_hom_var: int64, - n_insertion: int64, - n_non_ref: int64, - n_snp: int64, - n_transition: int64, - n_transversion: int64, - r_het_hom_var: float64, - r_insertion_deletion: float64, - r_ti_tv: float64 - } - 'gnomad_sex_imputation': struct { - chr20_mean_dp: float32, - chrX_mean_dp: float32, - chrY_mean_dp: float32, - chrX_ploidy: float32, - chrY_ploidy: float32, - X_karyotype: str, - Y_karyotype: str, - sex_karyotype: str, - f_stat: float64, - n_called: int64, - expected_homs: float64, - observed_homs: int64 - } - 'gnomad_population_inference': struct { - pca_scores: array, - pop: str, - prob_afr: float64, - prob_ami: float64, - prob_amr: float64, - prob_asj: float64, - prob_eas: float64, - prob_fin: float64, - prob_mid: float64, - prob_nfe: float64, - prob_oth: float64, - prob_sas: float64 - } - 'gnomad_sample_qc_residuals': struct { - n_snp_residual: float64, - r_ti_tv_residual: float64, - r_insertion_deletion_residual: float64, - n_insertion_residual: float64, - n_deletion_residual: float64, - r_het_hom_var_residual: float64, - n_transition_residual: float64, - n_transversion_residual: float64 - } - 'gnomad_sample_filters': struct { - hard_filters: set, - hard_filtered: bool, - release_related: bool, - qc_metrics_filters: set - } - 'gnomad_high_quality': bool - 'gnomad_release': bool - 'relatedness_inference': struct { - related_samples: set, - related: bool - } - 'hgdp_tgp_meta': struct { - project: str, - study_region: str, - population: str, - genetic_region: str, - latitude: float64, - longitude: float64, - hgdp_technical_meta: struct { - source: str, - library_type: str - }, - global_pca_scores: array, - subcontinental_pca: struct { - pca_scores: array, - pca_scores_outliers_removed: array, - outlier: bool - }, - gnomad_labeled_subpop: str - } - 'high_quality': bool - ---------------------------------------- - Key: ['s'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst deleted file mode 100644 index da60d664525..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst +++ /dev/null @@ -1,54 +0,0 @@ -.. _gnomad_hgdp_1kg_subset_sparse: - -gnomad_hgdp_1kg_subset_sparse -============================= - -* **Versions:** 3.1.2 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (3.1.2, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - None - ---------------------------------------- - Column fields: - 's': str - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - ---------------------------------------- - Entry fields: - 'DP': int32 - 'END': int32 - 'GQ': int32 - 'LA': array - 'LAD': array - 'LGT': call - 'LPGT': call - 'LPL': array - 'MIN_DP': int32 - 'PID': str - 'RGQ': int32 - 'SB': array - 'gvcf_info': struct { - ClippingRankSum: float64, - BaseQRankSum: float64, - MQ: float64, - MQRankSum: float64, - MQ_DP: int32, - QUALapprox: int32, - RAW_MQ: float64, - ReadPosRankSum: float64, - VarDP: int32 - } - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst deleted file mode 100644 index 9d7f900b1ab..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst +++ /dev/null @@ -1,857 +0,0 @@ -.. _gnomad_hgdp_1kg_subset_variant_annotations: - -gnomad_hgdp_1kg_subset_variant_annotations -========================================== - -* **Versions:** 3.1.2 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.Table` - -Schema (3.1.2, GRCh38) -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'global_annotation_descriptions': struct { - hgdp_tgp_freq_meta: struct { - Description: str - }, - gnomad_freq_meta: struct { - Description: str - }, - hgdp_tgp_freq_index_dict: struct { - Description: str - }, - gnomad_freq_index_dict: struct { - Description: str - }, - gnomad_faf_meta: struct { - Description: str - }, - gnomad_faf_index_dict: struct { - Description: str - }, - variant_filtering_model: struct { - Description: set, - sub_globals: struct { - model_name: struct { - Description: str - }, - score_name: struct { - Description: str - }, - snv_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - indel_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - snv_training_variables: struct { - Description: str - }, - indel_training_variables: struct { - Description: str - } - } - }, - variant_inbreeding_coeff_cutoff: struct { - Description: str - }, - vep_version: struct { - Description: str - }, - vep_csq_header: struct { - Description: str - }, - dbsnp_version: struct { - Description: str - } - } - 'variant_annotation_descriptions': struct { - locus: struct { - Description: str - }, - alleles: struct { - Description: str - }, - rsid: struct { - Description: str - }, - a_index: struct { - Description: str - }, - was_split: struct { - Description: str - }, - hgdp_tgp_freq: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - } - } - }, - gnomad_freq: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - } - } - }, - gnomad_popmax: struct { - Description: str, - sub_annotations: struct { - AC: struct { - Description: str - }, - AF: struct { - Description: str - }, - AN: struct { - Description: str - }, - homozygote_count: struct { - Description: str - }, - pop: struct { - Description: str - }, - faf95: struct { - Description: str - } - } - }, - gnomad_faf: struct { - Description: str, - sub_annotations: struct { - faf95: struct { - Description: str - }, - faf99: struct { - Description: str - } - } - }, - gnomad_qual_hists: struct { - Description: str, - sub_annotations: struct { - gq_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gq_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - ab_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - } - } - }, - gnomad_raw_qual_hists: struct { - Description: str, - sub_annotations: struct { - gq_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_all: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gq_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - dp_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - ab_hist_alt: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - } - } - }, - gnomad_age_hist_het: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - gnomad_age_hist_hom: struct { - Description: str, - sub_annotations: struct { - bin_edges: struct { - Description: str - }, - bin_freq: struct { - Description: str - }, - n_smaller: struct { - Description: str - }, - n_larger: struct { - Description: str - } - } - }, - filters: struct { - Description: str - }, - info: struct { - Description: str, - sub_annotations: struct { - QUALapprox: struct { - Description: str - }, - SB: struct { - Description: str - }, - MQ: struct { - Description: str - }, - MQRankSum: struct { - Description: str - }, - VarDP: struct { - Description: str - }, - AS_ReadPosRankSum: struct { - Description: str - }, - AS_pab_max: struct { - Description: str - }, - AS_QD: struct { - Description: str - }, - AS_MQ: struct { - Description: str - }, - QD: struct { - Description: str - }, - AS_MQRankSum: struct { - Description: str - }, - FS: struct { - Description: str - }, - AS_FS: struct { - Description: str - }, - ReadPosRankSum: struct { - Description: str - }, - AS_QUALapprox: struct { - Description: str - }, - AS_SB_TABLE: struct { - Description: str - }, - AS_VarDP: struct { - Description: str - }, - AS_SOR: struct { - Description: str - }, - SOR: struct { - Description: str - }, - transmitted_singleton: struct { - Description: str - }, - omni: struct { - Description: str - }, - mills: struct { - Description: str - }, - monoallelic: struct { - Description: str - }, - InbreedingCoeff: struct { - Description: str - } - } - }, - vep: struct { - Description: str - }, - vqsr: struct { - Description: str, - sub_annotations: struct { - AS_VQSLOD: struct { - Description: str - }, - AS_culprit: struct { - Description: str - }, - NEGATIVE_TRAIN_SITE: struct { - Description: str - }, - POSITIVE_TRAIN_SITE: struct { - Description: str - } - } - }, - region_flag: struct { - Description: str, - sub_annotations: struct { - lcr: struct { - Description: str - }, - segdup: struct { - Description: str - } - } - }, - allele_info: struct { - Description: str, - sub_annotations: struct { - variant_type: struct { - Description: str - }, - allele_type: struct { - Description: str - }, - n_alt_alleles: struct { - Description: str - } - } - }, - was_mixed: struct { - Description: str - }, - cadd: struct { - sub_annotations: struct { - raw_score: struct { - Description: str - }, - phred: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - revel: struct { - Description: str, - sub_annotations: struct { - revel_score: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - splice_ai: struct { - sub_annotations: struct { - splice_ai: struct { - Description: str - }, - splice_consequence: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - primate_ai: struct { - sub_annotations: struct { - primate_ai_score: struct { - Description: str - }, - has_duplicate: struct { - Description: str - } - } - }, - AS_lowqual: struct { - Description: str - }, - telomere_or_centromere: struct { - Description: str - } - } - 'hgdp_tgp_freq_meta': array> - 'hgdp_tgp_freq_index_dict': dict - 'gnomad_freq_meta': array> - 'gnomad_freq_index_dict': dict - 'gnomad_faf_index_dict': dict - 'gnomad_faf_meta': array> - 'vep_version': str - 'vep_csq_header': str - 'dbsnp_version': str - 'variant_filtering_model': struct { - model_name: str, - score_name: str, - snv_cutoff: struct { - bin: float64, - min_score: float64 - }, - indel_cutoff: struct { - bin: float64, - min_score: float64 - }, - snv_training_variables: array, - indel_training_variables: array - } - 'variant_inbreeding_coeff_cutoff': float64 - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'a_index': int32 - 'was_split': bool - 'rsid': set - 'filters': set - 'info': struct { - SB: array, - MQRankSum: float64, - VarDP: int32, - AS_FS: float64, - AS_ReadPosRankSum: float64, - AS_pab_max: float64, - AS_QD: float32, - AS_MQ: float64, - AS_QUALapprox: int64, - QD: float32, - AS_MQRankSum: float64, - FS: float64, - MQ: float64, - ReadPosRankSum: float64, - QUALapprox: int64, - AS_SB_TABLE: array, - AS_VarDP: int32, - AS_SOR: float64, - SOR: float64, - transmitted_singleton: bool, - omni: bool, - mills: bool, - monoallelic: bool, - InbreedingCoeff: float32, - AS_VQSLOD: float64 - } - 'vep': struct { - assembly_name: str, - allele_string: str, - ancestral: str, - context: str, - end: int32, - id: str, - input: str, - intergenic_consequences: array, - impact: str, - minimised: int32, - variant_allele: str - }>, - most_severe_consequence: str, - motif_feature_consequences: array, - high_inf_pos: str, - impact: str, - minimised: int32, - motif_feature_id: str, - motif_name: str, - motif_pos: int32, - motif_score_change: float64, - strand: int32, - variant_allele: str - }>, - regulatory_feature_consequences: array, - impact: str, - minimised: int32, - regulatory_feature_id: str, - variant_allele: str - }>, - seq_region_name: str, - start: int32, - strand: int32, - transcript_consequences: array, - distance: int32, - domains: array, - exon: str, - gene_id: str, - gene_pheno: int32, - gene_symbol: str, - gene_symbol_source: str, - hgnc_id: str, - hgvsc: str, - hgvsp: str, - hgvs_offset: int32, - impact: str, - intron: str, - lof: str, - lof_flags: str, - lof_filter: str, - lof_info: str, - minimised: int32, - polyphen_prediction: str, - polyphen_score: float64, - protein_end: int32, - protein_start: int32, - protein_id: str, - sift_prediction: str, - sift_score: float64, - strand: int32, - swissprot: str, - transcript_id: str, - trembl: str, - tsl: int32, - uniparc: str, - variant_allele: str - }>, - variant_class: str - } - 'vqsr': struct { - AS_VQSLOD: float64, - AS_culprit: str, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool - } - 'region_flag': struct { - lcr: bool, - segdup: bool - } - 'allele_info': struct { - variant_type: str, - allele_type: str, - n_alt_alleles: int32, - was_mixed: bool - } - 'hgdp_tgp_freq': array - 'gnomad_freq': array - 'gnomad_popmax': struct { - AC: int32, - AF: float64, - AN: int32, - homozygote_count: int32, - pop: str, - faf95: float64 - } - 'gnomad_faf': array - 'gnomad_raw_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_age_hist_het': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'gnomad_age_hist_hom': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'cadd': struct { - phred: float32, - raw_score: float32, - has_duplicate: bool - } - 'revel': struct { - revel_score: float64, - has_duplicate: bool - } - 'splice_ai': struct { - splice_ai_score: float32, - splice_consequence: str, - has_duplicate: bool - } - 'primate_ai': struct { - primate_ai_score: float32, - has_duplicate: bool - } - 'AS_lowqual': bool - 'telomere_or_centromere': bool - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst b/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst deleted file mode 100644 index 9e74ff1577f..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. _gnomad_pca_variant_loadings: - -gnomad_pca_variant_loadings -=========================== - -* **Versions:** 2.1, 3.1 -* **Reference genome builds:** GRCh37, GRCh38 -* **Type:** :class:`hail.Table` - -Schema (3.1, GRCh38) -~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - None - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'loadings': array - 'pca_af': float64 - ---------------------------------------- - Key: ['locus', 'alleles'] - ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst b/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst deleted file mode 100644 index 62284380179..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. _gnomad_variant_co-occurrence: - -gnomad_variant_co-occurrence -============================ - -* **Versions:** 2.1.1 -* **Reference genome builds:** GRCh37 -* **Type:** :class:`hail.Table` - -Schema (2.1.1, GRCh37) -~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'max_freq': float64 - 'least_consequence': str - 'same_haplotype_em_probability_cutoff': float64 - 'different_haplotypes_em_probability_cutoff': float64 - 'global_annotation_descriptions': struct { - max_freq: str, - least_consequence: str, - same_haplotype_em_probability_cutoff: str, - different_haplotypes_em_probability_cutoff: str - } - 'row_annotation_descriptions': struct { - locus1: str, - alleles1: str, - locus2: str, - alleles2: str, - phase_info: struct { - description: str, - gt_counts: str, - em: struct { - hap_counts: str, - p_chet: str, - same_haplotype: str, - different_haplotype: str - } - } - } - ---------------------------------------- - Row fields: - 'locus1': locus - 'alleles1': array - 'locus2': locus - 'alleles2': array - 'phase_info': dict, - em: struct { - hap_counts: array, - p_chet: float64, - same_haplotype: bool, - different_haplotype: bool - } - }> - ---------------------------------------- - Key: ['locus1', 'alleles1', 'locus2', 'alleles2'] - ---------------------------------------- diff --git a/hail/python/hail/docs/ggplot/index.rst b/hail/python/hail/docs/ggplot/index.rst index cd4cb2927b7..62c50439913 100644 --- a/hail/python/hail/docs/ggplot/index.rst +++ b/hail/python/hail/docs/ggplot/index.rst @@ -73,13 +73,9 @@ currently uses plotly to generate plots, as opposed to ``hl.plot``, which uses b scale_y_reverse scale_color_continuous scale_color_discrete - scale_color_hue - scale_color_manual scale_color_identity scale_fill_continuous scale_fill_discrete - scale_fill_hue - scale_fill_manual scale_fill_identity .. autofunction:: scale_x_continuous @@ -93,13 +89,9 @@ currently uses plotly to generate plots, as opposed to ``hl.plot``, which uses b .. autofunction:: scale_y_reverse .. autofunction:: scale_color_continuous .. autofunction:: scale_color_discrete -.. autofunction:: scale_color_hue -.. autofunction:: scale_color_manual .. autofunction:: scale_color_identity .. autofunction:: scale_fill_continuous .. autofunction:: scale_fill_discrete -.. autofunction:: scale_fill_hue -.. autofunction:: scale_fill_manual .. autofunction:: scale_fill_identity .. rubric:: Labels diff --git a/hail/python/hail/docs/hail_on_the_cloud.rst b/hail/python/hail/docs/hail_on_the_cloud.rst index 16f97e25bb1..d60492e652b 100644 --- a/hail/python/hail/docs/hail_on_the_cloud.rst +++ b/hail/python/hail/docs/hail_on_the_cloud.rst @@ -4,15 +4,15 @@ Hail on the Cloud ================= -Public clouds are a natural place to run Hail, offering the ability to run on-demand workloads with -high elasticity. Microsoft Azure, Google Cloud Platform, Databricks and Amazon Web Services make it -possible to rent Spark clusters with thousands of cores on-demand, providing for the elastic compute -requirements of scientific research without an up-front capital investment in hardware. +Public clouds are a natural place to run Hail, offering the ability to run +on-demand workloads with high elasticity. Microsoft Azure, Google Cloud Platform, Databricks and Amazon Web Services make it +possible to rent Spark clusters with thousands of cores on-demand, +providing for the elastic compute requirements of scientific research without +an up-front capital investment in hardware. .. toctree:: General Advice - Query-on-Batch Google Cloud Microsoft Azure Other Cloud Providers diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index 62f6a2a3dc5..be573be2d6f 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -3780,119 +3780,24 @@ } }, "version": "3.1" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.1/ht/genomes/gnomad.genomes.v3.1.1.sites.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.1/ht/genomes/gnomad.genomes.v3.1.1.sites.ht" - } - }, - "version": "3.1.1" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.sites.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.sites.ht" - } - }, - "version": "3.1.2" } ] }, - "gnomad_hgdp_1kg_subset_dense": { - "description": "gnomAD: 1KG + HGDP subset, formatted as a dense MatrixTable. Samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD.", + "gnomad_hgdp_1kg_callset": { + "description": "gnomAD: samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD.", "url": "https://gnomad.broadinstitute.org/", "versions": [ { "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset_dense.mt" + "us": "s3://gnomad-public-us-east-1/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt" }, "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset_dense.mt" + "us": "gs://gcp-public-data--gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt" } }, "version": "3.1" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_dense.mt" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_dense.mt" - } - }, - "version": "3.1.2" - } - ] - }, - "gnomad_hgdp_1kg_subset_sparse": { - "description": "gnomAD: 1KG + HGDP subset, formatted as a sparse MatrixTable. Samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD. Note: this version is formatted as a sparse MatrixTable.", - "url": "https://gnomad.broadinstitute.org/", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sparse.mt" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sparse.mt" - } - }, - "version": "3.1.2" - } - ] - }, - "gnomad_hgdp_1kg_subset_sample_metadata": { - "description": "gnomAD: Hail Table containing HGDP + 1KG gnomAD sample QC metrics, as well as additional sample metadata.", - "url": "https://gnomad.broadinstitute.org/", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sample_meta.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sample_meta.ht" - } - }, - "version": "3.1.2" - } - ] - }, - "gnomad_hgdp_1kg_subset_variant_annotations": { - "annotation_db": { - "key_properties": [ - "unique" - ] - }, - "description": "gnomAD: Hail Table containing HGDP + 1KG variant annotations. Note that this Hail Table splits multi-allelic variants, so users who would like to annotate the sparse, unsplit MatrixTable with this Table will need to split the sparse MatrixTable first.", - "url": "https://gnomad.broadinstitute.org/", - "versions": [ - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" - } - }, - "version": "3.1.2" } ] }, @@ -4702,59 +4607,6 @@ } ] }, - "gnomad_variant_co-occurrence": { - "description": "gnomAD: Hail Table containing variant co-occurrence (inferred phasing) data for all pairs of variants within a gene where both variants have a global allele frequency in gnomAD exomes <5% and are either coding, flanking intronic (from position -1 to -3 in acceptor sites, and +1 to +8 in donor sites) or in the 5’/3’ UTRs. This encompasses 20,921,100 pairs of variants across 19,685 genes.", - "url": "https://gnomad.broadinstitute.org/", - "versions": [ - { - "reference_genome": "GRCh37", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/2.1.1/ht/exomes_phased_counts_0.05_3_prime_UTR_variant_vp.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/2.1.1/ht/exomes_phased_counts_0.05_3_prime_UTR_variant_vp.ht" - } - }, - "version": "2.1.1" - } - ] - }, - "gnomad_pca_variant_loadings": { - "annotation_db": { - "key_properties": [ - "unique" - ] - }, - "description": "gnomAD: ancestry principal component analysis (PCA) variant loadings.", - "url": "https://gnomad.broadinstitute.org/", - "versions": [ - { - "reference_genome": "GRCh37", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/2.1/pca/gnomad.r2.1.pca_loadings.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/2.1/pca/gnomad.r2.1.pca_loadings.ht" - } - }, - "version": "2.1" - }, - { - "reference_genome": "GRCh38", - "url": { - "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1/pca/gnomad.v3.1.pca_loadings.ht" - }, - "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1/pca/gnomad.v3.1.pca_loadings.ht" - } - }, - "version": "3.1" - } - ] - }, "gnomad_plof_metrics_gene": { "annotation_db": { "key_properties": [ diff --git a/hail/python/hail/experimental/datasets.py b/hail/python/hail/experimental/datasets.py index 26dbddb4d1e..e7182bef66a 100644 --- a/hail/python/hail/experimental/datasets.py +++ b/hail/python/hail/experimental/datasets.py @@ -25,9 +25,9 @@ def load_dataset(name: str, Example ------- - >>> # Load the gnomAD "HGDP + 1000 Genomes" dense MatrixTable with GRCh38 coordinates. - >>> mt = hl.experimental.load_dataset(name='gnomad_hgdp_1kg_subset_dense', - ... version='3.1.2', + >>> # Load the gnomAD "HGDP + 1000 Genomes" MatrixTable with GRCh38 coordinates. + >>> mt = hl.experimental.load_dataset(name='gnomad_hgdp_1kg_callset', + ... version='3.1', ... reference_genome='GRCh38', ... region='us', ... cloud='gcp') diff --git a/hail/python/hail/experimental/expressions.py b/hail/python/hail/experimental/expressions.py index 5704ef3de69..c6feeda7ad7 100644 --- a/hail/python/hail/experimental/expressions.py +++ b/hail/python/hail/experimental/expressions.py @@ -1,8 +1,6 @@ import hail as hl from hail.expr.expressions import expr_any, analyze -from hail.expr.types import hail_type -from hail.expr.table_type import ttable -from hail.typecheck import typecheck, nullable +from hail.typecheck import typecheck @typecheck(expr=expr_any, path=str, overwrite=bool) @@ -41,11 +39,11 @@ def write_expression(expr, path, overwrite=False): analyze('write_expression.expr', expr, source._global_indices) source = source.select_globals(__expr=expr) expr = source.index_globals().__expr - hl.utils.range_table(1).filter(False).key_by().drop('idx').annotate_globals(expr=expr).write(path, overwrite=overwrite) + hl.utils.range_table(1).filter(False).annotate_globals(expr=expr).write(path, overwrite=overwrite) -@typecheck(path=str, _assert_type=nullable(hail_type)) -def read_expression(path, _assert_type=None): +@typecheck(path=str) +def read_expression(path): """Read an :class:`~.Expression` written with :func:`.experimental.write_expression`. Example @@ -64,9 +62,4 @@ def read_expression(path, _assert_type=None): ------- :class:`~.Expression` """ - _assert_table_type = None - _load_refs = True - if _assert_type: - _assert_table_type = ttable(hl.tstruct(expr=_assert_type), row_type=hl.tstruct(), row_key=[]) - _load_refs = False - return hl.read_table(path, _assert_type=_assert_table_type, _load_refs=_load_refs).index_globals().expr + return hl.read_table(path).index_globals().expr diff --git a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py index 7c0bf504323..b2c31a296b9 100644 --- a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py +++ b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py @@ -607,7 +607,6 @@ def run_combiner(sample_paths: List[str], None """ - hl.utils.no_service_backend('vcf_combiner') flagname = 'no_ir_logging' prev_flag_value = hl._get_flags(flagname).get(flagname) hl._set_flags(**{flagname: '1'}) diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index 6c8a6c71df5..5ced2e5f5e1 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -1219,6 +1219,5 @@ def _persist(self): src = self._indices.source if src is not None: raise ValueError("Can only persist a scalar (no Table/MatrixTable source)") - expr = Env.backend().persist_expression(self) - assert expr.dtype == self.dtype - return expr + executed_jir = Env.backend().persist_ir(self._ir) + return expressions.construct_expr(executed_jir, self.dtype) diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 7eb937bd071..65bc63cb689 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -2916,14 +2916,6 @@ def split(self, delim, n=None): else: return self._method("split", tarray(tstr), delim, n) - @typecheck_method(delim=expr_str, missing=expr_array(), quote=nullable(expr_str), regex=bool) - def _split_line(self, delim, missing, quote, regex): - regex_str = 'Regex' if regex else 'Char' - if quote is None: - return self._method(f"split{regex_str}", tarray(tstr), delim, missing) - else: - return self._method(f"splitQuoted{regex_str}", tarray(tstr), delim, missing, quote) - def lower(self): """Returns a copy of the string, but with upper case letters converted to lower case. @@ -3103,41 +3095,25 @@ def translate(self, mapping): """ return self._method('translate', tstr, mapping) - @typecheck_method(regex=expr_str, full_match=nullable(bool)) - def matches(self, regex, full_match=False): - """Returns ``True`` if the string contains any match for the given regex if - `full_match` is false. Returns ``True`` if the whole string matches the - given regex if `full_match` is true. + @typecheck_method(regex=expr_str) + def matches(self, regex): + """Returns ``True`` if the string contains any match for the given regex. Examples -------- - The `regex` parameter does not need to match the entire string if `full_match` is ``False``: - >>> string = hl.literal('NA12878') - >>> hl.eval(string.matches('12')) - True - The `regex` parameter needs to match the entire string if `full_match` is ``True``: + The `regex` parameter does not need to match the entire string: - >>> string = hl.literal('NA12878') - >>> hl.eval(string.matches('12', True)) - False - - >>> string = hl.literal('3412878') - >>> hl.eval(string.matches('^[0-9]*$')) + >>> hl.eval(string.matches('12')) True Regex motifs can be used to match sequences of characters: - >>> string = hl.literal('NA12878') >>> hl.eval(string.matches(r'NA\\d+')) True - >>> string = hl.literal('3412878') - >>> hl.eval(string.matches('^[0-9]*$')) - True - Notes ----- The `regex` argument is a @@ -3149,20 +3125,13 @@ def matches(self, regex, full_match=False): ---------- regex: :class:`.StringExpression` Pattern to match. - full_match: :obj: `bool` - If ``True``, the function considers whether the whole string matches the regex. - If ``False``, the function considers whether the string has a partial match for that regex Returns ------- :class:`.BooleanExpression` - If `full_match` is ``False``,``True`` if the string contains any match for the regex, otherwise ``False``. - If `full_match` is ``True``,``True`` if the whole string matches the regex, otherwise ``False``. + ``True`` if the string contains any match for the regex, otherwise ``False``. """ - if full_match is False: - return regex._method("regexMatch", tbool, self) - else: - return regex._method("regexFullMatch", tbool, self) + return regex._method("regexMatch", tbool, self) def reverse(self): """Returns the reversed value. diff --git a/hail/python/hail/expr/types.py b/hail/python/hail/expr/types.py index 32c2f6026b4..6c514109be4 100644 --- a/hail/python/hail/expr/types.py +++ b/hail/python/hail/expr/types.py @@ -70,7 +70,7 @@ def summary_type(t): return str(t) -def dtype(type_str) -> 'HailType': +def dtype(type_str): r"""Parse a type from its string representation. Examples @@ -216,7 +216,7 @@ def _pretty(self, b, indent, increment): b.append(str(self)) @abc.abstractmethod - def _parsable_string(self) -> str: + def _parsable_string(self): pass def typecheck(self, value): diff --git a/hail/python/hail/fs/fs.py b/hail/python/hail/fs/fs.py index fd289bb354f..fd89afa9ebe 100644 --- a/hail/python/hail/fs/fs.py +++ b/hail/python/hail/fs/fs.py @@ -73,7 +73,3 @@ def copy_log(self, path: str) -> None: self.copy(local_path_uri(Env.hc()._log), path) except Exception as e: sys.stderr.write(f'Could not copy log: encountered error:\n {e}') - - @abc.abstractmethod - def canonicalize_path(self, path: str) -> str: - pass diff --git a/hail/python/hail/fs/google_fs.py b/hail/python/hail/fs/google_fs.py new file mode 100644 index 00000000000..9b2dd4a6f99 --- /dev/null +++ b/hail/python/hail/fs/google_fs.py @@ -0,0 +1,148 @@ +import os +import time + +from stat import S_ISREG, S_ISDIR +from typing import Dict, List, Optional +from shutil import copy2, rmtree + +import dateutil +import gcsfs + +from hailtop.utils import sync_retry_transient_errors + +from .fs import FS +from .stat_result import FileType, StatResult + + +class GoogleCloudStorageFS(FS): + def __init__(self): + self.client = gcsfs.core.GCSFileSystem(secure_serialize=True) + + def _is_local(self, path: str): + if path.startswith("gs://"): + return False + return True + + def _add_gs_path_prefix(self, path: str) -> str: + first_idx = 0 + + for char in path: + if char != "/": + break + first_idx += 1 + + return "gs://" + path[first_idx:] + + def open(self, path: str, mode: str = 'r', buffer_size: int = 2**18): + if self._is_local(path): + if mode.startswith('w') and not os.path.exists(path): + parts = os.path.split(path) + if not os.path.exists(parts[0]): + os.makedirs(parts[0]) + + return open(path, mode, buffer_size) + + return self.client.open(path, mode, buffer_size) + + def copy(self, src: str, dest: str): + src_is_remote = src.startswith('gs://') + dest_is_remote = dest.startswith('gs://') + + if src_is_remote and dest_is_remote: + self.client.copy(src, dest) + elif src_is_remote: + self.client.get(src, dest) + elif dest_is_remote: + self.client.put(src, dest) + else: + dst_w_file = dest + if os.path.isdir(dst_w_file): + dst_w_file = os.path.join(dest, os.path.basename(src)) + + copy2(src, dst_w_file) + stats = os.stat(src) + + os.chown(dst_w_file, stats.st_uid, stats.st_gid) + + def exists(self, path: str) -> bool: + if self._is_local(path): + return os.path.exists(path) + + return self.client.exists(path) + + def is_file(self, path: str) -> bool: + try: + if self._is_local(path): + return S_ISREG(os.stat(path).st_mode) + return not self._stat_is_gs_dir(self.client.info(path)) + except FileNotFoundError: + return False + + def is_dir(self, path: str) -> bool: + try: + if self._is_local(path): + return S_ISDIR(os.stat(path).st_mode) + return self._stat_is_gs_dir(self.client.info(path)) + except FileNotFoundError: + return False + + def stat(self, path: str) -> Dict: + if self._is_local(path): + return StatResult.from_os_stat_result(path, os.stat(path)) + + return self._format_stat_gs_file(self.client.info(path), path) + + def _format_stat_gs_file(self, stats: Dict, path: Optional[str] = None) -> StatResult: + path_from_stats = stats.get('name') + if path_from_stats is not None: + path_from_stats = self._add_gs_path_prefix(path_from_stats) + else: + assert path is not None + path_from_stats = path + + modification_time = stats.get('updated') + if modification_time is not None: + dt = dateutil.parser.isoparse(modification_time) + modification_time = time.mktime(dt.timetuple()) + + typ = FileType.DIRECTORY if self._stat_is_gs_dir(stats) else FileType.FILE + + return StatResult( + path=path_from_stats, + size=stats['size'], + owner=stats['bucket'], + typ=typ, + modification_time=modification_time) + + def _stat_is_gs_dir(self, stats: Dict) -> bool: + return stats['storageClass'] == 'DIRECTORY' or stats['name'].endswith('/') + + def ls(self, path: str) -> List[StatResult]: + if self._is_local(path): + return [StatResult.from_os_stat_result(file, os.stat(file)) + for file in os.listdir(path)] + + return [self._format_stat_gs_file(file) + for file in self.client.ls(path, detail=True)] + + def mkdir(self, path: str): + pass + + def remove(self, path: str): + if self._is_local(path): + os.remove(path) + self.client.rm(path) + + def rmtree(self, path: str): + if self._is_local(path): + rmtree(path) + + def rm_not_exist_ok(): + try: + self.client.rm(path, recursive=True) + except FileNotFoundError: + pass + sync_retry_transient_errors(rm_not_exist_ok) + + def supports_scheme(self, scheme: str) -> bool: + return scheme in ("gs", "") diff --git a/hail/python/hail/fs/hadoop_fs.py b/hail/python/hail/fs/hadoop_fs.py index e172a06c07b..9642942d089 100644 --- a/hail/python/hail/fs/hadoop_fs.py +++ b/hail/python/hail/fs/hadoop_fs.py @@ -1,7 +1,7 @@ import io import json import time -from typing import Dict, List, Union, Any +from typing import Dict, List import dateutil @@ -9,7 +9,7 @@ from .stat_result import FileType, StatResult -def _stat_dict_to_stat_result(stat: Dict[str, Any]) -> StatResult: +def _stat_dict_to_stat_result(stat: Dict) -> StatResult: dt = dateutil.parser.isoparse(stat['modification_time']) mtime = time.mktime(dt.timetuple()) if stat['is_dir']: @@ -35,7 +35,6 @@ def legacy_open(self, path: str, mode: str = 'r', buffer_size: int = 8192): return self._open(path, mode, buffer_size, use_codec=True) def _open(self, path: str, mode: str = 'r', buffer_size: int = 8192, use_codec: bool = False): - handle: Union[io.BufferedReader, io.BufferedWriter] if 'r' in mode: handle = io.BufferedReader(HadoopReader(self, path, buffer_size, use_codec=use_codec), buffer_size=buffer_size) elif 'w' in mode: @@ -80,9 +79,6 @@ def rmtree(self, path: str): def supports_scheme(self, scheme: str) -> bool: return self._jfs.supportsScheme(scheme) - def canonicalize_path(self, path: str) -> str: - return self._jfs.makeQualified(path) - class HadoopReader(io.RawIOBase): def __init__(self, hfs, path, buffer_size, use_codec=False): diff --git a/hail/python/hail/fs/local_fs.py b/hail/python/hail/fs/local_fs.py index a957df747b7..1740ce5d514 100644 --- a/hail/python/hail/fs/local_fs.py +++ b/hail/python/hail/fs/local_fs.py @@ -1,7 +1,8 @@ +from typing import List, BinaryIO +import gzip +import io import os -from typing import List from shutil import copy2, rmtree -import glob from .fs import FS from .stat_result import StatResult @@ -11,14 +12,22 @@ class LocalFS(FS): def __init__(self): pass - def open(self, path: str, mode: str = 'r', buffer_size: int = -1): - if 'w' in mode: - try: - return open(path, mode, buffering=buffer_size) - except FileNotFoundError: - os.makedirs(os.path.dirname(path)) - return open(path, mode, buffering=buffer_size) - return open(path, mode, buffering=buffer_size) + def open(self, path: str, mode: str = 'r', buffer_size: int = 0): + if mode not in ('r', 'rb', 'w', 'wb'): + raise ValueError(f'Unsupported mode: {repr(mode)}') + + strm: BinaryIO + if mode[0] == 'r': + strm = open(path, 'rb') + else: + assert mode[0] == 'w' + strm = open(path, 'wb') + + if path[-3:] == '.gz' or path[-4:] == '.bgz': + strm = gzip.GzipFile(fileobj=strm, mode=mode) # type: ignore # GzipFile should be a BinaryIO + if 'b' not in mode: + strm = io.TextIOWrapper(strm, encoding='utf-8') # type: ignore # TextIOWrapper should be a BinaryIO + return strm def copy(self, src: str, dest: str): dst_w_file = dest @@ -43,19 +52,8 @@ def stat(self, path: str) -> StatResult: return StatResult.from_os_stat_result(path, os.stat(path)) def ls(self, path: str) -> List[StatResult]: - if glob.escape(path) == path: - return self._ls_no_glob(path) - return [ - result_path - for globbed_path in glob.glob(path) - for result_path in self._ls_no_glob(globbed_path) - ] - - def _ls_no_glob(self, path: str) -> List[StatResult]: - if os.path.isdir(path): - return [self.stat(os.path.join(path, file)) - for file in os.listdir(path)] - return [self.stat(path)] + return [self.stat(os.path.join(path, file)) + for file in os.listdir(path)] def mkdir(self, path: str): os.mkdir(path) @@ -68,8 +66,3 @@ def rmtree(self, path: str): def supports_scheme(self, scheme: str) -> bool: return scheme == "" - - def canonicalize_path(self, path: str) -> str: - if path.startswith('file:'): - return 'file:' + os.path.realpath(path[5:]) - return 'file:' + os.path.realpath(path) diff --git a/hail/python/hail/fs/router_fs.py b/hail/python/hail/fs/router_fs.py index 8f0dfa6a576..f463a2c8799 100644 --- a/hail/python/hail/fs/router_fs.py +++ b/hail/python/hail/fs/router_fs.py @@ -1,13 +1,12 @@ from typing import List, AsyncContextManager, BinaryIO import asyncio +import gzip import io import nest_asyncio -import os -from hailtop.aiotools.fs import Copier, Transfer, FileListEntry, ReadableStream, WritableStream -from hailtop.aiotools.local_fs import LocalAsyncFS from hailtop.aiotools.router_fs import RouterAsyncFS -from hailtop.utils import OnlineBoundedGather2, async_to_blocking +from hailtop.aiotools.fs import Copier, Transfer, FileListEntry, ReadableStream, WritableStream +from hailtop.utils import async_to_blocking, OnlineBoundedGather2 from .fs import FS from .stat_result import FileType, StatResult @@ -176,6 +175,8 @@ def open(self, path: str, mode: str = 'r', buffer_size: int = 8192) -> io.IOBase assert mode[0] == 'w' strm = SyncWritableStream(async_to_blocking(self.afs.create(path)), path) + if path[-3:] == '.gz' or path[-4:] == '.bgz': + strm = gzip.GzipFile(fileobj=strm, mode=mode) if 'b' not in mode: strm = io.TextIOWrapper(strm, encoding='utf-8') # type: ignore # typeshed is wrong, this *is* an IOBase return strm @@ -185,18 +186,12 @@ def copy(self, src: str, dest: str, *, max_simultaneous_transfers=75): async def _copy(): sema = asyncio.Semaphore(max_simultaneous_transfers) - await Copier.copy(self.afs, sema, transfer) + async with sema: + await Copier.copy(self.afs, asyncio.Semaphore, transfer) return async_to_blocking(_copy()) def exists(self, path: str) -> bool: - async def _exists(): - dir_path = path - if dir_path[-1] != '/': - dir_path = dir_path + '/' - return any(await asyncio.gather( - self.afs.isfile(path), - self.afs.isdir(dir_path))) - return async_to_blocking(_exists()) + return async_to_blocking(self.afs.exists(path)) def is_file(self, path: str) -> bool: return async_to_blocking(self.afs.isfile(path)) @@ -237,24 +232,17 @@ async def _ls(): async with OnlineBoundedGather2(asyncio.Semaphore(_max_simultaneous_files)) as pool: tasks = [pool.call(self._fle_to_dict, fle) async for fle in await self.afs.listfiles(path)] - return list(await asyncio.gather(*tasks)) + return [await t for t in tasks] return async_to_blocking(_ls()) def mkdir(self, path: str): return async_to_blocking(self.afs.mkdir(path)) def remove(self, path: str): - return async_to_blocking(self.afs.remove(path)) + return async_to_blocking(self.remove(path)) def rmtree(self, path: str): return async_to_blocking(self.afs.rmtree(None, path)) def supports_scheme(self, scheme: str) -> bool: return scheme in self.afs.schemes - - def canonicalize_path(self, path: str) -> str: - if isinstance(self.afs._get_fs(path), LocalAsyncFS): - if path.startswith('file:'): - return 'file:' + os.path.realpath(path[5:]) - return 'file:' + os.path.realpath(path) - return path diff --git a/hail/python/hail/fs/stat_result.py b/hail/python/hail/fs/stat_result.py index bf64f309804..cb49d60c07a 100644 --- a/hail/python/hail/fs/stat_result.py +++ b/hail/python/hail/fs/stat_result.py @@ -2,7 +2,7 @@ import stat from enum import Enum, auto -from typing import Dict, NamedTuple, Optional, Union, Any +from typing import Dict, NamedTuple, Optional, Union import hurry.filesize @@ -35,6 +35,6 @@ def from_os_stat_result(path: str, sb: os.stat_result) -> 'StatResult': return StatResult(path=path, owner=sb.st_uid, size=sb.st_size, typ=typ, modification_time=sb.st_mtime) - def to_legacy_dict(self) -> Dict[str, Any]: + def to_legacy_dict(self) -> Dict: return dict(path=self.path, owner=self.owner, is_dir=self.is_dir(), size_bytes=self.size, size=hurry.filesize.size(self.size), modification_time=self.modification_time) diff --git a/hail/python/hail/ggplot/__init__.py b/hail/python/hail/ggplot/__init__.py index 2c9b0d5d9c9..1fdcd4844b3 100644 --- a/hail/python/hail/ggplot/__init__.py +++ b/hail/python/hail/ggplot/__init__.py @@ -5,9 +5,8 @@ geom_hline, geom_vline, geom_tile, geom_col, geom_area, geom_ribbon # noqa F401 from .labels import ggtitle, xlab, ylab from .scale import scale_x_continuous, scale_y_continuous, scale_x_discrete, scale_y_discrete, scale_x_genomic, \ - scale_x_log10, scale_y_log10, scale_x_reverse, scale_y_reverse, scale_color_discrete, scale_color_hue, scale_color_identity,\ - scale_color_manual, scale_color_continuous, scale_fill_discrete, scale_fill_hue, scale_fill_identity, scale_fill_continuous,\ - scale_fill_manual + scale_x_log10, scale_y_log10, scale_x_reverse, scale_y_reverse, scale_color_discrete, scale_color_identity,\ + scale_color_continuous, scale_fill_discrete, scale_fill_identity, scale_fill_continuous __all__ = [ "aes", @@ -40,11 +39,7 @@ "scale_color_continuous", "scale_color_identity", "scale_color_discrete", - "scale_color_hue", - "scale_color_manual", "scale_fill_continuous", "scale_fill_identity", "scale_fill_discrete", - "scale_fill_hue", - "scale_fill_manual" ] diff --git a/hail/python/hail/ggplot/geoms.py b/hail/python/hail/ggplot/geoms.py index b7a6be5f5ec..ab8f181e064 100644 --- a/hail/python/hail/ggplot/geoms.py +++ b/hail/python/hail/ggplot/geoms.py @@ -255,7 +255,7 @@ def get_stat(self): def geom_bar(mapping=aes(), *, fill=None, color=None, alpha=None, position="stack", size=None): """Create a bar chart that counts occurrences of the various values of the ``x`` aesthetic. - Supported aesthetics: ``x``, ``color``, ``fill``, ``weight`` + Supported aesthetics: ``x``, ``color``, ``fill`` Returns ------- @@ -496,14 +496,9 @@ def plot_group(df): "y0": y_center - height / 2, "x1": x_center + width / 2, "y1": y_center + height / 2, + "fillcolor": "black" if "fill" not in df.attrs else df.attrs["fill"], "opacity": row.get('alpha', 1.0) } - if "fill" in df.attrs: - shape_args["fillcolor"] = df.attrs["fill"] - elif "fill" in row: - shape_args["fillcolor"] = row["fill"] - else: - shape_args["fillcolor"] = "black" fig_so_far.add_shape(**shape_args) for group_df in grouped_data: diff --git a/hail/python/hail/ggplot/ggplot.py b/hail/python/hail/ggplot/ggplot.py index eb251f81e5c..72ef33af3e5 100644 --- a/hail/python/hail/ggplot/ggplot.py +++ b/hail/python/hail/ggplot/ggplot.py @@ -1,3 +1,4 @@ +import plotly import plotly.graph_objects as go from pprint import pprint @@ -24,7 +25,8 @@ class GGPlot: .. automethod:: write_image """ - def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, scales=None): + def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, scales=None, + discrete_color_scale=plotly.colors.qualitative.D3, continuous_color_scale=plotly.colors.sequential.Viridis): if scales is None: scales = {} @@ -34,6 +36,10 @@ def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, sca self.labels = labels self.coord_cartesian = coord_cartesian self.scales = scales + self.discrete_color_scale = discrete_color_scale + self.discrete_color_dict = {} + self.discrete_color_idx = 0 + self.continuous_color_scale = continuous_color_scale self.add_default_scales(aes) @@ -93,7 +99,8 @@ def add_default_scales(self, aesthetic): self.scales[aesthetic_str] = ScaleDiscrete(aesthetic_str) def copy(self): - return GGPlot(self.ht, self.aes, self.geoms[:], self.labels, self.coord_cartesian, self.scales) + return GGPlot(self.ht, self.aes, self.geoms[:], self.labels, self.coord_cartesian, self.scales, + self.discrete_color_scale, self.continuous_color_scale) def verify_scales(self): for geom_idx, geom in enumerate(self.geoms): @@ -168,7 +175,7 @@ def get_aggregation_result(selected, mapping_per_geom, precomputed): # Create scaling functions based on all the data: transformers = {} for scale in self.scales.values(): - transformers[scale.aesthetic_name] = scale.create_local_transformer([x for _, _, x in geoms_and_grouped_dfs]) + transformers[scale.aesthetic_name] = scale.create_local_transformer([x for _, _, x in geoms_and_grouped_dfs], self) for geom, geom_label, grouped_dfs in geoms_and_grouped_dfs: scaled_grouped_dfs = [] diff --git a/hail/python/hail/ggplot/scale.py b/hail/python/hail/ggplot/scale.py index 19aa81289a6..55d692d76c9 100644 --- a/hail/python/hail/ggplot/scale.py +++ b/hail/python/hail/ggplot/scale.py @@ -5,9 +5,6 @@ from .utils import categorical_strings_to_colors, continuous_nums_to_colors -import plotly.express as px -import plotly - class Scale(FigureAttribute): def __init__(self, aesthetic_name): @@ -17,7 +14,7 @@ def __init__(self, aesthetic_name): def transform_data(self, field_expr): pass - def create_local_transformer(self, groups_of_dfs): + def create_local_transformer(self, groups_of_dfs, parent): return lambda x: x @abc.abstractmethod @@ -150,20 +147,16 @@ def is_continuous(self): return False -class ScaleColorManual(ScaleDiscrete): - - def __init__(self, aesthetic_name, values): - super().__init__(aesthetic_name) - self.values = values +class ScaleColorDiscrete(ScaleDiscrete): - def create_local_transformer(self, groups_of_dfs): + def create_local_transformer(self, groups_of_dfs, parent): categorical_strings = set() for group_of_dfs in groups_of_dfs: for df in group_of_dfs: if self.aesthetic_name in df.attrs: categorical_strings.add(df.attrs[self.aesthetic_name]) - unique_color_mapping = categorical_strings_to_colors(categorical_strings, self.values) + unique_color_mapping = categorical_strings_to_colors(categorical_strings, parent) def transform(df): df.attrs[f"{self.aesthetic_name}_legend"] = df.attrs[self.aesthetic_name] @@ -175,7 +168,7 @@ def transform(df): class ScaleColorContinuous(ScaleContinuous): - def create_local_transformer(self, groups_of_dfs): + def create_local_transformer(self, groups_of_dfs, parent): overall_min = None overall_max = None for group_of_dfs in groups_of_dfs: @@ -194,7 +187,7 @@ def create_local_transformer(self, groups_of_dfs): else: overall_max = max(series_max, overall_max) - color_mapping = continuous_nums_to_colors(overall_min, overall_max, plotly.colors.sequential.Viridis) + color_mapping = continuous_nums_to_colors(overall_min, overall_max, parent.continuous_color_scale) def transform(df): df[self.aesthetic_name] = df[self.aesthetic_name].map(lambda i: color_mapping(i)) @@ -203,29 +196,6 @@ def transform(df): return transform -class ScaleColorHue(ScaleDiscrete): - def create_local_transformer(self, groups_of_dfs): - categorical_strings = set() - for group_of_dfs in groups_of_dfs: - for df in group_of_dfs: - if self.aesthetic_name in df.attrs: - categorical_strings.add(df.attrs[self.aesthetic_name]) - - num_categories = len(categorical_strings) - step = 1.0 / num_categories - interpolation_values = [step * i for i in range(num_categories)] - hsv_scale = px.colors.get_colorscale("HSV") - colors = px.colors.sample_colorscale(hsv_scale, interpolation_values) - unique_color_mapping = dict(zip(categorical_strings, colors)) - - def transform(df): - df.attrs[f"{self.aesthetic_name}_legend"] = df.attrs[self.aesthetic_name] - df.attrs[self.aesthetic_name] = unique_color_mapping[df.attrs[self.aesthetic_name]] - return df - - return transform - - # Legend names messed up for scale color identity class ScaleColorDiscreteIdentity(ScaleDiscrete): pass @@ -398,26 +368,14 @@ def scale_x_genomic(reference_genome, name=None): def scale_color_discrete(): - """The default discrete color scale. This maps each discrete value to a color. Equivalent to scale_color_hue. + """The default discrete color scale. This maps each discrete value to a color. Returns ------- :class:`.FigureAttribute` The scale to be applied. """ - return scale_color_hue() - - -def scale_color_hue(): - """Map discrete colors to evenly placed positions around the color wheel. - - Returns - ------- - :class:`.FigureAttribute` - The scale to be applied. - - """ - return ScaleColorHue("color") + return ScaleColorDiscrete("color") def scale_color_continuous(): @@ -442,23 +400,6 @@ def scale_color_identity(): return ScaleColorDiscreteIdentity("color") -def scale_color_manual(*, values): - """A color scale that assigns strings to colors using the pool of colors specified as `values`. - - - Parameters - ---------- - values: :class:`list` of :class:`str` - The colors to choose when assigning values to colors. - - Returns - ------- - :class:`.FigureAttribute` - The scale to be applied. - """ - return ScaleColorManual("color", values=values) - - def scale_fill_discrete(): """The default discrete fill scale. This maps each discrete value to a fill color. @@ -467,7 +408,7 @@ def scale_fill_discrete(): :class:`.FigureAttribute` The scale to be applied. """ - return scale_fill_hue() + return ScaleColorDiscrete("fill") def scale_fill_continuous(): @@ -490,32 +431,3 @@ def scale_fill_identity(): The scale to be applied. """ return ScaleColorDiscreteIdentity("fill") - - -def scale_fill_hue(): - """Map discrete fill colors to evenly placed positions around the color wheel. - - Returns - ------- - :class:`.FigureAttribute` - The scale to be applied. - - """ - return ScaleColorHue("fill") - - -def scale_fill_manual(*, values): - """A color scale that assigns strings to fill colors using the pool of colors specified as `values`. - - - Parameters - ---------- - values: :class:`list` of :class:`str` - The colors to choose when assigning values to colors. - - Returns - ------- - :class:`.FigureAttribute` - The scale to be applied. - """ - return ScaleColorManual("fill", values=values) diff --git a/hail/python/hail/ggplot/stats.py b/hail/python/hail/ggplot/stats.py index 5c58b790918..45babdf934b 100644 --- a/hail/python/hail/ggplot/stats.py +++ b/hail/python/hail/ggplot/stats.py @@ -67,8 +67,6 @@ class StatCount(Stat): def make_agg(self, mapping, precomputed): grouping_variables = {aes_key: mapping[aes_key] for aes_key in mapping.keys() if should_use_for_grouping(aes_key, mapping[aes_key].dtype)} - if "weight" in mapping: - return hl.agg.group_by(hl.struct(**grouping_variables), hl.agg.counter(mapping["x"], weight=mapping["weight"])) return hl.agg.group_by(hl.struct(**grouping_variables), hl.agg.group_by(mapping["x"], hl.agg.count())) def listify(self, agg_result): diff --git a/hail/python/hail/ggplot/utils.py b/hail/python/hail/ggplot/utils.py index 51135d87ca8..afd92b91228 100644 --- a/hail/python/hail/ggplot/utils.py +++ b/hail/python/hail/ggplot/utils.py @@ -34,20 +34,16 @@ def should_use_scale_for_grouping(scale): # Map strings to numbers that will index into a color scale. -def categorical_strings_to_colors(string_set, color_values): +def categorical_strings_to_colors(string_set, parent_plot): - if isinstance(color_values, list): - if len(string_set) > len(color_values): - print(f"Not enough colors specified. Found {len(string_set)} distinct values of color aesthetic and only {len(color_values)} colors were provided.") - color_dict = {} - for idx, element in enumerate(string_set): - if element not in color_dict: - color_dict[element] = color_values[idx] + color_dict = parent_plot.discrete_color_dict - else: - color_dict = color_values + for element in string_set: + if element not in color_dict: + color_dict[element] = parent_plot.discrete_color_scale[parent_plot.discrete_color_idx % len(parent_plot.discrete_color_scale)] + parent_plot.discrete_color_idx += 1 - return color_dict + return parent_plot.discrete_color_dict def continuous_nums_to_colors(min_color, max_color, continuous_color_scale): diff --git a/hail/python/hail/ir/__init__.py b/hail/python/hail/ir/__init__.py index e6f55c52a90..10bd65a94fa 100644 --- a/hail/python/hail/ir/__init__.py +++ b/hail/python/hail/ir/__init__.py @@ -47,7 +47,7 @@ tensor_shape_to_matrix_shape from .utils import filter_predicate_with_keep, make_filter_and_replace from .matrix_reader import MatrixReader, MatrixNativeReader, MatrixRangeReader, \ - MatrixVCFReader, MatrixBGENReader, MatrixPLINKReader + MatrixVCFReader, MatrixBGENReader, TextMatrixReader, MatrixPLINKReader from .table_reader import AvroTableReader, TableReader, TableNativeReader, \ TextTableReader, TableFromBlockMatrixNativeReader, StringTableReader from .blockmatrix_reader import BlockMatrixReader, BlockMatrixNativeReader, \ @@ -254,6 +254,7 @@ 'MatrixRangeReader', 'MatrixVCFReader', 'MatrixBGENReader', + 'TextMatrixReader', 'MatrixPLINKReader', 'MatrixWriter', 'MatrixNativeWriter', diff --git a/hail/python/hail/ir/base_ir.py b/hail/python/hail/ir/base_ir.py index 5258bb5e83c..7a594b2f9ae 100644 --- a/hail/python/hail/ir/base_ir.py +++ b/hail/python/hail/ir/base_ir.py @@ -68,7 +68,7 @@ def head_str(self): @property @abc.abstractmethod def typ(self): - raise NotImplementedError + return def __eq__(self, other): return isinstance(other, self.__class__) and self.children == other.children and self._eq(other) diff --git a/hail/python/hail/ir/blockmatrix_ir.py b/hail/python/hail/ir/blockmatrix_ir.py index 9fa242b5d6f..9de543cd366 100644 --- a/hail/python/hail/ir/blockmatrix_ir.py +++ b/hail/python/hail/ir/blockmatrix_ir.py @@ -3,18 +3,17 @@ from hail.expr.types import tarray from .blockmatrix_reader import BlockMatrixReader from .base_ir import BlockMatrixIR, IR -from hail.typecheck import typecheck_method, sequenceof, nullable +from hail.typecheck import typecheck_method, sequenceof from hail.utils.misc import escape_id from hail.utils.java import Env class BlockMatrixRead(BlockMatrixIR): - @typecheck_method(reader=BlockMatrixReader, _assert_type=nullable(tblockmatrix)) - def __init__(self, reader, _assert_type=None): + @typecheck_method(reader=BlockMatrixReader) + def __init__(self, reader): super().__init__() self.reader = reader - self._type = _assert_type def head_str(self): return f'"{self.reader.render()}"' @@ -23,8 +22,7 @@ def _eq(self, other): return self.reader == other.reader def _compute_type(self): - if self._type is None: - self._type = Env.backend().blockmatrix_type(self) + self._type = Env.backend().blockmatrix_type(self) class BlockMatrixMap(BlockMatrixIR): diff --git a/hail/python/hail/ir/blockmatrix_writer.py b/hail/python/hail/ir/blockmatrix_writer.py index 9f41f17f593..2775640cf39 100644 --- a/hail/python/hail/ir/blockmatrix_writer.py +++ b/hail/python/hail/ir/blockmatrix_writer.py @@ -2,7 +2,6 @@ import json from ..typecheck import typecheck_method, sequenceof, nullable, enumeration -from ..expr.types import tvoid, tstr from ..utils.misc import escape_str @@ -11,10 +10,6 @@ class BlockMatrixWriter(object): def render(self): pass - @abc.abstractmethod - def _type(self): - pass - @abc.abstractmethod def __eq__(self, other): pass @@ -36,9 +31,6 @@ def render(self): 'stageLocally': self.stage_locally} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixNativeWriter) and \ self.path == other.path and \ @@ -57,9 +49,6 @@ def render(self): 'path': self.path} return escape_str(json.dumps(writer)) - def _type(self): - return tstr - def __eq__(self, other): return isinstance(other, BlockMatrixBinaryWriter) and \ self.path == other.path @@ -84,9 +73,6 @@ def render(self): 'binary': self.binary} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixRectanglesWriter) and \ self.path == other.path and \ @@ -100,9 +86,6 @@ class BlockMatrixMultiWriter(object): def render(self): pass - def _type(self): - return tvoid - @abc.abstractmethod def __eq__(self, other): pass @@ -120,9 +103,6 @@ def render(self): 'overwrite': self.overwrite} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixBinaryMultiWriter) and \ self.prefix == other.prefix and \ @@ -152,9 +132,6 @@ def render(self): 'customFilenames': self.custom_filenames} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixTextMultiWriter) and \ self.prefix == other.prefix and \ @@ -178,9 +155,6 @@ def render(self): 'storageLevel': self.storage_level} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixPersistWriter) and \ self.id == other.id and \ @@ -203,9 +177,6 @@ def render(self): 'stageLocally': self.stage_locally} return escape_str(json.dumps(writer)) - def _type(self): - return tvoid - def __eq__(self, other): return isinstance(other, BlockMatrixNativeMultiWriter) and \ self.prefix == other.prefix and \ diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index 811c39297c7..015e032f39b 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -10,7 +10,7 @@ from hail.ir.blockmatrix_writer import BlockMatrixWriter, BlockMatrixMultiWriter from hail.typecheck import typecheck, typecheck_method, sequenceof, numeric, \ sized_tupleof, nullable, tupleof, anytype, func_spec -from hail.utils.java import Env, HailUserError, warning +from hail.utils.java import Env, HailUserError from hail.utils.misc import escape_str, dump_json, parsable_strings, escape_id from .base_ir import BaseIR, IR, TableIR, MatrixIR, BlockMatrixIR, _env_bind from .matrix_writer import MatrixWriter, MatrixNativeMultiWriter @@ -2284,9 +2284,6 @@ def _compute_type(self, env, agg_env): class ApplySeeded(IR): @typecheck_method(function=str, seed=int, return_type=hail_type, args=IR) def __init__(self, function, seed, return_type, *args): - if hail.current_backend().requires_lowering: - warning("Seeded randomness is currently unreliable on the service. " - "You may observe some unexpected behavior. Don't use for real work yet.") super().__init__(*args) self.function = function self.args = args @@ -2548,7 +2545,7 @@ def _eq(self, other): def _compute_type(self, env, agg_env): self.child._compute_type() - self._type = self.writer._type() + self._type = tvoid @staticmethod def is_effectful() -> bool: diff --git a/hail/python/hail/ir/matrix_ir.py b/hail/python/hail/ir/matrix_ir.py index 6c45d99062e..7fd117b534c 100644 --- a/hail/python/hail/ir/matrix_ir.py +++ b/hail/python/hail/ir/matrix_ir.py @@ -1,6 +1,4 @@ -from typing import Optional import hail as hl -from hail.expr.types import HailType from hail.ir.base_ir import BaseIR, MatrixIR from hail.utils.misc import escape_str, parsable_strings, dump_json, escape_id from hail.utils.java import Env @@ -47,17 +45,11 @@ def renderable_agg_bindings(self, i, default_value=None): class MatrixRead(MatrixIR): - def __init__(self, - reader, - drop_cols: bool = False, - drop_rows: bool = False, - *, - _assert_type: Optional[HailType] = None): + def __init__(self, reader, drop_cols=False, drop_rows=False): super().__init__() self.reader = reader self.drop_cols = drop_cols self.drop_rows = drop_rows - self._type: Optional[HailType] = _assert_type def render_head(self, r): return f'(MatrixRead None {self.drop_cols} {self.drop_rows} "{self.reader.render(r)}"' @@ -66,8 +58,7 @@ def _eq(self, other): return self.reader == other.reader and self.drop_cols == other.drop_cols and self.drop_rows == other.drop_rows def _compute_type(self): - if self._type is None: - self._type = Env.backend().matrix_type(self) + self._type = Env.backend().matrix_type(self) class MatrixFilterRows(MatrixIR): diff --git a/hail/python/hail/ir/matrix_reader.py b/hail/python/hail/ir/matrix_reader.py index 405813ae91f..d62c86792c8 100644 --- a/hail/python/hail/ir/matrix_reader.py +++ b/hail/python/hail/ir/matrix_reader.py @@ -1,11 +1,13 @@ import abc import json -from .utils import make_filter_and_replace, impute_type_of_partition_interval_array -from ..expr.types import tfloat32, tfloat64 +import hail as hl + +from .utils import make_filter_and_replace +from ..expr.types import tfloat32, tfloat64, hail_type, tint32, tint64, tstr from ..genetics.reference_genome import reference_genome_type -from ..typecheck import (typecheck_method, sequenceof, nullable, enumeration, anytype, oneof, - dictof, sized_tupleof) +from ..typecheck import typecheck_method, sequenceof, nullable, enumeration, \ + anytype, oneof, dictof, sized_tupleof from ..utils import wrap_to_list from ..utils.misc import escape_str @@ -25,9 +27,25 @@ class MatrixNativeReader(MatrixReader): intervals=nullable(sequenceof(anytype)), filter_intervals=bool) def __init__(self, path, intervals, filter_intervals): + if intervals is not None: + t = hl.expr.impute_type(intervals) + if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval): + raise TypeError("'intervals' must be an array of tintervals") + pt = t.element_type.point_type + if isinstance(pt, hl.tstruct): + self._interval_type = t + else: + self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) + self.path = path self.filter_intervals = filter_intervals - self.intervals, self._interval_type = impute_type_of_partition_interval_array(intervals) + if intervals is not None and t != self._interval_type: + self.intervals = [hl.Interval(hl.Struct(__point=i.start), + hl.Struct(__point=i.end), + i.includes_start, + i.includes_end) for i in intervals] + else: + self.intervals = intervals def render(self, r): reader = {'name': 'MatrixNativeReader', @@ -199,6 +217,68 @@ def __eq__(self, other): other.included_variants == self.included_variants +class TextMatrixReader(MatrixReader): + @typecheck_method(paths=oneof(str, sequenceof(str)), + n_partitions=nullable(int), + row_fields=dictof(str, hail_type), + entry_type=enumeration(tint32, tint64, tfloat32, tfloat64, tstr), + missing_value=str, + has_header=bool, + separator=str, + gzip_as_bgzip=bool, + add_row_id=bool, + comment=sequenceof(str)) + def __init__(self, + paths, + n_partitions, + row_fields, + entry_type, + missing_value, + has_header, + separator, + gzip_as_bgzip, + add_row_id, + comment): + self.paths = wrap_to_list(paths) + self.n_partitions = n_partitions + self.row_fields = row_fields + self.entry_type = entry_type + self.missing_value = missing_value + self.has_header = has_header + self.separator = separator + self.gzip_as_bgzip = gzip_as_bgzip + self.add_row_id = add_row_id + self.comment = comment + + def render(self, r): + reader = {'name': 'TextMatrixReader', + 'paths': self.paths, + 'nPartitions': self.n_partitions, + 'rowFieldsStr': {k: v._parsable_string() + for k, v in self.row_fields.items()}, + 'entryTypeStr': self.entry_type._parsable_string(), + 'missingValue': self.missing_value, + 'hasHeader': self.has_header, + 'separatorStr': self.separator, + 'gzipAsBGZip': self.gzip_as_bgzip, + 'addRowId': self.add_row_id, + 'comment': self.comment} + return escape_str(json.dumps(reader)) + + def __eq__(self, other): + return isinstance(other, TextMatrixReader) and \ + self.paths == other.paths and \ + self.n_partitions == other.n_partitions and \ + self.row_fields == other.row_fields and \ + self.entry_type == other.entry_type and \ + self.missing_value == other.missing_value and \ + self.has_header == other.has_header and \ + self.separator == other.separator and \ + self.gzip_as_bgzip == other.gzip_as_bgzip and \ + self.add_row_id == other.add_row_id and \ + self.comment == other.comment + + class MatrixPLINKReader(MatrixReader): @typecheck_method(bed=str, bim=str, fam=str, n_partitions=nullable(int), block_size=nullable(int), min_partitions=nullable(int), diff --git a/hail/python/hail/ir/register_functions.py b/hail/python/hail/ir/register_functions.py index 061180d0d42..e62e9f50839 100644 --- a/hail/python/hail/ir/register_functions.py +++ b/hail/python/hail/ir/register_functions.py @@ -44,6 +44,7 @@ def ndarray_floating_point_divide(arg_type, ret_type): ndarray_floating_point_divide(tint64, tfloat32) ndarray_floating_point_divide(tfloat32, tfloat32) ndarray_floating_point_divide(tfloat64, tfloat64) + register_function("values", (dtype("dict"),), dtype("array")) register_function("sliceRight", (dtype("str"), dtype("int32"),), dtype("str")) register_function("get", (dtype("dict"), dtype("?key"),), dtype("?value")) @@ -210,12 +211,6 @@ def ndarray_floating_point_divide(arg_type, ret_type): register_function("slice", (dtype("str"), dtype("int32"), dtype("int32"),), dtype("str")) register_function("split", (dtype("str"), dtype("str"), dtype("int32"),), dtype("array")) register_function("split", (dtype("str"), dtype("str"),), dtype("array")) - register_function("splitQuotedChar", (dtype("str"), dtype("str"), dtype("array"), dtype("str"),), - dtype("array")) - register_function("splitQuotedRegex", (dtype("str"), dtype("str"), dtype("array"), dtype("str"),), - dtype("array")) - register_function("splitChar", (dtype("str"), dtype("str"), dtype("array"),), dtype("array")) - register_function("splitRegex", (dtype("str"), dtype("str"), dtype("array"),), dtype("array")) register_seeded_function("rand_gamma", (dtype("float64"), dtype("float64"),), dtype("float64")) register_function("UnphasedDiploidGtIndexCall", (dtype("int32"),), dtype("call")) register_function("lgt_to_gt", (dtype("call"), dtype("array"),), dtype("call")) diff --git a/hail/python/hail/ir/table_ir.py b/hail/python/hail/ir/table_ir.py index 13b8a0ca52d..6d8da139926 100644 --- a/hail/python/hail/ir/table_ir.py +++ b/hail/python/hail/ir/table_ir.py @@ -1,6 +1,5 @@ -from typing import Optional import hail as hl -from hail.expr.types import dtype, HailType +from hail.expr.types import dtype from hail.ir.base_ir import BaseIR, TableIR from hail.utils.java import Env from hail.utils.misc import escape_str, parsable_strings, dump_json, escape_id @@ -234,15 +233,10 @@ def _eq(self, other): class TableRead(TableIR): - def __init__(self, - reader, - drop_rows: bool = False, - *, - _assert_type: Optional[HailType] = None): + def __init__(self, reader, drop_rows=False): super().__init__() self.reader = reader self.drop_rows = drop_rows - self._type = _assert_type def head_str(self): return f'None {self.drop_rows} "{self.reader.render()}"' @@ -251,8 +245,7 @@ def _eq(self, other): return self.reader == other.reader and self.drop_rows == other.drop_rows def _compute_type(self): - if self._type is None: - self._type = Env.backend().table_type(self) + self._type = Env.backend().table_type(self) class MatrixEntriesTable(TableIR): diff --git a/hail/python/hail/ir/table_reader.py b/hail/python/hail/ir/table_reader.py index 441192f4c9d..d519d4b9507 100644 --- a/hail/python/hail/ir/table_reader.py +++ b/hail/python/hail/ir/table_reader.py @@ -9,8 +9,6 @@ from hail.typecheck import typecheck_method, sequenceof, nullable, anytype, oneof from hail.utils.misc import escape_str -from .utils import impute_type_of_partition_interval_array - class TableReader(object): @abc.abstractmethod @@ -27,9 +25,25 @@ class TableNativeReader(TableReader): intervals=nullable(sequenceof(anytype)), filter_intervals=bool) def __init__(self, path, intervals, filter_intervals): + if intervals is not None: + t = hl.expr.impute_type(intervals) + if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval): + raise TypeError("'intervals' must be an array of tintervals") + pt = t.element_type.point_type + if isinstance(pt, hl.tstruct): + self._interval_type = t + else: + self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) + self.path = path self.filter_intervals = filter_intervals - self.intervals, self._interval_type = impute_type_of_partition_interval_array(intervals) + if intervals is not None and t != self._interval_type: + self.intervals = [hl.Interval(hl.Struct(__point=i.start), + hl.Struct(__point=i.end), + i.includes_start, + i.includes_end) for i in intervals] + else: + self.intervals = intervals def render(self): reader = {'name': 'TableNativeReader', @@ -83,32 +97,21 @@ def __eq__(self, other): class StringTableReader(TableReader): - @typecheck_method(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int), force_bgz=bool, - force=bool, file_per_partition=bool) - def __init__(self, paths, min_partitions, force_bgz, force, file_per_partition): + @typecheck_method(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int)) + def __init__(self, paths, min_partitions): self.paths = paths self.min_partitions = min_partitions - self.force_bgz = force_bgz - self.force = force - self.file_per_partition = file_per_partition def render(self): reader = {'name': 'StringTableReader', 'files': self.paths, - 'minPartitions': self.min_partitions, - 'forceBGZ': self.force_bgz, - 'forceGZ': self.force, - 'filePerPartition': self.file_per_partition} - + 'minPartitions': self.min_partitions} return escape_str(json.dumps(reader)) def __eq__(self, other): return isinstance(other, StringTableReader) and \ other.path == self.path and \ - other.min_partitions == self.min_partitions and \ - other.force_bgz == self.force_bgz and \ - other.force == self.force and \ - other.file_per_partition == self.file_per_partition + other.min_partitions == self.min_partitions class TableFromBlockMatrixNativeReader(TableReader): diff --git a/hail/python/hail/ir/utils.py b/hail/python/hail/ir/utils.py index db12aae97c4..0c445beacb6 100644 --- a/hail/python/hail/ir/utils.py +++ b/hail/python/hail/ir/utils.py @@ -1,33 +1,4 @@ -from typing import Optional, List, Any, Tuple from .ir import Coalesce, ApplyUnaryPrimOp, FalseIR -import hail as hl - - -def impute_type_of_partition_interval_array( - intervals: Optional[List[Any]] -) -> Tuple[Optional[List[Any]], Any]: - if intervals is None: - return None, None - if len(intervals) == 0: - return [], hl.tarray(hl.tinterval(hl.tstruct())) - - t = hl.expr.impute_type(intervals) - if not isinstance(t, hl.tarray) or not isinstance(t.element_type, hl.tinterval): - raise TypeError("'intervals' must be an array of tintervals") - pt = t.element_type.point_type - - if isinstance(pt, hl.tstruct): - return intervals, t - - struct_intervals = [ - hl.Interval(hl.Struct(__point=i.start), - hl.Struct(__point=i.end), - i.includes_start, - i.includes_end) - for i in intervals - ] - struct_intervals_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) - return struct_intervals, struct_intervals_type def filter_predicate_with_keep(ir_pred, keep): @@ -45,26 +16,3 @@ def make_filter_and_replace(filter, find_replace): 'findPattern': find, 'replacePattern': replace } - - -def parse_type(string_expr, ttype): - if ttype == hl.tstr: - return string_expr - elif ttype == hl.tint32: - return hl.int32(string_expr) - elif ttype == hl.tint64: - return hl.int64(string_expr) - elif ttype == hl.tfloat32: - return hl.float32(string_expr) - elif ttype == hl.tfloat64: - return hl.float64(string_expr) - elif ttype == hl.tbool: - return hl.bool(string_expr) - elif ttype == hl.tcall: - return hl.parse_call(string_expr) - elif isinstance(ttype, hl.tlocus): - return hl.parse_locus(string_expr, ttype.reference_genome) - elif isinstance(ttype, hl.tinterval) and isinstance(ttype.point_type, hl.tlocus): - return hl.parse_locus_interval(string_expr, ttype.point_type.reference_genome) - else: - return hl.parse_json(string_expr, ttype) diff --git a/hail/python/hail/linalg/blockmatrix.py b/hail/python/hail/linalg/blockmatrix.py index 4df6eef2ce0..27e5ac3e52f 100644 --- a/hail/python/hail/linalg/blockmatrix.py +++ b/hail/python/hail/linalg/blockmatrix.py @@ -9,7 +9,6 @@ import hail as hl import hail.expr.aggregators as agg from hail.expr import construct_expr, construct_variable -from hail.expr.blockmatrix_type import tblockmatrix from hail.expr.expressions import (expr_float64, matrix_table_source, expr_ndarray, check_entry_indexed, expr_tuple, expr_array, expr_int32, expr_int64) from hail.ir import (BlockMatrixWrite, BlockMatrixMap2, ApplyBinaryPrimOp, F64, @@ -29,8 +28,8 @@ from hail.typecheck import (typecheck, typecheck_method, nullable, oneof, sliceof, sequenceof, lazy, enumeration, numeric, tupleof, func_spec, sized_tupleof) -from hail.utils import (new_temp_file, local_path_uri, storage_level, with_local_temp_file, - new_local_temp_file) +from hail.utils import (new_temp_file, new_local_temp_file, local_path_uri, + storage_level, with_local_temp_file) from hail.utils.java import Env block_matrix_type = lazy() @@ -228,8 +227,8 @@ def __init__(self, bmir): self._bmir = bmir @classmethod - @typecheck_method(path=str, _assert_type=nullable(tblockmatrix)) - def read(cls, path, *, _assert_type=None): + @typecheck_method(path=str) + def read(cls, path): """Reads a block matrix. Parameters @@ -241,15 +240,14 @@ def read(cls, path, *, _assert_type=None): ------- :class:`.BlockMatrix` """ - return cls(BlockMatrixRead(BlockMatrixNativeReader(path), _assert_type=_assert_type)) + return cls(BlockMatrixRead(BlockMatrixNativeReader(path))) @classmethod @typecheck_method(uri=str, n_rows=int, n_cols=int, - block_size=nullable(int), - _assert_type=nullable(tblockmatrix)) - def fromfile(cls, uri, n_rows, n_cols, block_size=None, *, _assert_type=None): + block_size=nullable(int)) + def fromfile(cls, uri, n_rows, n_cols, block_size=None): """Creates a block matrix from a binary file. Examples @@ -303,7 +301,7 @@ def fromfile(cls, uri, n_rows, n_cols, block_size=None, *, _assert_type=None): if not block_size: block_size = BlockMatrix.default_block_size() - return cls(BlockMatrixRead(BlockMatrixBinaryReader(uri, [n_rows, n_cols], block_size), _assert_type=_assert_type)) + return cls(BlockMatrixRead(BlockMatrixBinaryReader(uri, [n_rows, n_cols], block_size))) @classmethod @typecheck_method(ndarray=np.ndarray, @@ -337,8 +335,6 @@ def from_numpy(cls, ndarray, block_size=None): ------- :class:`.BlockMatrix` """ - from hail.backend.service_backend import ServiceBackend - if not block_size: block_size = BlockMatrix.default_block_size() @@ -349,14 +345,9 @@ def from_numpy(cls, ndarray, block_size=None): nd = _ndarray_as_float64(nd) n_rows, n_cols = nd.shape - if isinstance(hl.current_backend(), ServiceBackend): - path = hl.TemporaryFilename().name - hl.current_backend().fs.open(path, mode='wb').write(nd.tobytes()) - uri = path - else: - path = new_local_temp_file() - nd.tofile(path) - uri = local_path_uri(path) + path = new_local_temp_file() + uri = local_path_uri(path) + nd.tofile(path) return cls.fromfile(uri, n_rows, n_cols, block_size) @classmethod @@ -647,7 +638,7 @@ def checkpoint(self, path, overwrite=False, force_row_major=False, stage_locally before being copied to ``output``. """ self.write(path, overwrite, force_row_major, stage_locally) - return BlockMatrix.read(path, _assert_type=self._bmir._type) + return BlockMatrix.read(path) @staticmethod @typecheck(entry_expr=expr_float64, @@ -1212,20 +1203,12 @@ def to_numpy(self, _force_blocking=False): ------- :class:`numpy.ndarray` """ - from hail.backend.service_backend import ServiceBackend if self.n_rows * self.n_cols > 1 << 31 or _force_blocking: path = new_temp_file() self.export_blocks(path, binary=True) return BlockMatrix.rectangles_to_numpy(path, binary=True) - if isinstance(hl.current_backend(), ServiceBackend): - with hl.TemporaryFilename() as path: - self.tofile(path) - return np.frombuffer( - hl.current_backend().fs.open(path, mode='rb').read() - ).reshape((self.n_rows, self.n_cols)) - with with_local_temp_file() as path: uri = local_path_uri(path) self.tofile(uri) @@ -1332,7 +1315,7 @@ def persist(self, storage_level='MEMORY_AND_DISK'): """ id = Env.get_uid() Env.backend().execute(BlockMatrixWrite(self._bmir, BlockMatrixPersistWriter(id, storage_level))) - return BlockMatrix(BlockMatrixRead(BlockMatrixPersistReader(id, self._bmir), _assert_type=self._bmir._type)) + return BlockMatrix(BlockMatrixRead(BlockMatrixPersistReader(id, self._bmir))) def unpersist(self): """Unpersists this block matrix from memory/disk. diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index 4a14ac56c16..a3fcac35a26 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -2500,20 +2500,8 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = }""" if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec) - _assert_type = self._type - _load_refs = False - else: - _assert_type = None - _load_refs = True - return hl.read_matrix_table( - output, - _intervals=_intervals, - _filter_intervals=_filter_intervals, - _drop_cols=_drop_cols, - _drop_rows=_drop_rows, - _assert_type=_assert_type, - _load_refs=_load_refs - ) + return hl.read_matrix_table(output, _intervals=_intervals, _filter_intervals=_filter_intervals, + _drop_cols=_drop_cols, _drop_rows=_drop_rows) @typecheck_method(output=str, overwrite=bool, @@ -3338,20 +3326,6 @@ def repartition(self, n_partitions: int, shuffle: bool = True) -> 'MatrixTable': :class:`.MatrixTable` Repartitioned dataset. """ - if hl.current_backend().requires_lowering: - tmp = hl.utils.new_temp_file() - - if len(self.row_key) == 0: - uid = Env.get_uid() - tmp2 = hl.utils.new_temp_file() - self.checkpoint(tmp2) - ht = hl.read_matrix_table(tmp2).add_row_index(uid).key_rows_by(uid) - ht.checkpoint(tmp) - return hl.read_matrix_table(tmp, _n_partitions=n_partitions).drop(uid) - else: - # checkpoint rather than write to use fast codec - self.checkpoint(tmp) - return hl.read_matrix_table(tmp, _n_partitions=n_partitions) return MatrixTable(ir.MatrixRepartition( self._mir, n_partitions, @@ -3387,9 +3361,6 @@ def naive_coalesce(self, max_partitions: int) -> 'MatrixTable': Matrix table with at most `max_partitions` partitions. """ - if hl.current_backend().requires_lowering: - return self.repartition(max_partitions) - return MatrixTable(ir.MatrixRepartition( self._mir, max_partitions, ir.RepartitionStrategy.NAIVE_COALESCE)) diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index fdbc3dbd856..32bd2a73c72 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -6,6 +6,7 @@ import avro.schema from avro.datafile import DataFileReader from avro.io import DatumReader + import hail as hl from hail import ir from hail.expr import StructExpression, LocusExpression, \ @@ -15,13 +16,12 @@ from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ tcall, tbool, tint64, tfloat32 from hail.genetics.reference_genome import reference_genome_type -from hail.ir.utils import parse_type from hail.matrixtable import MatrixTable from hail.methods.misc import require_biallelic, require_row_key_variant, require_col_key_str from hail.table import Table from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char -from hail.utils.misc import wrap_to_list +from hail.utils import wrap_to_list from hail.utils.java import Env, FatalError, jindexed_seq_args, warning @@ -258,6 +258,7 @@ def export_bgen(mt, output, gp=None, varid=None, rsid=None, parallel=None): def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=None, mat_id=None, is_female=None, pheno=None, varid=None, cm_position=None): + """Export a :class:`.MatrixTable` as `PLINK2 `__ BED, BIM and FAM files. @@ -522,8 +523,7 @@ def export_vcf(dataset, output, append_to_header=None, parallel=None, metadata=N invalid_info_fields = [f for f in info_fields if not re.fullmatch(r"^([A-Za-z_][0-9A-Za-z_.]*|1000G)", f)] if invalid_info_fields: invalid_info_str = ''.join(f'\n {f!r}' for f in invalid_info_fields) - warning( - 'export_vcf: the following info field names are invalid in VCF 4.3 and may not work with some tools: ' + invalid_info_str) + warning('export_vcf: the following info field names are invalid in VCF 4.3 and may not work with some tools: ' + invalid_info_str) row_fields_used = {'rsid', 'info', 'filters', 'qual'} @@ -833,8 +833,7 @@ def recode_contig(x): reference_genome, skip_invalid_intervals)) - elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct( - f0=tstr, f1=tint32, f2=tint32, f3=tstr): + elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct(f0=tstr, f1=tint32, f2=tint32, f3=tstr): t = t.select(interval=locus_interval_expr(recode_contig(t['f0']), t['f1'] + 1, t['f2'] + 1, @@ -1160,23 +1159,20 @@ def import_bgen(path, variants = hl.struct(locus=variants) if len(variants.dtype) == 0 or not variants.dtype._is_prefix_of(expected_vtype): - raise TypeError( - "'import_bgen' requires the expression type for 'variants' is a non-empty prefix of the BGEN key type: \n" - + f"\tFound: {repr(variants.dtype)}\n" - + f"\tExpected: {repr(expected_vtype)}\n") + raise TypeError("'import_bgen' requires the expression type for 'variants' is a non-empty prefix of the BGEN key type: \n" + + f"\tFound: {repr(variants.dtype)}\n" + + f"\tExpected: {repr(expected_vtype)}\n") uid = Env.get_uid() fnames = list(variants.dtype) - name, variants = variants._to_table( - uid) # This will add back the other key fields of the source, which we don't want + name, variants = variants._to_table(uid) # This will add back the other key fields of the source, which we don't want variants = variants.key_by(**{fname: variants[name][fname] for fname in fnames}) variants = variants.select() elif isinstance(variants, Table): if len(variants.key) == 0 or not variants.key.dtype._is_prefix_of(expected_vtype): - raise TypeError( - "'import_bgen' requires the row key type for 'variants' is a non-empty prefix of the BGEN key type: \n" - + f"\tFound: {repr(variants.key.dtype)}\n" - + f"\tExpected: {repr(expected_vtype)}\n") + raise TypeError("'import_bgen' requires the row key type for 'variants' is a non-empty prefix of the BGEN key type: \n" + + f"\tFound: {repr(variants.key.dtype)}\n" + + f"\tExpected: {repr(expected_vtype)}\n") variants = variants.select() else: assert isinstance(variants, list) @@ -1202,8 +1198,7 @@ def import_bgen(path, schema=expected_vtype, key=['locus', 'alleles']) except Exception: - raise TypeError( - f"'import_bgen' requires all elements in 'variants' are a non-empty prefix of the BGEN key type: {repr(expected_vtype)}") + raise TypeError(f"'import_bgen' requires all elements in 'variants' are a non-empty prefix of the BGEN key type: {repr(expected_vtype)}") reader = ir.MatrixBGENReader(path, sample_file, index_file_map, n_partitions, block_size, variants) @@ -1498,9 +1493,9 @@ def import_table(paths, .. code-block: text $cat data/table_with_json.tsv - id json_field - 1 {"foo": "bar", "x": 7} - 4 {"foo": "baz", "x": 100} + id json_field + 1 {"foo": "bar", "x": 7} + 4 {"foo": "baz", "x": 100} To import, we need to specify the types argument. @@ -1607,98 +1602,26 @@ def import_table(paths, ------- :class:`.Table` """ - if len(delimiter) < 1: - raise ValueError('import_table: empty delimiter is not supported') - - def split_lines(row, fields): - split_array = row.text._split_line(delimiter, missing=missing, quote=quote, regex=len(delimiter) > 1) - return hl.case().when(hl.len(split_array) == len(fields), split_array)\ - .or_error(hl.str("error in number of fields found: in file ") + hl.str(row.file) - + hl.str(f"\nExpected {len(fields)} {'fields' if len(fields) > 1 else 'field' }, found ") - + hl.str(hl.len(split_array)) + hl.if_else(hl.len(split_array) > 1, hl.str(" fields"), - hl.str(" field")) + hl.str("\nfor line consisting of '") + hl.str(row.text) + "'") - - def should_filter_line(hl_str): - to_filter = hl_str.matches(filter) if filter is not None else hl.bool(False) - if len(comment) > 0: - hl_comment = hl.array(comment) - filter_comment = hl_comment.any(lambda com: hl.if_else(hl.len(com) == 1, - hl_str.startswith(com), - hl_str.matches(com, True))) - else: - filter_comment = hl.bool(False) - filter_blank_line = hl.len(hl_str) == 0 if skip_blank_lines else hl.bool(False) - return hl.array([to_filter, filter_comment, filter_blank_line]).any(lambda filt: filt) - - def check_fields_for_duplicates(fields_to_check): - changed_fields = [] - unique_fields = {} - for field_idx, field_to_check in enumerate(fields_to_check): - field_copy = field_to_check - suffix = 1 - while unique_fields.get(field_copy) is not None: - field_copy = field_to_check + str(suffix) - suffix += 1 - if field_copy is not field_to_check: - changed_fields.append((field_copy, field_to_check)) - unique_fields[field_copy] = field_idx - for new_field_name in changed_fields: - fields_to_check[unique_fields[new_field_name[0]]] = new_field_name[0] - if len(changed_fields) > 0: - from itertools import starmap - print_changed_fields = list(starmap(lambda post, pre: f"{pre} -> {post}", changed_fields)) - hl.utils.warning(f"Found {len(changed_fields)} duplicate" - f" {'row field' if len(changed_fields) == 1 else 'row fields'}. Changed row fields as " - f"follows:\n" + "\n".join(print_changed_fields)) - return fields_to_check - - if len(delimiter) == 0: - raise ValueError("Hail does not currently support 0-character separators") - paths = wrap_to_list(paths) comment = wrap_to_list(comment) missing = wrap_to_list(missing) - ht = hl.import_lines(paths, min_partitions, force_bgz, force) - if skip_blank_lines is not None or len(comment) > 0 or filter is not None: - ht = ht.filter(should_filter_line(ht.text), keep=False) - - if find_replace is not None: - ht = ht.annotate(text=ht['text'].replace(*find_replace)) - - first_row = ht.head(1) - first_row_value = first_row.annotate( - header=first_row.text._split_line(delimiter, missing=hl.empty_array(hl.tstr), quote=quote, regex=len(delimiter) > 1)).collect()[0] - - if first_row_value is None: - raise ValueError(f"Invalid file: no lines remaining after filters\n Offending file: {first_row.file}") + tr = ir.TextTableReader(paths, min_partitions, types, comment, + delimiter, missing, no_header, quote, + skip_blank_lines, force_bgz, filter, find_replace, + force, source_file_field) + ht = Table(ir.TableRead(tr)) - if not no_header: - unchecked_fields = first_row_value.header - fields = check_fields_for_duplicates(unchecked_fields) - ht = ht.filter(ht.text == first_row_value.text, keep=False) - else: - num_of_fields = list(range(0, len(first_row_value.header))) - fields = list(map(lambda f_num: "f" + str(f_num), num_of_fields)) - - ht = ht.annotate(split_text=hl.case().when(hl.len(ht.text) > 0, split_lines(ht, fields)) - .or_error(hl.str("Blank line found in file ") + ht.file)).drop('text') - - fields_to_value = {} strs = [] + if impute: - fields_to_impute_idx = [] - fields_to_guess = [] - for idx, field in enumerate(fields): - if types.get(field) is None: - fields_to_impute_idx.append(idx) - fields_to_guess.append(field) + fields_to_guess = [f for f in ht.row if f not in types] hl.utils.info('Reading table to impute column types') - guessed = ht.aggregate(hl.agg.array_agg(lambda x: hl.agg._impute_type(x), - [ht.split_text[i] for i in fields_to_impute_idx])) + guessed = ht.aggregate(hl.agg.array_agg(lambda x: hl.agg._impute_type(x), [ht[f] for f in fields_to_guess])) reasons = {f: 'user-supplied type' for f in types} + imputed_types = dict() for field, s in zip(fields_to_guess, guessed): if not s['anyNonMissing']: @@ -1720,31 +1643,28 @@ def check_fields_for_duplicates(fields_to_check): strs.append('Finished type imputation') all_types = dict(**types, **imputed_types) - - for f_idx, field in enumerate(fields): + for field in ht.row: strs.append(f' Loading field {field!r} as type {all_types[field]} ({reasons[field]})') - fields_to_value[field] = parse_type(ht.split_text[f_idx], all_types[field]) + + tr = ir.TextTableReader(paths, min_partitions, all_types, comment, + delimiter, missing, no_header, quote, + skip_blank_lines, force_bgz, filter, find_replace, + force, source_file_field) + ht = Table(ir.TableRead(tr)) else: strs.append('Reading table without type imputation') - for f_idx, field in enumerate(fields): + for field in ht.row: reason = 'user-supplied' if field in types else 'not specified' t = types.get(field, hl.tstr) - fields_to_value[field] = parse_type(ht.split_text[f_idx], t) strs.append(f' Loading field {field!r} as type {t} ({reason})') - ht = ht.annotate(**fields_to_value).drop('split_text') - if source_file_field is not None: - source_file = {source_file_field: ht.file} - ht = ht.annotate(**source_file) - ht = ht.drop('file') - - if len(fields) < 30: + if len(ht.row) < 30: hl.utils.info('\n'.join(strs)) else: from collections import Counter - strs2 = [f'Loading {ht.row} fields. Counts by type:'] - for name, count in Counter(ht[f].dtype for f in fields).most_common(): + strs2 = [f'Loading {len(ht.row)} fields. Counts by type:'] + for name, count in Counter(ht[f].dtype for f in ht.row).most_common(): strs2.append(f' {name}: {count}') hl.utils.info('\n'.join(strs2)) @@ -1754,9 +1674,8 @@ def check_fields_for_duplicates(fields_to_check): return ht -@typecheck(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int), force_bgz=bool, - force=bool, file_per_partition=bool) -def import_lines(paths, min_partitions=None, force_bgz=False, force=False, file_per_partition=False) -> Table: +@typecheck(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int)) +def import_lines(paths, min_partitions=None) -> Table: """Import lines of file(s) as a :class:`.Table` of strings. Examples @@ -1783,35 +1702,15 @@ def import_lines(paths, min_partitions=None, force_bgz=False, force=False, file_ Files to import. min_partitions: :obj:`int` or :obj:`None` Minimum number of partitions. - force_bgz : :obj:`bool` - If ``True``, load files as blocked gzip files, assuming - that they were actually compressed using the BGZ codec. This option is - useful when the file extension is not ``'.bgz'``, but the file is - blocked gzip, so that the file can be read in parallel and not on a - single node. - force : :obj:`bool` - If ``True``, load gzipped files serially on one core. This should - be used only when absolutely necessary, as processing time will be - increased due to lack of parallelism. - file_per_partition : :obj:`bool` - If ``True``, each file will be in a seperate partition. Not recommended - for most uses. Error thrown if ``True`` and `min_partitions` is less than - the number of files Returns ------- :class:`.Table` Table constructed from imported data. """ - paths = wrap_to_list(paths) - if file_per_partition and min_partitions is not None: - if min_partitions > len(paths): - raise FatalError(f'file_per_partition is True while min partitions is {min_partitions} ,which is greater' - f' than the number of files, {len(paths)}') - - st_reader = ir.StringTableReader(paths, min_partitions, force_bgz, force, file_per_partition) + st_reader = ir.StringTableReader(paths, min_partitions) string_table = Table(ir.TableRead(st_reader)) return string_table @@ -1994,110 +1893,6 @@ def import_matrix_table(paths, :class:`.MatrixTable` MatrixTable constructed from imported data. """ - row_key = wrap_to_list(row_key) - comment = wrap_to_list(comment) - paths = [hl.current_backend().fs.canonicalize_path(p) for p in wrap_to_list(paths)] - missing_list = wrap_to_list(missing) - - def comment_filter(table): - return hl.rbind(hl.array(comment), - lambda hl_comment: hl_comment.any(lambda com: hl.if_else(hl.len(com) == 1, - table.text.startswith(com), - table.text.matches(com, False)))) \ - if len(comment) > 0 else False - - def truncate(string_array, delim=", "): - if len(string_array) > 10: - string_array = string_array[:10] - string_array.append("...") - return delim.join(string_array) - - path_to_index = {path: idx for idx, path in enumerate(paths)} - - def format_file(file_name, hl_value=False): - if hl_value: - return hl.rbind(file_name.split('/'), lambda split_file: - hl.if_else(hl.len(split_file) <= 4, hl.str("/").join(file_name.split('/')[-4:]), - hl.str("/") + hl.str("/").join(file_name.split('/')[-4:]))) - else: - return "/".join(file_name.split('/')[-3:]) if len(file_name) <= 4 else \ - "/" + "/".join(file_name.split('/')[-3:]) - - def get_file_start(row): - first_lines = first_lines_table.collect() - if first_lines: - file_start_array = hl.array(list(map(lambda line: (line.file, line.idx), first_lines))) - match_file_idx = file_start_array.index(lambda line_tuple: line_tuple[0] == row.file) - return file_start_array[match_file_idx][1] - else: - return 0 - - def validate_row_fields(): - unique_fields = {} - duplicates = [] - header_idx = 0 - for header_rowf in header_dict['row_fields']: - rowf_type = row_fields.get(header_rowf) - if rowf_type is None: - import itertools as it - row_fields_string = '\n'.join(list(it.starmap( - lambda row_field, row_type: f" '{row_field}': {str(row_type)}", row_fields.items()))) - header_fields_string = "\n ".join(map(lambda field: f"'{field}'", header_dict['row_fields'])) - raise FatalError(f"in file {format_file(header_dict['path'])} found row field '{header_rowf}' that's" - f" not in 'row fields'\nrow fields found in file:\n {header_fields_string}" - f"\n'row fields':\n{row_fields_string}") - if header_rowf in unique_fields: - duplicates.append(header_rowf) - else: - unique_fields[header_rowf] = True - header_idx += 1 - if len(duplicates) > 0: - raise FatalError("Found following duplicate row fields in header:\n" + '\n'.join(duplicates)) - - def parse_entries(row): - return hl.range(num_of_row_fields, len(header_dict['column_ids']) + num_of_row_fields).map( - lambda entry_idx: parse_type_or_error(entry_type, row, entry_idx, not_entries=False)) - - def parse_rows(row): - rows_list = list(row_fields.items()) - return {rows_list[idx][0]: - parse_type_or_error(rows_list[idx][1], row, idx) for idx in range(num_of_row_fields)} - - def error_msg(row, idx, msg): - return (hl.str("in file ") + hl.str(format_file(row.file, True)) - + hl.str(" on line ") + hl.str(row.row_id - get_file_start(row) + 1) - + hl.str(" at value '") + hl.str(row.split_array[idx]) + hl.str("':\n") + hl.str(msg)) - - def parse_type_or_error(hail_type, row, idx, not_entries=True): - value = row.split_array[idx] - if hail_type == hl.tint32: - parsed_type = hl.parse_int32(value) - elif hail_type == hl.tint64: - parsed_type = hl.parse_int64(value) - elif hail_type == hl.tfloat32: - parsed_type = hl.parse_float32(value) - elif hail_type == hl.tfloat64: - parsed_type = hl.parse_float64(value) - else: - parsed_type = value - - if not_entries: - error_clarify_msg = hl.str(" at row field '") + hl.str(hl_row_fields[idx]) + hl.str("'") - else: - error_clarify_msg = (hl.str(" at column id '") + hl.str(hl_columns[idx - num_of_row_fields]) - + hl.str("' for entry field 'x' ")) - - return hl.if_else(hl.is_missing(value), hl.missing(hail_type), - hl.case().when(~hl.is_missing(parsed_type), parsed_type) - .or_error( - error_msg(row, idx, f"error parsing value into {str(hail_type)}" + error_clarify_msg))) - - num_of_row_fields = len(row_fields.keys()) - add_row_id = False - if len(row_key) == 0: - add_row_id = True - row_key = ['row_id'] - if sep is not None: if delimiter is not None: raise ValueError( @@ -2111,173 +1906,37 @@ def parse_type_or_error(hail_type, row, idx, not_entries=True): if len(delimiter) != 1: raise FatalError('delimiter or sep must be a single character') - if add_row_id: - if 'row_id' in row_fields: - raise FatalError( - "import_matrix_table reserves the field name 'row_id' for" - 'its own use, please use a different name') + add_row_id = False + if isinstance(row_key, list) and len(row_key) == 0: + add_row_id = True + row_key = ['row_id'] + + if 'row_id' in row_fields and add_row_id: + raise FatalError( + "import_matrix_table reserves the field name 'row_id' for" + 'its own use, please use a different name') for k, v in row_fields.items(): if v not in {tint32, tint64, tfloat32, tfloat64, tstr}: raise FatalError( f'import_matrix_table expects field types to be one of:' f"'int32', 'int64', 'float32', 'float64', 'str': field {repr(k)} had type '{v}'") - if entry_type not in {tint32, tint64, tfloat32, tfloat64, tstr}: raise FatalError("""import_matrix_table expects entry types to be one of: 'int32', 'int64', 'float32', 'float64', 'str': found '{}'""".format(entry_type)) - if missing in delimiter: - raise FatalError(f"Missing value {missing} contains delimiter {delimiter}") - - ht = import_lines(paths, min_partitions, force_bgz=force_bgz).add_index(name='row_id') - # for checking every header matches - file_per_partition = import_lines(paths, force_bgz=force_bgz, file_per_partition=True) - file_per_partition = file_per_partition.filter(hl.bool(hl.len(file_per_partition.text) == 0) - | comment_filter(file_per_partition), False) - first_lines_table = file_per_partition._map_partitions(lambda rows: rows[:1]) - first_lines_table = first_lines_table.annotate(split_array=first_lines_table.text.split(delimiter)).add_index() - - if not no_header: - def validate_header_get_info_dict(): - two_first_lines = file_per_partition.head(2) - two_first_lines = two_first_lines.annotate(split_array=two_first_lines.text.split(delimiter)).collect() - header_line = two_first_lines[0] if two_first_lines else None - first_data_line = two_first_lines[1] if len(two_first_lines) > 1 else None - num_of_data_line_values = len(first_data_line.split_array) if len(two_first_lines) > 1 else 0 - num_of_header_values = len(header_line.split_array) if two_first_lines else 0 - if header_line is None or path_to_index[header_line.file] != 0: - raise ValueError(f"Expected header in every file but found empty file: {format_file(paths[0])}") - elif not first_data_line or first_data_line.file != header_line.file: - hl.utils.warning(f"File {format_file(header_line.file)} contains a header, but no lines of data") - if num_of_header_values < num_of_data_line_values: - raise ValueError(f"File {format_file(header_line.file)} contains one line assumed to be the header." - f"The header had a length of {num_of_header_values} while the number" - f"of row fields is {num_of_row_fields}") - user_row_fields = header_line.split_array[:num_of_row_fields] - column_ids = header_line.split_array[num_of_row_fields:] - elif num_of_data_line_values != num_of_header_values: - if num_of_data_line_values == num_of_header_values + num_of_row_fields: - user_row_fields = ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))] - column_ids = header_line.split_array - else: - raise ValueError( - f"In file {format_file(header_line.file)}, expected the header line to match either:\n" - f"rowField0 rowField1 ... rowField${num_of_row_fields} colId0 colId1 ...\nor\n" - f" colId0 colId1 ...\nInstead the first two lines were:\nInstead the first two lin" - f"es were:\n{header_line.text}\n{first_data_line.text}\nThe first line contained" - f" {num_of_header_values} separated values and the second line" - f" contained {num_of_data_line_values}") - else: - user_row_fields = header_line.split_array[:num_of_row_fields] - column_ids = header_line.split_array[num_of_row_fields:] - return {'text': header_line.text, 'header_values': header_line.split_array, 'path': header_line.file, - 'row_fields': user_row_fields, 'column_ids': column_ids} - - def warn_if_duplicate_col_ids(): - time_col_id_encountered_dict = {} - duplicate_cols = [] - for item in header_dict['column_ids']: - if time_col_id_encountered_dict.get(item) is not None: - duplicate_cols.append(item) - time_col_id_encountered_dict[item] = time_col_id_encountered_dict[item] + 1 - time_col_id_encountered_dict[item] = 1 - if len(duplicate_cols) == 0: - return - - import itertools as it - duplicates_to_print = sorted( - [('"' + dup_field + '"', '(' + str(time_col_id_encountered_dict[dup_field]) + ')') - for dup_field in duplicate_cols], key=lambda dup_values: dup_values[1]) - - duplicates_to_print = truncate(duplicates_to_print) - duplicates_to_print_formatted = it.starmap(lambda dup, time_found: time_found - + " " + dup, duplicates_to_print) - ht.utils.warning(f"Found {len(duplicate_cols)} duplicate column id" - + f"{'s' if len(duplicate_cols) > 1 else ''}\n" + '\n'.join(duplicates_to_print_formatted)) - - def validate_all_headers(): - all_headers = first_lines_table.collect() - for header in all_headers: - if header_dict['text'] != header.text: - if len(header_dict['header_values']) == len(header.split_array): - zipped_headers = list(zip(header_dict['header_values'], header.split_array)) - for header_idx, header_values in enumerate(zipped_headers): - main_header_value = header_values[0] - error_header_value = header_values[1] - if main_header_value != error_header_value: - raise ValueError("invalid header: expected elements to be identical for all input paths" - f". Found different elements at position {header_idx + 1}" - f"\n in file {format_file(header.file)} with value " - f"'{error_header_value}' when expecting value '{main_header_value}'") - else: - raise ValueError(f"invalid header: lengths of headers differ. \n" - f"{len(header_dict['header_values'])} elements in " - f"{format_file(header_dict['path'])}:\n" - + truncate(["'{}'".format(value) for value in header_dict['header_values']]) - + f" {len(header.split_array)} elements in {format_file(header.file)}:\n" - + truncate(["'{}'".format(value) for value in header.split_array])) - - header_dict = validate_header_get_info_dict() - warn_if_duplicate_col_ids() - validate_all_headers() - - else: - first_line = first_lines_table.head(1).collect() - if not first_line or path_to_index[first_line[0].file] != 0: - hl.utils.warning( - f"File {format_file(paths[0])} is empty and has no header, so we assume no columns") - header_dict = {'header_values': [], - 'row_fields': ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))], - 'column_ids': [] - } - else: - first_line = first_line[0] - header_dict = {'header_values': [], - 'row_fields': ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))], - 'column_ids': - [col_id for col_id in list(range(0, len(first_line.split_array) - num_of_row_fields))] - } - - validate_row_fields() - header_filter = ht.text == header_dict['text'] if not no_header else False - - ht = ht.filter(hl.bool(hl.len(ht.text) == 0) | comment_filter(ht) | header_filter, False) - - hl_columns = hl.array(header_dict['column_ids']) if len(header_dict['column_ids']) > 0 else hl.empty_array(hl.tstr) - hl_row_fields = hl.array(header_dict['row_fields']) if len(header_dict['row_fields']) > 0 \ - else hl.empty_array(hl.tstr) - ht = ht.annotate(split_array=ht.text._split_line(delimiter, missing_list, quote=None, regex=False)).add_index( - 'row_id') - - ht = ht.annotate(split_array=hl.case().when(hl.len(ht.split_array) >= num_of_row_fields, ht.split_array) - .or_error(error_msg(ht, hl.len(ht.split_array) - 1, - " unexpected end of line while reading row field"))) - - n_column_ids = len(header_dict['column_ids']) - n_in_split_array = hl.len(ht.split_array[num_of_row_fields:(num_of_row_fields + n_column_ids)]) - ht = ht.annotate(split_array=hl.case().when( - n_column_ids <= n_in_split_array, - ht.split_array - ).or_error( - error_msg( - ht, - hl.len(ht.split_array) - 1, - " unexpected end of line while reading entries" - ) - )) - - ht = ht.annotate(**parse_rows(ht), entries=parse_entries(ht).map(lambda entry: hl.struct(x=entry)))\ - .drop('text', 'split_array', 'file') - - ht = ht.annotate_globals(cols=hl.range(0, len(header_dict['column_ids'])) - .map(lambda col_idx: hl.struct(col_id=hl_columns[col_idx]))) - - if not add_row_id: - ht = ht.drop('row_id') - - mt = ht._unlocalize_entries('entries', 'cols', ['col_id']) - mt = mt.key_rows_by(*row_key) + reader = ir.TextMatrixReader(paths, + min_partitions, + row_fields, + entry_type, + missing, + not no_header, + delimiter, + force_bgz, + add_row_id, + wrap_to_list(comment)) + + mt = MatrixTable(ir.MatrixRead(reader)).key_rows_by(*wrap_to_list(row_key)) return mt @@ -2450,11 +2109,9 @@ def import_plink(bed, bim, fam, _filter_intervals=bool, _drop_cols=bool, _drop_rows=bool, - _n_partitions=nullable(int), - _assert_type=nullable(hl.tmatrix), - _load_refs=bool) + _n_partitions=nullable(int)) def read_matrix_table(path, *, _intervals=None, _filter_intervals=False, _drop_cols=False, - _drop_rows=False, _n_partitions=None, _assert_type=None, _load_refs=True) -> MatrixTable: + _drop_rows=False, _n_partitions=None) -> MatrixTable: """Read in a :class:`.MatrixTable` written with :meth:`.MatrixTable.write`. Parameters @@ -2466,27 +2123,17 @@ def read_matrix_table(path, *, _intervals=None, _filter_intervals=False, _drop_c ------- :class:`.MatrixTable` """ - if _load_refs: - for rg_config in Env.backend().load_references_from_dataset(path): - hl.ReferenceGenome._from_config(rg_config) + for rg_config in Env.backend().load_references_from_dataset(path): + hl.ReferenceGenome._from_config(rg_config) if _intervals is not None and _n_partitions is not None: raise ValueError("'read_matrix_table' does not support both _intervals and _n_partitions") mt = MatrixTable(ir.MatrixRead(ir.MatrixNativeReader(path, _intervals, _filter_intervals), - _drop_cols, - _drop_rows, - _assert_type=_assert_type)) + _drop_cols, _drop_rows)) if _n_partitions: intervals = mt._calculate_new_partitions(_n_partitions) - return read_matrix_table( - path, - _drop_rows=_drop_rows, - _drop_cols=_drop_cols, - _intervals=intervals, - _assert_type=_assert_type, - _load_refs=_load_refs - ) + return read_matrix_table(path, _drop_rows=_drop_rows, _drop_cols=_drop_cols, _intervals=intervals) return mt @@ -2789,8 +2436,7 @@ def import_gvcfs(path, rg = reference_genome.name if reference_genome else None if partitions is not None: - partitions, partitions_type = hl.utils._dumps_partitions(partitions, hl.tstruct(locus=hl.tlocus(rg), - alleles=hl.tarray(hl.tstr))) + partitions, partitions_type = hl.utils._dumps_partitions(partitions, hl.tstruct(locus=hl.tlocus(rg), alleles=hl.tarray(hl.tstr))) else: partitions_type = None @@ -2908,16 +2554,8 @@ def index_bgen(path, @typecheck(path=str, _intervals=nullable(sequenceof(anytype)), _filter_intervals=bool, - _n_partitions=nullable(int), - _assert_type=nullable(hl.ttable), - _load_refs=bool) -def read_table(path, - *, - _intervals=None, - _filter_intervals=False, - _n_partitions=None, - _assert_type=None, - _load_refs=True) -> Table: + _n_partitions=nullable(int)) +def read_table(path, *, _intervals=None, _filter_intervals=False, _n_partitions=None) -> Table: """Read in a :class:`.Table` written with :meth:`.Table.write`. Parameters @@ -2929,18 +2567,17 @@ def read_table(path, ------- :class:`.Table` """ - if _load_refs: - for rg_config in Env.backend().load_references_from_dataset(path): - hl.ReferenceGenome._from_config(rg_config) + for rg_config in Env.backend().load_references_from_dataset(path): + hl.ReferenceGenome._from_config(rg_config) if _intervals is not None and _n_partitions is not None: raise ValueError("'read_table' does not support both _intervals and _n_partitions") tr = ir.TableNativeReader(path, _intervals, _filter_intervals) - ht = Table(ir.TableRead(tr, False, _assert_type=_assert_type)) + ht = Table(ir.TableRead(tr, False)) if _n_partitions: intervals = ht._calculate_new_partitions(_n_partitions) - return read_table(path, _intervals=intervals, _assert_type=_assert_type, _load_refs=_load_refs) + return read_table(path, _intervals=intervals) return ht diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index 6b32a452a09..8ff5d520e49 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -821,9 +821,6 @@ def logistic_regression_rows(test, y, x, covariates, pass_through=()) -> hail.Ta ------- :class:`.Table` """ - if not isinstance(Env.backend(), SparkBackend): - return _logistic_regression_rows_nd(test, y, x, covariates, pass_through) - if len(covariates) == 0: raise ValueError('logistic regression requires at least one covariate expression') @@ -3495,7 +3492,6 @@ def ld_prune(call_expr, r2=0.2, bp_window_size=1000000, memory_per_core=256, kee :class:`.Table` Table of a maximal independent set of variants. """ - hl.utils.no_service_backend('ld_prune') if block_size is None: block_size = BlockMatrix.default_block_size() diff --git a/hail/python/hail/stats/linear_mixed_model.py b/hail/python/hail/stats/linear_mixed_model.py index 62e03b864a5..0368dc4501c 100644 --- a/hail/python/hail/stats/linear_mixed_model.py +++ b/hail/python/hail/stats/linear_mixed_model.py @@ -282,7 +282,6 @@ class LinearMixedModel(object): x=nullable(np.ndarray), p_path=nullable(str)) def __init__(self, py, px, s, y=None, x=None, p_path=None): - hl.utils.no_service_backend('linear_mixed_model') if y is None and x is None: low_rank = False elif y is not None and x is not None: diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index 842516daf8d..3b99b73c841 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -834,7 +834,7 @@ def annotate(self, **named_exprs) -> 'Table': @typecheck_method(expr=expr_bool, keep=bool) - def filter(self, expr, keep: bool = True) -> 'Table': + def filter(self, expr, keep=True) -> 'Table': """Filter rows. Examples @@ -1289,18 +1289,7 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec) - _assert_type = self._type - _load_refs = False - else: - _assert_type = None - _load_refs = True - return hl.read_table( - output, - _intervals=_intervals, - _filter_intervals=_filter_intervals, - _assert_type=_assert_type, - _load_refs=_load_refs - ) + return hl.read_table(output, _intervals=_intervals, _filter_intervals=_filter_intervals) @typecheck_method(output=str, overwrite=bool, @@ -1756,8 +1745,6 @@ def rekey_f(t): return t if is_interval: - if all_matches: - hl.utils.no_service_backend('interval join with all_matches=True') left = Table(ir.TableIntervalJoin(left._tir, self._tir, uid, all_matches)) else: left = Table(ir.TableLeftJoinRightDistinct(left._tir, self._tir, uid)) @@ -2328,20 +2315,6 @@ def repartition(self, n, shuffle=True) -> 'Table': :class:`.Table` Repartitioned table. """ - if hl.current_backend().requires_lowering: - tmp = hl.utils.new_temp_file() - - if len(self.key) == 0: - uid = Env.get_uid() - tmp2 = hl.utils.new_temp_file() - self.checkpoint(tmp2) - ht = hl.read_table(tmp2).add_index(uid).key_by(uid) - ht.checkpoint(tmp) - return hl.read_table(tmp, _n_partitions=n).key_by().drop(uid) - else: - # checkpoint rather than write to use fast codec - self.checkpoint(tmp) - return hl.read_table(tmp, _n_partitions=n) return Table(ir.TableRepartition( self._tir, n, ir.RepartitionStrategy.SHUFFLE if shuffle else ir.RepartitionStrategy.COALESCE)) @@ -2375,8 +2348,6 @@ def naive_coalesce(self, max_partitions: int) -> 'Table': :class:`.Table` Table with at most `max_partitions` partitions. """ - if hl.current_backend().requires_lowering: - return self.repartition(max_partitions) return Table(ir.TableRepartition( self._tir, max_partitions, ir.RepartitionStrategy.NAIVE_COALESCE)) diff --git a/hail/python/hail/utils/__init__.py b/hail/python/hail/utils/__init__.py index db24c963fc7..8db153d0b31 100644 --- a/hail/python/hail/utils/__init__.py +++ b/hail/python/hail/utils/__init__.py @@ -1,7 +1,7 @@ from .misc import (wrap_to_list, get_env_or_default, uri_path, local_path_uri, new_temp_file, new_local_temp_dir, new_local_temp_file, with_local_temp_file, storage_level, range_matrix_table, range_table, run_command, HailSeedGenerator, timestamp_path, - _dumps_partitions, default_handler, guess_cloud_spark_provider, no_service_backend) + _dumps_partitions, default_handler, guess_cloud_spark_provider) from .hadoop_utils import (hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_scheme_supported, hadoop_stat, copy_log) from .struct import Struct @@ -52,6 +52,5 @@ 'deduplicate', 'with_local_temp_file', 'guess_cloud_spark_provider', - 'no_service_backend', 'JSONEncoder', ] diff --git a/hail/python/hail/utils/hadoop_utils.py b/hail/python/hail/utils/hadoop_utils.py index 6ea5cac3ad0..8e8c1a7ea3c 100644 --- a/hail/python/hail/utils/hadoop_utils.py +++ b/hail/python/hail/utils/hadoop_utils.py @@ -1,8 +1,4 @@ -import gzip -import io -import os.path -from typing import Any, Dict, List - +from typing import Dict, List from hail.fs.hadoop_fs import HadoopFS from hail.utils.java import Env from hail.typecheck import typecheck, enumeration @@ -81,20 +77,11 @@ def hadoop_open(path: str, mode: str = 'r', buffer_size: int = 8192): ------- Readable or writable file handle. """ - # pile of hacks to preserve some legacy behavior, like auto gzip + # legacy hack fs = Env.fs() if isinstance(fs, HadoopFS): return fs.legacy_open(path, mode, buffer_size) - _, ext = os.path.splitext(path) - if ext in ('.gz', '.bgz'): - binary_mode = 'wb' if mode[0] == 'w' else 'rb' - file = fs.open(path, binary_mode, buffer_size) - file = gzip.GzipFile(fileobj=file, mode=mode) - if 'b' not in mode: - file = io.TextIOWrapper(file, encoding='utf-8') - else: - file = fs.open(path, mode, buffer_size) - return file + return fs.open(path, mode, buffer_size) @typecheck(src=str, @@ -174,7 +161,7 @@ def hadoop_is_dir(path: str) -> bool: return Env.fs().is_dir(path) -def hadoop_stat(path: str) -> Dict[str, Any]: +def hadoop_stat(path: str) -> Dict: """Returns information about the file or directory at a given path. Notes @@ -201,7 +188,7 @@ def hadoop_stat(path: str) -> Dict[str, Any]: return Env.fs().stat(path).to_legacy_dict() -def hadoop_ls(path: str) -> List[Dict[str, Any]]: +def hadoop_ls(path: str) -> List[Dict]: """Returns information about files at `path`. Notes diff --git a/hail/python/hail/utils/java.py b/hail/python/hail/utils/java.py index 4f20829d0f9..6175d0f2104 100644 --- a/hail/python/hail/utils/java.py +++ b/hail/python/hail/utils/java.py @@ -1,19 +1,9 @@ -from typing import Optional +import warnings import os import sys import re import hail -from hailtop.config import get_user_config - - -def choose_backend(backend: Optional[str] = None) -> str: - return ( - backend - or os.environ.get('HAIL_QUERY_BACKEND', None) - or get_user_config().get('query', 'backend', fallback=None) - or 'spark' - ) class FatalError(Exception): @@ -56,8 +46,21 @@ def hc() -> 'hail.context.HailContext': if not Env._hc: sys.stderr.write("Initializing Hail with default parameters...\n") sys.stderr.flush() - from ..context import init - init() + + backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') + if backend_name == 'service': + from hail.context import init_service + import asyncio + warnings.warn('When using the query service backend, use `await Env._async_hc()\'') + asyncio.get_event_loop().run_until_complete(init_service()) + elif backend_name == 'spark': + from hail.context import init + init() + elif backend_name == 'local': + from hail.context import init_local + init_local() + else: + raise ValueError(f'unknown Hail Query backend: {backend_name}') assert Env._hc is not None return Env._hc @@ -68,10 +71,10 @@ async def _async_hc() -> 'hail.context.HailContext': sys.stderr.write("Initializing Hail with default parameters...\n") sys.stderr.flush() - backend_name = choose_backend() + backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') if backend_name == 'service': - from hail.context import init_batch - await init_batch() + from hail.context import init_service + await init_service() else: return Env.hc() assert Env._hc is not None diff --git a/hail/python/hail/utils/misc.py b/hail/python/hail/utils/misc.py index cdfbbf72d67..e8a00051478 100644 --- a/hail/python/hail/utils/misc.py +++ b/hail/python/hail/utils/misc.py @@ -1,21 +1,20 @@ +from typing import Optional +from typing_extensions import Literal +import os import atexit import datetime +import string import difflib -import json -import os -import re -import secrets import shutil -import string import tempfile +import secrets from collections import defaultdict, Counter -from contextlib import contextmanager -from io import StringIO from random import Random -from typing import Optional +import json +import re from urllib.parse import urlparse - -from typing_extensions import Literal +from io import StringIO +from contextlib import contextmanager import hail import hail as hl @@ -67,17 +66,7 @@ def range_matrix_table(n_rows, n_cols, n_partitions=None) -> 'hail.MatrixTable': check_nonnegative_and_in_range('range_matrix_table', 'n_cols', n_cols) if n_partitions is not None: check_positive_and_in_range('range_matrix_table', 'n_partitions', n_partitions) - return hail.MatrixTable(hail.ir.MatrixRead( - hail.ir.MatrixRangeReader(n_rows, n_cols, n_partitions), - _assert_type=hl.tmatrix( - hl.tstruct(), - hl.tstruct(col_idx=hl.tint32), - ['col_idx'], - hl.tstruct(row_idx=hl.tint32), - ['row_idx'], - hl.tstruct() - ) - )) + return hail.MatrixTable(hail.ir.MatrixRead(hail.ir.MatrixRangeReader(n_rows, n_cols, n_partitions))) @typecheck(n=int, n_partitions=nullable(int)) @@ -639,12 +628,3 @@ def guess_cloud_spark_provider() -> Optional[Literal['dataproc', 'hdinsight']]: if 'AZURE_SPARK' in os.environ or 'hdinsight' in os.getenv('CLASSPATH', ''): return 'hdinsight' return None - - -def no_service_backend(unsupported_feature): - from hail import current_backend - from hail.backend.service_backend import ServiceBackend - if isinstance(current_backend(), ServiceBackend): - raise NotImplementedError(f'{unsupported_feature!r} is not yet supported on the service backend.' - f'\n If this is a pressing need, please alert the team on the discussion' - f'\n forum to aid in prioritization: https://discuss.hail.is') diff --git a/hail/python/hail/vds/combiner/variant_dataset_combiner.py b/hail/python/hail/vds/combiner/variant_dataset_combiner.py index 388c21dd510..6708af3bd3f 100644 --- a/hail/python/hail/vds/combiner/variant_dataset_combiner.py +++ b/hail/python/hail/vds/combiner/variant_dataset_combiner.py @@ -100,7 +100,6 @@ def __init__(self, gvcf_info_to_keep: Optional[Collection[str]] = None, gvcf_reference_entry_fields_to_keep: Optional[Collection[str]] = None, ): - hl.utils.no_service_backend('VariantDatasetCombiner') if not (vdses or gvcfs): raise ValueError("one of 'vdses' or 'gvcfs' must be nonempty") if not gvcf_import_intervals: diff --git a/hail/python/hail/vds/variant_dataset.py b/hail/python/hail/vds/variant_dataset.py index b6d13df69f2..62981c87902 100644 --- a/hail/python/hail/vds/variant_dataset.py +++ b/hail/python/hail/vds/variant_dataset.py @@ -25,7 +25,6 @@ def read_vds(path, *, intervals=None, n_partitions=None) -> 'VariantDataset': assert n_partitions is not None reference_data = hl.read_matrix_table(VariantDataset._reference_path(path)) intervals = reference_data._calculate_new_partitions(n_partitions) - assert len(intervals) > 0 reference_data = hl.read_matrix_table(VariantDataset._reference_path(path), _intervals=intervals) variant_data = hl.read_matrix_table(VariantDataset._variants_path(path), _intervals=intervals) return VariantDataset(reference_data, variant_data) diff --git a/hail/python/hailtop/aiotools/copy.py b/hail/python/hailtop/aiotools/copy.py index 9c47445f723..fcf29708f9c 100644 --- a/hail/python/hailtop/aiotools/copy.py +++ b/hail/python/hailtop/aiotools/copy.py @@ -1,9 +1,8 @@ from typing import List, Optional, Dict -import argparse -import asyncio import json +import asyncio import logging -import sys +import argparse import uvloop from concurrent.futures import ThreadPoolExecutor @@ -19,7 +18,7 @@ async def copy(*, gcs_kwargs: Optional[dict] = None, azure_kwargs: Optional[dict] = None, s3_kwargs: Optional[dict] = None, - transfers: List[Transfer], + transfers: List[Transfer] ) -> None: with ThreadPoolExecutor() as thread_pool: if max_simultaneous_transfers is None: @@ -82,8 +81,8 @@ async def main() -> None: parser = argparse.ArgumentParser(description='Hail copy tool') parser.add_argument('requester_pays_project', type=str, help='a JSON string indicating the Google project to which to charge egress costs') - parser.add_argument('files', type=str, nargs='?', - help='a JSON array of JSON objects indicating from where and to where to copy files. If empty or "-", read the array from standard input instead') + parser.add_argument('files', type=str, + help='a JSON array of JSON objects indicating from where and to where to copy files') parser.add_argument('--max-simultaneous-transfers', type=int, help='The limit on the number of simultaneous transfers. Large files are uploaded as multiple transfers. This parameter sets an upper bound on the number of open source and destination files.') parser.add_argument('-v', '--verbose', action='store_const', @@ -96,8 +95,6 @@ async def main() -> None: logging.root.setLevel(logging.INFO) requester_pays_project = json.loads(args.requester_pays_project) - if args.files is None or args.files == '-': - args.files = sys.stdin.read() files = json.loads(args.files) gcs_kwargs = {'project': requester_pays_project} diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index f3611aa1637..9fb1d9e0aa0 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -431,8 +431,8 @@ def create_job(self, image: str, command: List[str], *, mount_docker_socket: boo {'command': command, 'image': image, 'mount_docker_socket': mount_docker_socket, 'type': 'docker'}, **kwargs ) - def create_jvm_job(self, jar_spec: Dict[str, str], argv: List[str], **kwargs): - return self._create_job({'type': 'jvm', 'jar_spec': jar_spec, 'command': argv}, **kwargs) + def create_jvm_job(self, command: List[str], **kwargs): + return self._create_job({'command': command, 'type': 'jvm'}, **kwargs) def _create_job(self, process: dict, diff --git a/hail/python/hailtop/config/user_config.py b/hail/python/hailtop/config/user_config.py index 809d580a3d5..a7d00ec33cd 100644 --- a/hail/python/hailtop/config/user_config.py +++ b/hail/python/hailtop/config/user_config.py @@ -31,12 +31,15 @@ def get_user_config() -> configparser.ConfigParser: if user_config is None: user_config = configparser.ConfigParser() config_file = get_user_config_path() + os.makedirs(config_file.parent, exist_ok=True) # in older versions, the config file was accidentally named # config.yaml, if the new config does not exist, and the old # one does, silently rename it old_path = config_file.with_name('config.yaml') if old_path.exists() and not config_file.exists(): old_path.rename(config_file) + else: + config_file.touch(exist_ok=True) user_config.read(config_file) return user_config diff --git a/hail/python/hailtop/hailctl/config/cli.py b/hail/python/hailtop/hailctl/config/cli.py index fb209a7f831..67d35ea5af9 100644 --- a/hail/python/hailtop/hailctl/config/cli.py +++ b/hail/python/hailtop/hailctl/config/cli.py @@ -1,4 +1,3 @@ -import os import sys import argparse import re @@ -126,12 +125,7 @@ def main(args): if section not in config: config[section] = {} config[section][key] = args.value - try: - f = open(config_file, 'w', encoding='utf-8') - except FileNotFoundError: - os.makedirs(config_file.parent, exist_ok=True) - f = open(config_file, 'w', encoding='utf-8') - with f: + with open(config_file, 'w', encoding='utf-8') as f: config.write(f) sys.exit(0) if args.module == 'unset': diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py index fb31c3cd644..2906ee619a9 100644 --- a/hail/python/hailtop/utils/__init__.py +++ b/hail/python/hailtop/utils/__init__.py @@ -1,6 +1,4 @@ -from .time import ( - time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs, - time_ns) +from .time import time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs from .utils import ( unzip, async_to_blocking, blocking_to_async, AsyncWorkerPool, bounded_gather, grouped, sync_sleep_and_backoff, sleep_and_backoff, is_transient_error, @@ -91,5 +89,4 @@ 'retry_all_errors_n_times', 'parse_timestamp_msecs', 'Timings', - 'time_ns', ] diff --git a/hail/python/hailtop/utils/time.py b/hail/python/hailtop/utils/time.py index 8a1ca5b3118..56a2448f6ce 100644 --- a/hail/python/hailtop/utils/time.py +++ b/hail/python/hailtop/utils/time.py @@ -7,10 +7,6 @@ def time_msecs() -> int: return int(time.time() * 1000 + 0.5) -def time_ns() -> int: - return time.monotonic_ns() - - def time_msecs_str(t) -> str: return datetime.datetime.utcfromtimestamp(t / 1000).strftime( '%Y-%m-%dT%H:%M:%SZ') diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 834c7a52681..29d603090e3 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -388,10 +388,9 @@ def call(self, f, *args, **kwargs) -> asyncio.Task: self._counter += 1 async def run_and_cleanup(): - retval = None try: async with self._sema: - retval = await f(*args, **kwargs) + await f(*args, **kwargs) except asyncio.CancelledError: pass except: @@ -403,11 +402,10 @@ async def run_and_cleanup(): log.info('discarding exception', exc_info=True) if self._pending is None: - return retval + return del self._pending[id] if not self._pending: self._done_event.set() - return retval t = asyncio.create_task(run_and_cleanup()) self._pending[id] = t @@ -839,13 +837,6 @@ async def run_if_changed(changed, f, *args, **kwargs): while True: changed.clear() should_wait = await f(*args, **kwargs) - # 0.5 is arbitrary, but should be short enough not to greatly - # increase latency and long enough to reduce the impact of - # wasteful spinning when `should_wait` is always true and the - # event is constantly being set. This was instated to - # avoid wasteful repetition of scheduling loops, but - # might not always be desirable, especially in very low-latency batches. - await asyncio.sleep(0.5) if should_wait: await changed.wait() diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index 67ccbee0986..da378786689 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -10,12 +10,12 @@ botocore>=1.20,<2.0 decorator<5 Deprecated>=1.2.10,<1.3 dill>=0.3.1.1,<0.4 +gcsfs==2021.* google-auth==1.27.0 google-cloud-storage==1.25.* humanize==1.0.0 hurry.filesize==0.9 janus>=0.6,<1.1 -Jinja2==3.0.3 nest_asyncio==1.5.4 numpy<2 orjson==3.6.4 diff --git a/hail/python/setup.py b/hail/python/setup.py index a00cc756e64..527cae199f1 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -47,7 +47,6 @@ package_data={ 'hail': ['hail_pip_version', 'hail_version', - 'hail_revision', 'experimental/datasets.json'], 'hail.backend': ['hail-all-spark.jar'], 'hailtop': ['hail_version', 'py.typed'], diff --git a/hail/python/test/hail/backend/__init__.py b/hail/python/test/hail/backend/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/hail/python/test/hail/backend/test_service_backend.py b/hail/python/test/hail/backend/test_service_backend.py deleted file mode 100644 index ecc3b50e69d..00000000000 --- a/hail/python/test/hail/backend/test_service_backend.py +++ /dev/null @@ -1,28 +0,0 @@ -import hail as hl - -from ..helpers import skip_unless_service_backend - -@skip_unless_service_backend() -def test_tiny_driver_has_tiny_memory(): - try: - hl.utils.range_table(100_000_000, 50).to_pandas() - except Exception as exc: - assert 'java.lang.OutOfMemoryError: Java heap space' in exc.args[0] - else: - assert Fail - -@skip_unless_service_backend() -def test_big_driver_has_big_memory(): - old_driver_cores = hl.current_backend().driver_cores - old_driver_memory = hl.current_backend().driver_memory - try: - hl.current_backend().driver_cores = 8 - hl.current_backend().driver_memory = 'highmem' - t = hl.utils.range_table(100_000_000, 50) - # The pytest (client-side) worker dies if we try to realize all 100M rows in memory. - # Instead, we realize the 100M rows in memory on the driver and then take just the first 10M - # rows back to the client. - hl.eval(t.aggregate(hl.agg.collect(t.idx), _localize=False)[:10_000_000]) - finally: - hl.current_backend().driver_cores = old_driver_cores - hl.current_backend().driver_memory = old_driver_memory diff --git a/hail/python/test/hail/experimental/test_experimental.py b/hail/python/test/hail/experimental/test_experimental.py index 8995bd71978..b8b45aa9c6c 100644 --- a/hail/python/test/hail/experimental/test_experimental.py +++ b/hail/python/test/hail/experimental/test_experimental.py @@ -11,7 +11,6 @@ class Tests(unittest.TestCase): @fails_service_backend() - @fails_local_backend def test_ld_score(self): ht = hl.import_table(doctest_resource('ldsc.annot'), @@ -118,6 +117,7 @@ def test_import_keyby_count_ldsc_lowered_shuffle(self): @pytest.mark.unchecked_allocator + @skip_when_service_backend('hangs >5 minutes; last message is "all results compelte" in ServiceBackend.parallelizeAndComputeWithIndex') def test_ld_score_regression(self): ht_scores = hl.import_table( @@ -294,6 +294,7 @@ def test_sparse(self): .drop('a_index', 'was_split').select_entries(*expected_split_mt.entry.keys())) assert mt._same(expected_split_mt) + @fails_service_backend() def test_define_function(self): f1 = hl.experimental.define_function( lambda a, b: (a + 7) * b, hl.tint32, hl.tint32) diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index accb1b17325..d726eefe197 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -64,6 +64,7 @@ def test_random_function(rand_f): test_random_function(lambda: hl.rand_cat(hl.array([1, 1, 1, 1]))) test_random_function(lambda: hl.rand_dirichlet(hl.array([1, 1, 1, 1]))) + @fails_service_backend(reason='need to convert errors to HailUserError') def test_range(self): def same_as_python(*args): self.assertEqual(hl.eval(hl.range(*args)), list(range(*args))) @@ -198,6 +199,7 @@ def test_operators(self): else: self.assertEqual(v, result[k], msg=k) + @fails_service_backend(reason='need to convert errors to HailUserError') def test_array_slicing(self): schema = hl.tstruct(a=hl.tarray(hl.tint32)) rows = [{'a': [1, 2, 3, 4, 5]}] @@ -265,6 +267,7 @@ def test_dict_methods(self): self.assertDictEqual(result, expected) + @fails_service_backend(reason='need to convert errors to HailUserError') def test_dict_missing_error(self): d = hl.dict({'a': 2, 'b': 3}) with pytest.raises(hl.utils.HailUserError, match='Key NA not found in dictionary'): @@ -665,6 +668,7 @@ def test_agg_densify(self): None]), ] + @fails_service_backend(reason='service backend needs to support flags') @with_flags('distributed_scan_comb_op') def test_densify_table(self): ht = hl.utils.range_table(100, n_partitions=33) @@ -1377,11 +1381,13 @@ def test_aggregators_sum_product(self): self.assertTrue(r.sum_x == -15 and r.sum_y == 10 and r.sum_empty == 0 and r.prod_x == -120 and r.prod_y == 0 and r.prod_empty == 1) + @fails_service_backend def test_aggregators_hist(self): table = hl.utils.range_table(11) r = table.aggregate(hl.agg.hist(table.idx - 1, 0, 8, 4)) self.assertTrue(r.bin_edges == [0, 2, 4, 6, 8] and r.bin_freq == [2, 2, 2, 3] and r.n_smaller == 1 and r.n_larger == 1) + @fails_service_backend() def test_aggregators_hist_neg0(self): table = hl.utils.range_table(32) table = table.annotate(d=hl.if_else(table.idx == 11, -0.0, table.idx / 3)) @@ -1391,6 +1397,7 @@ def test_aggregators_hist_neg0(self): self.assertEqual(r.n_smaller, 0) self.assertEqual(r.n_larger, 1) + @fails_service_backend() def test_aggregators_hist_nan(self): ht = hl.utils.range_table(3).annotate(x=hl.float('nan')) r = ht.aggregate(hl.agg.hist(ht.x, 0, 10, 2)) @@ -1426,6 +1433,7 @@ def test_aggregator_cse(self): # r2adj = sumfit$adj.r.squared # f = sumfit$fstatistic # p = pf(f[1],f[2],f[3],lower.tail=F) + @fails_service_backend() def test_aggregators_linreg(self): t = hl.Table.parallelize([ {"y": None, "x": 1.0}, @@ -1483,6 +1491,7 @@ def test_aggregators_linreg(self): self.assertAlmostEqual(r.multiple_p_value, 0.56671386) self.assertAlmostEqual(r.n, 5) + @fails_service_backend() def test_linreg_no_data(self): ht = hl.utils.range_table(1).filter(False) r = ht.aggregate(hl.agg.linreg(ht.idx, 0)) @@ -1598,6 +1607,7 @@ def test_joins_inside_aggregators(self): table2 = hl.utils.range_table(10) self.assertEqual(table.aggregate(hl.agg.count_where(hl.is_defined(table2[table.idx]))), 10) + @fails_service_backend() def test_switch(self): x = hl.literal('1') na = hl.missing(tint32) @@ -1642,6 +1652,7 @@ def test_switch(self): hl.eval(hl.switch(x).when('0', 0).or_error("foo")) assert '.or_error("foo")' in str(exc.value) + @fails_service_backend() def test_case(self): def make_case(x): x = hl.literal(x) @@ -2554,6 +2565,7 @@ def test_int_typecheck(self): (hl.literal(None, dtype='int32'), None), (hl.literal(None, dtype='int64'), None)]) + @fails_service_backend() def test_is_transition(self): _test_many_equal([ (hl.is_transition("A", "G"), True), @@ -2563,6 +2575,7 @@ def test_is_transition(self): (hl.is_transition("ACA", "AGA"), False), (hl.is_transition("A", "T"), False)]) + @fails_service_backend() def test_is_transversion(self): _test_many_equal([ (hl.is_transversion("A", "T"), True), @@ -2571,6 +2584,7 @@ def test_is_transversion(self): (hl.is_transversion("AA", "T"), False), (hl.is_transversion("ACCC", "ACCT"), False)]) + @fails_service_backend() def test_is_snp(self): _test_many_equal([ (hl.is_snp("A", "T"), True), @@ -2580,30 +2594,36 @@ def test_is_snp(self): (hl.is_snp("AT", "AG"), True), (hl.is_snp("ATCCC", "AGCCC"), True)]) + @fails_service_backend() def test_is_mnp(self): _test_many_equal([ (hl.is_mnp("ACTGAC", "ATTGTT"), True), (hl.is_mnp("CA", "TT"), True)]) + @fails_service_backend() def test_is_insertion(self): _test_many_equal([ (hl.is_insertion("A", "ATGC"), True), (hl.is_insertion("ATT", "ATGCTT"), True)]) + @fails_service_backend() def test_is_deletion(self): self.assertTrue(hl.eval(hl.is_deletion("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_deletion("GTGTA", "GTA"))) + @fails_service_backend() def test_is_indel(self): self.assertTrue(hl.eval(hl.is_indel("A", "ATGC"))) self.assertTrue(hl.eval(hl.is_indel("ATT", "ATGCTT"))) self.assertTrue(hl.eval(hl.is_indel("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_indel("GTGTA", "GTA"))) + @fails_service_backend() def test_is_complex(self): self.assertTrue(hl.eval(hl.is_complex("CTA", "ATTT"))) self.assertTrue(hl.eval(hl.is_complex("A", "TATGC"))) + @fails_service_backend() def test_is_star(self): self.assertTrue(hl.eval(hl.is_star("ATC", "*"))) self.assertTrue(hl.eval(hl.is_star("A", "*"))) @@ -2612,6 +2632,7 @@ def test_is_strand_ambiguous(self): self.assertTrue(hl.eval(hl.is_strand_ambiguous("A", "T"))) self.assertFalse(hl.eval(hl.is_strand_ambiguous("G", "T"))) + @fails_service_backend() def test_allele_type(self): self.assertEqual( hl.eval(hl.tuple(( @@ -3161,6 +3182,8 @@ def test_show_expression(self): +---------+ ''' + @fails_service_backend() + @fails_local_backend() def test_export(self): for delimiter in ['\t', ',', '@']: for missing in ['NA', 'null']: @@ -3628,6 +3651,7 @@ def test_set_operators(self): self.assert_evals_to(hl.set([1, 2, 3]) ^ set([3, 4, 5]), set([1, 2, 4, 5])) self.assert_evals_to(set([1, 2, 3]) ^ hl.set([3, 4, 5]), set([1, 2, 4, 5])) + @fails_service_backend() def test_uniroot(self): tol = 1.220703e-4 @@ -4105,6 +4129,7 @@ def test_bit_shift_edge_cases(self): assert hl.eval(hl.bit_rshift(hl.int64(-1), 64)) == -1 assert hl.eval(hl.bit_rshift(hl.int64(-11), 64, logical=True)) == 0 + @fails_service_backend() def test_bit_shift_errors(self): with pytest.raises(hl.utils.HailUserError): hl.eval(hl.bit_lshift(1, -1)) @@ -4248,6 +4273,7 @@ def test_parse_json(self): ] assert hl.eval(hl._compare(hl.tuple(values), hl.tuple(hl.parse_json(hl.json(v), v.dtype) for v in values)) == 0) + @fails_service_backend() def test_expr_persist(self): # need to test laziness, so we will overwrite a file ht2 = hl.utils.range_table(100) @@ -4309,14 +4335,3 @@ def test_enumerate(self): [('foo', 10), ('bar', 11), ('baz', 12)], [] ) - - def test_split_line(self): - s1 = '1 2 3 4 5 6 7' - s2 = '1 2 "3 4" "a b c d"' - s3 = '"1" "2"' - - assert hl.eval(hl.str(s1)._split_line(' ', ['NA'], quote=None, regex=False)) == s1.split(' ') - assert hl.eval(hl.str(s1)._split_line(r'\s+', ['NA'], quote=None, regex=True)) == s1.split(' ') - assert hl.eval(hl.str(s3)._split_line(' ', ['1'], quote='"', regex=False)) == [None, '2'] - assert hl.eval(hl.str(s2)._split_line(' ', ['1', '2'], quote='"', regex=False)) == [None, None, '3 4', 'a b c d'] - assert hl.eval(hl.str(s2)._split_line(r'\s+', ['1', '2'], quote='"', regex=True)) == [None, None, '3 4', 'a b c d'] diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index 3fd572cfa39..cef5d401a04 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -26,6 +26,7 @@ def assert_ndarrays_almost_eq(*expr_and_expected): assert_ndarrays(np.allclose, expr_and_expected) +@fails_service_backend() def test_ndarray_ref(): scalar = 5.0 @@ -61,6 +62,7 @@ def test_ndarray_ref(): assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc.value) +@skip_when_service_backend('slow >800s') def test_ndarray_slice(): np_rect_prism = np.arange(24).reshape((2, 3, 4)) rect_prism = hl.nd.array(np_rect_prism) @@ -202,6 +204,7 @@ def test_ndarray_transposed_slice(): ) +@fails_service_backend() def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] mishapen_data_list1 = [[4], [1, 2, 3]] @@ -285,6 +288,7 @@ def test_ndarray_shape(): ) +@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_reshape(): np_single = np.array([8]) single = hl.nd.array([8]) @@ -551,6 +555,7 @@ def test_ndarray_transpose(): cube.transpose((1, 1, 1)) assert "Axes cannot contain duplicates" in str(exc.value) +@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_matmul(): np_v = np.array([1, 2]) np_y = np.array([1, 1, 1]) @@ -676,6 +681,7 @@ def test_ndarray_full(): assert hl.eval(hl.nd.full((5, 6, 7), hl.int32(3), dtype=hl.tfloat64)).dtype, np.float64 +@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_arange(): assert_ndarrays_eq( (hl.nd.arange(40), np.arange(40)), @@ -717,6 +723,7 @@ def test_ndarray_diagonal(): assert "2 dimensional" in str(exc.value) +@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_solve_triangular(): a = hl.nd.array([[1, 1], [0, 1]]) b = hl.nd.array([2, 1]) @@ -735,6 +742,7 @@ def test_ndarray_solve_triangular(): hl.eval(hl.nd.solve_triangular(a_sing, b_sing)) assert "singular" in str(exc.value), str(exc.value) +@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_solve(): a = hl.nd.array([[1, 2], [3, 5]]) b = hl.nd.array([1, 2]) @@ -1193,4 +1201,4 @@ def test_ndarray_broadcasting_with_decorator(): nd = hl.nd.array([[1.2, 2.3, 3.3], [4.3, 5.3, 6.3]]) nd_floor = hl.eval(hl.nd.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])) nd = hl.eval(hl.floor(nd)) - assert(np.array_equal(nd, nd_floor)) + assert(np.array_equal(nd, nd_floor)) \ No newline at end of file diff --git a/hail/python/test/hail/genetics/test_pedigree.py b/hail/python/test/hail/genetics/test_pedigree.py index c03297cbe35..ca592a8ac9a 100644 --- a/hail/python/test/hail/genetics/test_pedigree.py +++ b/hail/python/test/hail/genetics/test_pedigree.py @@ -29,7 +29,7 @@ def test_trios(self): self.assertEqual(t1.is_female, True) self.assertEqual(t1.is_female, True) self.assertEqual(t1.is_male, False) - + self.assertEqual(t1.is_complete(), False) self.assertEqual(t4.is_complete(), True) self.assertEqual(t5.is_complete(), False) diff --git a/hail/python/test/hail/ggplot/test_ggplot.py b/hail/python/test/hail/ggplot/test_ggplot.py index b568f3fc5f7..9478d9d3b73 100644 --- a/hail/python/test/hail/ggplot/test_ggplot.py +++ b/hail/python/test/hail/ggplot/test_ggplot.py @@ -1,3 +1,4 @@ +# These tests only check that the functions don't error out, they don't check what the output plot looks like. import hail as hl from hail.ggplot import * import numpy as np @@ -33,6 +34,7 @@ def test_manhattan_plot(): expected_ticks = ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y') assert pfig.layout.xaxis.ticktext == expected_ticks +@fails_service_backend() def test_histogram(): num_rows = 101 num_groups = 5 @@ -66,37 +68,3 @@ def test_geom_ribbon(): ht = hl.utils.range_table(20) fig = ggplot(ht, aes(x=ht.idx, ymin=ht.idx * 2, ymax=ht.idx * 3)) + geom_ribbon() fig.to_plotly() - - -def test_default_scale_no_repeat_colors(): - num_rows = 20 - ht = hl.utils.range_table(num_rows) - fig = ggplot(ht, aes(x=ht.idx, y=ht.idx, color=hl.str(ht.idx))) + geom_point() - pfig = fig.to_plotly() - - scatter_colors = [scatter['marker']['color'] for scatter in pfig['data']] - num_unique_colors = len(set(scatter_colors)) - assert num_unique_colors == num_rows - - -def test_scale_color_manual(): - num_rows = 4 - colors = set(["red", "blue"]) - ht = hl.utils.range_table(num_rows) - fig = ggplot(ht, aes(x=ht.idx, y=ht.idx, color=hl.str(ht.idx % 2))) + geom_point() + scale_color_manual(values=list(colors)) - pfig = fig.to_plotly() - - assert set([scatter.marker.color for scatter in pfig.data]) == colors - - -def test_weighted_bar(): - x = hl.array([2, 3, 3, 3, 4, 5, 2]) - w = hl.array([1, 2, 3, 4, 5, 6, 7]) - ht = hl.utils.range_table(7) - ht = ht.annotate(x=x[ht.idx], w=w[ht.idx]) - fig = ggplot(ht) + geom_bar(aes(x=ht.x, weight=ht.w)) - - result = [8, 9, 5, 6] - for idx, y in enumerate(fig.to_plotly().data[0].y): - assert(y == result[idx]) - diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 3b4508ff173..906c2dc1df3 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -1,11 +1,13 @@ +import asyncio import os from timeit import default_timer as timer import unittest import pytest from decorator import decorator -from hail.utils.java import Env, choose_backend +from hail.utils.java import Env import hail as hl +from hail.backend.local_backend import LocalBackend _initialized = False @@ -13,7 +15,7 @@ def startTestHailContext(): global _initialized if not _initialized: - backend_name = choose_backend() + backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') if backend_name == 'spark': hl.init(master='local[2]', min_block_size=0, quiet=True) else: @@ -140,32 +142,23 @@ def wrapper(func, *args, **kwargs): return wrapper -def skip_unless_service_backend(message='only relevant to service backend'): - from hail.backend.service_backend import ServiceBackend - @decorator - def wrapper(func, *args, **kwargs): - if not isinstance(hl.utils.java.Env.backend(), ServiceBackend): - raise unittest.SkipTest(message) - else: - return func(*args, **kwargs) - - return wrapper - - fails_local_backend = pytest.mark.xfail( - choose_backend() == 'local', + os.environ.get('HAIL_QUERY_BACKEND') == 'local', reason="doesn't yet work on local backend", strict=True) fails_service_backend = pytest.mark.xfail( - choose_backend() == 'batch', + os.environ.get('HAIL_QUERY_BACKEND') == 'service', reason="doesn't yet work on service backend", strict=True) +def check_spark(): + backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') + return backend_name == 'spark' fails_spark_backend = pytest.mark.xfail( - choose_backend() == 'spark', + check_spark(), reason="doesn't yet work on spark backend", strict=True) @@ -198,7 +191,7 @@ def wrapper(func, *args, **kwargs): def lower_only(): @decorator def wrapper(func, *args, **kwargs): - flags = hl._get_flags('lower', 'lower_only') + flags = hl._get_flags() prev_lower = flags.get('lower') prev_lower_only = flags.get('lower_only') diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 98b235d4645..369916c64ae 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -164,12 +164,9 @@ def test_random_uniform(self): for entry in row: assert entry > 0 - def test_bm_to_numpy(self): - bm = BlockMatrix.from_ndarray(hl.nd.arange(20).map(lambda x: hl.float64(x)).reshape((4, 5))) - np_bm = bm.to_numpy() - self._assert_eq(np_bm, np.arange(20, dtype=np.float64).reshape((4, 5))) - - def test_numpy_round_trip(self): + @fails_service_backend() + @fails_local_backend() + def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) @@ -179,7 +176,7 @@ def test_numpy_round_trip(self): with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f: bm.tofile(bm_f) - hl.current_backend().fs.open(a_f, mode='wb').write(a.tobytes()) + a.tofile(a_f) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() @@ -200,7 +197,7 @@ def test_numpy_round_trip(self): with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f: bmt.tofile(bmt_f) - hl.current_backend().fs.open(at_f, mode='wb').write(at.tobytes()) + at.tofile(at_f) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() @@ -216,15 +213,6 @@ def test_numpy_round_trip(self): self._assert_eq(at4, at) self._assert_eq(at5, at) - @fails_service_backend() - @fails_local_backend() - def test_numpy_round_trip_force_blocking(self): - n_rows = 10 - n_cols = 11 - data = np.random.rand(n_rows * n_cols) - a = data.reshape((n_rows, n_cols)) - - bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) self._assert_eq(bm.to_numpy(_force_blocking=True), a) @fails_service_backend() @@ -959,7 +947,6 @@ def test_export_blocks(self): self._assert_eq(nd, actual) @fails_service_backend() - @fails_local_backend() def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], @@ -1067,6 +1054,7 @@ def test_locus_windows_per_contig(self): f = hl._locus_windows_per_contig([[1.0, 3.0, 4.0], [2.0, 2.0], [5.0]], 1.0) assert hl.eval(f) == ([0, 1, 1, 3, 3, 5], [1, 3, 3, 5, 5, 6]) + @fails_service_backend() def test_locus_windows(self): def assert_eq(a, b): assert np.array_equal(a, np.array(b)), f"a={a}, b={b}" @@ -1146,6 +1134,8 @@ def assert_eq(a, b): hl.linalg.utils.locus_windows(ht.locus, 1.0, coord_expr=ht.cm) assert "missing value for 'coord_expr'" in str(cm.exception) + @fails_service_backend() + @fails_local_backend() def test_write_overwrite(self): with hl.TemporaryDirectory(ensure_exists=False) as path: bm = BlockMatrix.from_numpy(np.array([[0]])) @@ -1225,6 +1215,8 @@ def assert_same_columns_up_to_sign(a, b): s = x.svd(compute_uv=False, complexity_bound=0) assert np.all(s >= 0) + @fails_service_backend() + @fails_local_backend() def test_filtering(self): np_square = np.arange(16, dtype=np.float64).reshape((4, 4)) bm = BlockMatrix.from_numpy(np_square) @@ -1302,6 +1294,8 @@ def test_sparse_transposition(self): sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np) + + @fails_service_backend() def test_row_blockmatrix_sum(self): row = BlockMatrix.from_numpy(np.arange(10)) diff --git a/hail/python/test/hail/matrixtable/test_file_formats.py b/hail/python/test/hail/matrixtable/test_file_formats.py index 9d1922524d7..a86e0c21fa0 100644 --- a/hail/python/test/hail/matrixtable/test_file_formats.py +++ b/hail/python/test/hail/matrixtable/test_file_formats.py @@ -37,7 +37,26 @@ class Tests(unittest.TestCase): def test_write(self): create_backward_compatibility_files() - @fails_service_backend() + @skip_when_service_backend('''intermittent worker failure: +> assert backward_compatible_same(all_values_table, ds) + +Caused by: java.lang.AssertionError: assertion failed + at scala.Predef$.assert(Predef.scala:208) + at is.hail.io.BlockingInputBuffer.ensure(InputBuffers.scala:389) + at is.hail.io.BlockingInputBuffer.readInt(InputBuffers.scala:412) + at __C1210collect_distributed_array.__m1218INPLACE_DECODE_r_binary_TO_r_binary(Unknown Source) + at __C1210collect_distributed_array.__m1217INPLACE_DECODE_r_struct_of_r_binaryEND_TO_r_tuple_of_r_binaryEND(Unknown Source) + at __C1210collect_distributed_array.__m1216INPLACE_DECODE_r_struct_of_r_struct_of_r_binaryENDEND_TO_r_struct_of_r_tuple_of_r_binaryENDEND(Unknown Source) + at __C1210collect_distributed_array.__m1215DECODE_r_struct_of_r_struct_of_r_struct_of_r_binaryENDENDEND_TO_SBaseStructPointer(Unknown Source) + at __C1210collect_distributed_array.apply(Unknown Source) + at __C1210collect_distributed_array.apply(Unknown Source) + at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31) + at is.hail.utils.package$.using(package.scala:627) + at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144) + at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30) + at is.hail.backend.service.Worker$.main(Worker.scala:120) + at is.hail.backend.service.Worker.main(Worker.scala) + ... 11 more''') def test_backward_compatability(self): import os @@ -80,4 +99,4 @@ def backward_compatible_same(current, old): f = os.path.join(matrix_table_dir, '{}.hmt'.format(i)) n += 1 - assert n == 88, f'{resource_dir!r} {versions!r}' + assert n == 88 diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index d9cd1d87586..7019477ad7b 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -639,6 +639,7 @@ def test_table_product_join(self): rows = left.rows() self.assertTrue(rows.all(rows.matches.map(lambda x: x.idx) == hl.range(0, rows.row_idx))) + @fails_service_backend() def test_naive_coalesce(self): mt = self.get_mt(min_partitions=8) self.assertEqual(mt.n_partitions(), 8) @@ -1005,6 +1006,8 @@ def test_field_groups(self): (df.GT == df.entry_struct.GT)) & (df.AD == df.entry_struct.AD)))) + @fails_service_backend() + @fails_local_backend() def test_filter_partitions(self): ds = self.get_mt(min_partitions=8) self.assertEqual(ds.n_partitions(), 8) @@ -1954,6 +1957,8 @@ def test_read_write_balding_nichols_model(): mt.write(tmp_file) assert hl.read_matrix_table(tmp_file)._same(mt) +@fails_service_backend() +@fails_local_backend() def test_read_partitions(): ht = hl.utils.range_matrix_table(n_rows=100, n_cols=10, n_partitions=3) path = new_temp_file() diff --git a/hail/python/test/hail/methods/relatedness/test_pc_relate.py b/hail/python/test/hail/methods/relatedness/test_pc_relate.py index 3148a41e412..dbc8d4cd350 100644 --- a/hail/python/test/hail/methods/relatedness/test_pc_relate.py +++ b/hail/python/test/hail/methods/relatedness/test_pc_relate.py @@ -28,7 +28,6 @@ def test_pc_relate_against_R_truth(): @fails_service_backend() -@fails_local_backend() def test_pc_relate_simple_example(): gs = hl.literal( [[0, 0, 0, 0, 1, 1, 1, 1], diff --git a/hail/python/test/hail/methods/test_family_methods.py b/hail/python/test/hail/methods/test_family_methods.py index db19d0706a0..13bd829b340 100644 --- a/hail/python/test/hail/methods/test_family_methods.py +++ b/hail/python/test/hail/methods/test_family_methods.py @@ -8,6 +8,7 @@ class Tests(unittest.TestCase): + @fails_service_backend() def test_trio_matrix(self): """ This test depends on certain properties of the trio matrix VCF and @@ -80,6 +81,7 @@ def test_trio_matrix(self): self.assertEqual(e_cols.row.dtype, t_cols.row.dtype) self.assertTrue(e_cols._same(t_cols)) + @fails_service_backend() def test_trio_matrix_null_keys(self): ped = hl.Pedigree.read(resource('triomatrix.fam')) ht = hl.import_fam(resource('triomatrix.fam')) @@ -99,6 +101,7 @@ def test_trio_matrix_incomplete_trios(self): hl.trio_matrix(mt, ped, complete_trios=False) + @fails_service_backend() def test_mendel_errors(self): mt = hl.import_vcf(resource('mendel.vcf')) ped = hl.Pedigree.read(resource('mendel.fam')) @@ -214,6 +217,7 @@ def test_tdt(self): bad.order_by(hl.asc(bad.v)).show() self.fail('Found rows in violation of the predicate (see show output)') + @fails_service_backend() def test_de_novo(self): mt = hl.import_vcf(resource('denovo.vcf')) mt = mt.filter_rows(mt.locus.in_y_par(), keep=False) # de_novo_finder doesn't know about y PAR diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index 5b27d3b360e..8d2b7cc607f 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -2,7 +2,6 @@ import os import shutil import unittest - from unittest import mock from avro.datafile import DataFileReader @@ -12,7 +11,7 @@ import pytest import hail as hl from ..helpers import * -from hail.utils import new_temp_file, FatalError, run_command, uri_path, HailUserError +from hail.utils import new_temp_file, FatalError, run_command, uri_path setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -40,6 +39,7 @@ class VCFTests(unittest.TestCase): def test_info_char(self): self.assertEqual(hl.import_vcf(resource('infochar.vcf')).count_rows(), 1) + @fails_service_backend() def test_import_export_same(self): for i in range(10): mt = hl.import_vcf(resource(f'random_vcfs/{i}.vcf.bgz')) @@ -80,11 +80,14 @@ def test_undeclared_info(self): def test_can_import_bad_number_flag(self): hl.import_vcf(resource('bad_flag_number.vcf')).rows()._force_count() + @fails_service_backend() def test_malformed(self): with self.assertRaisesRegex(FatalError, "invalid character"): mt = hl.import_vcf(resource('malformed.vcf')) mt._force_count_rows() + @fails_service_backend() + @fails_local_backend() def test_not_identical_headers(self): t = new_temp_file(extension='vcf') mt = hl.import_vcf(resource('sample.vcf')) @@ -225,6 +228,7 @@ def test_vcf_unsorted_alleles(self): mt = hl.import_vcf(resource('sample.pksorted.vcf'), n_partitions=4) mt.rows()._force_count() + @fails_service_backend() def test_import_vcf_skip_invalid_loci(self): mt = hl.import_vcf(resource('skip_invalid_loci.vcf'), reference_genome='GRCh37', skip_invalid_loci=True) @@ -258,6 +262,8 @@ def test_import_vcf_invalid_float_type(self): with self.assertRaises(TypeError): mt = hl.import_vcf(resource('small-ds.vcf'), entry_float_type=hl.tint64) + @fails_service_backend() + @fails_local_backend() def test_export_vcf(self): dataset = hl.import_vcf(resource('sample.vcf.bgz')) vcf_metadata = hl.get_vcf_metadata(resource('sample.vcf.bgz')) @@ -276,6 +282,8 @@ def test_export_vcf(self): # are py4 JavaMaps, not dicts, so can't use assertDictEqual self.assertEqual(vcf_metadata, metadata_imported) + @fails_service_backend() + @fails_local_backend() def test_export_vcf_empty_format(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).select_entries() tmp = new_temp_file(extension="vcf") @@ -283,6 +291,8 @@ def test_export_vcf_empty_format(self): assert hl.import_vcf(tmp)._same(mt) + @fails_service_backend() + @fails_local_backend() def test_export_vcf_no_gt(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).drop('GT') tmp = new_temp_file(extension="vcf") @@ -290,6 +300,7 @@ def test_export_vcf_no_gt(self): assert hl.import_vcf(tmp)._same(mt) + @fails_service_backend() def test_export_vcf_no_alt_alleles(self): mt = hl.import_vcf(resource('gvcfs/HG0096_excerpt.g.vcf'), reference_genome='GRCh38') self.assertEqual(mt.filter_rows(hl.len(mt.alleles) == 1).count_rows(), 5) @@ -299,6 +310,8 @@ def test_export_vcf_no_alt_alleles(self): mt2 = hl.import_vcf(tmp, reference_genome='GRCh38') self.assertTrue(mt._same(mt2)) + @fails_service_backend() + @fails_local_backend() def test_export_sites_only_from_table(self): mt = hl.import_vcf(resource('sample.vcf.bgz'))\ .select_entries()\ @@ -348,7 +361,6 @@ def test_tabix_export(self): self.import_gvcfs_sample_vcf(tmp) @fails_service_backend() - @fails_local_backend() def test_import_gvcfs(self): path = resource('sample.vcf.bgz') self.import_gvcfs_sample_vcf(path) @@ -460,6 +472,7 @@ def test_combiner_works(self): self.assertEqual(len(parts), comb.n_partitions()) comb._force_count_rows() + @fails_service_backend() def test_haploid_combiner_ok(self): from hail.experimental.vcf_combiner.vcf_combiner import transform_gvcf # make a combiner table @@ -527,33 +540,39 @@ def test_missing_float_entries(self): assert gl_gp == [hl.Struct(GL=[None, None, None], GP=[0.22, 0.5, 0.27]), hl.Struct(GL=[None, None, None], GP=[None, None, None])] + @fails_service_backend() + @fails_local_backend() def test_same_bgzip(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4) f = new_temp_file(extension='vcf.bgz') hl.export_vcf(mt, f) assert hl.import_vcf(f)._same(mt) - def test_vcf_parallel_separate_header_export(self): - fs = hl.current_backend().fs + @fails_service_backend() + @fails_local_backend() + def test_vcf_parallel_export(self): + import glob def concat_files(outpath, inpaths): - with fs.open(outpath, 'wb') as outfile: + with open(outpath, 'wb') as outfile: for path in inpaths: - with fs.open(path, 'rb') as infile: + with open(path, 'rb') as infile: shutil.copyfileobj(infile, outfile) mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4) f = new_temp_file(extension='vcf.bgz') hl.export_vcf(mt, f, parallel='separate_header') - stat = fs.stat(f) - assert stat - assert stat.is_dir() - shard_paths = [info.path for info in fs.ls(f) - if os.path.splitext(info.path)[-1] == '.bgz'] - assert shard_paths + shard_paths = glob.glob(f + "/*.bgz") shard_paths.sort() nf = new_temp_file(extension='vcf.bgz') concat_files(nf, shard_paths) + assert hl.import_vcf(nf)._same(mt) + f = new_temp_file(extension='vcf.bgz') + hl.export_vcf(mt, f, parallel='composable') + shard_paths = glob.glob(f + "/*.bgz") + shard_paths.sort() + nf = new_temp_file(extension='vcf.bgz') + concat_files(nf, shard_paths) assert hl.import_vcf(nf)._same(mt) @fails_service_backend() @@ -562,6 +581,8 @@ def test_custom_rg_import(self): mt = hl.import_vcf(resource('custom_rg.vcf'), reference_genome=rg) assert mt.locus.collect() == [hl.Locus('D', 123, reference_genome=rg)] + @fails_service_backend() + @fails_local_backend() def test_sorted(self): mt = hl.utils.range_matrix_table(10, 10, n_partitions=4).filter_cols(False) mt = mt.key_cols_by(s='dummy') @@ -571,7 +592,7 @@ def test_sorted(self): hl.export_vcf(mt, f) last = 0 - with hl.current_backend().fs.open(f, 'r') as i: + with open(uri_path(f), 'r') as i: for line in i: if line.startswith('#'): continue @@ -580,6 +601,8 @@ def test_sorted(self): assert pos >= last last = pos + @fails_service_backend() + @fails_local_backend() def test_empty_read_write(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4).filter_rows(False) @@ -589,8 +612,8 @@ def test_empty_read_write(self): hl.export_vcf(mt, out1) hl.export_vcf(mt, out2) - assert hl.current_backend().fs.stat(out1).size > 0 - assert hl.current_backend().fs.stat(out2).size > 0 + assert os.stat(uri_path(out1)).st_size > 0 + assert os.stat(uri_path(out2)).st_size > 0 assert hl.import_vcf(out1)._same(mt) assert hl.import_vcf(out2)._same(mt) @@ -601,6 +624,8 @@ def test_empty_import_vcf_group_by_collect(self): groups = ht.group_by(the_key=ht.key).aggregate(values=hl.agg.collect(ht.row_value)).collect() assert not groups + @fails_service_backend() + @fails_local_backend() def test_format_header(self): mt = hl.import_vcf(resource('sample2.vcf')) metadata = hl.get_vcf_metadata(resource('sample2.vcf')) @@ -608,7 +633,7 @@ def test_format_header(self): hl.export_vcf(mt, f, metadata=metadata) s = set() - with hl.current_backend().fs.open(f, 'r') as i: + with open(uri_path(f), 'r') as i: for line in i: if line.startswith('##FORMAT'): s.add(line.strip()) @@ -621,11 +646,13 @@ def test_format_header(self): '##FORMAT=', } + @fails_service_backend() + @fails_local_backend() def test_format_genotypes(self): mt = hl.import_vcf(resource('sample.vcf')) f = new_temp_file(extension='vcf') hl.export_vcf(mt, f) - with hl.current_backend().fs.open(f, 'r') as i: + with open(uri_path(f), 'r') as i: for line in i: if line.startswith('20\t13029920'): expected = "GT:AD:DP:GQ:PL\t1/1:0,6:6:18:234,18,0\t1/1:0,4:4:12:159,12,0\t" \ @@ -638,11 +665,13 @@ def test_format_genotypes(self): else: assert False, 'expected pattern not found' + @fails_service_backend() + @fails_local_backend() def test_contigs_header(self): mt = hl.import_vcf(resource('sample.vcf')).filter_cols(False) f = new_temp_file(extension='vcf') hl.export_vcf(mt, f) - with hl.current_backend().fs.open(f, 'r') as i: + with open(uri_path(f), 'r') as i: for line in i: if line.startswith('##contig=' @@ -650,6 +679,8 @@ def test_contigs_header(self): else: assert False, 'expected pattern not found' + @fails_service_backend() + @fails_local_backend() def test_metadata_argument(self): mt = hl.import_vcf(resource('multipleChromosomes.vcf')) f = new_temp_file(extension='vcf') @@ -662,7 +693,7 @@ def test_metadata_argument(self): saw_gt = False saw_lq = False - with hl.current_backend().fs.open(f, 'r') as f: + with open(uri_path(f), 'r') as f: for line in f: print(line[:25]) if line.startswith('##FORMAT==3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.9|tqdm==4.42.1|", + "pip_dependencies": "aiohttp>=3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|gcsfs==0.2.1|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.9|tqdm==4.42.1|", "vep-GRCh37.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh37.sh", "vep-GRCh38.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh38.sh", } diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala index e2be3c864c0..01301a6b8a9 100644 --- a/hail/src/main/scala/is/hail/HailContext.scala +++ b/hail/src/main/scala/is/hail/HailContext.scala @@ -50,6 +50,10 @@ object HailContext { def backend: Backend = get.backend + def getFlag(flag: String): String = get.flags.get(flag) + + def setFlag(flag: String, value: String): Unit = get.flags.set(flag, value) + def sparkBackend(op: String): SparkBackend = get.sparkBackend(op) def configureLogging(logFile: String, quiet: Boolean, append: Boolean, skipLoggingConfiguration: Boolean): Unit = { @@ -403,6 +407,8 @@ class HailContext private( def sparkBackend(op: String): SparkBackend = backend.asSpark(op) + val flags: HailFeatureFlags = new HailFeatureFlags() + var checkRVDKeys: Boolean = false private var nextVectorId: Int = 0 @@ -458,3 +464,54 @@ class HailContext private( JsonMethods.compact(Extraction.decompose(metadata)) } } + +object HailFeatureFlags { + val defaults: Map[String, (String, String)] = Map[String, (String, String)]( + ("no_whole_stage_codegen", ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN" -> null)), + ("no_ir_logging", ("HAIL_DEV_NO_IR_LOG" -> null)), + ("lower", ("HAIL_DEV_LOWER" -> null)), + ("lower_only", ("HAIL_DEV_LOWER_ONLY" -> null)), + ("lower_bm", ("HAIL_DEV_LOWER_BM" -> null)), + ("max_leader_scans", ("HAIL_DEV_MAX_LEADER_SCANS" -> "1000")), + ("distributed_scan_comb_op", ("HAIL_DEV_DISTRIBUTED_SCAN_COMB_OP" -> null)), + ("jvm_bytecode_dump", ("HAIL_DEV_JVM_BYTECODE_DUMP" -> null)), + ("use_packed_int_encoding", ("HAIL_DEV_USE_PACKED_INT_ENCODING" -> null)), + ("use_column_encoding", ("HAIL_DEV_USE_COLUMN_ENCODING" -> null)), + ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), + ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), + ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), + ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), + ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")), + ("use_new_shuffle", ("HAIL_USE_NEW_SHUFFLE" -> null)), + ("shuffle_cutoff_to_local_sort", ("HAIL_SHUFFLE_CUTOFF" -> null)), + ("grouped_aggregate_buffer_size", ("HAIL_GROUPED_AGGREGATE_BUFFER_SIZE" -> "50")), + ("use_ssa_logs", "HAIL_USE_SSA_LOGS" -> null) + ) +} + +class HailFeatureFlags { + private[this] val flags: mutable.Map[String, String] = + mutable.Map[String, String](HailFeatureFlags.defaults.mapValues { case (env, default) => + sys.env.getOrElse(env, default) + }.toFastSeq: _*) + + val available: java.util.ArrayList[String] = + new java.util.ArrayList[String](java.util.Arrays.asList[String](flags.keys.toSeq: _*)) + + def set(flag: String, value: String): Unit = { + flags.update(flag, value) + } + + def get(flag: String): String = flags(flag) + + def exists(flag: String): Boolean = flags.contains(flag) + + def toJSONEnv: JArray = + JArray(flags.filter { case (_, v) => + v != null + }.map{ case (name, v) => + JObject( + "name" -> JString(HailFeatureFlags.defaults(name)._1), + "value" -> JString(v)) + }.toList) +} diff --git a/hail/src/main/scala/is/hail/HailFeatureFlags.scala b/hail/src/main/scala/is/hail/HailFeatureFlags.scala deleted file mode 100644 index 70c0a896289..00000000000 --- a/hail/src/main/scala/is/hail/HailFeatureFlags.scala +++ /dev/null @@ -1,73 +0,0 @@ -package is.hail - -import is.hail.utils._ -import org.json4s.JsonAST.{JArray, JObject, JString} - -import scala.collection.mutable - -object HailFeatureFlags { - val defaults: Map[String, (String, String)] = Map[String, (String, String)]( - ("no_whole_stage_codegen", ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN" -> null)), - ("no_ir_logging", ("HAIL_DEV_NO_IR_LOG" -> null)), - ("lower", ("HAIL_DEV_LOWER" -> null)), - ("lower_only", ("HAIL_DEV_LOWER_ONLY" -> null)), - ("lower_bm", ("HAIL_DEV_LOWER_BM" -> null)), - ("max_leader_scans", ("HAIL_DEV_MAX_LEADER_SCANS" -> "1000")), - ("distributed_scan_comb_op", ("HAIL_DEV_DISTRIBUTED_SCAN_COMB_OP" -> null)), - ("jvm_bytecode_dump", ("HAIL_DEV_JVM_BYTECODE_DUMP" -> null)), - ("use_packed_int_encoding", ("HAIL_DEV_USE_PACKED_INT_ENCODING" -> null)), - ("use_column_encoding", ("HAIL_DEV_USE_COLUMN_ENCODING" -> null)), - ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), - ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), - ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), - ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), - ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")), - ("use_new_shuffle", ("HAIL_USE_NEW_SHUFFLE" -> null)), - ("shuffle_max_branch_factor", ("HAIL_SHUFFLE_MAX_BRANCH" -> "64")), - ("shuffle_cutoff_to_local_sort", ("HAIL_SHUFFLE_CUTOFF" -> "32000000")), // This is in bytes - ("grouped_aggregate_buffer_size", ("HAIL_GROUPED_AGGREGATE_BUFFER_SIZE" -> "50")), - ("use_ssa_logs", "HAIL_USE_SSA_LOGS" -> null) - ) - - def fromEnv(): HailFeatureFlags = - new HailFeatureFlags( - mutable.Map( - HailFeatureFlags.defaults.mapValues { case (env, default) => - sys.env.getOrElse(env, default) - }.toFastSeq: _* - ) - ) - - def fromMap(m: mutable.Map[String, String]): HailFeatureFlags = - new HailFeatureFlags( - mutable.Map( - HailFeatureFlags.defaults.map { - case (flagName, (_, default)) => (flagName, m.getOrElse(flagName, default)) - }.toFastSeq: _* - ) - ) -} - -class HailFeatureFlags( - private[this] val flags: mutable.Map[String, String] -) { - val available: java.util.ArrayList[String] = - new java.util.ArrayList[String](java.util.Arrays.asList[String](flags.keys.toSeq: _*)) - - def set(flag: String, value: String): Unit = { - flags.update(flag, value) - } - - def get(flag: String): String = flags(flag) - - def exists(flag: String): Boolean = flags.contains(flag) - - def toJSONEnv: JArray = - JArray(flags.filter { case (_, v) => - v != null - }.map{ case (name, v) => - JObject( - "name" -> JString(HailFeatureFlags.defaults(name)._1), - "value" -> JString(v)) - }.toList) -} diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 22a2c10253a..59c11ad9d87 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -95,7 +95,7 @@ trait WrappedModuleBuilder { def genClass[C](baseName: String)(implicit cti: TypeInfo[C]): ClassBuilder[C] = modb.genClass[C](baseName) - def classesBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): ClassesBytes = modb.classesBytes(writeIRs, print) + def classesBytes(print: Option[PrintWriter] = None): ClassesBytes = modb.classesBytes(print) } class ModuleBuilder() { @@ -143,12 +143,12 @@ class ModuleBuilder() { var classesBytes: ClassesBytes = _ - def classesBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): ClassesBytes = { + def classesBytes(print: Option[PrintWriter] = None): ClassesBytes = { if (classesBytes == null) { classesBytes = new ClassesBytes( classes .iterator - .flatMap(c => c.classBytes(writeIRs, print)) + .flatMap(c => c.classBytes(print)) .toArray) } @@ -203,7 +203,7 @@ trait WrappedClassBuilder[C] extends WrappedModuleBuilder { )(body: MethodBuilder[C] => Unit): MethodBuilder[C] = cb.getOrGenMethod(baseName, key, argsInfo, returnInfo)(body) - def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(writeIRs, print) + def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) def _this: Value[C] = cb._this @@ -241,8 +241,7 @@ class ClassBuilder[C]( val lazyFieldMemo: mutable.Map[Any, Value[_]] = mutable.Map.empty - val lInitBuilder = new MethodBuilder[C](this, "", FastIndexedSeq(), UnitInfo) - val lInit = lInitBuilder.lmethod + val lInit = lclass.newMethod("", FastIndexedSeq(), UnitInfo) var initBody: Code[Unit] = { val L = new lir.Block() @@ -266,11 +265,6 @@ class ClassBuilder[C]( initBody = Code(initBody, c) } - def emitInitI(f: CodeBuilder => Unit): Unit = { - val body = CodeBuilder.scopedVoid(lInitBuilder)(f) - emitInit(body) - } - def emitClinit(c: Code[Unit]): Unit = { clinitBody match { case None => @@ -340,7 +334,7 @@ class ClassBuilder[C]( } } - def classBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): Array[(String, Array[Byte])] = { + def classBytes(print: Option[PrintWriter] = None): Array[(String, Array[Byte])] = { assert(initBody.start != null) lInit.setEntry(initBody.start) @@ -354,12 +348,12 @@ class ClassBuilder[C]( lClinit.setEntry(nbody.start) } - lclass.asBytes(writeIRs, print) + lclass.asBytes(print) } - def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = { + def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = { val n = className.replace("/", ".") - val classesBytes = modb.classesBytes(writeIRs) + val classesBytes = modb.classesBytes() assert(TaskContext.get() == null, "FunctionBuilder emission should happen on master, but happened on worker") diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index a1808d9b902..ed91a971e92 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -1073,9 +1073,9 @@ class CodeChar(val lhs: Code[Char]) extends AnyVal { def <=(rhs: Code[Int]): Code[Boolean] = lhs.toI <= rhs - def ceq(rhs: Code[Char]): Code[Boolean] = lhs.toI.ceq(rhs.toI) + def ceq(rhs: Code[Int]): Code[Boolean] = lhs.toI.ceq(rhs) - def cne(rhs: Code[Char]): Code[Boolean] = lhs.toI.cne(rhs.toI) + def cne(rhs: Code[Int]): Code[Boolean] = lhs.toI.cne(rhs) def toI: Code[Int] = lhs.asInstanceOf[Code[Int]] diff --git a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala index 1f663230e78..92bac782c8c 100644 --- a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala +++ b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala @@ -1,7 +1,7 @@ package is.hail.backend import is.hail.asm4s.HailClassLoader -import is.hail.{HailContext, HailFeatureFlags} +import is.hail.HailContext import is.hail.annotations.{Region, RegionPool} import is.hail.io.fs.FS import is.hail.utils.{ExecutionTimer, using} @@ -43,30 +43,9 @@ object ExecuteContext { result } - def scoped[T]( - tmpdir: String, - localTmpdir: String, - backend: Backend, - fs: FS, - timer: ExecutionTimer, - tempFileManager: TempFileManager, - theHailClassLoader: HailClassLoader, - flags: HailFeatureFlags, - )( - f: ExecuteContext => T - ): T = { + def scoped[T](tmpdir: String, localTmpdir: String, backend: Backend, fs: FS, timer: ExecutionTimer, tempFileManager: TempFileManager, theHailClassLoader: HailClassLoader)(f: ExecuteContext => T): T = { RegionPool.scoped { pool => - using(new ExecuteContext( - tmpdir, - localTmpdir, - backend, - fs, - Region(pool = pool), - timer, - tempFileManager, - theHailClassLoader, - flags - ))(f(_)) + using(new ExecuteContext(tmpdir, localTmpdir, backend, fs, Region(pool = pool), timer, tempFileManager, theHailClassLoader))(f(_)) } } @@ -101,11 +80,12 @@ class ExecuteContext( var r: Region, val timer: ExecutionTimer, _tempFileManager: TempFileManager, - val theHailClassLoader: HailClassLoader, - private[this] val flags: HailFeatureFlags + val theHailClassLoader: HailClassLoader ) extends Closeable { var backendContext: BackendContext = _ + val printIRs: Boolean = HailContext.getFlag("no_ir_logging") == null + private val tempFileManager: TempFileManager = if (_tempFileManager != null) _tempFileManager else @@ -133,14 +113,6 @@ class ExecuteContext( cleanupFunctions += cleanupFunction } - def getFlag(name: String): String = flags.get(name) - - def shouldWriteIRFiles(): Boolean = getFlag("write_ir_files") != null - - def shouldNotLogIR(): Boolean = flags.get("no_ir_logging") != null - - def shouldLogIR(): Boolean = !shouldNotLogIR() - def close(): Unit = { tempFileManager.cleanup() diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index 5fefd5b2854..3e434c60493 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -1,6 +1,6 @@ package is.hail.backend.local -import is.hail.{HailContext, HailFeatureFlags} +import is.hail.HailContext import is.hail.annotations.{Region, SafeRow, UnsafeRow} import is.hail.asm4s._ import is.hail.backend._ @@ -62,19 +62,12 @@ class LocalBackend( + "is.hail.io.compress.BGzipCodecTbi," + "org.apache.hadoop.io.compress.GzipCodec") - private[this] val flags = HailFeatureFlags.fromEnv() private[this] val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) - def getFlag(name: String): String = flags.get(name) - - def setFlag(name: String, value: String) = flags.set(name, value) - - val availableFlags: java.util.ArrayList[String] = flags.available - val fs: FS = new HadoopFS(new SerializableHadoopConfiguration(hadoopConf)) def withExecuteContext[T](timer: ExecutionTimer)(f: ExecuteContext => T): T = { - ExecuteContext.scoped(tmpdir, tmpdir, this, fs, timer, null, theHailClassLoader, flags)(f) + ExecuteContext.scoped(tmpdir, tmpdir, this, fs, timer, null, theHailClassLoader)(f) } def broadcast[T: ClassTag](value: T): BroadcastValue[T] = new LocalBroadcastValue[T](value) @@ -105,7 +98,7 @@ class LocalBackend( val ir = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, ir0).asInstanceOf[IR] if (!Compilable(ir)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") if (ir.typ == TVoid) { val (pt, f) = ctx.timer.time("Compile") { @@ -136,7 +129,7 @@ class LocalBackend( } private[this] def _execute(ctx: ExecuteContext, ir: IR): (Option[SingleCodeType], Long) = { - TypeCheck(ctx, ir) + TypeCheck(ir) Validate(ir) val queryID = Backend.nextID() log.info(s"starting execution of query $queryID of initial size ${ IRSize(ir) }") @@ -291,7 +284,7 @@ class LocalBackend( rowTypeRequiredness: RStruct ): TableStage = { - if (getFlag("use_new_shuffle") != null) { + if (HailContext.getFlag("shuffle_cutoff_to_local_sort") != null) { LowerDistributedSort.distributedSort(ctx, stage, sortFields, relationalLetsAbove, rowTypeRequiredness) } else { LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) diff --git a/hail/src/main/scala/is/hail/backend/service/Main.scala b/hail/src/main/scala/is/hail/backend/service/Main.scala index 94db679c2bc..daf63fa3a82 100644 --- a/hail/src/main/scala/is/hail/backend/service/Main.scala +++ b/hail/src/main/scala/is/hail/backend/service/Main.scala @@ -28,7 +28,7 @@ object Main { val logFile = argv(1) configureLogging(logFile) - argv(3) match { + argv(2) match { case WORKER => Worker.main(argv) case DRIVER => ServiceBackendSocketAPI2.main(argv) case kind => throw new RuntimeException(s"unknown kind: ${kind}") diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 6533df64040..fd5003eb666 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -6,20 +6,14 @@ import java.net._ import java.nio.charset.StandardCharsets import java.util.concurrent._ -import is.hail.{HAIL_REVISION, HailContext, HailFeatureFlags} +import is.hail.{HAIL_REVISION, HailContext} import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.{Backend, BackendContext, BroadcastValue, ExecuteContext, HailTaskContext} import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.lowering._ import is.hail.expr.ir.{Compile, IR, IRParser, MakeTuple, SortField} -import is.hail.expr.ir.functions.IRFunctionRegistry -import is.hail.io.{BufferSpec, TypedCodecSpec} -import is.hail.io.bgen.IndexBgen import is.hail.io.fs._ -import is.hail.io.bgen.IndexBgen -import is.hail.io.plink.LoadPlink -import is.hail.io.vcf.LoadVCF import is.hail.linalg.BlockMatrix import is.hail.services._ import is.hail.services.batch_client.BatchClient @@ -27,21 +21,24 @@ import is.hail.types._ import is.hail.types.physical._ import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ -import is.hail.types.encoded._ import is.hail.utils._ import is.hail.variant.ReferenceGenome import org.apache.commons.io.IOUtils import org.apache.log4j.Logger -import org.json4s.Extraction import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats} import org.newsclub.net.unix.{AFUNIXServerSocket, AFUNIXSocketAddress} +import java.io._ +import java.net._ +import java.nio.charset.StandardCharsets +import java.util.concurrent._ import scala.annotation.switch import scala.reflect.ClassTag import scala.{concurrent => scalaConcurrent} -import scala.collection.mutable +import is.hail.io.vcf.LoadVCF +import org.json4s.Extraction class ServiceBackendContext( @@ -57,7 +54,13 @@ object ServiceBackend { private val log = Logger.getLogger(getClass.getName()) } +class User( + val username: String, + val tmpdir: String, + val fs: GoogleStorageFS) + class ServiceBackend( + val revision: String, val jarLocation: String, var name: String, val theHailClassLoader: HailClassLoader, @@ -66,10 +69,20 @@ class ServiceBackend( import ServiceBackend.log private[this] var batchCount = 0 + private[this] val users = new ConcurrentHashMap[String, User]() private[this] implicit val ec = scalaConcurrent.ExecutionContext.fromExecutorService( Executors.newCachedThreadPool()) - private[this] val MAX_AVAILABLE_GCS_CONNECTIONS = 100 - private[this] val availableGCSConnections = new Semaphore(MAX_AVAILABLE_GCS_CONNECTIONS, true) + + def addUser(username: String, key: String): Unit = synchronized { + val previous = users.put(username, new User(username, "/tmp", new GoogleStorageFS(Some(key)))) + assert(previous == null) + } + + def userContext[T](username: String, timer: ExecutionTimer, theHailClassLoader: HailClassLoader)(f: (ExecuteContext) => T): T = { + val user = users.get(username) + assert(user != null, username) + ExecuteContext.scoped(user.tmpdir, "file:///tmp", this, user.fs, timer, null, theHailClassLoader)(f) + } def defaultParallelism: Int = 10 @@ -87,13 +100,6 @@ class ServiceBackend( } } - private[this] def readString(in: DataInputStream): String = { - val n = in.readInt() - val bytes = new Array[Byte](n) - in.read(bytes) - new String(bytes, StandardCharsets.UTF_8) - } - def parallelizeAndComputeWithIndex( _backendContext: BackendContext, _fs: FS, @@ -107,19 +113,12 @@ class ServiceBackend( val token = tokenUrlSafe(32) val root = s"${ backendContext.remoteTmpDir }parallelizeAndComputeWithIndex/$token" - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } - log.info(s"parallelizeAndComputeWithIndex: $token: nPartitions $n") log.info(s"parallelizeAndComputeWithIndex: $token: writing f and contexts") val uploadFunction = scalaConcurrent.Future { retryTransientErrors { - using(new ObjectOutputStream(create(s"$root/f"))) { os => + using(new ObjectOutputStream(fs.createCachedNoCompression(s"$root/f"))) { os => os.writeObject(f) } } @@ -127,7 +126,7 @@ class ServiceBackend( val uploadContexts = scalaConcurrent.Future { retryTransientErrors { - using(create(s"$root/contexts")) { os => + using(fs.createCachedNoCompression(s"$root/contexts")) { os => var o = 12L * n var i = 0 while (i < n) { @@ -157,15 +156,12 @@ class ServiceBackend( "job_id" -> JInt(i + 1), "parent_ids" -> JArray(List()), "process" -> JObject( - "jar_spec" -> JObject( - "type" -> JString("jar_url"), - "value" -> JString(jarLocation) - ), "command" -> JArray(List( JString(Main.WORKER), + JString(revision), + JString(jarLocation), JString(root), - JString(s"$i"), - JString(s"$n"))), + JString(s"$i"))), "type" -> JString("jvm")), "mount_tokens" -> JBool(true), "resources" -> JObject("preemptible" -> JBool(true)) @@ -196,30 +192,13 @@ class ServiceBackend( val r = new Array[Array[Byte]](n) - def resultOrHailException(is: DataInputStream): Array[Byte] = { - val success = is.readBoolean() - if (success) { - IOUtils.toByteArray(is) - } else { - val shortMessage = readString(is) - val expandedMessage = readString(is) - val errorId = is.readInt() - throw new HailWorkerException(shortMessage, expandedMessage, errorId) - } - } - def readResult(i: Int): scalaConcurrent.Future[Unit] = scalaConcurrent.Future { - availableGCSConnections.acquire() - try { - r(i) = retryTransientErrors { - using(open(s"$root/result.$i")) { is => - resultOrHailException(new DataInputStream(is)) - } + r(i) = retryTransientErrors { + using(fs.openCachedNoCompression(s"$root/result.$i")) { is => + IOUtils.toByteArray(is) } - log.info(s"result $i complete") - } finally { - availableGCSConnections.release() } + log.info(s"result $i complete") } scalaConcurrent.Await.result( @@ -234,17 +213,35 @@ class ServiceBackend( def stop(): Unit = () def valueType( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, s: String - ): String = { + ): String = serviceBackendExecuteContext( + "ServiceBackend.valueType", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => val x = IRParser.parse_value_ir(ctx, s) x.typ.toString } def tableType( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, s: String - ): String = { + ): String = serviceBackendExecuteContext( + "ServiceBackend.tableType", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => val x = IRParser.parse_table_ir(ctx, s) val t = x.typ val jv = JObject("global" -> JString(t.globalType.toString), @@ -254,9 +251,18 @@ class ServiceBackend( } def matrixTableType( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, s: String - ): String = { + ): String = serviceBackendExecuteContext( + "ServiceBackend.matrixTableType", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => val x = IRParser.parse_matrix_ir(ctx, s) val t = x.typ val jv = JObject("global" -> JString(t.globalType.toString), @@ -269,9 +275,18 @@ class ServiceBackend( } def blockMatrixType( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, s: String - ): String = { + ): String = serviceBackendExecuteContext( + "ServiceBackend.blockMatrixType", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => val x = IRParser.parse_blockmatrix_ir(ctx, s) val t = x.typ val jv = JObject("element_type" -> JString(t.elementType.toString), @@ -282,14 +297,22 @@ class ServiceBackend( } def referenceGenome( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, name: String - ): String = { + ): String = serviceBackendExecuteContext( + "ServiceBackend.referenceGenome", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => ReferenceGenome.getReference(name).toJSONString } - private[this] def execute(ctx: ExecuteContext, _x: IR, bufferSpecString: String): Array[Byte] = { - // FIXME: do we need Validate(_x)? + private[this] def execute(ctx: ExecuteContext, _x: IR): Option[(Annotation, PType)] = { val x = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, _x) .asInstanceOf[IR] if (x.typ == TVoid) { @@ -300,33 +323,45 @@ class ServiceBackend( optimize = true) f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) - Array() + None } else { val (Some(PTypeReferenceSingleCodeType(pt)), f) = Compile[AsmFunction1RegionLong](ctx, FastIndexedSeq(), FastIndexedSeq[TypeInfo[_]](classInfo[Region]), LongInfo, MakeTuple.ordered(FastIndexedSeq(x)), optimize = true) + + val a = f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) val retPType = pt.asInstanceOf[PBaseStruct] - val off = f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) - val codec = TypedCodecSpec( - EType.fromTypeAllOptional(retPType.virtualType), - retPType.virtualType, - BufferSpec.parseOrDefault(bufferSpecString) - ) - codec.encode(ctx, retPType, off) + Some((new UnsafeRow(retPType, ctx.r, a).get(0), retPType.types(0))) } } def execute( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, code: String, - token: String, - bufferSpecString: String - ): Array[Byte] = { + token: String + ): String = serviceBackendExecuteContext( + "ServiceBackend.execute", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => log.info(s"executing: ${token}") - execute(ctx, IRParser.parse_value_ir(ctx, code), bufferSpecString) + execute(ctx, IRParser.parse_value_ir(ctx, code)) match { + case Some((v, t)) => + JsonMethods.compact( + JObject(List("value" -> JSONAnnotationImpex.exportAnnotation(v, t.virtualType), + "type" -> JString(t.virtualType.toString)))) + case None => + JsonMethods.compact( + JObject(List("value" -> null, "type" -> JString(TVoid.toString)))) + } } def lowerDistributedSort( @@ -336,11 +371,7 @@ class ServiceBackend( relationalLetsAbove: Map[String, IR], rowTypeRequiredness: RStruct ): TableStage = { - if (ctx.getFlag("use_new_shuffle") != null) { - LowerDistributedSort.distributedSort(ctx, stage, sortFields, relationalLetsAbove, rowTypeRequiredness) - } else { - LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) - } + LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) } def persist(backendContext: BackendContext, id: String, value: BlockMatrix, storageLevel: String): Unit = ??? @@ -352,40 +383,55 @@ class ServiceBackend( def getPersistedBlockMatrixType(backendContext: BackendContext, id: String): BlockMatrixType = ??? def loadReferencesFromDataset( - ctx: ExecuteContext, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, path: String - ): String = ReferenceGenome.fromHailDataset(ctx.fs, path) + ): String = serviceBackendExecuteContext( + "ServiceBackend.loadReferencesFromDataset", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => + ReferenceGenome.fromHailDataset(ctx.fs, path) + } def parseVCFMetadata( - ctx: ExecuteContext, - path: String - ): String = { + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String, + path: String, + ): String = serviceBackendExecuteContext( + "ServiceBackend.parseVCFMetadata", + tmpdir, + sessionId, + billingProject, + remoteTmpDir + ) { ctx => val metadata = LoadVCF.parseHeaderMetadata(ctx.fs, Set.empty, TFloat64, path) implicit val formats = defaultJSONFormats JsonMethods.compact(Extraction.decompose(metadata)) } - def importFam( - ctx: ExecuteContext, - path: String, - quantPheno: Boolean, - delimiter: String, - missing: String - ): String = { - LoadPlink.importFamJSON(ctx.fs, path, quantPheno, delimiter, missing) - } - - def indexBgen( - ctx: ExecuteContext, - files: Array[String], - indexFileMap: Map[String, String], - referenceGenomeName: Option[String], - contigRecoding: Map[String, String], - skipInvalidLoci: Boolean - ): String = { - IndexBgen(ctx, files, indexFileMap, referenceGenomeName, contigRecoding, skipInvalidLoci) - info(s"Number of BGEN files indexed: ${ files.size }") - "null" + private[this] def serviceBackendExecuteContext[T]( + methodName: String, + tmpdir: String, + sessionId: String, + billingProject: String, + remoteTmpDir: String + )(body: ExecuteContext => T): T = ExecutionTimer.logTime(methodName) { timer => + val fs = retryTransientErrors { + using(new FileInputStream(s"$scratchDir/secrets/gsa-key/key.json")) { is => + new GoogleStorageFS(Some(IOUtils.toString(is, Charset.defaultCharset().toString()))).asCacheable() + } + } + ExecuteContext.scoped(tmpdir, "file:///tmp", this, fs, timer, null, theHailClassLoader) { ctx => + ctx.backendContext = new ServiceBackendContext(sessionId, billingProject, remoteTmpDir) + body(ctx) + } } } @@ -394,20 +440,21 @@ class HailBatchFailure(message: String) extends RuntimeException(message) object ServiceBackendSocketAPI2 { def main(argv: Array[String]): Unit = { - assert(argv.length == 7, argv.toFastIndexedSeq) + assert(argv.length == 8, argv.toFastIndexedSeq) val scratchDir = argv(0) val logFile = argv(1) - val jarLocation = argv(2) - val kind = argv(3) + val kind = argv(2) assert(kind == Main.DRIVER) - val name = argv(4) - val input = argv(5) - val output = argv(6) + val revision = argv(3) + val jarLocation = argv(4) + val name = argv(5) + val input = argv(6) + val output = argv(7) // FIXME: when can the classloader be shared? (optimizer benefits!) val backend = new ServiceBackend( - jarLocation, name, new HailClassLoader(getClass().getClassLoader()), scratchDir) + revision, jarLocation, name, new HailClassLoader(getClass().getClassLoader()), scratchDir) if (HailContext.isInitialized) { HailContext.get.backend = backend } else { @@ -445,6 +492,8 @@ class ServiceBackendSocketAPI2( private[this] val out: OutputStream, private[this] val sessionId: String ) extends Thread { + import ServiceBackendSocketAPI2._ + private[this] val LOAD_REFERENCES_FROM_DATASET = 1 private[this] val VALUE_TYPE = 2 private[this] val TABLE_TYPE = 3 @@ -455,6 +504,7 @@ class ServiceBackendSocketAPI2( private[this] val PARSE_VCF_METADATA = 8 private[this] val INDEX_BGEN = 9 private[this] val IMPORT_FAM = 10 + private[this] val GOODBYE = 254 private[this] val dummy = new Array[Byte](8) @@ -471,11 +521,6 @@ class ServiceBackendSocketAPI2( } } - def readBool(): Boolean = { - read(dummy, 0, 1) - Memory.loadByte(dummy, 0) != 0.toByte - } - def readInt(): Int = { read(dummy, 0, 4) Memory.loadInt(dummy, 0) @@ -517,216 +562,132 @@ class ServiceBackendSocketAPI2( def writeString(s: String): Unit = writeBytes(s.getBytes(StandardCharsets.UTF_8)) def executeOneCommand(): Unit = { - var nFlagsRemaining = readInt() - val flags = mutable.Map[String, String]() - while (nFlagsRemaining > 0) { - val flagName = readString() - val flagValue = readString() - flags.update(flagName, flagValue) - nFlagsRemaining -= 1 - } - val cmd = readInt() - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - - def withExecuteContext(methodName: String, method: ExecuteContext => Array[Byte]): Array[Byte] = ExecutionTimer.logTime(methodName) { timer => - val fs = retryTransientErrors { - using(new FileInputStream(s"${backend.scratchDir}/secrets/gsa-key/key.json")) { is => - new GoogleStorageFS(Some(IOUtils.toString(is, Charset.defaultCharset().toString()))).asCacheable() + (cmd: @switch) match { + case LOAD_REFERENCES_FROM_DATASET => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val path = readString() + try { + val result = backend.loadReferencesFromDataset(tmpdir, sessionId, billingProject, remoteTmpDir, path) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) } - } - ExecuteContext.scoped( - tmpdir, - "file:///tmp", - backend, - fs, - timer, - null, - backend.theHailClassLoader, - HailFeatureFlags.fromMap(flags) - ) { ctx => - ctx.backendContext = new ServiceBackendContext(sessionId, billingProject, remoteTmpDir) - method(ctx) - } - } - try { - val result = (cmd: @switch) match { - case LOAD_REFERENCES_FROM_DATASET => - val path = readString() - withExecuteContext( - "ServiceBackend.loadReferencesFromDataset", - backend.loadReferencesFromDataset(_, path).getBytes(StandardCharsets.UTF_8) - ) - case VALUE_TYPE => - val s = readString() - withExecuteContext( - "ServiceBackend.valueType", - backend.valueType(_, s).getBytes(StandardCharsets.UTF_8) - ) - case TABLE_TYPE => - val s = readString() - withExecuteContext( - "ServiceBackend.tableType", - backend.tableType(_, s).getBytes(StandardCharsets.UTF_8) - ) - case MATRIX_TABLE_TYPE => - val s = readString() - withExecuteContext( - "ServiceBackend.matrixTableType", - backend.matrixTableType(_, s).getBytes(StandardCharsets.UTF_8) - ) - case BLOCK_MATRIX_TYPE => - val s = readString() - withExecuteContext( - "ServiceBackend.blockMatrixType", - backend.blockMatrixType(_, s).getBytes(StandardCharsets.UTF_8) - ) - case REFERENCE_GENOME => - val name = readString() - withExecuteContext( - "ServiceBackend.referenceGenome", - backend.referenceGenome(_, name).getBytes(StandardCharsets.UTF_8) - ) - case EXECUTE => - val code = readString() - val token = readString() - withExecuteContext( - "ServiceBackend.execute", - { ctx => - withIRFunctionsReadFromInput(ctx) { () => - val bufferSpecString = readString() - backend.execute(ctx, code, token, bufferSpecString) - } - } - ) - case PARSE_VCF_METADATA => - val path = readString() - withExecuteContext( - "ServiceBackend.parseVCFMetadata", - backend.parseVCFMetadata(_, path).getBytes(StandardCharsets.UTF_8) - ) - case IMPORT_FAM => - val path = readString() - val quantPheno = readBool() - val delimiter = readString() - val missing = readString() - withExecuteContext( - "ServiceBackend.importFam", - backend.importFam(_, path, quantPheno, delimiter, missing).getBytes(StandardCharsets.UTF_8) - ) - case INDEX_BGEN => - val nFiles = readInt() - val files = new Array[String](nFiles) - var i = 0 - while (i < nFiles) { - files(i) = readString() - i += 1 - } - val nIndexFiles = readInt() - val indexFileMap = mutable.Map[String, String]() - i = 0 - while (i < nIndexFiles) { - val k = readString() - val v = readString() - indexFileMap(k) = v - i += 1 - } - val hasReferenceGenome = readBool() - val referenceGenomeName = hasReferenceGenome match { - case true => Some(readString()) - case false => None - } - val nContigRecoding = readInt() - val contigRecoding = mutable.Map[String, String]() - i = 0 - while (i < nContigRecoding) { - val k = readString() - val v = readString() - contigRecoding(k) = v - i += 1 - } - val skipInvalidLoci = readBool() - withExecuteContext( - "ServiceBackend.indexBgen", - backend.indexBgen( - _, - files, - indexFileMap.toMap, - referenceGenomeName, - contigRecoding.toMap, - skipInvalidLoci - ).getBytes(StandardCharsets.UTF_8) - ) - } - writeBool(true) - writeBytes(result) - } catch { - case exc: HailWorkerException => - writeBool(false) - writeString(exc.shortMessage) - writeString(exc.expandedMessage) - writeInt(exc.errorId) - case t: Throwable => - val (shortMessage, expandedMessage, errorId) = handleForPython(t) - writeBool(false) - writeString(shortMessage) - writeString(expandedMessage) - writeInt(errorId) - } - } + case VALUE_TYPE => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val s = readString() + try { + val result = backend.valueType(tmpdir, sessionId, billingProject, remoteTmpDir, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } - def withIRFunctionsReadFromInput(ctx: ExecuteContext)(body: () => Array[Byte]): Array[Byte] = { - try { - var nFunctionsRemaining = readInt() - while (nFunctionsRemaining > 0) { - val name = readString() + case TABLE_TYPE => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val s = readString() + try { + val result = backend.tableType(tmpdir, sessionId, billingProject, remoteTmpDir, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } - val nTypeParametersRemaining = readInt() - val typeParameters = new Array[String](nTypeParametersRemaining) - var i = 0 - while (i < nTypeParametersRemaining) { - typeParameters(i) = readString() - i += 1 + case MATRIX_TABLE_TYPE => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val s = readString() + try { + val result = backend.matrixTableType(tmpdir, sessionId, billingProject, remoteTmpDir, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) } - val nValueParameterNamesRemaining = readInt() - val valueParameterNames = new Array[String](nValueParameterNamesRemaining) - i = 0 - while (i < nValueParameterNamesRemaining) { - valueParameterNames(i) = readString() - i += 1 + case BLOCK_MATRIX_TYPE => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val s = readString() + try { + val result = backend.blockMatrixType(tmpdir, sessionId, billingProject, remoteTmpDir, s) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) } - val nValueParameterTypesRemaining = readInt() - val valueParameterTypes = new Array[String](nValueParameterTypesRemaining) - i = 0 - while (i < nValueParameterTypesRemaining) { - valueParameterTypes(i) = readString() - i += 1 + case REFERENCE_GENOME => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val name = readString() + try { + val result = backend.referenceGenome(tmpdir, sessionId, billingProject, remoteTmpDir, name) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) } - val returnType = readString() + case EXECUTE => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val code = readString() + val token = readString() + try { + val result = backend.execute(tmpdir, sessionId, billingProject, remoteTmpDir, code, token) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } - val renderedBody = readString() + case PARSE_VCF_METADATA => + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() + val path = readString() + try { + val result = backend.parseVCFMetadata(tmpdir, sessionId, billingProject, remoteTmpDir, path) + writeBool(true) + writeString(result) + } catch { + case t: Throwable => + writeBool(false) + writeString(formatException(t)) + } - IRFunctionRegistry.pyRegisterIRForServiceBackend( - ctx, - name, - typeParameters, - valueParameterNames, - valueParameterTypes, - returnType, - renderedBody - ) - nFunctionsRemaining -= 1 - } - body() - } finally { - IRFunctionRegistry.clearUserFunctions() + case GOODBYE => + writeInt(GOODBYE) } } } diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala index 7edbc55d2b8..93693561ec3 100644 --- a/hail/src/main/scala/is/hail/backend/service/Worker.scala +++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala @@ -51,12 +51,6 @@ object Worker { private[this] implicit val ec = ExecutionContext.fromExecutorService( javaConcurrent.Executors.newCachedThreadPool()) - private[this] def writeString(out: DataOutputStream, s: String): Unit = { - val bytes = s.getBytes(StandardCharsets.UTF_8) - out.writeInt(bytes.length) - out.write(bytes) - } - def main(argv: Array[String]): Unit = { val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) @@ -65,12 +59,12 @@ object Worker { } val scratchDir = argv(0) val logFile = argv(1) - var jarLocation = argv(2) - val kind = argv(3) + val kind = argv(2) assert(kind == Main.WORKER) - val root = argv(4) - val i = argv(5).toInt - val n = argv(6).toInt + val revision = argv(3) + val jarGCSPath = argv(4) + val root = argv(5) + val i = argv(6).toInt val timer = new WorkerTimer() val deployConfig = DeployConfig.fromConfigFile( @@ -81,9 +75,9 @@ object Worker { tls.setSSLConfigFromDir(s"$scratchDir/secrets/ssl-config") log.info(s"is.hail.backend.service.Worker $myRevision") - log.info(s"running job $i/$n at root $root with scratch directory '$scratchDir'") + log.info(s"running job $i at root $root with scratch directory '$scratchDir'") - timer.start(s"Job $i/$n") + timer.start(s"Job $i") timer.start("readInputs") val fs = retryTransientErrors { @@ -92,16 +86,9 @@ object Worker { } } - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } - val fFuture = Future { retryTransientErrors { - using(new ObjectInputStream(open(s"$root/f"))) { is => + using(new ObjectInputStream(fs.openCachedNoCompression(s"$root/f"))) { is => is.readObject().asInstanceOf[(Array[Byte], HailTaskContext, HailClassLoader, FS) => Array[Byte]] } } @@ -109,7 +96,7 @@ object Worker { val contextFuture = Future { retryTransientErrors { - using(open(s"$root/contexts")) { is => + using(fs.openCachedNoCompression(s"$root/contexts")) { is => is.seek(i * 12) val offset = is.readLong() val length = is.readInt() @@ -128,41 +115,21 @@ object Worker { timer.start("executeFunction") if (HailContext.isInitialized) { - HailContext.get.backend = new ServiceBackend(null, null, new HailClassLoader(getClass().getClassLoader())) + HailContext.get.backend = new ServiceBackend(null, null, null, new HailClassLoader(getClass().getClassLoader())) } else { HailContext( // FIXME: workers should not have backends, but some things do need hail contexts - new ServiceBackend(null, null, new HailClassLoader(getClass().getClassLoader())), skipLoggingConfiguration = true, quiet = true) + new ServiceBackend(null, null, null, new HailClassLoader(getClass().getClassLoader())), skipLoggingConfiguration = true, quiet = true) } val htc = new ServiceTaskContext(i) - var result: Array[Byte] = null - var userError: HailException = null - try { - result = f(context, htc, theHailClassLoader, fs) - } catch { - case err: HailException => userError = err - } + val result = f(context, htc, theHailClassLoader, fs) htc.finish() timer.end("executeFunction") timer.start("writeOutputs") - using(create(s"$root/result.$i")) { os => - val dos = new DataOutputStream(os) - if (result != null) { - assert(userError == null) - - dos.writeBoolean(true) - dos.write(result) - } else { - assert(userError != null) - val (shortMessage, expandedMessage, errorId) = handleForPython(userError) - - dos.writeBoolean(false) - writeString(dos, shortMessage) - writeString(dos, expandedMessage) - dos.writeInt(errorId) - } + using(fs.createCachedNoCompression(s"$root/result.$i")) { os => + os.write(result) } timer.end("writeOutputs") timer.end(s"Job $i") diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index 62b92c30766..5ac8826bfae 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.IRParser import is.hail.types.encoded.EType import is.hail.io.{BufferSpec, StreamBufferSpec, TypedCodecSpec} -import is.hail.{HailContext, HailFeatureFlags} +import is.hail.HailContext import is.hail.expr.{JSONAnnotationImpex, SparkAnnotationImpex, Validate} import is.hail.expr.ir.lowering._ import is.hail.expr.ir._ @@ -261,14 +261,6 @@ class SparkBackend( val bmCache: SparkBlockMatrixCache = SparkBlockMatrixCache() - private[this] val flags = HailFeatureFlags.fromEnv() - - def getFlag(name: String): String = flags.get(name) - - def setFlag(name: String, value: String) = flags.set(name, value) - - val availableFlags: java.util.ArrayList[String] = flags.available - def persist(backendContext: BackendContext, id: String, value: BlockMatrix, storageLevel: String): Unit = bmCache.persistBlockMatrix(id, value, storageLevel) def unpersist(backendContext: BackendContext, id: String): Unit = unpersist(id) @@ -279,33 +271,10 @@ class SparkBackend( def unpersist(id: String): Unit = bmCache.unpersistBlockMatrix(id) - def createExecuteContextForTests( - timer: ExecutionTimer, - region: Region, - selfContainedExecution: Boolean = true - ): ExecuteContext = new ExecuteContext( - tmpdir, - localTmpdir, - this, - fs, - region, - timer, - if (selfContainedExecution) null else new NonOwningTempFileManager(longLifeTempFileManager), - theHailClassLoader, - flags - ) - def withExecuteContext[T](timer: ExecutionTimer, selfContainedExecution: Boolean = true)(f: ExecuteContext => T): T = { - ExecuteContext.scoped( - tmpdir, - localTmpdir, - this, - fs, - timer, + ExecuteContext.scoped(tmpdir, localTmpdir, this, fs, timer, if (selfContainedExecution) null else new NonOwningTempFileManager(longLifeTempFileManager), - theHailClassLoader, - flags - )(f) + theHailClassLoader)(f) } def broadcast[T : ClassTag](value: T): BroadcastValue[T] = new SparkBroadcastValue[T](sc.broadcast(value)) @@ -364,7 +333,7 @@ class SparkBackend( val ir = LoweringPipeline.darrayLowerer(optimize)(typesToLower).apply(ctx, ir0).asInstanceOf[IR] if (!Compilable(ir)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") val res = ir.typ match { case TVoid => @@ -402,14 +371,14 @@ class SparkBackend( } private[this] def _execute(ctx: ExecuteContext, ir: IR, optimize: Boolean): Either[Unit, (PTuple, Long)] = { - TypeCheck(ctx, ir) + TypeCheck(ir) Validate(ir) try { - val lowerTable = getFlag("lower") != null - val lowerBM = getFlag("lower_bm") != null + val lowerTable = HailContext.getFlag("lower") != null + val lowerBM = HailContext.getFlag("lower_bm") != null _jvmLowerAndExecute(ctx, ir, optimize, lowerTable, lowerBM) } catch { - case e: LowererUnsupportedOperation if getFlag("lower_only") != null => throw e + case e: LowererUnsupportedOperation if HailContext.getFlag("lower_only") != null => throw e case _: LowererUnsupportedOperation => CompileAndEvaluate._apply(ctx, ir, optimize = optimize) } diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index e5ecff4b9dd..23efc2a2bec 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -11,9 +11,9 @@ import is.hail.annotations.{NDArray, Region} import is.hail.backend.{BackendContext, ExecuteContext} import is.hail.expr.Nat import is.hail.expr.ir.lowering.{BlockMatrixStage, LowererUnsupportedOperation} -import is.hail.io.{StreamBufferSpec, TypedCodecSpec} +import is.hail.io.TypedCodecSpec import is.hail.io.fs.FS -import is.hail.types.encoded.{EBlockMatrixNDArray, EFloat64, ENumpyBinaryNDArray} +import is.hail.types.encoded.{EBlockMatrixNDArray, EFloat64} import scala.collection.mutable.ArrayBuffer import is.hail.utils.richUtils.RichDenseMatrixDouble @@ -64,7 +64,7 @@ abstract sealed class BlockMatrixIR extends BaseIR { def typ: BlockMatrixType protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = - fatal("tried to execute unexecutable IR:\n" + Pretty(ctx, this)) + fatal("tried to execute unexecutable IR:\n" + Pretty(this)) def copy(newChildren: IndexedSeq[BaseIR]): BlockMatrixIR @@ -165,7 +165,7 @@ class BlockMatrixNativeReader( val spec = TypedCodecSpec(EBlockMatrixNDArray(EFloat64(required = true), required = true), vType, BlockMatrix.bufferSpec) - new BlockMatrixStage(IndexedSeq(), Array(), TString) { + new BlockMatrixStage(Array(), TString) { def blockContext(idx: (Int, Int)): IR = { if (!fullType.hasBlock(idx)) fatal(s"trying to read nonexistent block $idx from path ${ params.path }") @@ -203,24 +203,6 @@ case class BlockMatrixBinaryReader(path: String, shape: IndexedSeq[Long], blockS val breezeMatrix = RichDenseMatrixDouble.importFromDoubles(ctx.fs, path, nRows.toInt, nCols.toInt, rowMajor = true) BlockMatrix.fromBreezeMatrix(breezeMatrix, blockSize) } - - override def lower(ctx: ExecuteContext): BlockMatrixStage = { - val readFromNumpyEType = ENumpyBinaryNDArray(nRows, nCols, true) - val readFromNumpySpec = TypedCodecSpec(readFromNumpyEType, TNDArray(TFloat64, Nat(2)), new StreamBufferSpec()) - val nd = ReadValue(Str(path), readFromNumpySpec, TNDArray(TFloat64, nDimsBase = Nat(2))) - val ndRef = Ref(genUID(), nd.typ) - - new BlockMatrixStage(IndexedSeq(ndRef.name -> nd), Array(), nd.typ) { - def blockContext(idx: (Int, Int)): IR = { - val (r, c) = idx - NDArraySlice(ndRef, MakeTuple.ordered(FastSeq( - MakeTuple.ordered(FastSeq(I64(r.toLong * blockSize), I64(java.lang.Math.min((r.toLong + 1) * blockSize, nRows)), I64(1))), - MakeTuple.ordered(FastSeq(I64(c.toLong * blockSize), I64(java.lang.Math.min((c.toLong + 1) * blockSize, nCols)), I64(1)))))) - } - - def blockBody(ctxRef: Ref): IR = ctxRef - } - } } case class BlockMatrixNativePersistParameters(id: String) @@ -310,7 +292,7 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen val i = evalIR(ctx, l) ("/", binaryOp(evalIR(ctx, l), BlockMatrix.reverseScalarDiv)) - case _ => fatal(s"Unsupported operation on BlockMatrices: ${Pretty(ctx, f)}") + case _ => fatal(s"Unsupported operation on BlockMatrices: ${Pretty(f)}") } prev.blockMap(breezeF, name, reqDense = needsDense) @@ -980,4 +962,4 @@ case class RelationalLetBlockMatrix(name: String, value: IR, body: BlockMatrixIR val IndexedSeq(newValue: IR, newBody: BlockMatrixIR) = newChildren RelationalLetBlockMatrix(name, newValue, newBody) } -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala index 3459f6c6983..511585b8d86 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala @@ -6,11 +6,11 @@ import is.hail.asm4s._ import is.hail.backend.ExecuteContext import is.hail.expr.Nat import is.hail.expr.ir.lowering.{BlockMatrixStage, LowererUnsupportedOperation} -import is.hail.io.{StreamBufferSpec, TypedCodecSpec} +import is.hail.io.TypedCodecSpec import is.hail.io.fs.FS import is.hail.linalg.{BlockMatrix, BlockMatrixMetadata} -import is.hail.types.encoded.{EBlockMatrixNDArray, ENumpyBinaryNDArray, EType} -import is.hail.types.virtual.{TArray, TNDArray, TString, Type, TVoid} +import is.hail.types.encoded.{EBlockMatrixNDArray, EType} +import is.hail.types.virtual.{TArray, TNDArray, TString, Type} import is.hail.types.{BlockMatrixType, TypeWithRequiredness} import is.hail.utils._ import is.hail.utils.richUtils.RichDenseMatrixDouble @@ -30,8 +30,8 @@ object BlockMatrixWriter { abstract class BlockMatrixWriter { def pathOpt: Option[String] - def apply(ctx: ExecuteContext, bm: BlockMatrix): Any - def loweredTyp: Type + def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit + def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = throw new LowererUnsupportedOperation(s"unimplemented writer: \n${ this.getClass }") } @@ -45,8 +45,6 @@ case class BlockMatrixNativeWriter( def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = bm.write(ctx, path, overwrite, forceRowMajor, stageLocally) - def loweredTyp: Type = TVoid - override def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = { if (stageLocally) throw new LowererUnsupportedOperation(s"stageLocally not supported in BlockMatrixWrite lowering") @@ -107,27 +105,14 @@ case class BlockMatrixNativeMetadataWriter(path: String, stageLocally: Boolean, }) cb += cb.emb.getObject(metaHelper).invoke[FS, Array[String], Unit]("write", cb.emb.getFS, partFiles) } - - def loweredTyp: Type = TVoid } case class BlockMatrixBinaryWriter(path: String) extends BlockMatrixWriter { def pathOpt: Option[String] = Some(path) - def apply(ctx: ExecuteContext, bm: BlockMatrix): String = { + def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = { RichDenseMatrixDouble.exportToDoubles(ctx.fs, path, bm.toBreezeMatrix(), forceRowMajor = true) - path - } - - def loweredTyp: Type = TString - - override def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = { - val nd = s.collectLocal(relationalBindings, bm.typ) - - val etype = ENumpyBinaryNDArray(bm.typ.nRows, bm.typ.nCols, true) - val spec = TypedCodecSpec(etype, TNDArray(bm.typ.elementType, Nat(2)), new StreamBufferSpec()) - WriteValue(nd, Str(path), spec) } } @@ -135,7 +120,6 @@ case class BlockMatrixPersistWriter(id: String, storageLevel: String) extends Bl def pathOpt: Option[String] = None def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = HailContext.backend.persist(ctx.backendContext, id, bm, storageLevel) - def loweredTyp: Type = TVoid } case class BlockMatrixRectanglesWriter( @@ -149,8 +133,6 @@ case class BlockMatrixRectanglesWriter( def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = { bm.exportRectangles(ctx, path, rectangles, delimiter, binary) } - - def loweredTyp: Type = TVoid } abstract class BlockMatrixMultiWriter { @@ -163,8 +145,6 @@ case class BlockMatrixBinaryMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = BlockMatrix.binaryWriteBlockMatrices(ctx.fs, bms, prefix, overwrite) - - def loweredTyp: Type = TVoid } case class BlockMatrixTextMultiWriter( @@ -178,8 +158,6 @@ case class BlockMatrixTextMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = BlockMatrix.exportBlockMatrices(ctx.fs, bms, prefix, overwrite, delimiter, header, addIndex, compression, customFilenames) - - def loweredTyp: Type = TVoid } case class BlockMatrixNativeMultiWriter( @@ -190,6 +168,4 @@ case class BlockMatrixNativeMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = { BlockMatrix.writeBlockMatrices(ctx, bms, prefix, overwrite, forceRowMajor) } - - def loweredTyp: Type = TVoid } diff --git a/hail/src/main/scala/is/hail/expr/ir/Compile.scala b/hail/src/main/scala/is/hail/expr/ir/Compile.scala index 6600a5cc81c..5dcdd9f0a32 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Compile.scala @@ -29,7 +29,6 @@ object Compile { expectedCodeParamTypes: IndexedSeq[TypeInfo[_]], expectedCodeReturnType: TypeInfo[_], body: IR, optimize: Boolean = true, - writeIRs: Boolean = false, print: Option[PrintWriter] = None ): (Option[SingleCodeType], (HailClassLoader, FS, Int, Region) => F) = { @@ -49,7 +48,7 @@ object Compile { .foldLeft(Env.empty[IR]) { case (e, ((n, t), i)) => e.bind(n, In(i, t)) })) ir = LoweringPipeline.compileLowerer(optimize).apply(ctx, ir).asInstanceOf[IR].noSharing - TypeCheck(ctx, ir, BindingEnv.empty) + TypeCheck(ir, BindingEnv.empty) val returnParam = CodeParamType(SingleCodeType.typeInfoFromType(ir.typ)) @@ -77,7 +76,7 @@ object Compile { val emitContext = EmitContext.analyze(ctx, ir) val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length) - val f = fb.resultWithIndex(writeIRs, print) + val f = fb.resultWithIndex(print) codeCache += k -> CodeCacheValue(rt, f) (rt, f) @@ -111,7 +110,7 @@ object CompileWithAggregators { .foldLeft(Env.empty[IR]) { case (e, ((n, t), i)) => e.bind(n, In(i, t)) })) ir = LoweringPipeline.compileLowerer(optimize).apply(ctx, ir).asInstanceOf[IR].noSharing - TypeCheck(ctx, ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) + TypeCheck(ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) val fb = EmitFunctionBuilder[F](ctx, "CompiledWithAggs", CodeParamType(typeInfo[Region]) +: params.map { case (_, pt) => pt }, @@ -184,7 +183,6 @@ object CompileIterator { ctx: ExecuteContext, body: IR, argTypeInfo: Array[ParamType], - writeIRs: Boolean, printWriter: Option[PrintWriter] ): (PType, (HailClassLoader, FS, Int, Region) => F) = { @@ -200,7 +198,7 @@ object CompileIterator { val outerRegion = outerRegionField val ir = LoweringPipeline.compileLowerer(true)(ctx, body).asInstanceOf[IR].noSharing - TypeCheck(ctx, ir) + TypeCheck(ir) var elementAddress: Settable[Long] = null var returnType: PType = null @@ -263,7 +261,7 @@ object CompileIterator { val getMB = fb.newEmitMethod("loadAddress", FastIndexedSeq(), LongInfo) getMB.emit(elementAddress.load()) - (returnType, fb.resultWithIndex(writeIRs, printWriter)) + (returnType, fb.resultWithIndex(printWriter)) } def forTableMapPartitions( @@ -279,7 +277,6 @@ object CompileIterator { CodeParamType(typeInfo[Object]), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(typ0)), SingleCodeEmitParamType(true, StreamSingleCodeType(true, streamElementType))), - false, None) (eltPType, (theHailClassLoader, fs, idx, consumerCtx, v0, part) => { val stepper = makeStepper(theHailClassLoader, fs, idx, consumerCtx.partitionRegion) @@ -305,7 +302,6 @@ object CompileIterator { CodeParamType(typeInfo[Object]), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(ctxType)), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(bcValsType))), - false, None) (eltPType, (theHailClassLoader, fs, idx, consumerCtx, v0, v1) => { val stepper = makeStepper(theHailClassLoader, fs, idx, consumerCtx.partitionRegion) diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 86d1314e6f5..20747e51aa8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -32,7 +32,7 @@ object EmitContext { val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) val requiredness = Requiredness.apply(ir, usesAndDefs, null, pTypeEnv) val inLoopCriticalPath = ControlFlowPreventsSplit(ir, ParentPointers(ir), usesAndDefs) - val methodSplits = ComputeMethodSplits(ctx, ir, inLoopCriticalPath) + val methodSplits = ComputeMethodSplits(ir,inLoopCriticalPath) new EmitContext(ctx, requiredness, usesAndDefs, methodSplits, inLoopCriticalPath, Memo.empty[Unit]) } } @@ -55,7 +55,7 @@ case class EmitEnv(bindings: Env[EmitValue], inputValues: IndexedSeq[(EmitCodeBu object Emit { def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], nParams: Int, aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { - TypeCheck(ctx.executeContext, ir) + TypeCheck(ir) val mb = fb.apply_method val container = aggs.map { a => @@ -614,9 +614,6 @@ class Emit[C]( this.emitI(ir, cb, region, env, container, loopEnv) (ir: @unchecked) match { - case Literal(TVoid, ()) => - Code._empty - case Void() => Code._empty @@ -2405,7 +2402,7 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ctx.executeContext, ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ir) }") } case _ => @@ -2413,7 +2410,7 @@ class Emit[C]( } if (result.st.virtualType != ir.typ) - throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ctx.executeContext, ir).take(50) }") + throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ir).take(50) }") result } @@ -2568,7 +2565,7 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ctx.executeContext, ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ir) }") } case _ => diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index 1a5538e1c4e..ee54b73647b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -116,10 +116,7 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def fieldBuilder: SettableBuilder = cb.fieldBuilder - def result( - ctx: ExecuteContext, - print: Option[PrintWriter] = None - ): (HailClassLoader) => C = cb.result(ctx.shouldWriteIRFiles(), print) + def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) def getHailClassLoader: Code[HailClassLoader] = ecb.getHailClassLoader @@ -144,9 +141,9 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def addEncodedLiteral(encodedLiteral: EncodedLiteral) = ecb.addEncodedLiteral(encodedLiteral) - def getPType[T <: PType : TypeInfo](t: T): Code[T] = ecb.getPType(t) + def getPType(t: PType): Code[PType] = ecb.getPType(t) - def getType[T <: Type : TypeInfo](t: T): Code[T] = ecb.getType(t) + def getType(t: Type): Code[Type] = ecb.getType(t) def newEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = ecb.newEmitMethod(name, argsInfo, returnInfo) @@ -167,7 +164,7 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def newRNG(seed: Long): Value[IRRandomness] = ecb.newRNG(seed) - def resultWithIndex(writeIRs: Boolean = false, print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = ecb.resultWithIndex(writeIRs, print) + def resultWithIndex(print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = ecb.resultWithIndex(print) def getOrGenEmitMethod( baseName: String, key: Any, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType @@ -221,7 +218,7 @@ class EmitClassBuilder[C]( def fieldBuilder: SettableBuilder = cb.fieldBuilder - def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(writeIRs, print) + def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) // EmitClassBuilder methods @@ -664,10 +661,7 @@ class EmitClassBuilder[C]( rng } - def resultWithIndex( - writeIRs: Boolean, - print: Option[PrintWriter] = None - ): (HailClassLoader, FS, Int, Region) => C = { + def resultWithIndex(print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = { makeRNGs() makeAddPartitionRegion() makeAddHailClassLoader() @@ -705,7 +699,7 @@ class EmitClassBuilder[C]( "FunctionBuilder emission should happen on master, but happened on worker") val n = cb.className.replace("/", ".") - val classesBytes = modb.classesBytes(writeIRs, print) + val classesBytes = modb.classesBytes(print) new ((HailClassLoader, FS, Int, Region) => C) with java.io.Serializable { @transient @volatile private var theClass: Class[_] = null diff --git a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala index 40598f7ab34..01b45fd3469 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala @@ -1,6 +1,5 @@ package is.hail.expr.ir -import is.hail.backend.ExecuteContext import is.hail.annotations.IntervalEndpointOrdering import is.hail.types.virtual._ import is.hail.utils.{FastSeq, Interval, IntervalEndpoint, _} @@ -274,7 +273,7 @@ object ExtractIntervalFilters { extractAndRewrite(cond, ExtractionState(ref, key)) } - def apply(ctx: ExecuteContext, ir0: BaseIR): BaseIR = { + def apply(ir0: BaseIR): BaseIR = { MapIR.mapBaseIR(ir0, (ir: BaseIR) => { (ir match { case TableFilter(child, pred) => @@ -282,8 +281,8 @@ object ExtractIntervalFilters { .map { case (newCond, intervals) => log.info(s"generated TableFilterIntervals node with ${ intervals.length } intervals:\n " + s"Intervals: ${ intervals.mkString(", ") }\n " + - s"Predicate: ${ Pretty(ctx, pred) }\n " + - s"Post: ${ Pretty(ctx, newCond) }") + s"Predicate: ${ Pretty(pred) }\n " + + s"Post: ${ Pretty(newCond) }") TableFilter( TableFilterIntervals(child, intervals, keep = true), newCond) @@ -293,8 +292,8 @@ object ExtractIntervalFilters { .map { case (newCond, intervals) => log.info(s"generated MatrixFilterIntervals node with ${ intervals.length } intervals:\n " + s"Intervals: ${ intervals.mkString(", ") }\n " + - s"Predicate: ${ Pretty(ctx, pred) }\n " + - s"Post: ${ Pretty(ctx, newCond) }") + s"Predicate: ${ Pretty(pred) }\n " + + s"Post: ${ Pretty(newCond) }") MatrixFilterRows( MatrixFilterIntervals(child, intervals, keep = true), newCond) @@ -304,4 +303,4 @@ object ExtractIntervalFilters { }).getOrElse(ir) }) } -} +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala index 15cd8a39b57..4a0cfcb64ba 100644 --- a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala +++ b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala @@ -16,7 +16,7 @@ import scala.annotation.meta.param trait CloseableIterator[T] extends Iterator[T] with AutoCloseable object GenericLines { - def read(fs: FS, contexts: IndexedSeq[Any], gzAsBGZ: Boolean, filePerPartition: Boolean): GenericLines = { + def read(fs: FS, contexts: IndexedSeq[Any], gzAsBGZ: Boolean): GenericLines = { val body: (FS, Any) => CloseableIterator[GenericLine] = { (fs: FS, context: Any) => val contextRow = context.asInstanceOf[Row] @@ -32,18 +32,18 @@ object GenericLines { val rawIS = fs.openNoCompression(file) val codec = fs.getCodecFromPath(file, gzAsBGZ) if (codec == null) { - assert(split || filePerPartition) + assert(split) rawIS.seek(start) rawIS } else if (codec == BGZipCompressionCodec) { - assert(split || filePerPartition) + assert(split) splitCompressed = true val bgzIS = new BGzipInputStream(rawIS, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK) new ProxyInputStream(bgzIS) with Positioned { def getPosition: Long = bgzIS.getVirtualOffset } } else { - assert(!split || filePerPartition) + assert(!split) new CountingInputStream(codec.makeInputStream(rawIS)) with Positioned { def getPosition: Long = getByteCount } @@ -244,8 +244,7 @@ object GenericLines { blockSizeInMB: Option[Int], minPartitions: Option[Int], gzAsBGZ: Boolean, - allowSerialRead: Boolean, - filePerPartition: Boolean = false + allowSerialRead: Boolean ): GenericLines = { val fileStatuses = fileStatuses0.filter(_.getLen > 0) val totalSize = fileStatuses.map(_.getLen).sum @@ -268,7 +267,7 @@ object GenericLines { val codec = fs.getCodecFromPath(status.getPath, gzAsBGZ) val splittable = codec == null || codec == BGZipCompressionCodec - if (splittable && !filePerPartition) { + if (splittable) { var fileNParts = ((totalPartitions.toDouble * size) / totalSize + 0.5).toInt if (fileNParts == 0) fileNParts = 1 @@ -284,7 +283,7 @@ object GenericLines { Row(i, status.getPath, start, end, true) } } else { - if (!allowSerialRead && !filePerPartition) + if (!allowSerialRead) fatal(s"Cowardly refusing to read file serially: ${ status.getPath }.") Iterator.single { @@ -293,7 +292,7 @@ object GenericLines { } } - GenericLines.read(fs, contexts, gzAsBGZ, filePerPartition) + GenericLines.read(fs, contexts, gzAsBGZ) } def collect(fs: FS, lines: GenericLines): IndexedSeq[String] = { diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index 10132aacba7..16cc84e607f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -196,7 +196,7 @@ object MakeArray { MakeArray(args, TArray(args.head.typ)) } - def unify(ctx: ExecuteContext, args: Seq[IR], requestedType: TArray = null): MakeArray = { + def unify(args: Seq[IR], requestedType: TArray = null): MakeArray = { assert(requestedType != null || args.nonEmpty) if(args.nonEmpty) @@ -204,7 +204,7 @@ object MakeArray { return MakeArray(args, TArray(args.head.typ)) MakeArray(args.map { arg => - val upcast = PruneDeadFields.upcast(ctx, arg, requestedType.elementType) + val upcast = PruneDeadFields.upcast(arg, requestedType.elementType) assert(upcast.typ == requestedType.elementType) upcast }, requestedType) @@ -214,7 +214,7 @@ object MakeArray { final case class MakeArray(args: Seq[IR], _typ: TArray) extends IR object MakeStream { - def unify(ctx: ExecuteContext, args: Seq[IR], requiresMemoryManagementPerElement: Boolean = false, requestedType: TStream = null): MakeStream = { + def unify(args: Seq[IR], requiresMemoryManagementPerElement: Boolean = false, requestedType: TStream = null): MakeStream = { assert(requestedType != null || args.nonEmpty) if (args.nonEmpty) @@ -222,7 +222,7 @@ object MakeStream { return MakeStream(args, TStream(args.head.typ), requiresMemoryManagementPerElement) MakeStream(args.map { arg => - val upcast = PruneDeadFields.upcast(ctx, arg, requestedType.elementType) + val upcast = PruneDeadFields.upcast(arg, requestedType.elementType) assert(upcast.typ == requestedType.elementType) upcast }, requestedType, requiresMemoryManagementPerElement) @@ -747,7 +747,6 @@ object PartitionWriter { override val typeHints = ShortTypeHints(List( classOf[PartitionNativeWriter], classOf[TableTextPartitionWriter], - classOf[VCFPartitionWriter], classOf[AbstractTypedCodecSpec], classOf[TypedCodecSpec]), typeHintFieldName = "name" ) + BufferSpec.shortTypeHints @@ -766,7 +765,6 @@ object MetadataWriter { classOf[TableSpecWriter], classOf[RelationalWriter], classOf[TableTextFinalizer], - classOf[VCFExportFinalizer], classOf[RVDSpecMaker], classOf[AbstractTypedCodecSpec], classOf[TypedCodecSpec]), diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index 0a74106c0e3..27f633cae7f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -260,7 +260,7 @@ object InferType { case _: MatrixWrite => TVoid case _: MatrixMultiWrite => TVoid case _: BlockMatrixCollect => TNDArray(TFloat64, Nat(2)) - case BlockMatrixWrite(_, writer) => writer.loweredTyp + case _: BlockMatrixWrite => TVoid case _: BlockMatrixMultiWrite => TVoid case TableGetGlobals(child) => child.typ.globalType case TableCollect(child) => TStruct("rows" -> TArray(child.typ.rowType), "global" -> child.typ.globalType) diff --git a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala index ddda5d12698..21103cfe72e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala @@ -1,6 +1,5 @@ package is.hail.expr.ir -import is.hail.backend.ExecuteContext import is.hail.expr.ir.functions.{WrappedMatrixToTableFunction, WrappedMatrixToValueFunction} import is.hail.expr.ir._ import is.hail.types._ @@ -13,44 +12,40 @@ object LowerMatrixIR { val colsField: Symbol = Symbol(colsFieldName) val entriesField: Symbol = Symbol(entriesFieldName) - def apply(ctx: ExecuteContext, ir: IR): IR = { + def apply(ir: IR): IR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(ctx, ir, ab) + val l1 = lower(ir, ab) ab.result().foldRight[IR](l1) { case ((ident, value), body) => RelationalLet(ident, value, body) } } - def apply(ctx: ExecuteContext, tir: TableIR): TableIR = { + def apply(tir: TableIR): TableIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(ctx, tir, ab) + val l1 = lower(tir, ab) ab.result().foldRight[TableIR](l1) { case ((ident, value), body) => RelationalLetTable(ident, value, body) } } - def apply(ctx: ExecuteContext, mir: MatrixIR): TableIR = { + def apply(mir: MatrixIR): TableIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(ctx, mir, ab) + val l1 = lower(mir, ab) ab.result().foldRight[TableIR](l1) { case ((ident, value), body) => RelationalLetTable(ident, value, body) } } - def apply(ctx: ExecuteContext, bmir: BlockMatrixIR): BlockMatrixIR = { + def apply(bmir: BlockMatrixIR): BlockMatrixIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(ctx, bmir, ab) + val l1 = lower(bmir, ab) ab.result().foldRight[BlockMatrixIR](l1) { case ((ident, value), body) => RelationalLetBlockMatrix(ident, value, body) } } - private[this] def lowerChildren( - ctx: ExecuteContext, - ir: BaseIR, - ab: BoxedArrayBuilder[(String, IR)] - ): BaseIR = { + private[this] def lowerChildren(ir: BaseIR, ab: BoxedArrayBuilder[(String, IR)]): BaseIR = { val loweredChildren = ir.children.map { - case tir: TableIR => lower(ctx, tir, ab) + case tir: TableIR => lower(tir, ab) case mir: MatrixIR => throw new RuntimeException(s"expect specialized lowering rule for " + s"${ ir.getClass.getName }\n Found MatrixIR child $mir") - case bmir: BlockMatrixIR => lower(ctx, bmir, ab) - case vir: IR => lower(ctx, vir, ab) + case bmir: BlockMatrixIR => lower(bmir, ab) + case vir: IR => lower(vir, ab) } if ((ir.children, loweredChildren).zipped.forall(_ eq _)) ir @@ -91,17 +86,13 @@ object LowerMatrixIR { } - private[this] def lower( - ctx: ExecuteContext, - mir: MatrixIR, - ab: BoxedArrayBuilder[(String, IR)] - ): TableIR = { + private[this] def lower(mir: MatrixIR, ab: BoxedArrayBuilder[(String, IR)]): TableIR = { val lowered = mir match { case RelationalLetMatrixTable(name, value, body) => - RelationalLetTable(name, lower(ctx, value, ab), lower(ctx, body, ab)) + RelationalLetTable(name, lower(value, ab), lower(body, ab)) case CastTableToMatrix(child, entries, cols, colKey) => - val lc = lower(ctx, child, ab) + val lc = lower(child, ab) lc.mapRows( irIf('row (Symbol(entries)).isNA) { irDie("missing entry array unsupported in 'to_matrix_table_row_major'", lc.typ.rowType) @@ -115,11 +106,11 @@ object LowerMatrixIR { ).rename(Map(entries -> entriesFieldName), Map(cols -> colsFieldName)) case MatrixToMatrixApply(child, function) => - val loweredChild = lower(ctx, child, ab) + val loweredChild = lower(child, ab) TableToTableApply(loweredChild, function.lower()) case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => - var t = lower(ctx, child, ab).rename(rowMap, globalMap) + var t = lower(child, ab).rename(rowMap, globalMap) if (colMap.nonEmpty) { val newColsType = TArray(child.typ.colType.rename(colMap)) @@ -134,19 +125,19 @@ object LowerMatrixIR { t case MatrixKeyRowsBy(child, keys, isSorted) => - lower(ctx, child, ab).keyBy(keys, isSorted) + lower(child, ab).keyBy(keys, isSorted) case MatrixFilterRows(child, pred) => - lower(ctx, child, ab) - .filter(subst(lower(ctx, pred, ab), matrixSubstEnv(child))) + lower(child, ab) + .filter(subst(lower(pred, ab), matrixSubstEnv(child))) case MatrixFilterCols(child, pred) => - lower(ctx, child, ab) + lower(child, ab) .mapGlobals('global.insertFields('newColIdx -> irRange(0, 'global (colsField).len) .filter('i ~> (let(sa = 'global (colsField)('i)) - in subst(lower(ctx, pred, ab), matrixGlobalSubstEnv(child)))))) + in subst(lower(pred, ab), matrixGlobalSubstEnv(child)))))) .mapRows('row.insertFields(entriesField -> 'global ('newColIdx).map('i ~> 'row (entriesField)('i)))) .mapGlobals('global .insertFields(colsField -> @@ -156,12 +147,12 @@ object LowerMatrixIR { case MatrixAnnotateRowsTable(child, table, root, product) => val kt = table.typ.keyType if (kt.size == 1 && kt.types(0) == TInterval(child.typ.rowKeyStruct.types(0))) - TableIntervalJoin(lower(ctx, child, ab), lower(ctx, table, ab), root, product) + TableIntervalJoin(lower(child, ab), lower(table, ab), root, product) else - TableLeftJoinRightDistinct(lower(ctx, child, ab), lower(ctx, table, ab), root) + TableLeftJoinRightDistinct(lower(child, ab), lower(table, ab), root) case MatrixChooseCols(child, oldIndices) => - lower(ctx, child, ab) + lower(child, ab) .mapGlobals('global.insertFields('newColIdx -> oldIndices.map(I32))) .mapRows('row.insertFields(entriesField -> 'global ('newColIdx).map('i ~> 'row (entriesField)('i)))) .mapGlobals('global @@ -171,8 +162,8 @@ object LowerMatrixIR { case MatrixAnnotateColsTable(child, table, root) => val col = Symbol(genUID()) val colKey = makeStruct(table.typ.key.zip(child.typ.colKey).map { case (tk, mck) => Symbol(tk) -> col(Symbol(mck)) }: _*) - lower(ctx, child, ab) - .mapGlobals(let(__dictfield = lower(ctx, table, ab) + lower(child, ab) + .mapGlobals(let(__dictfield = lower(table, ab) .keyBy(FastIndexedSeq()) .collect() .apply('rows) @@ -182,9 +173,9 @@ object LowerMatrixIR { }) case MatrixMapGlobals(child, newGlobals) => - lower(ctx, child, ab) + lower(child, ab) .mapGlobals( - subst(lower(ctx, newGlobals, ab), BindingEnv(Env[IRProxy]( + subst(lower(newGlobals, ab), BindingEnv(Env[IRProxy]( "global" -> 'global.selectFields(child.typ.globalType.fieldNames: _*)))) .insertFields(colsField -> 'global (colsField))) @@ -286,14 +277,14 @@ object LowerMatrixIR { } - val lc = lower(ctx, child, ab) + val lc = lower(child, ab) lc.mapRows(let(n_cols = 'global(colsField).len) { - liftScans(Subst(lower(ctx, newRow, ab), matrixSubstEnvIR(child, lc))) + liftScans(Subst(lower(newRow, ab), matrixSubstEnvIR(child, lc))) .insertFields(entriesField -> 'row(entriesField)) }) case MatrixMapCols(child, newCol, _) => - val loweredChild = lower(ctx, child, ab) + val loweredChild = lower(child, ab) def lift(ir: IR, scanBindings: BoxedArrayBuilder[(String, IR)], aggBindings: BoxedArrayBuilder[(String, IR)]): IR = ir match { case a: ApplyScanOp => @@ -395,7 +386,7 @@ object LowerMatrixIR { val scanBuilder = new BoxedArrayBuilder[(String, IR)] val aggBuilder = new BoxedArrayBuilder[(String, IR)] - val b0 = lift(Subst(lower(ctx, newCol, ab), matrixSubstEnvIR(child, loweredChild)), scanBuilder, aggBuilder) + val b0 = lift(Subst(lower(newCol, ab), matrixSubstEnvIR(child, loweredChild)), scanBuilder, aggBuilder) val aggs = aggBuilder.result() val scans = scanBuilder.result() @@ -471,13 +462,13 @@ object LowerMatrixIR { )) case MatrixFilterEntries(child, pred) => - val lc = lower(ctx, child, ab) + val lc = lower(child, ab) lc.mapRows('row.insertFields(entriesField -> irRange(0, 'global (colsField).len).map { 'i ~> let(g = 'row (entriesField)('i)) { irIf(let(sa = 'global (colsField)('i)) - in !subst(lower(ctx, pred, ab), matrixSubstEnv(child))) { + in !subst(lower(pred, ab), matrixSubstEnv(child))) { NA(child.typ.entryType) } { 'g @@ -488,7 +479,7 @@ object LowerMatrixIR { case MatrixUnionCols(left, right, joinType) => val rightEntries = genUID() val rightCols = genUID() - val ll = lower(ctx, left, ab).distinct() + val ll = lower(left, ab).distinct() def handleMissingEntriesArray(entries: Symbol, cols: Symbol): IRProxy = if (joinType == "inner") 'row(entries) @@ -501,7 +492,7 @@ object LowerMatrixIR { } TableJoin( ll, - lower(ctx, right, ab).distinct() + lower(right, ab).distinct() .mapRows('row .insertFields(Symbol(rightEntries) -> 'row(entriesField)) .selectFields(right.typ.rowKey :+ rightEntries: _*)) @@ -523,7 +514,7 @@ object LowerMatrixIR { .dropFields(Symbol(rightCols))) case MatrixMapEntries(child, newEntries) => - val loweredChild = lower(ctx, child, ab) + val loweredChild = lower(child, ab) val rt = loweredChild.typ.rowType val gt = loweredChild.typ.globalType TableMapRows( @@ -535,39 +526,39 @@ object LowerMatrixIR { ToStream(GetField(Ref("row", rt), entriesFieldName)), ToStream(GetField(Ref("global", gt), colsFieldName))), FastIndexedSeq("g", "sa"), - Subst(lower(ctx, newEntries, ab), BindingEnv(Env( + Subst(lower(newEntries, ab), BindingEnv(Env( "global" -> SelectFields(Ref("global", gt), child.typ.globalType.fieldNames), "va" -> SelectFields(Ref("row", rt), child.typ.rowType.fieldNames)))), ArrayZipBehavior.AssumeSameLength ))))) ) - case MatrixRepartition(child, n, shuffle) => TableRepartition(lower(ctx, child, ab), n, shuffle) + case MatrixRepartition(child, n, shuffle) => TableRepartition(lower(child, ab), n, shuffle) - case MatrixFilterIntervals(child, intervals, keep) => TableFilterIntervals(lower(ctx, child, ab), intervals, keep) + case MatrixFilterIntervals(child, intervals, keep) => TableFilterIntervals(lower(child, ab), intervals, keep) case MatrixUnionRows(children) => // FIXME: this should check that all children have the same column keys. - val first = lower(ctx, children.head, ab) + val first = lower(children.head, ab) TableUnion(FastIndexedSeq(first) ++ - children.tail.map(lower(ctx, _, ab) + children.tail.map(lower(_, ab) .mapRows('row.selectFields(first.typ.rowType.fieldNames: _*)))) - case MatrixDistinctByRow(child) => TableDistinct(lower(ctx, child, ab)) + case MatrixDistinctByRow(child) => TableDistinct(lower(child, ab)) - case MatrixRowsHead(child, n) => TableHead(lower(ctx, child, ab), n) - case MatrixRowsTail(child, n) => TableTail(lower(ctx, child, ab), n) + case MatrixRowsHead(child, n) => TableHead(lower(child, ab), n) + case MatrixRowsTail(child, n) => TableTail(lower(child, ab), n) - case MatrixColsHead(child, n) => lower(ctx, child, ab) + case MatrixColsHead(child, n) => lower(child, ab) .mapGlobals('global.insertFields(colsField -> 'global (colsField).arraySlice(0, Some(n), 1))) .mapRows('row.insertFields(entriesField -> 'row (entriesField).arraySlice(0, Some(n), 1))) - case MatrixColsTail(child, n) => lower(ctx, child, ab) + case MatrixColsTail(child, n) => lower(child, ab) .mapGlobals('global.insertFields(colsField -> 'global (colsField).arraySlice(-n, None, 1))) .mapRows('row.insertFields(entriesField -> 'row (entriesField).arraySlice(-n, None, 1))) case MatrixExplodeCols(child, path) => - val loweredChild = lower(ctx, child, ab) + val loweredChild = lower(child, ab) val lengths = Symbol(genUID()) val colIdx = Symbol(genUID()) val nestedIdx = Symbol(genUID()) @@ -607,9 +598,9 @@ object LowerMatrixIR { case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => val substEnv = matrixSubstEnv(child) - val eeSub = subst(lower(ctx, entryExpr, ab), substEnv) - val reSub = subst(lower(ctx, rowExpr, ab), substEnv) - lower(ctx, child, ab) + val eeSub = subst(lower(entryExpr, ab), substEnv) + val reSub = subst(lower(rowExpr, ab), substEnv) + lower(child, ab) .aggregateByKey( reSub.insertFields(entriesField -> irRange(0, 'global (colsField).len) .aggElements('__element_idx, '__result_idx, Some('global (colsField).len))( @@ -621,7 +612,7 @@ object LowerMatrixIR { }))) case MatrixCollectColsByKey(child) => - lower(ctx, child, ab) + lower(child, ab) .mapGlobals('global.insertFields('newColIdx -> irRange(0, 'global (colsField).len).map { 'i ~> @@ -649,7 +640,7 @@ object LowerMatrixIR { .dropFields('newColIdx) ) - case MatrixExplodeRows(child, path) => TableExplode(lower(ctx, child, ab), path) + case MatrixExplodeRows(child, path) => TableExplode(lower(child, ab), path) case mr: MatrixRead => mr.lower() @@ -664,11 +655,11 @@ object LowerMatrixIR { val aggElementIdx = Symbol(genUID()) val substEnv = matrixSubstEnv(child) - val ceSub = subst(lower(ctx, colExpr, ab), substEnv) + val ceSub = subst(lower(colExpr, ab), substEnv) val vaBinding = 'row.selectFields(child.typ.rowType.fieldNames: _*) - val eeSub = subst(lower(ctx, entryExpr, ab), substEnv.bindEval("va", vaBinding).bindAgg("va", vaBinding)) + val eeSub = subst(lower(entryExpr, ab), substEnv.bindEval("va", vaBinding).bindAgg("va", vaBinding)) - lower(ctx, child, ab) + lower(child, ab) .mapGlobals('global.insertFields(keyMap -> let(__cols_field = 'global (colsField)) { irRange(0, '__cols_field.len) @@ -707,24 +698,20 @@ object LowerMatrixIR { } if (!mir.typ.isCompatibleWith(lowered.typ)) - throw new RuntimeException(s"Lowering changed type:\n BEFORE: ${ Pretty(ctx, mir) }\n ${ mir.typ }\n ${ mir.typ.canonicalTableType}\n AFTER: ${ Pretty(ctx, lowered) }\n ${ lowered.typ }") + throw new RuntimeException(s"Lowering changed type:\n BEFORE: ${ Pretty(mir) }\n ${ mir.typ }\n ${ mir.typ.canonicalTableType}\n AFTER: ${ Pretty(lowered) }\n ${ lowered.typ }") lowered } - private[this] def lower( - ctx: ExecuteContext, - tir: TableIR, - ab: BoxedArrayBuilder[(String, IR)] - ): TableIR = { + private[this] def lower(tir: TableIR, ab: BoxedArrayBuilder[(String, IR)]): TableIR = { val lowered = tir match { case CastMatrixToTable(child, entries, cols) => - lower(ctx, child, ab) + lower(child, ab) .mapRows('row.selectFields(child.typ.rowType.fieldNames ++ Array(entriesFieldName): _*)) .rename(Map(entriesFieldName -> entries), Map(colsFieldName -> cols)) case x@MatrixEntriesTable(child) => - val lc = lower(ctx, child, ab) + val lc = lower(child, ab) if (child.typ.rowKey.nonEmpty && child.typ.colKey.nonEmpty) { val oldColIdx = Symbol(genUID()) @@ -783,19 +770,19 @@ object LowerMatrixIR { } case MatrixToTableApply(child, function) => - val loweredChild = lower(ctx, child, ab) + val loweredChild = lower(child, ab) TableToTableApply(loweredChild, function.lower() .getOrElse(WrappedMatrixToTableFunction(function, colsFieldName, entriesFieldName, child.typ.colKey))) case MatrixRowsTable(child) => - lower(ctx, child, ab) + lower(child, ab) .mapGlobals('global.dropFields(colsField)) .mapRows('row.dropFields(entriesField)) case MatrixColsTable(child) => val colKey = child.typ.colKey - let(__cols_and_globals = lower(ctx, child, ab).getGlobals) { + let(__cols_and_globals = lower(child, ab).getGlobals) { val sortedCols = if (colKey.isEmpty) '__cols_and_globals (colsField) else @@ -811,42 +798,34 @@ object LowerMatrixIR { makeStruct('rows -> sortedCols, 'global -> '__cols_and_globals.dropFields(colsField)) }.parallelize(None).keyBy(child.typ.colKey) - case table => lowerChildren(ctx, table, ab).asInstanceOf[TableIR] + case table => lowerChildren(table, ab).asInstanceOf[TableIR] } assertTypeUnchanged(tir, lowered) lowered } - private[this] def lower( - ctx: ExecuteContext, - bmir: BlockMatrixIR, - ab: BoxedArrayBuilder[(String, IR)] - ): BlockMatrixIR = { + private[this] def lower(bmir: BlockMatrixIR, ab: BoxedArrayBuilder[(String, IR)]): BlockMatrixIR = { val lowered = bmir match { - case noMatrixChildren => lowerChildren(ctx, noMatrixChildren, ab).asInstanceOf[BlockMatrixIR] + case noMatrixChildren => lowerChildren(noMatrixChildren, ab).asInstanceOf[BlockMatrixIR] } assertTypeUnchanged(bmir, lowered) lowered } - private[this] def lower( - ctx: ExecuteContext, - ir: IR, - ab: BoxedArrayBuilder[(String, IR)] - ): IR = { + private[this] def lower(ir: IR, ab: BoxedArrayBuilder[(String, IR)]): IR = { val lowered = ir match { - case MatrixToValueApply(child, function) => TableToValueApply(lower(ctx, child, ab), function.lower() + case MatrixToValueApply(child, function) => TableToValueApply(lower(child, ab), function.lower() .getOrElse(WrappedMatrixToValueFunction(function, colsFieldName, entriesFieldName, child.typ.colKey))) case MatrixWrite(child, writer) => - TableWrite(lower(ctx, child, ab), WrappedMatrixWriter(writer, colsFieldName, entriesFieldName, child.typ.colKey)) + TableWrite(lower(child, ab), WrappedMatrixWriter(writer, colsFieldName, entriesFieldName, child.typ.colKey)) case MatrixMultiWrite(children, writer) => - TableMultiWrite(children.map(lower(ctx, _, ab)), WrappedMatrixNativeMultiWriter(writer, children.head.typ.colKey)) + TableMultiWrite(children.map(lower(_, ab)), WrappedMatrixNativeMultiWriter(writer, children.head.typ.colKey)) case MatrixCount(child) => - lower(ctx, child, ab) + lower(child, ab) .aggregate(makeTuple(applyAggOp(Count(), FastIndexedSeq(), FastIndexedSeq()), 'global(colsField).len)) case MatrixAggregate(child, query) => - val lc = lower(ctx, child, ab) + val lc = lower(child, ab) val idx = Symbol(genUID()) TableAggregate(lc, aggExplodeIR( @@ -865,7 +844,7 @@ object LowerMatrixIR { isScan = false), isScan = false) }) - case _ => lowerChildren(ctx, ir, ab).asInstanceOf[IR] + case _ => lowerChildren(ir, ab).asInstanceOf[IR] } assertTypeUnchanged(ir, lowered) lowered diff --git a/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala b/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala index 4660d35d0f0..c257396b4d8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala +++ b/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala @@ -13,7 +13,7 @@ object LowerOrInterpretNonCompilable { def evaluate(value: IR): IR = { val preTime = System.nanoTime() - val result = CanLowerEfficiently(ctx, value) match { + val result = CanLowerEfficiently(value) match { case Some(failReason) => log.info(s"LowerOrInterpretNonCompilable: cannot efficiently lower query: $failReason") log.info(s"interpreting non-compilable result: ${ value.getClass.getSimpleName }") diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala index 72ad21f70ad..51377192da8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala @@ -8,6 +8,7 @@ import is.hail.expr.ir.IRBuilder._ import is.hail.expr.ir.functions.MatrixToMatrixFunction import is.hail.types._ import is.hail.types.virtual._ +import is.hail.io.TextMatrixReader import is.hail.io.bgen.MatrixBGENReader import is.hail.io.fs.FS import is.hail.io.plink.MatrixPLINKReader @@ -85,6 +86,7 @@ object MatrixReader { case "MatrixRangeReader" => MatrixRangeReader.fromJValue(env.ctx, jv) case "MatrixNativeReader" => MatrixNativeReader.fromJValue(env.ctx.fs, jv) case "MatrixBGENReader" => MatrixBGENReader.fromJValue(env, jv) + case "TextMatrixReader" => TextMatrixReader.fromJValue(env.ctx, jv) case "MatrixPLINKReader" => MatrixPLINKReader.fromJValue(env.ctx, jv) case "MatrixVCFReader" => MatrixVCFReader.fromJValue(env.ctx, jv) } diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala index 58ea87d42bb..8d32374aac6 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala @@ -5,7 +5,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.backend.ExecuteContext import is.hail.expr.ir.functions.MatrixWriteBlockMatrix -import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, RVDToTableStage, TableStage} +import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, TableStage} import is.hail.expr.ir.streams.StreamProducer import is.hail.expr.{JSONAnnotationImpex, Nat} import is.hail.io._ @@ -13,22 +13,19 @@ import is.hail.io.fs.FS import is.hail.io.gen.{ExportBGEN, ExportGen} import is.hail.io.index.StagedIndexWriter import is.hail.io.plink.ExportPlink -import is.hail.io.vcf.{ExportVCF, TabixVCF} +import is.hail.io.vcf.ExportVCF import is.hail.linalg.BlockMatrix import is.hail.rvd.{IndexSpec, RVDPartitioner, RVDSpecMaker} import is.hail.types.encoded.{EBaseStruct, EBlockMatrixNDArray, EType} -import is.hail.types.physical.stypes.{EmitType, SValue} +import is.hail.types.physical.stypes.{EmitType, SCode} import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.primitives._ import is.hail.types.physical.{PBooleanRequired, PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStruct, PType} import is.hail.types.virtual._ import is.hail.types._ -import is.hail.types.physical.stypes.concrete.{SJavaString, SJavaArrayString, SJavaArrayStringValue, SStackStruct} -import is.hail.types.physical.stypes.interfaces.{SIndexableValue, SBaseStructValue} +import is.hail.types.physical.stypes.concrete.SStackStruct import is.hail.types.physical.stypes.primitives.{SBooleanValue, SInt64Value} import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream -import is.hail.variant.{ReferenceGenome, Call} import org.apache.spark.sql.Row import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats, ShortTypeHints} @@ -388,418 +385,7 @@ case class MatrixVCFWriter( metadata: Option[VCFMetadata] = None, tabix: Boolean = false ) extends MatrixWriter { - def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = { - val appendStr = getAppendHeaderValue(ctx.fs) - val tv = mv.toTableValue - val ts = RVDToTableStage(tv.rvd, tv.globals.toEncodedLiteral(ctx.theHailClassLoader)) - val tl = TableLiteral(tv, ctx.theHailClassLoader) - CompileAndEvaluate(ctx, - lower(LowerMatrixIR.colsFieldName, MatrixType.entriesIdentifier, mv.typ.colKey, - ctx, ts, tl, BaseTypeWithRequiredness(tv.typ).asInstanceOf[RTable], Map())) - } - - override def canLowerEfficiently: Boolean = true - override def lower(colsFieldName: String, entriesFieldName: String, colKey: IndexedSeq[String], - ctx: ExecuteContext, ts: TableStage, t: TableIR, r: RTable, relationalLetsAbove: Map[String, IR]): IR = { - require(exportType != ExportType.PARALLEL_COMPOSABLE) - - val tm = MatrixType.fromTableType(t.typ, colsFieldName, entriesFieldName, colKey) - tm.requireRowKeyVariant() - tm.requireColKeyString() - ExportVCF.checkFormatSignature(tm.entryType) - - val ext = ctx.fs.getCodecExtension(path) - - val folder = if (exportType == ExportType.CONCATENATED) - ctx.createTmpPath("write-vcf-concatenated") - else - path - - val appendStr = getAppendHeaderValue(ctx.fs) - - val writeHeader = exportType == ExportType.PARALLEL_HEADER_IN_SHARD - val partAppend = appendStr.filter(_ => writeHeader) - val partMetadata = metadata.filter(_ => writeHeader) - val lineWriter = VCFPartitionWriter(tm, entriesFieldName, writeHeader = exportType == ExportType.PARALLEL_HEADER_IN_SHARD, - partAppend, partMetadata, tabix && exportType != ExportType.CONCATENATED) - - ts.mapContexts { oldCtx => - val d = digitsNeeded(ts.numPartitions) - val partFiles = Literal(TArray(TString), Array.tabulate(ts.numPartitions)(i => s"$folder/${ partFile(d, i) }$ext").toFastIndexedSeq) - - zip2(oldCtx, ToStream(partFiles), ArrayZipBehavior.AssertSameLength) { (ctxElt, pf) => - MakeStruct(FastSeq( - "oldCtx" -> ctxElt, - "partFile" -> pf)) - } - }(GetField(_, "oldCtx")).mapCollectWithContextsAndGlobals(relationalLetsAbove) { (rows, ctxRef) => - val ctx = MakeStruct(FastSeq( - "cols" -> GetField(ts.globals, colsFieldName), - "partFile" -> GetField(ctxRef, "partFile"))) - WritePartition(rows, ctx, lineWriter) - }{ (parts, globals) => - val ctx = MakeStruct(FastSeq("cols" -> GetField(globals, colsFieldName), "partFiles" -> parts)) - val commit = VCFExportFinalizer(tm, path, appendStr, metadata, exportType, tabix) - Begin(FastIndexedSeq(WriteMetadata(ctx, commit))) - } - } - - private def getAppendHeaderValue(fs: FS): Option[String] = append.map { f => - using(fs.open(f)) { s => - val sb = new StringBuilder - scala.io.Source.fromInputStream(s) - .getLines() - .filterNot(_.isEmpty) - .foreach { line => - sb.append(line) - sb += '\n' - } - sb.result() - } - } -} - -case class VCFPartitionWriter(typ: MatrixType, entriesFieldName: String, writeHeader: Boolean, - append: Option[String], metadata: Option[VCFMetadata], tabix: Boolean) extends PartitionWriter { - val ctxType: Type = TStruct("cols" -> TArray(typ.colType), "partFile" -> TString) - - if (typ.rowType.hasField("info")) { - typ.rowType.field("info").typ match { - case _: TStruct => - case t => - warn(s"export_vcf found row field 'info' of type $t, but expected type 'Struct'. Emitting no INFO fields.") - } - } else { - warn(s"export_vcf found no row field 'info'. Emitting no INFO fields.") - } - - val formatFieldOrder: Array[Int] = typ.entryType.fieldIdx.get("GT") match { - case Some(i) => (i +: typ.entryType.fields.filter(fd => fd.name != "GT").map(_.index)).toArray - case None => typ.entryType.fields.indices.toArray - } - val formatFieldString = formatFieldOrder.map(i => typ.entryType.fields(i).name).mkString(":") - val missingFormatStr = if (typ.entryType.size > 0 && typ.entryType.types(formatFieldOrder(0)) == TCall) - "./." - else - "." - - val locusIdx = typ.rowType.fieldIdx("locus") - val allelesIdx = typ.rowType.fieldIdx("alleles") - val (idExists, idIdx) = ExportVCF.lookupVAField(typ.rowType, "rsid", "ID", Some(TString)) - val (qualExists, qualIdx) = ExportVCF.lookupVAField(typ.rowType, "qual", "QUAL", Some(TFloat64)) - val (filtersExists, filtersIdx) = ExportVCF.lookupVAField(typ.rowType, "filters", "FILTERS", Some(TSet(TString))) - val (infoExists, infoIdx) = ExportVCF.lookupVAField(typ.rowType, "info", "INFO", None) - - def returnType: Type = TString - def unionTypeRequiredness(r: TypeWithRequiredness, ctxType: TypeWithRequiredness, streamType: RIterable): Unit = { - r.union(ctxType.required) - r.union(streamType.required) - } - - final def consumeStream(ctx: ExecuteContext, cb: EmitCodeBuilder, stream: StreamProducer, - context: EmitCode, region: Value[Region]): IEmitCode = { - val mb = cb.emb - context.toI(cb).map(cb) { case ctx: SBaseStructValue => - val filename = ctx.loadField(cb, "partFile").get(cb, "partFile can't be missing").asString.loadString(cb) - - val os = cb.memoize(cb.emb.create(filename)) - if (writeHeader) { - val sampleIds = ctx.loadField(cb, "cols").get(cb).asIndexable - val stringSampleIds = cb.memoize(Code.newArray[String](sampleIds.loadLength())) - sampleIds.forEachDefined(cb) { case (cb, i, colv: SBaseStructValue) => - val s = colv.subset(typ.colKey: _*).loadField(cb, 0).get(cb).asString - cb += (stringSampleIds(i) = s.loadString(cb)) - } - - val headerStr = Code.invokeScalaObject6[TStruct, TStruct, ReferenceGenome, Option[String], Option[VCFMetadata], Array[String], String]( - ExportVCF.getClass, "makeHeader", - mb.getType[TStruct](typ.rowType), mb.getType[TStruct](typ.entryType), - mb.getReferenceGenome(typ.referenceGenome), mb.getObject(append), - mb.getObject(metadata), stringSampleIds) - cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) - cb += os.invoke[Int, Unit]("write", '\n') - } - - stream.memoryManagedConsume(region, cb) { cb => - consumeElement(cb, stream.element, os, stream.elementRegion) - } - - cb += os.invoke[Unit]("close") - - if (tabix) { - cb += Code.invokeScalaObject2[FS, String, Unit](TabixVCF.getClass, "apply", cb.emb.getFS, filename) - } - - SJavaString.construct(cb, filename) - } - } - - def consumeElement(cb: EmitCodeBuilder, element: EmitCode, os: Value[OutputStream], region: Value[Region]): Unit = { - def _writeC(cb: EmitCodeBuilder, code: Code[Int]) = { cb += os.invoke[Int, Unit]("write", code) } - def _writeB(cb: EmitCodeBuilder, code: Code[Array[Byte]]) = { cb += os.invoke[Array[Byte], Unit]("write", code) } - def _writeS(cb: EmitCodeBuilder, code: Code[String]) = { _writeB(cb, code.invoke[Array[Byte]]("getBytes")) } - def writeValue(cb: EmitCodeBuilder, value: SValue) = value match { - case v: SInt32Value => _writeS(cb, v.value.toS) - case v: SInt64Value => - cb.ifx(v.value > Int.MaxValue || v.value < Int.MinValue, cb._fatal( - "Cannot convert Long to Int if value is greater than Int.MaxValue (2^31 - 1) ", - "or less than Int.MinValue (-2^31). Found ", v.value.toS)) - _writeS(cb, v.value.toS) - case v: SFloat32Value => - cb.ifx(Code.invokeStatic1[java.lang.Float, Float, Boolean]("isNaN", v.value), - _writeC(cb, '.'), - _writeS(cb, Code.invokeScalaObject2[String, Float, String](ExportVCF.getClass, "fmtFloat", "%.6g", v.value))) - case v: SFloat64Value => - cb.ifx(Code.invokeStatic1[java.lang.Double, Double, Boolean]("isNaN", v.value), - _writeC(cb, '.'), - _writeS(cb, Code.invokeScalaObject2[String, Double, String](ExportVCF.getClass, "fmtDouble", "%.6g", v.value))) - case v: SStringValue => - _writeB(cb, v.toBytes(cb).loadBytes(cb)) - case v: SCallValue => - val ploidy = v.ploidy(cb) - val phased = v.isPhased(cb) - cb.ifx(ploidy.ceq(0), cb._fatal("VCF spec does not support 0-ploid calls.")) - cb.ifx(ploidy.ceq(1) , cb._fatal("VCF spec does not support phased haploid calls.")) - val c = v.canonicalCall(cb) - _writeS(cb, Code.invokeScalaObject1[Int, String](Call.getClass, "toString", c)) - case _ => - fatal(s"VCF does not support ${value.st}") - } - - def writeIterable(cb: EmitCodeBuilder, it: SIndexableValue, delim: Int) = - it.forEachDefinedOrMissing(cb)({ (cb, i) => - cb.ifx(i.cne(0), _writeC(cb, delim)) - _writeC(cb, '.') - }, { (cb, i, value) => - cb.ifx(i.cne(0), _writeC(cb, delim)) - writeValue(cb, value) - }) - - def writeGenotype(cb: EmitCodeBuilder, gt: SBaseStructValue) = { - val end = cb.newLocal[Int]("lastDefined", -1) - val Lend = CodeLabel() - formatFieldOrder.zipWithIndex.reverse.foreach { case (idx, pos) => - cb.ifx(!gt.isFieldMissing(cb, idx), { - cb.assign(end, pos) - cb.goto(Lend) - }) - } - - cb.define(Lend) - - val Lout = CodeLabel() - - cb.ifx(end < 0, { - _writeS(cb, missingFormatStr) - cb.goto(Lout) - }) - - formatFieldOrder.zipWithIndex.foreach { case (idx, pos) => - if (pos != 0) - _writeC(cb, ':') - - gt.loadField(cb, idx).consume(cb, { - if (gt.st.fieldTypes(idx).virtualType == TCall) - _writeS(cb, "./.") - else - _writeC(cb, '.') - }, { - case value: SIndexableValue => - writeIterable(cb, value, ',') - case value => - writeValue(cb, value) - }) - - cb.ifx(end.ceq(pos), cb.goto(Lout)) - } - - cb.define(Lout) - } - - def writeC(code: Code[Int]) = _writeC(cb, code) - def writeB(code: Code[Array[Byte]]) = _writeB(cb, code) - def writeS(code: Code[String]) = _writeS(cb, code) - - val elt = element.toI(cb).get(cb).asBaseStruct - val locus = elt.loadField(cb, locusIdx).get(cb).asLocus - // CHROM - writeB(locus.contig(cb).toBytes(cb).loadBytes(cb)) - // POS - writeC('\t') - writeS(locus.position(cb).toS) - - // ID - writeC('\t') - if (idExists) - elt.loadField(cb, idIdx).consume(cb, writeC('.'), { case id: SStringValue => - writeB(id.toBytes(cb).loadBytes(cb)) - }) - else - writeC('.') - - // REF - writeC('\t') - val alleles = elt.loadField(cb, allelesIdx).get(cb).asIndexable - writeB(alleles.loadElement(cb, 0).get(cb).asString.toBytes(cb).loadBytes(cb)) - - // ALT - writeC('\t') - cb.ifx(alleles.loadLength() > 1, - { - val i = cb.newLocal[Int]("i") - cb.forLoop(cb.assign(i, 1), i < alleles.loadLength(), cb.assign(i, i + 1), { - cb.ifx(i.cne(1), writeC(',')) - writeB(alleles.loadElement(cb, i).get(cb).asString.toBytes(cb).loadBytes(cb)) - }) - }, - writeC('.')) - - // QUAL - writeC('\t') - if (qualExists) - elt.loadField(cb, qualIdx).consume(cb, writeC('.'), { qual => - writeS(Code.invokeScalaObject2[String, Double, String](ExportVCF.getClass, "fmtDouble", "%.2f", qual.asDouble.value)) - }) - else - writeC('.') - - // FILTER - writeC('\t') - if (filtersExists) - elt.loadField(cb, filtersIdx).consume(cb, writeC('.'), { case filters: SIndexableValue => - cb.ifx(filters.loadLength().ceq(0), writeS("PASS"), { - writeIterable(cb, filters, ';') - }) - }) - else - writeC('.') - - // INFO - writeC('\t') - if (infoExists) { - val wroteInfo = cb.newLocal[Boolean]("wroteInfo", false) - - elt.loadField(cb, infoIdx).consume(cb, { /* do nothing */ }, { case info: SBaseStructValue => - var idx = 0 - while (idx < info.st.size) { - val field = info.st.virtualType.fields(idx) - info.loadField(cb, idx).consume(cb, { /* do nothing */ }, { - case infoArray: SIndexableValue if infoArray.st.elementType.virtualType != TBoolean => - cb.ifx(infoArray.loadLength() > 0, { - cb.ifx(wroteInfo, writeC(';')) - writeS(field.name) - writeC('=') - writeIterable(cb, infoArray, ',') - cb.assign(wroteInfo, true) - }) - case infoFlag: SBooleanValue => - cb.ifx(infoFlag.value, { - cb.ifx(wroteInfo, writeC(';')) - writeS(field.name) - cb.assign(wroteInfo, true) - }) - case info => - cb.ifx(wroteInfo, writeC(';')) - writeS(field.name) - writeC('=') - writeValue(cb, info) - cb.assign(wroteInfo, true) - }) - idx += 1 - } - }) - - cb.ifx(!wroteInfo, writeC('.')) - } else { - writeC('.') - } - - // FORMAT - val genotypes = elt.loadField(cb, entriesFieldName).get(cb).asIndexable - cb.ifx(genotypes.loadLength() > 0, { - writeC('\t') - writeS(formatFieldString) - genotypes.forEachDefinedOrMissing(cb)({ (cb, _) => - _writeC(cb, '\t') - _writeS(cb, missingFormatStr) - }, { case (cb, _, gt: SBaseStructValue) => - _writeC(cb, '\t') - writeGenotype(cb, gt) - }) - }) - - writeC('\n') - } -} - -case class VCFExportFinalizer(typ: MatrixType, outputPath: String, append: Option[String], - metadata: Option[VCFMetadata], exportType: String, tabix: Boolean) extends MetadataWriter { - def annotationType: Type = TStruct("cols" -> TArray(typ.colType), "partFiles" -> TArray(TString)) - private def header(cb: EmitCodeBuilder, annotations: SBaseStructValue): Code[String] = { - val mb = cb.emb - val sampleIds = annotations.loadField(cb, "cols").get(cb).asIndexable - val stringSampleIds = cb.memoize(Code.newArray[String](sampleIds.loadLength())) - sampleIds.forEachDefined(cb) { case (cb, i, colv: SBaseStructValue) => - val s = colv.subset(typ.colKey: _*).loadField(cb, 0).get(cb).asString - cb += (stringSampleIds(i) = s.loadString(cb)) - } - Code.invokeScalaObject6[TStruct, TStruct, ReferenceGenome, Option[String], Option[VCFMetadata], Array[String], String]( - ExportVCF.getClass, "makeHeader", - mb.getType[TStruct](typ.rowType), mb.getType[TStruct](typ.entryType), - mb.getReferenceGenome(typ.referenceGenome), mb.getObject(append), - mb.getObject(metadata), stringSampleIds) - } - - def writeMetadata(writeAnnotations: => IEmitCode, cb: EmitCodeBuilder, region: Value[Region]): Unit = { - val ctx: ExecuteContext = cb.emb.ctx - val ext = ctx.fs.getCodecExtension(outputPath) - - val annotations = writeAnnotations.get(cb).asBaseStruct - - exportType match { - case ExportType.CONCATENATED => - val headerStr = header(cb, annotations) - - val partPaths = annotations.loadField(cb, "partFiles").get(cb) - val files = partPaths.castTo(cb, region, SJavaArrayString(true), false) - val headerFilePath = ctx.createTmpPath("header", ext) - val os = cb.memoize(cb.emb.create(const(headerFilePath))) - cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) - cb += os.invoke[Int, Unit]("write", '\n') - cb += os.invoke[Unit]("close") - - val partFiles = files.asInstanceOf[SJavaArrayStringValue].array - val jFiles = cb.memoize(Code.newArray[String](partFiles.length + 1)) - cb += (jFiles(0) = const(headerFilePath)) - cb += Code.invokeStatic5[System, Any, Int, Any, Int, Int, Unit]( - "arraycopy", partFiles /*src*/, 0 /*srcPos*/, jFiles /*dest*/, 1 /*destPos*/, partFiles.length /*len*/) - - cb += cb.emb.getFS.invoke[Array[String], String, Unit]("concatenateFiles", jFiles, const(outputPath)) - - val i = cb.newLocal[Int]("i") - cb.forLoop(cb.assign(i, 0), i < jFiles.length, cb.assign(i, i + 1), { - cb += cb.emb.getFS.invoke[String, Boolean, Unit]("delete", jFiles(i), const(false)) - }) - - if (tabix) { - cb += Code.invokeScalaObject2[FS, String, Unit](TabixVCF.getClass, "apply", cb.emb.getFS, const(outputPath)) - } - - case ExportType.PARALLEL_HEADER_IN_SHARD => - cb += cb.emb.getFS.invoke[String, Unit]("touch", const(outputPath).concat("/_SUCCESS")) - - case ExportType.PARALLEL_SEPARATE_HEADER => - val headerFilePath = s"$outputPath/header$ext" - val headerStr = header(cb, annotations) - - val os = cb.memoize(cb.emb.create(const(headerFilePath))) - cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) - cb += os.invoke[Int, Unit]("write", '\n') - cb += os.invoke[Unit]("close") - - cb += cb.emb.getFS.invoke[String, Unit]("touch", const(outputPath).concat("/_SUCCESS")) - } - } + def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = ExportVCF(ctx, mv, path, append, exportType, metadata, tabix) } case class MatrixGENWriter( diff --git a/hail/src/main/scala/is/hail/expr/ir/Optimize.scala b/hail/src/main/scala/is/hail/expr/ir/Optimize.scala index 9e5a7f3599d..1ba313d14db 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Optimize.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Optimize.scala @@ -6,8 +6,8 @@ import is.hail.utils._ object Optimize { def apply[T <: BaseIR](ir0: T, context: String, ctx: ExecuteContext): T = { - if (ctx.shouldLogIR()) - log.info(s"optimize $context: before: IR size ${ IRSize(ir0) }: \n" + Pretty(ctx, ir0, elideLiterals = true)) + if (ctx.printIRs) + log.info(s"optimize $context: before: IR size ${ IRSize(ir0) }: \n" + Pretty(ir0, elideLiterals = true)) var ir = ir0 var last: BaseIR = null @@ -22,11 +22,11 @@ object Optimize { while (iter < maxIter && ir != last) { last = ir runOpt(FoldConstants(ctx, _), iter, "FoldConstants") - runOpt(ExtractIntervalFilters(ctx, _), iter, "ExtractIntervalFilters") - runOpt(Simplify(ctx, _), iter, "Simplify") + runOpt(ExtractIntervalFilters(_), iter, "ExtractIntervalFilters") + runOpt(Simplify(_), iter, "Simplify") runOpt(ForwardLets(_), iter, "ForwardLets") runOpt(ForwardRelationalLets(_), iter, "ForwardRelationalLets") - runOpt(PruneDeadFields(ctx, _), iter, "PruneDeadFields") + runOpt(PruneDeadFields(_), iter, "PruneDeadFields") iter += 1 } @@ -36,11 +36,11 @@ object Optimize { throw new RuntimeException(s"optimization changed type!" + s"\n before: ${ ir0.typ.parsableString() }" + s"\n after: ${ ir.typ.parsableString() }" + - s"\n Before IR:\n ----------\n${ Pretty(ctx, ir0) }" + - s"\n After IR:\n ---------\n${ Pretty(ctx, ir) }") + s"\n Before IR:\n ----------\n${ Pretty(ir0) }" + + s"\n After IR:\n ---------\n${ Pretty(ir) }") - if (ctx.shouldLogIR()) - log.info(s"optimize $context: after: IR size ${ IRSize(ir) }:\n" + Pretty(ctx, ir, elideLiterals = true)) + if (ctx.printIRs) + log.info(s"optimize $context: after: IR size ${ IRSize(ir) }:\n" + Pretty(ir, elideLiterals = true)) ir } diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index bd5471b9eef..ed34874b4c0 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -869,7 +869,7 @@ object IRParser { case "MakeArray" => val typ = opt(it, type_expr(env.typEnv)).map(_.asInstanceOf[TArray]).orNull ir_value_children(env)(it).map { args => - MakeArray.unify(env.ctx, args, typ) + MakeArray.unify(args, typ) } case "MakeStream" => val typ = opt(it, type_expr(env.typEnv)).map(_.asInstanceOf[TStream]).orNull diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index cffc70f7775..88c3aef048e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -1,7 +1,6 @@ package is.hail.expr.ir import is.hail.HailContext -import is.hail.backend.ExecuteContext import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.agg._ import is.hail.expr.ir.functions.RelationalFunctions @@ -15,8 +14,8 @@ import org.json4s.jackson.{JsonMethods, Serialization} import scala.collection.mutable object Pretty { - def apply(ctx: ExecuteContext, ir: BaseIR, width: Int = 100, ribbonWidth: Int = 50, elideLiterals: Boolean = true, maxLen: Int = -1, allowUnboundRefs: Boolean = false): String = { - val useSSA = ctx != null && ctx.getFlag("use_ssa_logs") != null + def apply(ir: BaseIR, width: Int = 100, ribbonWidth: Int = 50, elideLiterals: Boolean = true, maxLen: Int = -1, allowUnboundRefs: Boolean = false): String = { + val useSSA = HailContext.getFlag("use_ssa_logs") != null val pretty = new Pretty(width, ribbonWidth, elideLiterals, maxLen, allowUnboundRefs, useSSA) pretty(ir) } diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala index 5cd3fd54aa7..70dacc98dd9 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala @@ -1,6 +1,5 @@ package is.hail.expr.ir -import is.hail.backend.ExecuteContext import is.hail.annotations._ import is.hail.types._ import is.hail.types.virtual._ @@ -59,7 +58,7 @@ object PruneDeadFields { } case (t1: TTuple, t2: TTuple) => var idx = -1 - t1._types.forall { f => + t1.fields.forall { f => val t2field = t2.fields(t2.fieldIndex(f.index)) if (t2field.index > idx) { idx = t2field.index @@ -77,26 +76,26 @@ object PruneDeadFields { } } - def apply(ctx: ExecuteContext, ir: BaseIR): BaseIR = { + def apply(ir: BaseIR): BaseIR = { try { val irCopy = ir.deepCopy() val ms = ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) irCopy match { case mir: MatrixIR => - memoizeMatrixIR(ctx, mir, mir.typ, ms) - rebuild(ctx, mir, ms.rebuildState) + memoizeMatrixIR(mir, mir.typ, ms) + rebuild(mir, ms.rebuildState) case tir: TableIR => - memoizeTableIR(ctx, tir, tir.typ, ms) - rebuild(ctx, tir, ms.rebuildState) + memoizeTableIR(tir, tir.typ, ms) + rebuild(tir, ms.rebuildState) case bmir: BlockMatrixIR => - memoizeBlockMatrixIR(ctx, bmir, bmir.typ, ms) - rebuild(ctx, bmir, ms.rebuildState) + memoizeBlockMatrixIR(bmir, bmir.typ, ms) + rebuild(bmir, ms.rebuildState) case vir: IR => - memoizeValueIR(ctx, vir, vir.typ, ms) - rebuildIR(ctx, vir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) + memoizeValueIR(vir, vir.typ, ms) + rebuildIR(vir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) } } catch { - case e: Throwable => fatal(s"error trying to rebuild IR:\n${ Pretty(ctx, ir, elideLiterals = true) }", e) + case e: Throwable => fatal(s"error trying to rebuild IR:\n${ Pretty(ir, elideLiterals = true) }", e) } } @@ -321,25 +320,20 @@ object PruneDeadFields { BindingEnv(e, Some(e), Some(e)) } - def memoizeTableIR( - ctx: ExecuteContext, - tir: TableIR, - requestedType: TableType, - memo: ComputeMutableState - ) { + def memoizeTableIR(tir: TableIR, requestedType: TableType, memo: ComputeMutableState) { memo.requestedType.bind(tir, requestedType) tir match { case TableRead(_, _, _) => case TableLiteral(_, _, _, _) => case TableParallelize(rowsAndGlobal, _) => - memoizeValueIR(ctx, rowsAndGlobal, TStruct("rows" -> TArray(requestedType.rowType), "global" -> requestedType.globalType), memo) + memoizeValueIR(rowsAndGlobal, TStruct("rows" -> TArray(requestedType.rowType), "global" -> requestedType.globalType), memo) case TableRange(_, _) => - case TableRepartition(child, _, _) => memoizeTableIR(ctx, child, requestedType, memo) - case TableHead(child, _) => memoizeTableIR(ctx, child, TableType( + case TableRepartition(child, _, _) => memoizeTableIR(child, requestedType, memo) + case TableHead(child, _) => memoizeTableIR(child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.key), requestedType.rowType), globalType = requestedType.globalType), memo) - case TableTail(child, _) => memoizeTableIR(ctx, child, TableType( + case TableTail(child, _) => memoizeTableIR(child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.key), requestedType.rowType), globalType = requestedType.globalType), memo) @@ -355,7 +349,7 @@ object PruneDeadFields { requestedType.rowType.fieldOption(f).map(reqF => f -> reqF.typ)): _*), globalType = TStruct(left.typ.globalType.fieldNames.flatMap(f => requestedType.globalType.fieldOption(f).map(reqF => f -> reqF.typ)): _*)) - memoizeTableIR(ctx, left, leftDep, memo) + memoizeTableIR(left, leftDep, memo) val rk = right.typ.key.take(joinKey + math.max(0, requestedType.key.length - left.typ.key.length)) val rightKeyFields = rk.toSet @@ -368,7 +362,7 @@ object PruneDeadFields { requestedType.rowType.fieldOption(f).map(reqF => f -> reqF.typ)): _*), globalType = TStruct(right.typ.globalType.fieldNames.flatMap(f => requestedType.globalType.fieldOption(f).map(reqF => f -> reqF.typ)): _*)) - memoizeTableIR(ctx, right, rightDep, memo) + memoizeTableIR(right, rightDep, memo) case TableLeftJoinRightDistinct(left, right, root) => val fieldDep = requestedType.rowType.fieldOption(root).map(_.typ.asInstanceOf[TStruct]) fieldDep match { @@ -380,7 +374,7 @@ object PruneDeadFields { FastIndexedSeq[TStruct](right.typ.rowType.filterSet(right.typ.key.toSet, true)._1) ++ FastIndexedSeq(struct): _*), globalType = minimal(right.typ.globalType)) - memoizeTableIR(ctx, right, rightDep, memo) + memoizeTableIR(right, rightDep, memo) val lk = unifyKey(FastSeq(left.typ.key.take(right.typ.key.length), requestedType.key)) val leftDep = TableType( @@ -388,10 +382,10 @@ object PruneDeadFields { rowType = unify(left.typ.rowType, requestedType.rowType.filterSet(Set(root), include = false)._1, selectKey(left.typ.rowType, lk)), globalType = requestedType.globalType) - memoizeTableIR(ctx, left, leftDep, memo) + memoizeTableIR(left, leftDep, memo) case None => // don't memoize right if we are going to elide it during rebuild - memoizeTableIR(ctx, left, requestedType, memo) + memoizeTableIR(left, requestedType, memo) } case TableIntervalJoin(left, right, root, product) => val fieldDep = requestedType.rowType.fieldOption(root).map { field => @@ -409,7 +403,7 @@ object PruneDeadFields { FastIndexedSeq[TStruct](right.typ.rowType.filterSet(right.typ.key.toSet, true)._1) ++ FastIndexedSeq(struct): _*), globalType = minimal(right.typ.globalType)) - memoizeTableIR(ctx, right, rightDep, memo) + memoizeTableIR(right, rightDep, memo) val lk = unifyKey(FastSeq(left.typ.key.take(right.typ.key.length), requestedType.key)) val leftDep = TableType( @@ -417,10 +411,10 @@ object PruneDeadFields { rowType = unify(left.typ.rowType, requestedType.rowType.filterSet(Set(root), include = false)._1, selectKey(left.typ.rowType, lk)), globalType = requestedType.globalType) - memoizeTableIR(ctx, left, leftDep, memo) + memoizeTableIR(left, leftDep, memo) case None => // don't memoize right if we are going to elide it during rebuild - memoizeTableIR(ctx, left, requestedType, memo) + memoizeTableIR(left, requestedType, memo) } case TableMultiWayZipJoin(children, fieldName, globalName) => val gType = requestedType.globalType.fieldOption(globalName) @@ -436,7 +430,7 @@ object PruneDeadFields { child1.typ.keyType.fieldOption(f).orElse(rType.fieldOption(f)).map(reqF => f -> reqF.typ) ): _*), globalType = gType) - children.foreach(memoizeTableIR(ctx, _, dep, memo)) + children.foreach(memoizeTableIR(_, dep, memo)) case TableExplode(child, path) => def getExplodedField(typ: TableType): Type = typ.rowType.queryTyped(path.toList)._1 @@ -452,10 +446,10 @@ object PruneDeadFields { } val dep = requestedType.copy(rowType = unify(child.typ.rowType, requestedType.rowType.insert(prunedPreExlosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeTableIR(ctx, child, dep, memo) + memoizeTableIR(child, dep, memo) case TableFilter(child, pred) => - val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) - memoizeTableIR(ctx, child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) + memoizeTableIR(child, unify(child.typ, requestedType, irDep), memo) case TableKeyBy(child, _, isSorted) => val reqKey = requestedType.key val isPrefix = reqKey.zip(child.typ.key).forall { case (l, r) => l == r } @@ -465,7 +459,7 @@ object PruneDeadFields { if (reqKey.length <= child.typ.key.length) reqKey else child.typ.key else FastIndexedSeq() - memoizeTableIR(ctx, child, TableType( + memoizeTableIR(child, TableType( key = childReqKey, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, childReqKey), requestedType.rowType), globalType = requestedType.globalType), memo) @@ -474,7 +468,7 @@ object PruneDeadFields { child.typ.key else FastIndexedSeq() - memoizeTableIR(ctx, child, TableType( + memoizeTableIR(child, TableType( key = k, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, sortFields.map(_.field) ++ k), @@ -484,10 +478,10 @@ object PruneDeadFields { val dep = TableType(key = child.typ.key, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = requestedType.globalType) - memoizeTableIR(ctx, child, dep, memo) + memoizeTableIR(child, dep, memo) case TableMapPartitions(child, gName, pName, body) => val reqRowsType = TStream(requestedType.rowType) - val bodyDep = memoizeValueIR(ctx, body, reqRowsType, memo) + val bodyDep = memoizeValueIR(body, reqRowsType, memo) val depGlobalType = unifySeq(child.typ.globalType, bodyDep.eval.lookupOption(gName).map(_.result()).getOrElse(Array()) :+ requestedType.globalType) val depRowType = unifySeq(child.typ.rowType, @@ -498,28 +492,28 @@ object PruneDeadFields { key = requestedType.key, rowType = depRowType.asInstanceOf[TStruct], globalType = depGlobalType.asInstanceOf[TStruct]) - memoizeTableIR(ctx, child, dep, memo) + memoizeTableIR(child, dep, memo) case TableMapRows(child, newRow) => - val rowDep = memoizeAndGetDep(ctx, newRow, requestedType.rowType, child.typ, memo) + val rowDep = memoizeAndGetDep(newRow, requestedType.rowType, child.typ, memo) val dep = TableType( key = requestedType.key, rowType = unify(child.typ.rowType, selectKey(requestedType.rowType, requestedType.key), rowDep.rowType), globalType = unify(child.typ.globalType, requestedType.globalType, rowDep.globalType) ) - memoizeTableIR(ctx, child, dep, memo) + memoizeTableIR(child, dep, memo) case TableMapGlobals(child, newGlobals) => - val globalDep = memoizeAndGetDep(ctx, newGlobals, requestedType.globalType, child.typ, memo) - memoizeTableIR(ctx, child, unify(child.typ, requestedType.copy(globalType = globalDep.globalType), globalDep), memo) + val globalDep = memoizeAndGetDep(newGlobals, requestedType.globalType, child.typ, memo) + memoizeTableIR(child, unify(child.typ, requestedType.copy(globalType = globalDep.globalType), globalDep), memo) case TableAggregateByKey(child, expr) => val exprRequestedType = requestedType.rowType.filter(f => expr.typ.asInstanceOf[TStruct].hasField(f.name))._1 - val aggDep = memoizeAndGetDep(ctx, expr, exprRequestedType, child.typ, memo) - memoizeTableIR(ctx, child, TableType(key = child.typ.key, + val aggDep = memoizeAndGetDep(expr, exprRequestedType, child.typ, memo) + memoizeTableIR(child, TableType(key = child.typ.key, rowType = unify(child.typ.rowType, aggDep.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = unify(child.typ.globalType, aggDep.globalType, requestedType.globalType)), memo) case TableKeyByAndAggregate(child, expr, newKey, _, _) => - val keyDep = memoizeAndGetDep(ctx, newKey, newKey.typ, child.typ, memo) - val exprDep = memoizeAndGetDep(ctx, expr, requestedType.valueType, child.typ, memo) - memoizeTableIR(ctx, child, + val keyDep = memoizeAndGetDep(newKey, newKey.typ, child.typ, memo) + val exprDep = memoizeAndGetDep(expr, requestedType.valueType, child.typ, memo) + memoizeTableIR(child, TableType( key = FastIndexedSeq(), // note: this can deoptimize if prune runs before Simplify rowType = unify(child.typ.rowType, keyDep.rowType, exprDep.rowType), @@ -531,14 +525,14 @@ object PruneDeadFields { entryType = TStruct.empty, colType = requestedType.rowType, colKey = requestedType.key) - memoizeMatrixIR(ctx, child, mtDep, memo) + memoizeMatrixIR(child, mtDep, memo) case MatrixRowsTable(child) => val minChild = minimal(child.typ) val mtDep = minChild.copy( globalType = requestedType.globalType, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, requestedType.key), requestedType.rowType), rowKey = requestedType.key) - memoizeMatrixIR(ctx, child, mtDep, memo) + memoizeMatrixIR(child, mtDep, memo) case MatrixEntriesTable(child) => val mtDep = MatrixType( rowKey = requestedType.key.take(child.typ.rowKey.length), @@ -551,9 +545,9 @@ object PruneDeadFields { entryType = TStruct( child.typ.entryType.fields.flatMap(f => requestedType.rowType.fieldOption(f.name).map(f2 => f.name -> f2.typ)): _*) ) - memoizeMatrixIR(ctx, child, mtDep, memo) + memoizeMatrixIR(child, mtDep, memo) case TableUnion(children) => - children.foreach(memoizeTableIR(ctx, _, requestedType, memo)) + children.foreach(memoizeTableIR(_, requestedType, memo)) case CastMatrixToTable(child, entriesFieldName, colsFieldName) => val childDep = MatrixType( rowKey = requestedType.key, @@ -574,7 +568,7 @@ object PruneDeadFields { requestedType.rowType.deleteKey(entriesFieldName) else requestedType.rowType) - memoizeMatrixIR(ctx, child, childDep, memo) + memoizeMatrixIR(child, childDep, memo) case TableRename(child, rowMap, globalMap) => val rowMapRev = rowMap.map { case (k, v) => (v, k) } val globalMapRev = globalMap.map { case (k, v) => (v, k) } @@ -582,42 +576,37 @@ object PruneDeadFields { rowType = requestedType.rowType.rename(rowMapRev), globalType = requestedType.globalType.rename(globalMapRev), key = requestedType.key.map(k => rowMapRev.getOrElse(k, k))) - memoizeTableIR(ctx, child, childDep, memo) + memoizeTableIR(child, childDep, memo) case TableFilterIntervals(child, _, _) => - memoizeTableIR(ctx, child, requestedType.copy(key = child.typ.key, + memoizeTableIR(child, requestedType.copy(key = child.typ.key, rowType = PruneDeadFields.unify(child.typ.rowType, requestedType.rowType, PruneDeadFields.selectKey(child.typ.rowType, child.typ.key))), memo) - case TableToTableApply(child, f) => memoizeTableIR(ctx, child, child.typ, memo) - case MatrixToTableApply(child, _) => memoizeMatrixIR(ctx, child, child.typ, memo) + case TableToTableApply(child, f) => memoizeTableIR(child, child.typ, memo) + case MatrixToTableApply(child, _) => memoizeMatrixIR(child, child.typ, memo) case BlockMatrixToTableApply(bm, aux, _) => - memoizeBlockMatrixIR(ctx, bm, bm.typ, memo) - memoizeValueIR(ctx, aux, aux.typ, memo) - case BlockMatrixToTable(child) => memoizeBlockMatrixIR(ctx, child, child.typ, memo) + memoizeBlockMatrixIR(bm, bm.typ, memo) + memoizeValueIR(aux, aux.typ, memo) + case BlockMatrixToTable(child) => memoizeBlockMatrixIR(child, child.typ, memo) case RelationalLetTable(name, value, body) => - memoizeTableIR(ctx, body, requestedType, memo) + memoizeTableIR(body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) + memoizeValueIR(value, unifySeq(value.typ, usages), memo) } } - def memoizeMatrixIR( - ctx: ExecuteContext, - mir: MatrixIR, - requestedType: MatrixType, - memo: ComputeMutableState - ) { + def memoizeMatrixIR(mir: MatrixIR, requestedType: MatrixType, memo: ComputeMutableState) { memo.requestedType.bind(mir, requestedType) mir match { case MatrixFilterCols(child, pred) => - val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) - memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) + memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) case MatrixFilterRows(child, pred) => - val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) - memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) + memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) case MatrixFilterEntries(child, pred) => - val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) - memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) + memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) case MatrixUnionCols(left, right, joinType) => val leftRequestedType = requestedType.copy( rowKey = left.typ.rowKey, @@ -627,38 +616,38 @@ object PruneDeadFields { globalType = TStruct.empty, rowKey = right.typ.rowKey, rowType = selectKey(right.typ.rowType, right.typ.rowKey)) - memoizeMatrixIR(ctx, left, leftRequestedType, memo) - memoizeMatrixIR(ctx, right, rightRequestedType, memo) + memoizeMatrixIR(left, leftRequestedType, memo) + memoizeMatrixIR(right, rightRequestedType, memo) case MatrixMapEntries(child, newEntries) => - val irDep = memoizeAndGetDep(ctx, newEntries, requestedType.entryType, child.typ, memo) + val irDep = memoizeAndGetDep(newEntries, requestedType.entryType, child.typ, memo) val depMod = requestedType.copy(entryType = TStruct.empty) - memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) case MatrixKeyRowsBy(child, _, isSorted) => val reqKey = requestedType.rowKey val childReqKey = if (isSorted) child.typ.rowKey.take(reqKey.length) else FastIndexedSeq() - memoizeMatrixIR(ctx, child, requestedType.copy( + memoizeMatrixIR(child, requestedType.copy( rowKey = childReqKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, childReqKey))), memo) case MatrixMapRows(child, newRow) => - val irDep = memoizeAndGetDep(ctx, newRow, requestedType.rowType, child.typ, memo) + val irDep = memoizeAndGetDep(newRow, requestedType.rowType, child.typ, memo) val depMod = requestedType.copy(rowType = selectKey(child.typ.rowType, child.typ.rowKey)) - memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) case MatrixMapCols(child, newCol, newKey) => - val irDep = memoizeAndGetDep(ctx, newCol, requestedType.colType, child.typ, memo) + val irDep = memoizeAndGetDep(newCol, requestedType.colType, child.typ, memo) val reqKey = newKey match { case Some(_) => FastIndexedSeq() case None => requestedType.colKey } val depMod = requestedType.copy(colType = selectKey(child.typ.colType, reqKey), colKey = reqKey) - memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) case MatrixMapGlobals(child, newGlobals) => - val irDep = memoizeAndGetDep(ctx, newGlobals, requestedType.globalType, child.typ, memo) - memoizeMatrixIR(ctx, child, unify(child.typ, requestedType.copy(globalType = irDep.globalType), irDep), memo) + val irDep = memoizeAndGetDep(newGlobals, requestedType.globalType, child.typ, memo) + memoizeMatrixIR(child, unify(child.typ, requestedType.copy(globalType = irDep.globalType), irDep), memo) case MatrixRead(_, _, _, _) => case MatrixLiteral(_, _) => case MatrixChooseCols(child, _) => - memoizeMatrixIR(ctx, child, unify(child.typ, requestedType), memo) + memoizeMatrixIR(child, unify(child.typ, requestedType), memo) case MatrixCollectColsByKey(child) => val colKeySet = child.typ.colKey.toSet val requestedColType = requestedType.colType @@ -674,10 +663,10 @@ object PruneDeadFields { }: _*), rowType = requestedType.rowType, entryType = TStruct(requestedType.entryType.fields.map(f => f.copy(typ = f.typ.asInstanceOf[TArray].elementType)))) - memoizeMatrixIR(ctx, child, explodedDep, memo) + memoizeMatrixIR(child, explodedDep, memo) case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => - val irDepEntry = memoizeAndGetDep(ctx, entryExpr, requestedType.entryType, child.typ, memo) - val irDepRow = memoizeAndGetDep(ctx, rowExpr, requestedType.rowValueStruct, child.typ, memo) + val irDepEntry = memoizeAndGetDep(entryExpr, requestedType.entryType, child.typ, memo) + val irDepRow = memoizeAndGetDep(rowExpr, requestedType.rowValueStruct, child.typ, memo) val childDep = MatrixType( rowKey = child.typ.rowKey, colKey = requestedType.colKey, @@ -685,10 +674,10 @@ object PruneDeadFields { rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.rowKey), irDepRow.rowType, irDepEntry.rowType), colType = unify(child.typ.colType, requestedType.colType, irDepEntry.colType, irDepRow.colType), globalType = unify(child.typ.globalType, requestedType.globalType, irDepEntry.globalType, irDepRow.globalType)) - memoizeMatrixIR(ctx, child, childDep, memo) + memoizeMatrixIR(child, childDep, memo) case MatrixAggregateColsByKey(child, entryExpr, colExpr) => - val irDepEntry = memoizeAndGetDep(ctx, entryExpr, requestedType.entryType, child.typ, memo) - val irDepCol = memoizeAndGetDep(ctx, colExpr, requestedType.colValueStruct, child.typ, memo) + val irDepEntry = memoizeAndGetDep(entryExpr, requestedType.entryType, child.typ, memo) + val irDepCol = memoizeAndGetDep(colExpr, requestedType.colValueStruct, child.typ, memo) val childDep: MatrixType = MatrixType( rowKey = requestedType.rowKey, colKey = child.typ.colKey, @@ -696,7 +685,7 @@ object PruneDeadFields { globalType = unify(child.typ.globalType, requestedType.globalType, irDepEntry.globalType, irDepCol.globalType), rowType = unify(child.typ.rowType, irDepEntry.rowType, irDepCol.rowType, requestedType.rowType), entryType = irDepEntry.entryType) - memoizeMatrixIR(ctx, child, childDep, memo) + memoizeMatrixIR(child, childDep, memo) case MatrixAnnotateRowsTable(child, table, root, product) => val fieldDep = requestedType.rowType.fieldOption(root).map { field => if (product) @@ -711,7 +700,7 @@ object PruneDeadFields { key = tk, rowType = unify(table.typ.rowType, struct, selectKey(table.typ.rowType, tk)), globalType = minimal(table.typ.globalType)) - memoizeTableIR(ctx, table, tableDep, memo) + memoizeTableIR(table, tableDep, memo) val mk = unifyKey(FastSeq(child.typ.rowKey.take(tk.length), requestedType.rowKey)) val matDep = requestedType.copy( @@ -720,10 +709,10 @@ object PruneDeadFields { unify(child.typ.rowType, selectKey(child.typ.rowType, mk), requestedType.rowType.filterSet(Set(root), include = false)._1)) - memoizeMatrixIR(ctx, child, matDep, memo) + memoizeMatrixIR(child, matDep, memo) case None => // don't depend on key IR dependencies if we are going to elide the node anyway - memoizeMatrixIR(ctx, child, requestedType, memo) + memoizeMatrixIR(child, requestedType, memo) } case MatrixAnnotateColsTable(child, table, uid) => val fieldDep = requestedType.colType.fieldOption(uid).map(_.typ.asInstanceOf[TStruct]) @@ -734,17 +723,17 @@ object PruneDeadFields { key = tk, rowType = unify(table.typ.rowType, struct, selectKey(table.typ.rowType, tk)), globalType = minimal(table.typ.globalType)) - memoizeTableIR(ctx, table, tableDep, memo) + memoizeTableIR(table, tableDep, memo) val mk = unifyKey(FastSeq(child.typ.colKey.take(table.typ.key.length), requestedType.colKey)) val matDep = requestedType.copy( colKey = mk, colType = unify(child.typ.colType, requestedType.colType.filterSet(Set(uid), include = false)._1, selectKey(child.typ.colType, mk))) - memoizeMatrixIR(ctx, child, matDep, memo) + memoizeMatrixIR(child, matDep, memo) case None => // don't depend on key IR dependencies if we are going to elide the node anyway - memoizeMatrixIR(ctx, child, requestedType, memo) + memoizeMatrixIR(child, requestedType, memo) } case MatrixExplodeRows(child, path) => def getExplodedField(typ: MatrixType): Type = typ.rowType.queryTyped(path.toList)._1 @@ -761,7 +750,7 @@ object PruneDeadFields { } val dep = requestedType.copy(rowType = unify(child.typ.rowType, requestedType.rowType.insert(prunedPreExlosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeMatrixIR(ctx, child, dep, memo) + memoizeMatrixIR(child, dep, memo) case MatrixExplodeCols(child, path) => def getExplodedField(typ: MatrixType): Type = typ.colType.queryTyped(path.toList)._1 @@ -777,31 +766,31 @@ object PruneDeadFields { } val dep = requestedType.copy(colType = unify(child.typ.colType, requestedType.colType.insert(prunedPreExplosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeMatrixIR(ctx, child, dep, memo) + memoizeMatrixIR(child, dep, memo) case MatrixRepartition(child, _, _) => - memoizeMatrixIR(ctx, child, requestedType, memo) + memoizeMatrixIR(child, requestedType, memo) case MatrixUnionRows(children) => - children.foreach(memoizeMatrixIR(ctx, _, requestedType, memo)) + children.foreach(memoizeMatrixIR(_, requestedType, memo)) case MatrixDistinctByRow(child) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(ctx, child, dep, memo) + memoizeMatrixIR(child, dep, memo) case MatrixRowsHead(child, n) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(ctx, child, dep, memo) - case MatrixColsHead(child, n) => memoizeMatrixIR(ctx, child, requestedType, memo) + memoizeMatrixIR(child, dep, memo) + case MatrixColsHead(child, n) => memoizeMatrixIR(child, requestedType, memo) case MatrixRowsTail(child, n) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(ctx, child, dep, memo) - case MatrixColsTail(child, n) => memoizeMatrixIR(ctx, child, requestedType, memo) + memoizeMatrixIR(child, dep, memo) + case MatrixColsTail(child, n) => memoizeMatrixIR(child, requestedType, memo) case CastTableToMatrix(child, entriesFieldName, colsFieldName, _) => val m = Map(MatrixType.entriesIdentifier -> entriesFieldName) val childDep = child.typ.copy( @@ -809,13 +798,13 @@ object PruneDeadFields { globalType = unify(child.typ.globalType, requestedType.globalType, TStruct((colsFieldName, TArray(requestedType.colType)))), rowType = unify(child.typ.rowType, requestedType.rowType, TStruct((entriesFieldName, TArray(requestedType.entryType)))) ) - memoizeTableIR(ctx, child, childDep, memo) + memoizeTableIR(child, childDep, memo) case MatrixFilterIntervals(child, _, _) => - memoizeMatrixIR(ctx, child, requestedType.copy(rowKey = child.typ.rowKey, + memoizeMatrixIR(child, requestedType.copy(rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey))), memo) - case MatrixToMatrixApply(child, f) => memoizeMatrixIR(ctx, child, child.typ, memo) + case MatrixToMatrixApply(child, f) => memoizeMatrixIR(child, child.typ, memo) case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => val globalMapRev = globalMap.map { case (k, v) => (v, k) } val colMapRev = colMap.map { case (k, v) => (v, k) } @@ -828,44 +817,33 @@ object PruneDeadFields { colKey = requestedType.colKey.map(k => colMapRev.getOrElse(k, k)), rowType = requestedType.rowType.rename(rowMapRev), entryType = requestedType.entryType.rename(entryMapRev)) - memoizeMatrixIR(ctx, child, childDep, memo) + memoizeMatrixIR(child, childDep, memo) case RelationalLetMatrixTable(name, value, body) => - memoizeMatrixIR(ctx, body, requestedType, memo) + memoizeMatrixIR(body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) + memoizeValueIR(value, unifySeq(value.typ, usages), memo) } } - def memoizeBlockMatrixIR( - ctx: ExecuteContext, - bmir: BlockMatrixIR, - requestedType: BlockMatrixType, - memo: ComputeMutableState - ): Unit = { + def memoizeBlockMatrixIR(bmir: BlockMatrixIR, requestedType: BlockMatrixType, memo: ComputeMutableState): Unit = { memo.requestedType.bind(bmir, requestedType) bmir match { case RelationalLetBlockMatrix(name, value, body) => - memoizeBlockMatrixIR(ctx, body, requestedType, memo) + memoizeBlockMatrixIR(body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) + memoizeValueIR(value, unifySeq(value.typ, usages), memo) case _ => bmir.children.foreach { - case mir: MatrixIR => memoizeMatrixIR(ctx, mir, mir.typ, memo) - case tir: TableIR => memoizeTableIR(ctx, tir, tir.typ, memo) - case bmir: BlockMatrixIR => memoizeBlockMatrixIR(ctx, bmir, bmir.typ, memo) - case ir: IR => memoizeValueIR(ctx, ir, ir.typ, memo) + case mir: MatrixIR => memoizeMatrixIR(mir, mir.typ, memo) + case tir: TableIR => memoizeTableIR(tir, tir.typ, memo) + case bmir: BlockMatrixIR => memoizeBlockMatrixIR(bmir, bmir.typ, memo) + case ir: IR => memoizeValueIR(ir, ir.typ, memo) } } } - def memoizeAndGetDep( - ctx: ExecuteContext, - ir: IR, - requestedType: Type, - base: TableType, - memo: ComputeMutableState - ): TableType = { - val depEnv = memoizeValueIR(ctx, ir, requestedType, memo) + def memoizeAndGetDep(ir: IR, requestedType: Type, base: TableType, memo: ComputeMutableState): TableType = { + val depEnv = memoizeValueIR(ir, requestedType, memo) val depEnvUnified = concatEnvs(FastIndexedSeq(depEnv.eval) ++ FastIndexedSeq(depEnv.agg, depEnv.scan).flatten) val expectedBindingSet = Set("row", "global") @@ -873,7 +851,7 @@ object PruneDeadFields { if (!expectedBindingSet.contains(k)) throw new RuntimeException(s"found unexpected free variable in pruning: $k\n" + s" ${ depEnv.pretty(_.result().mkString(",")) }\n" + - s" ${ Pretty(ctx, ir) }") + s" ${ Pretty(ir) }") } val min = minimal(base) @@ -886,20 +864,14 @@ object PruneDeadFields { globalType = globalType.asInstanceOf[TStruct]) } - def memoizeAndGetDep( - ctx: ExecuteContext, - ir: IR, - requestedType: Type, - base: MatrixType, - memo: ComputeMutableState - ): MatrixType = { - val depEnv = memoizeValueIR(ctx, ir, requestedType, memo) + def memoizeAndGetDep(ir: IR, requestedType: Type, base: MatrixType, memo: ComputeMutableState): MatrixType = { + val depEnv = memoizeValueIR(ir, requestedType, memo) val depEnvUnified = concatEnvs(FastIndexedSeq(depEnv.eval) ++ FastIndexedSeq(depEnv.agg, depEnv.scan).flatten) val expectedBindingSet = Set("va", "sa", "g", "global", "n_rows", "n_cols") depEnvUnified.m.keys.foreach { k => if (!expectedBindingSet.contains(k)) - throw new RuntimeException(s"found unexpected free variable in pruning: $k\n ${ Pretty(ctx, ir) }") + throw new RuntimeException(s"found unexpected free variable in pruning: $k\n ${ Pretty(ir) }") } val min = minimal(base) @@ -917,7 +889,7 @@ object PruneDeadFields { .asInstanceOf[TStruct] if (rowType.hasField(MatrixType.entriesIdentifier)) - throw new RuntimeException(s"prune: found dependence on entry array in row binding:\n${ Pretty(ctx, ir) }") + throw new RuntimeException(s"prune: found dependence on entry array in row binding:\n${ Pretty(ir) }") MatrixType( rowKey = FastIndexedSeq(), @@ -939,15 +911,10 @@ object PruneDeadFields { * any of the "b" dependencies in order to create its own requested type, * which only contains "a". */ - def memoizeValueIR( - ctx: ExecuteContext, - ir: IR, - requestedType: Type, - memo: ComputeMutableState - ): BindingEnv[BoxedArrayBuilder[Type]] = { + def memoizeValueIR(ir: IR, requestedType: Type, memo: ComputeMutableState): BindingEnv[BoxedArrayBuilder[Type]] = { memo.requestedType.bind(ir, requestedType) ir match { - case IsNA(value) => memoizeValueIR(ctx, value, minimal(value.typ), memo) + case IsNA(value) => memoizeValueIR(value, minimal(value.typ), memo) case CastRename(v, _typ) => def recur(reqType: Type, castType: Type, baseType: Type): Type = { ((reqType, castType, baseType): @unchecked) match { @@ -973,33 +940,33 @@ object PruneDeadFields { } } - memoizeValueIR(ctx, v, recur(requestedType, _typ, v.typ), memo) + memoizeValueIR(v, recur(requestedType, _typ, v.typ), memo) case If(cond, cnsq, alt) => unifyEnvs( - memoizeValueIR(ctx, cond, cond.typ, memo), - memoizeValueIR(ctx, cnsq, requestedType, memo), - memoizeValueIR(ctx, alt, requestedType, memo) + memoizeValueIR(cond, cond.typ, memo), + memoizeValueIR(cnsq, requestedType, memo), + memoizeValueIR(alt, requestedType, memo) ) - case Coalesce(values) => unifyEnvsSeq(values.map(memoizeValueIR(ctx, _, requestedType, memo))) - case Consume(value) => memoizeValueIR(ctx, value, value.typ, memo) + case Coalesce(values) => unifyEnvsSeq(values.map(memoizeValueIR(_, requestedType, memo))) + case Consume(value) => memoizeValueIR(value, value.typ, memo) case Let(name, value, body) => - val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) + val bodyEnv = memoizeValueIR(body, requestedType, memo) val valueType = bodyEnv.eval.lookupOption(name) match { case Some(ab) => unifySeq(value.typ, ab.result()) case None => minimal(value.typ) } unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, value, valueType, memo) + memoizeValueIR(value, valueType, memo) ) case AggLet(name, value, body, isScan) => - val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) + val bodyEnv = memoizeValueIR(body, requestedType, memo) if (isScan) { val valueType = unifySeq( value.typ, bodyEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val valueEnv = memoizeValueIR(ctx, value, valueType, memo) + val valueEnv = memoizeValueIR(value, valueType, memo) unifyEnvs( bodyEnv.copy(scan = bodyEnv.scan.map(_.delete(name))), valueEnv.copy(eval = Env.empty, scan = Some(valueEnv.eval)) @@ -1009,7 +976,7 @@ object PruneDeadFields { value.typ, bodyEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val valueEnv = memoizeValueIR(ctx, value, valueType, memo) + val valueEnv = memoizeValueIR(value, valueType, memo) unifyEnvs( bodyEnv.copy(agg = bodyEnv.agg.map(_.delete(name))), valueEnv.copy(eval = Env.empty, agg = Some(valueEnv.eval)) @@ -1020,38 +987,38 @@ object PruneDeadFields { ab += requestedType BindingEnv.empty.bindEval(name -> ab) case RelationalLet(name, value, body) => - val e = memoizeValueIR(ctx, body, requestedType, memo) + val e = memoizeValueIR(body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) + memoizeValueIR(value, unifySeq(value.typ, usages), memo) e case RelationalRef(name, _) => memo.relationalRefs.getOrElseUpdate(name, new BoxedArrayBuilder[Type]) += requestedType BindingEnv.empty case MakeArray(args, _) => val eltType = requestedType.asInstanceOf[TArray].elementType - unifyEnvsSeq(args.map(a => memoizeValueIR(ctx, a, eltType, memo))) + unifyEnvsSeq(args.map(a => memoizeValueIR(a, eltType, memo))) case MakeStream(args, _, _) => val eltType = requestedType.asInstanceOf[TStream].elementType - unifyEnvsSeq(args.map(a => memoizeValueIR(ctx, a, eltType, memo))) + unifyEnvsSeq(args.map(a => memoizeValueIR(a, eltType, memo))) case ArrayRef(a, i, s) => unifyEnvs( - memoizeValueIR(ctx, a, TArray(requestedType), memo), - memoizeValueIR(ctx, i, i.typ, memo), - memoizeValueIR(ctx, s, s.typ, memo) + memoizeValueIR(a, TArray(requestedType), memo), + memoizeValueIR(i, i.typ, memo), + memoizeValueIR(s, s.typ, memo) ) case ArrayLen(a) => - memoizeValueIR(ctx, a, minimal(a.typ), memo) + memoizeValueIR(a, minimal(a.typ), memo) case StreamTake(a, len) => unifyEnvs( - memoizeValueIR(ctx, a, requestedType, memo), - memoizeValueIR(ctx, len, len.typ, memo)) + memoizeValueIR(a, requestedType, memo), + memoizeValueIR(len, len.typ, memo)) case StreamDrop(a, len) => unifyEnvs( - memoizeValueIR(ctx, a, requestedType, memo), - memoizeValueIR(ctx, len, len.typ, memo)) + memoizeValueIR(a, requestedType, memo), + memoizeValueIR(len, len.typ, memo)) case StreamMap(a, name, body) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, body, + val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TStream].elementType, memo) val valueType = unifySeq( @@ -1059,25 +1026,25 @@ object PruneDeadFields { bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamGrouped(a, size) => unifyEnvs( - memoizeValueIR(ctx, a, requestedType.asInstanceOf[TStream].elementType, memo), - memoizeValueIR(ctx, size, size.typ, memo)) + memoizeValueIR(a, requestedType.asInstanceOf[TStream].elementType, memo), + memoizeValueIR(size, size.typ, memo)) case StreamGroupByKey(a, key) => val reqStructT = coerce[TStruct](coerce[TStream](coerce[TStream](requestedType).elementType).elementType) val origStructT = coerce[TStruct](coerce[TStream](a.typ).elementType) - memoizeValueIR(ctx, a, TStream(unify(origStructT, reqStructT, selectKey(origStructT, key))), memo) + memoizeValueIR(a, TStream(unify(origStructT, reqStructT, selectKey(origStructT, key))), memo) case StreamZip(as, names, body, behavior, _) => - val bodyEnv = memoizeValueIR(ctx, body, + val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TStream].elementType, memo) val valueTypes = (names, as).zipped.map { (name, a) => bodyEnv.eval.lookupOption(name).map(ab => unifySeq(coerce[TStream](a.typ).elementType, ab.result())) } if (behavior == ArrayZipBehavior.AssumeSameLength && valueTypes.forall(_.isEmpty)) { - unifyEnvs(memoizeValueIR(ctx, as.head, TStream(minimal(coerce[TStream](as.head.typ).elementType)), memo) +: + unifyEnvs(memoizeValueIR(as.head, TStream(minimal(coerce[TStream](as.head.typ).elementType)), memo) +: Array(bodyEnv.deleteEval(names)): _*) } else { unifyEnvs( @@ -1085,73 +1052,73 @@ object PruneDeadFields { val at = coerce[TStream](a.typ) if (behavior == ArrayZipBehavior.AssumeSameLength) { vtOption.map { vt => - memoizeValueIR(ctx, a, TStream(vt), memo) + memoizeValueIR(a, TStream(vt), memo) }.getOrElse(BindingEnv.empty) } else - memoizeValueIR(ctx, a, TStream(vtOption.getOrElse(minimal(at.elementType))), memo) + memoizeValueIR(a, TStream(vtOption.getOrElse(minimal(at.elementType))), memo) } ++ Array(bodyEnv.deleteEval(names)): _*) } case StreamZipJoin(as, key, curKey, curVals, joinF) => val eltType = coerce[TStruct](coerce[TStream](as.head.typ).elementType) val requestedEltType = coerce[TStream](requestedType).elementType - val bodyEnv = memoizeValueIR(ctx, joinF, requestedEltType, memo) + val bodyEnv = memoizeValueIR(joinF, requestedEltType, memo) val childRequestedEltType = unifySeq( eltType, bodyEnv.eval.lookupOption(curVals).map(_.result().map(_.asInstanceOf[TArray].elementType)).getOrElse(Array()) :+ selectKey(eltType, key)) - unifyEnvsSeq(as.map(memoizeValueIR(ctx, _, TStream(childRequestedEltType), memo))) + unifyEnvsSeq(as.map(memoizeValueIR(_, TStream(childRequestedEltType), memo))) case StreamMultiMerge(as, key) => val eltType = coerce[TStruct](coerce[TStream](as.head.typ).elementType) val requestedEltType = coerce[TStream](requestedType).elementType val childRequestedEltType = unify(eltType, requestedEltType, selectKey(eltType, key)) - unifyEnvsSeq(as.map(memoizeValueIR(ctx, _, TStream(childRequestedEltType), memo))) + unifyEnvsSeq(as.map(memoizeValueIR(_, TStream(childRequestedEltType), memo))) case StreamFilter(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamTakeWhile(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamDropWhile(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamFlatMap(a, name, body) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) + val bodyEnv = memoizeValueIR(body, requestedType, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamFold(a, zero, accumName, valueName, body) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnv = memoizeValueIR(ctx, zero, zero.typ, memo) - val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) + val zeroEnv = memoizeValueIR(zero, zero.typ, memo) + val bodyEnv = memoizeValueIR(body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) @@ -1159,13 +1126,13 @@ object PruneDeadFields { unifyEnvs( zeroEnv, bodyEnv.deleteEval(valueName).deleteEval(accumName), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamFold2(a, accum, valueName, seq, res) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnvs = accum.map { case (name, zval) => memoizeValueIR(ctx, zval, zval.typ, memo) } - val seqEnvs = seq.map { seq => memoizeValueIR(ctx, seq, seq.typ, memo) } - val resEnv = memoizeValueIR(ctx, res, requestedType, memo) + val zeroEnvs = accum.map { case (name, zval) => memoizeValueIR(zval, zval.typ, memo) } + val seqEnvs = seq.map { seq => memoizeValueIR(seq, seq.typ, memo) } + val resEnv = memoizeValueIR(res, requestedType, memo) val valueType = unifySeq( aType.elementType, resEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array()) ++ @@ -1177,25 +1144,25 @@ object PruneDeadFields { zeroEnvs ++ Array(resEnv.copy(eval = resEnv.eval.delete(accumNames))) ++ seqEnvs.map(e => e.copy(eval = e.eval.delete(seqNames))) - ++ Array(memoizeValueIR(ctx, a, TStream(valueType), memo)) + ++ Array(memoizeValueIR(a, TStream(valueType), memo)) ) case StreamScan(a, zero, accumName, valueName, body) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnv = memoizeValueIR(ctx, zero, zero.typ, memo) - val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) + val zeroEnv = memoizeValueIR(zero, zero.typ, memo) + val bodyEnv = memoizeValueIR(body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) unifyEnvs( zeroEnv, bodyEnv.deleteEval(valueName).deleteEval(accumName), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case StreamJoinRightDistinct(left, right, lKey, rKey, l, r, join, joinType) => val lType = left.typ.asInstanceOf[TStream] val rType = right.typ.asInstanceOf[TStream] - val joinEnv = memoizeValueIR(ctx, join, requestedType.asInstanceOf[TStream].elementType, memo) + val joinEnv = memoizeValueIR(join, requestedType.asInstanceOf[TStream].elementType, memo) val lRequested = unifySeq( lType.elementType, @@ -1209,10 +1176,10 @@ object PruneDeadFields { unifyEnvs( joinEnv.deleteEval(l).deleteEval(r), - memoizeValueIR(ctx, left, TStream(lRequested), memo), - memoizeValueIR(ctx, right, TStream(rRequested), memo)) + memoizeValueIR(left, TStream(lRequested), memo), + memoizeValueIR(right, TStream(rRequested), memo)) case ArraySort(a, left, right, lessThan) => - val compEnv = memoizeValueIR(ctx, lessThan, lessThan.typ, memo) + val compEnv = memoizeValueIR(lessThan, lessThan.typ, memo) val aType = a.typ.asInstanceOf[TStream] val requestedElementType = unifySeq( @@ -1221,7 +1188,7 @@ object PruneDeadFields { compEnv.eval.lookupOption(left).map(_.result()).getOrElse(Array()) ++ compEnv.eval.lookupOption(right).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TStream(requestedElementType), memo) + val aEnv = memoizeValueIR(a, TStream(requestedElementType), memo) unifyEnvs( compEnv.deleteEval(left).deleteEval(right), @@ -1230,37 +1197,37 @@ object PruneDeadFields { case StreamFor(a, valueName, body) => assert(requestedType == TVoid) val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) + val bodyEnv = memoizeValueIR(body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(valueName), - memoizeValueIR(ctx, a, TStream(valueType), memo) + memoizeValueIR(a, TStream(valueType), memo) ) case MakeNDArray(data, shape, rowMajor, errorId) => val elementType = requestedType.asInstanceOf[TNDArray].elementType val dataType = if (data.typ.isInstanceOf[TArray]) TArray(elementType) else TStream(elementType) unifyEnvs( - memoizeValueIR(ctx, data, dataType, memo), - memoizeValueIR(ctx, shape, shape.typ, memo), - memoizeValueIR(ctx, rowMajor, rowMajor.typ, memo) + memoizeValueIR(data, dataType, memo), + memoizeValueIR(shape, shape.typ, memo), + memoizeValueIR(rowMajor, rowMajor.typ, memo) ) case NDArrayMap(nd, valueName, body) => val ndType = nd.typ.asInstanceOf[TNDArray] - val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TNDArray].elementType, memo) + val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TNDArray].elementType, memo) val valueType = unifySeq( ndType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array()) ) unifyEnvs( bodyEnv.deleteEval(valueName), - memoizeValueIR(ctx, nd, ndType.copy(elementType = valueType), memo) + memoizeValueIR(nd, ndType.copy(elementType = valueType), memo) ) case NDArrayMap2(left, right, leftName, rightName, body, _) => val leftType = left.typ.asInstanceOf[TNDArray] val rightType = right.typ.asInstanceOf[TNDArray] - val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TNDArray].elementType, memo) + val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TNDArray].elementType, memo) val leftValueType = unify( leftType.elementType, @@ -1274,12 +1241,12 @@ object PruneDeadFields { unifyEnvs( bodyEnv.deleteEval(leftName).deleteEval(rightName), - memoizeValueIR(ctx, left, leftType.copy(elementType = leftValueType), memo), - memoizeValueIR(ctx, right, rightType.copy(elementType = rightValueType), memo) + memoizeValueIR(left, leftType.copy(elementType = leftValueType), memo), + memoizeValueIR(right, rightType.copy(elementType = rightValueType), memo) ) case AggExplode(a, name, body, isScan) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(ctx, body, + val bodyEnv = memoizeValueIR(body, requestedType, memo) if (isScan) { @@ -1287,7 +1254,7 @@ object PruneDeadFields { aType.elementType, bodyEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TStream(valueType), memo) + val aEnv = memoizeValueIR(a, TStream(valueType), memo) unifyEnvs( BindingEnv(scan = bodyEnv.scan.map(_.delete(name))), BindingEnv(scan = Some(aEnv.eval)) @@ -1297,33 +1264,33 @@ object PruneDeadFields { aType.elementType, bodyEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TStream(valueType), memo) + val aEnv = memoizeValueIR(a, TStream(valueType), memo) unifyEnvs( BindingEnv(agg = bodyEnv.agg.map(_.delete(name))), BindingEnv(agg = Some(aEnv.eval)) ) } case AggFilter(cond, aggIR, isScan) => - val condEnv = memoizeValueIR(ctx, cond, cond.typ, memo) + val condEnv = memoizeValueIR(cond, cond.typ, memo) unifyEnvs( if (isScan) BindingEnv(scan = Some(condEnv.eval)) else BindingEnv(agg = Some(condEnv.eval)), - memoizeValueIR(ctx, aggIR, requestedType, memo) + memoizeValueIR(aggIR, requestedType, memo) ) case AggGroupBy(key, aggIR, isScan) => - val keyEnv = memoizeValueIR(ctx, key, requestedType.asInstanceOf[TDict].keyType, memo) + val keyEnv = memoizeValueIR(key, requestedType.asInstanceOf[TDict].keyType, memo) unifyEnvs( if (isScan) BindingEnv(scan = Some(keyEnv.eval)) else BindingEnv(agg = Some(keyEnv.eval)), - memoizeValueIR(ctx, aggIR, requestedType.asInstanceOf[TDict].valueType, memo) + memoizeValueIR(aggIR, requestedType.asInstanceOf[TDict].valueType, memo) ) case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) => val aType = a.typ.asInstanceOf[TArray] - val bodyEnv = memoizeValueIR(ctx, aggBody, + val bodyEnv = memoizeValueIR(aggBody, requestedType.asInstanceOf[TArray].elementType, memo) if (isScan) { @@ -1331,36 +1298,36 @@ object PruneDeadFields { aType.elementType, bodyEnv.scanOrEmpty.lookupOption(elementName).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TArray(valueType), memo) + val aEnv = memoizeValueIR(a, TArray(valueType), memo) unifyEnvsSeq(FastSeq( bodyEnv.copy(eval = bodyEnv.eval.delete(indexName), scan = bodyEnv.scan.map(_.delete(elementName))), BindingEnv(scan = Some(aEnv.eval)) - ) ++ knownLength.map(x => memoizeValueIR(ctx, x, x.typ, memo))) + ) ++ knownLength.map(x => memoizeValueIR(x, x.typ, memo))) } else { val valueType = unifySeq( aType.elementType, bodyEnv.aggOrEmpty.lookupOption(elementName).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TArray(valueType), memo) + val aEnv = memoizeValueIR(a, TArray(valueType), memo) unifyEnvsSeq(FastSeq( bodyEnv.copy(eval = bodyEnv.eval.delete(indexName), agg = bodyEnv.agg.map(_.delete(elementName))), BindingEnv(agg = Some(aEnv.eval)) - ) ++ knownLength.map(x => memoizeValueIR(ctx, x, x.typ, memo))) + ) ++ knownLength.map(x => memoizeValueIR(x, x.typ, memo))) } case ApplyAggOp(initOpArgs, seqOpArgs, sig) => val prunedSig = AggSignature.prune(sig, requestedType) - val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) - val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) + val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) + val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) BindingEnv(eval = initEnv.eval, agg = Some(seqOpEnv.eval)) case ApplyScanOp(initOpArgs, seqOpArgs, sig) => val prunedSig = AggSignature.prune(sig, requestedType) - val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) - val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) + val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) + val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) BindingEnv(eval = initEnv.eval, scan = Some(seqOpEnv.eval)) case AggFold(zero, seqOp, combOp, accumName, otherAccumName, isScan) => - val initEnv = memoizeValueIR(ctx, zero, zero.typ, memo) - val seqEnv = memoizeValueIR(ctx, seqOp, seqOp.typ, memo) - memoizeValueIR(ctx, combOp, combOp.typ, memo) + val initEnv = memoizeValueIR(zero, zero.typ, memo) + val seqEnv = memoizeValueIR(seqOp, seqOp.typ, memo) + memoizeValueIR(combOp, combOp.typ, memo) if (isScan) BindingEnv(eval = initEnv.eval, scan = Some(seqEnv.eval.delete(accumName))) @@ -1368,47 +1335,47 @@ object PruneDeadFields { BindingEnv(eval = initEnv.eval, agg = Some(seqEnv.eval.delete(accumName))) case StreamAgg(a, name, query) => val aType = a.typ.asInstanceOf[TStream] - val queryEnv = memoizeValueIR(ctx, query, requestedType, memo) + val queryEnv = memoizeValueIR(query, requestedType, memo) val requestedElemType = unifySeq( aType.elementType, queryEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TStream(requestedElemType), memo) + val aEnv = memoizeValueIR(a, TStream(requestedElemType), memo) unifyEnvs( BindingEnv(eval = concatEnvs(Array(queryEnv.eval, queryEnv.aggOrEmpty.delete(name)))), aEnv) case StreamAggScan(a, name, query) => val aType = a.typ.asInstanceOf[TStream] - val queryEnv = memoizeValueIR(ctx, query, requestedType.asInstanceOf[TStream].elementType, memo) + val queryEnv = memoizeValueIR(query, requestedType.asInstanceOf[TStream].elementType, memo) val requestedElemType = unifySeq( aType.elementType, queryEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array()) ++ queryEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(ctx, a, TStream(requestedElemType), memo) + val aEnv = memoizeValueIR(a, TStream(requestedElemType), memo) unifyEnvs( BindingEnv(eval = concatEnvs(Array(queryEnv.eval.delete(name), queryEnv.scanOrEmpty.delete(name)))), aEnv) case RunAgg(body, result, _) => unifyEnvs( - memoizeValueIR(ctx, body, body.typ, memo), - memoizeValueIR(ctx, result, requestedType, memo) + memoizeValueIR(body, body.typ, memo), + memoizeValueIR(result, requestedType, memo) ) case RunAggScan(array, name, init, seqs, result, signature) => val aType = array.typ.asInstanceOf[TStream] - val resultEnv = memoizeValueIR(ctx, result, requestedType.asInstanceOf[TStream].elementType, memo) - val seqEnv = memoizeValueIR(ctx, seqs, seqs.typ, memo) + val resultEnv = memoizeValueIR(result, requestedType.asInstanceOf[TStream].elementType, memo) + val seqEnv = memoizeValueIR(seqs, seqs.typ, memo) val elemEnv = unifyEnvs(resultEnv, seqEnv) val requestedElemType = unifySeq(aType.elementType, elemEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( elemEnv, - memoizeValueIR(ctx, array, TStream(requestedElemType), memo), - memoizeValueIR(ctx, init, init.typ, memo) + memoizeValueIR(array, TStream(requestedElemType), memo), + memoizeValueIR(init, init.typ, memo) ) case MakeStruct(fields) => val sType = requestedType.asInstanceOf[TStruct] unifyEnvsSeq(fields.flatMap { case (fname, fir) => // ignore unreachable fields, these are eliminated on the upwards pass - sType.fieldOption(fname).map(f => memoizeValueIR(ctx, fir, f.typ, memo)) + sType.fieldOption(fname).map(f => memoizeValueIR(fir, f.typ, memo)) }) case InsertFields(old, fields, _) => val sType = requestedType.asInstanceOf[TStruct] @@ -1425,10 +1392,10 @@ object PruneDeadFields { sType.fieldOption(f.name).map(f.name -> _.typ) }: _*) unifyEnvsSeq( - FastSeq(memoizeValueIR(ctx, old, leftDep, memo)) ++ + FastSeq(memoizeValueIR(old, leftDep, memo)) ++ // ignore unreachable fields, these are eliminated on the upwards pass fields.flatMap { case (fname, fir) => - rightDep.fieldOption(fname).map(f => memoizeValueIR(ctx, fir, f.typ, memo)) + rightDep.fieldOption(fname).map(f => memoizeValueIR(fir, f.typ, memo)) } ) case SelectFields(old, fields) => @@ -1436,9 +1403,9 @@ object PruneDeadFields { val oldReqType = TStruct(old.typ.asInstanceOf[TStruct] .fieldNames .flatMap(fn => sType.fieldOption(fn).map(fd => (fd.name, fd.typ))): _*) - memoizeValueIR(ctx, old, oldReqType, memo) + memoizeValueIR(old, oldReqType, memo) case GetField(o, name) => - memoizeValueIR(ctx, o, TStruct(name -> requestedType), memo) + memoizeValueIR(o, TStruct(name -> requestedType), memo) case MakeTuple(fields) => val tType = requestedType.asInstanceOf[TTuple] @@ -1447,29 +1414,29 @@ object PruneDeadFields { // ignore unreachable fields, these are eliminated on the upwards pass tType.fieldIndex.get(i) .map { idx => - memoizeValueIR(ctx, value, tType.types(idx), memo) + memoizeValueIR(value, tType.types(idx), memo) }}) case GetTupleElement(o, idx) => val childTupleType = o.typ.asInstanceOf[TTuple] val tupleDep = TTuple(FastIndexedSeq(TupleField(idx, requestedType))) - memoizeValueIR(ctx, o, tupleDep, memo) + memoizeValueIR(o, tupleDep, memo) case ConsoleLog(message, result) => unifyEnvs( - memoizeValueIR(ctx, message, TString, memo), - memoizeValueIR(ctx, result, result.typ, memo) + memoizeValueIR(message, TString, memo), + memoizeValueIR(result, result.typ, memo) ) case MatrixCount(child) => - memoizeMatrixIR(ctx, child, minimal(child.typ), memo) + memoizeMatrixIR(child, minimal(child.typ), memo) BindingEnv.empty case TableCount(child) => - memoizeTableIR(ctx, child, minimal(child.typ), memo) + memoizeTableIR(child, minimal(child.typ), memo) BindingEnv.empty case TableGetGlobals(child) => - memoizeTableIR(ctx, child, minimal(child.typ).copy(globalType = requestedType.asInstanceOf[TStruct]), memo) + memoizeTableIR(child, minimal(child.typ).copy(globalType = requestedType.asInstanceOf[TStruct]), memo) BindingEnv.empty case TableCollect(child) => val rStruct = requestedType.asInstanceOf[TStruct] - memoizeTableIR(ctx, child, TableType( + memoizeTableIR(child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, rStruct.fieldOption("rows").map(_.typ.asInstanceOf[TArray].elementType.asInstanceOf[TStruct]).getOrElse(TStruct.empty)), @@ -1477,23 +1444,23 @@ object PruneDeadFields { memo) BindingEnv.empty case TableToValueApply(child, _) => - memoizeTableIR(ctx, child, child.typ, memo) + memoizeTableIR(child, child.typ, memo) BindingEnv.empty - case MatrixToValueApply(child, _) => memoizeMatrixIR(ctx, child, child.typ, memo) + case MatrixToValueApply(child, _) => memoizeMatrixIR(child, child.typ, memo) BindingEnv.empty - case BlockMatrixToValueApply(child, _) => memoizeBlockMatrixIR(ctx, child, child.typ, memo) + case BlockMatrixToValueApply(child, _) => memoizeBlockMatrixIR(child, child.typ, memo) BindingEnv.empty case TableAggregate(child, query) => - val queryDep = memoizeAndGetDep(ctx, query, query.typ, child.typ, memo) + val queryDep = memoizeAndGetDep(query, query.typ, child.typ, memo) val dep = TableType( key = child.typ.key, rowType = unify(child.typ.rowType, queryDep.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = queryDep.globalType ) - memoizeTableIR(ctx, child, dep, memo) + memoizeTableIR(child, dep, memo) BindingEnv.empty case MatrixAggregate(child, query) => - val queryDep = memoizeAndGetDep(ctx, query, query.typ, child.typ, memo) + val queryDep = memoizeAndGetDep(query, query.typ, child.typ, memo) val dep = MatrixType( rowKey = child.typ.rowKey, colKey = FastIndexedSeq(), @@ -1502,10 +1469,10 @@ object PruneDeadFields { colType = queryDep.colType, globalType = queryDep.globalType ) - memoizeMatrixIR(ctx, child, dep, memo) + memoizeMatrixIR(child, dep, memo) BindingEnv.empty case TailLoop(name, params, body) => - val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) + val bodyEnv = memoizeValueIR(body, body.typ, memo) val paramTypes = params.map{ case (paramName, paramIR) => bodyEnv.eval.lookupOption(paramName) match { case Some(ab) => unifySeq(paramIR.typ, ab.result()) @@ -1515,12 +1482,12 @@ object PruneDeadFields { unifyEnvsSeq( IndexedSeq(bodyEnv.deleteEval(params.map(_._1))) ++ (params, paramTypes).zipped.map{ case ((paramName, paramIR), paramType) => - memoizeValueIR(ctx, paramIR, paramType, memo) + memoizeValueIR(paramIR, paramType, memo) } ) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => val rArray = requestedType.asInstanceOf[TArray] - val bodyEnv = memoizeValueIR(ctx, body, rArray.elementType, memo) + val bodyEnv = memoizeValueIR(body, rArray.elementType, memo) assert(bodyEnv.scan.isEmpty) assert(bodyEnv.agg.isEmpty) @@ -1535,36 +1502,32 @@ object PruneDeadFields { } unifyEnvs( - memoizeValueIR(ctx, contexts, cDep, memo), - memoizeValueIR(ctx, globals, gDep, memo) + memoizeValueIR(contexts, cDep, memo), + memoizeValueIR(globals, gDep, memo) ) case _: IR => val envs = ir.children.flatMap { case mir: MatrixIR => - memoizeMatrixIR(ctx, mir, mir.typ, memo) + memoizeMatrixIR(mir, mir.typ, memo) None case tir: TableIR => - memoizeTableIR(ctx, tir, tir.typ, memo) + memoizeTableIR(tir, tir.typ, memo) None case bmir: BlockMatrixIR => //NOTE Currently no BlockMatrixIRs would have dead fields None case ir: IR => - Some(memoizeValueIR(ctx, ir, ir.typ, memo)) + Some(memoizeValueIR(ir, ir.typ, memo)) } unifyEnvsSeq(envs) } } - def rebuild( - ctx: ExecuteContext, - tir: TableIR, - memo: RebuildMutableState - ): TableIR = { + def rebuild(tir: TableIR, memo: RebuildMutableState): TableIR = { val requestedType = memo.requestedType.lookup(tir).asInstanceOf[TableType] tir match { case TableParallelize(rowsAndGlobal, nPartitions) => TableParallelize( - upcast(ctx, rebuildIR(ctx, rowsAndGlobal, BindingEnv.empty, memo), + upcast(rebuildIR(rowsAndGlobal, BindingEnv.empty, memo), memo.requestedType.lookup(rowsAndGlobal).asInstanceOf[TStruct]), nPartitions) case TableRead(typ, dropRows, tr) => @@ -1575,12 +1538,12 @@ object PruneDeadFields { globalType = requestedType.globalType) TableRead(requestedTypeWithKey, dropRows, tr) case TableFilter(child, pred) => - val child2 = rebuild(ctx, child, memo) - val pred2 = rebuildIR(ctx, pred, BindingEnv(child2.typ.rowEnv), memo) + val child2 = rebuild(child, memo) + val pred2 = rebuildIR(pred, BindingEnv(child2.typ.rowEnv), memo) TableFilter(child2, pred2) case TableMapPartitions(child, gName, pName, body) => - val child2 = rebuild(ctx, child, memo) - val body2 = rebuildIR(ctx, body, BindingEnv(Env( + val child2 = rebuild(child, memo) + val body2 = rebuildIR(body, BindingEnv(Env( gName -> child2.typ.globalType, pName -> TStream(child2.typ.rowType))), memo) val body2ElementType = body2.typ.asInstanceOf[TStream].elementType.asInstanceOf[TStruct] @@ -1590,8 +1553,8 @@ object PruneDeadFields { child2 TableMapPartitions(child2Keyed, gName, pName, body2) case TableMapRows(child, newRow) => - val child2 = rebuild(ctx, child, memo) - val newRow2 = rebuildIR(ctx, newRow, BindingEnv(child2.typ.rowEnv, scan = Some(child2.typ.rowEnv)), memo) + val child2 = rebuild(child, memo) + val newRow2 = rebuildIR(newRow, BindingEnv(child2.typ.rowEnv, scan = Some(child2.typ.rowEnv)), memo) val newRowType = newRow2.typ.asInstanceOf[TStruct] val child2Keyed = if (child2.typ.key.exists(k => !newRowType.hasField(k))) TableKeyBy(child2, child2.typ.key.takeWhile(newRowType.hasField)) @@ -1599,43 +1562,43 @@ object PruneDeadFields { child2 TableMapRows(child2Keyed, newRow2) case TableMapGlobals(child, newGlobals) => - val child2 = rebuild(ctx, child, memo) - TableMapGlobals(child2, rebuildIR(ctx, newGlobals, BindingEnv(child2.typ.globalEnv), memo)) + val child2 = rebuild(child, memo) + TableMapGlobals(child2, rebuildIR(newGlobals, BindingEnv(child2.typ.globalEnv), memo)) case TableKeyBy(child, _, isSorted) => - var child2 = rebuild(ctx, child, memo) + var child2 = rebuild(child, memo) val keys2 = requestedType.key // fully upcast before shuffle if (!isSorted && keys2.nonEmpty) - child2 = upcastTable(ctx, child2, memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) + child2 = upcastTable(child2, memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) TableKeyBy(child2, keys2, isSorted) case TableOrderBy(child, sortFields) => // fully upcast before shuffle - val child2 = upcastTable(ctx, rebuild(ctx, child, memo), memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) + val child2 = upcastTable(rebuild(child, memo), memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) TableOrderBy(child2, sortFields) case TableLeftJoinRightDistinct(left, right, root) => if (requestedType.rowType.hasField(root)) - TableLeftJoinRightDistinct(rebuild(ctx, left, memo), rebuild(ctx, right, memo), root) + TableLeftJoinRightDistinct(rebuild(left, memo), rebuild(right, memo), root) else - rebuild(ctx, left, memo) + rebuild(left, memo) case TableIntervalJoin(left, right, root, product) => if (requestedType.rowType.hasField(root)) - TableIntervalJoin(rebuild(ctx, left, memo), rebuild(ctx, right, memo), root, product) + TableIntervalJoin(rebuild(left, memo), rebuild(right, memo), root, product) else - rebuild(ctx, left, memo) + rebuild(left, memo) case TableMultiWayZipJoin(children, fieldName, globalName) => - val rebuilt = children.map { c => rebuild(ctx, c, memo) } - val upcasted = rebuilt.map { t => upcastTable(ctx, t, memo.requestedType.lookup(children(0)).asInstanceOf[TableType]) } + val rebuilt = children.map { c => rebuild(c, memo) } + val upcasted = rebuilt.map { t => upcastTable(t, memo.requestedType.lookup(children(0)).asInstanceOf[TableType]) } TableMultiWayZipJoin(upcasted, fieldName, globalName) case TableAggregateByKey(child, expr) => - val child2 = rebuild(ctx, child, memo) - TableAggregateByKey(child2, rebuildIR(ctx, expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) + val child2 = rebuild(child, memo) + TableAggregateByKey(child2, rebuildIR(expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) case TableKeyByAndAggregate(child, expr, newKey, nPartitions, bufferSize) => - val child2 = rebuild(ctx, child, memo) - val expr2 = rebuildIR(ctx, expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) - val newKey2 = rebuildIR(ctx, newKey, BindingEnv(child2.typ.rowEnv), memo) + val child2 = rebuild(child, memo) + val expr2 = rebuildIR(expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) + val newKey2 = rebuildIR(newKey, BindingEnv(child2.typ.rowEnv), memo) TableKeyByAndAggregate(child2, expr2, newKey2, nPartitions, bufferSize) case TableRename(child, rowMap, globalMap) => - val child2 = rebuild(ctx, child, memo) + val child2 = rebuild(child, memo) TableRename( child2, rowMap.filterKeys(child2.typ.rowType.hasField), @@ -1643,31 +1606,27 @@ object PruneDeadFields { case TableUnion(children) => val requestedType = memo.requestedType.lookup(tir).asInstanceOf[TableType] val rebuilt = children.map { c => - upcastTable(ctx, rebuild(ctx, c, memo), requestedType, upcastGlobals = false) + upcastTable(rebuild(c, memo), requestedType, upcastGlobals = false) } TableUnion(rebuilt) case RelationalLetTable(name, value, body) => - val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) + val value2 = rebuildIR(value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetTable(name, value2, rebuild(ctx, body, memo)) + RelationalLetTable(name, value2, rebuild(body, memo)) case BlockMatrixToTableApply(bmir, aux, function) => - val bmir2 = rebuild(ctx, bmir, memo) - val aux2 = rebuildIR(ctx, aux, BindingEnv.empty, memo) + val bmir2 = rebuild(bmir, memo) + val aux2 = rebuildIR(aux, BindingEnv.empty, memo) BlockMatrixToTableApply(bmir2, aux2, function) case _ => tir.copy(tir.children.map { // IR should be a match error - all nodes with child value IRs should have a rule - case childT: TableIR => rebuild(ctx, childT, memo) - case childM: MatrixIR => rebuild(ctx, childM, memo) - case childBm: BlockMatrixIR => rebuild(ctx, childBm, memo) + case childT: TableIR => rebuild(childT, memo) + case childM: MatrixIR => rebuild(childM, memo) + case childBm: BlockMatrixIR => rebuild(childBm, memo) }) } } - def rebuild( - ctx: ExecuteContext, - mir: MatrixIR, - memo: RebuildMutableState - ): MatrixIR = { + def rebuild(mir: MatrixIR, memo: RebuildMutableState): MatrixIR = { val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] mir match { case x@MatrixRead(typ, dropCols, dropRows, reader) => @@ -1682,20 +1641,20 @@ object PruneDeadFields { ) MatrixRead(requestedTypeWithKeys, dropCols, dropRows, reader) case MatrixFilterCols(child, pred) => - val child2 = rebuild(ctx, child, memo) - MatrixFilterCols(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.colEnv), memo)) + val child2 = rebuild(child, memo) + MatrixFilterCols(child2, rebuildIR(pred, BindingEnv(child2.typ.colEnv), memo)) case MatrixFilterRows(child, pred) => - val child2 = rebuild(ctx, child, memo) - MatrixFilterRows(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.rowEnv), memo)) + val child2 = rebuild(child, memo) + MatrixFilterRows(child2, rebuildIR(pred, BindingEnv(child2.typ.rowEnv), memo)) case MatrixFilterEntries(child, pred) => - val child2 = rebuild(ctx, child, memo) - MatrixFilterEntries(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.entryEnv), memo)) + val child2 = rebuild(child, memo) + MatrixFilterEntries(child2, rebuildIR(pred, BindingEnv(child2.typ.entryEnv), memo)) case MatrixMapEntries(child, newEntries) => - val child2 = rebuild(ctx, child, memo) - MatrixMapEntries(child2, rebuildIR(ctx, newEntries, BindingEnv(child2.typ.entryEnv), memo)) + val child2 = rebuild(child, memo) + MatrixMapEntries(child2, rebuildIR(newEntries, BindingEnv(child2.typ.entryEnv), memo)) case MatrixMapRows(child, newRow) => - val child2 = rebuild(ctx, child, memo) - val newRow2 = rebuildIR(ctx, newRow, + val child2 = rebuild(child, memo) + val newRow2 = rebuildIR(newRow, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv), scan = Some(child2.typ.rowEnv)), memo) val newRowType = newRow2.typ.asInstanceOf[TStruct] val child2Keyed = if (child2.typ.rowKey.exists(k => !newRowType.hasField(k))) @@ -1704,8 +1663,8 @@ object PruneDeadFields { child2 MatrixMapRows(child2Keyed, newRow2) case MatrixMapCols(child, newCol, newKey) => - val child2 = rebuild(ctx, child, memo) - val newCol2 = rebuildIR(ctx, newCol, + val child2 = rebuild(child, memo) + val newCol2 = rebuildIR(newCol, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv), scan = Some(child2.typ.colEnv)), memo) val newColType = newCol2.typ.asInstanceOf[TStruct] val newKey2 = newKey match { @@ -1717,32 +1676,32 @@ object PruneDeadFields { } MatrixMapCols(child2, newCol2, newKey2) case MatrixMapGlobals(child, newGlobals) => - val child2 = rebuild(ctx, child, memo) - MatrixMapGlobals(child2, rebuildIR(ctx, newGlobals, BindingEnv(child2.typ.globalEnv), memo)) + val child2 = rebuild(child, memo) + MatrixMapGlobals(child2, rebuildIR(newGlobals, BindingEnv(child2.typ.globalEnv), memo)) case MatrixKeyRowsBy(child, keys, isSorted) => - val child2 = rebuild(ctx, child, memo) + val child2 = rebuild(child, memo) val keys2 = keys.takeWhile(child2.typ.rowType.hasField) MatrixKeyRowsBy(child2, keys2, isSorted) case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => - val child2 = rebuild(ctx, child, memo) + val child2 = rebuild(child, memo) MatrixAggregateRowsByKey(child2, - rebuildIR(ctx, entryExpr, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv)), memo), - rebuildIR(ctx, rowExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) + rebuildIR(entryExpr, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv)), memo), + rebuildIR(rowExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) case MatrixAggregateColsByKey(child, entryExpr, colExpr) => - val child2 = rebuild(ctx, child, memo) + val child2 = rebuild(child, memo) MatrixAggregateColsByKey(child2, - rebuildIR(ctx, entryExpr, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv)), memo), - rebuildIR(ctx, colExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.colEnv)), memo)) + rebuildIR(entryExpr, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv)), memo), + rebuildIR(colExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.colEnv)), memo)) case MatrixUnionRows(children) => val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] MatrixUnionRows(children.map { child => - upcast(ctx, rebuild(ctx, child, memo), requestedType, + upcast(rebuild(child, memo), requestedType, upcastGlobals = false) }) case MatrixUnionCols(left, right, joinType) => val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] - val left2 = rebuild(ctx, left, memo) - val right2 = rebuild(ctx, right, memo) + val left2 = rebuild(left, memo) + val right2 = rebuild(right, memo) if (left2.typ.colType == right2.typ.colType && left2.typ.entryType == right2.typ.entryType) { MatrixUnionCols( @@ -1752,31 +1711,31 @@ object PruneDeadFields { ) } else { MatrixUnionCols( - upcast(ctx, left2, requestedType, upcastRows=false, upcastGlobals = false), - upcast(ctx, right2, requestedType, upcastRows=false, upcastGlobals = false), + upcast(left2, requestedType, upcastRows=false, upcastGlobals = false), + upcast(right2, requestedType, upcastRows=false, upcastGlobals = false), joinType ) } case MatrixAnnotateRowsTable(child, table, root, product) => // if the field is not used, this node can be elided entirely if (!requestedType.rowType.hasField(root)) - rebuild(ctx, child, memo) + rebuild(child, memo) else { - val child2 = rebuild(ctx, child, memo) - val table2 = rebuild(ctx, table, memo) + val child2 = rebuild(child, memo) + val table2 = rebuild(table, memo) MatrixAnnotateRowsTable(child2, table2, root, product) } case MatrixAnnotateColsTable(child, table, uid) => // if the field is not used, this node can be elided entirely if (!requestedType.colType.hasField(uid)) - rebuild(ctx, child, memo) + rebuild(child, memo) else { - val child2 = rebuild(ctx, child, memo) - val table2 = rebuild(ctx, table, memo) + val child2 = rebuild(child, memo) + val table2 = rebuild(table, memo) MatrixAnnotateColsTable(child2, table2, uid) } case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => - val child2 = rebuild(ctx, child, memo) + val child2 = rebuild(child, memo) MatrixRename( child2, globalMap.filterKeys(child2.typ.globalType.hasField), @@ -1784,50 +1743,41 @@ object PruneDeadFields { rowMap.filterKeys(child2.typ.rowType.hasField), entryMap.filterKeys(child2.typ.entryType.hasField)) case RelationalLetMatrixTable(name, value, body) => - val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) + val value2 = rebuildIR(value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetMatrixTable(name, value2, rebuild(ctx, body, memo)) + RelationalLetMatrixTable(name, value2, rebuild(body, memo)) case CastTableToMatrix(child, entriesFieldName, colsFieldName, _) => - CastTableToMatrix(rebuild(ctx, child, memo), entriesFieldName, colsFieldName, requestedType.colKey) + CastTableToMatrix(rebuild(child, memo), entriesFieldName, colsFieldName, requestedType.colKey) case _ => mir.copy(mir.children.map { // IR should be a match error - all nodes with child value IRs should have a rule - case childT: TableIR => rebuild(ctx, childT, memo) - case childM: MatrixIR => rebuild(ctx, childM, memo) + case childT: TableIR => rebuild(childT, memo) + case childM: MatrixIR => rebuild(childM, memo) }) } } - def rebuild( - ctx: ExecuteContext, - bmir: BlockMatrixIR, - memo: RebuildMutableState - ): BlockMatrixIR = bmir match { + def rebuild(bmir: BlockMatrixIR, memo: RebuildMutableState): BlockMatrixIR = bmir match { case RelationalLetBlockMatrix(name, value, body) => - val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) + val value2 = rebuildIR(value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetBlockMatrix(name, value2, rebuild(ctx, body, memo)) + RelationalLetBlockMatrix(name, value2, rebuild(body, memo)) case _ => bmir.copy( bmir.children.map { - case tir: TableIR => rebuild(ctx, tir, memo) - case mir: MatrixIR => rebuild(ctx, mir, memo) - case ir: IR => rebuildIR(ctx, ir, BindingEnv.empty[Type], memo) - case bmir: BlockMatrixIR => rebuild(ctx, bmir, memo) + case tir: TableIR => rebuild(tir, memo) + case mir: MatrixIR => rebuild(mir, memo) + case ir: IR => rebuildIR(ir, BindingEnv.empty[Type], memo) + case bmir: BlockMatrixIR => rebuild(bmir, memo) } ) } - def rebuildIR( - ctx: ExecuteContext, - ir: IR, - env: BindingEnv[Type], - memo: RebuildMutableState - ): IR = { + def rebuildIR(ir: IR, env: BindingEnv[Type], memo: RebuildMutableState): IR = { val requestedType = memo.requestedType.lookup(ir).asInstanceOf[Type] ir match { case NA(_) => NA(requestedType) case CastRename(v, _typ) => - val v2 = rebuildIR(ctx, v, env, memo) + val v2 = rebuildIR(v, env, memo) def recur(rebuildType: Type, castType: Type, baseType: Type): Type = { ((rebuildType, castType, baseType): @unchecked) match { @@ -1855,100 +1805,100 @@ object PruneDeadFields { CastRename(v2, recur(v2.typ, _typ, v.typ)) case If(cond, cnsq, alt) => - val cond2 = rebuildIR(ctx, cond, env, memo) - val cnsq2 = rebuildIR(ctx, cnsq, env, memo) - val alt2 = rebuildIR(ctx, alt, env, memo) + val cond2 = rebuildIR(cond, env, memo) + val cnsq2 = rebuildIR(cnsq, env, memo) + val alt2 = rebuildIR(alt, env, memo) if (cnsq2.typ == alt2.typ) If(cond2, cnsq2, alt2) else If(cond2, - upcast(ctx, cnsq2, requestedType), - upcast(ctx, alt2, requestedType) + upcast(cnsq2, requestedType), + upcast(alt2, requestedType) ) case Coalesce(values) => - val values2 = values.map(rebuildIR(ctx, _, env, memo)) + val values2 = values.map(rebuildIR(_, env, memo)) require(values2.nonEmpty) if (values2.forall(_.typ == values2.head.typ)) Coalesce(values2) else - Coalesce(values2.map(upcast(ctx, _, requestedType))) + Coalesce(values2.map(upcast(_, requestedType))) case Consume(value) => - val value2 = rebuildIR(ctx, value, env, memo) + val value2 = rebuildIR(value, env, memo) Consume(value2) case Let(name, value, body) => - val value2 = rebuildIR(ctx, value, env, memo) + val value2 = rebuildIR(value, env, memo) Let( name, value2, - rebuildIR(ctx, body, env.bindEval(name, value2.typ), memo) + rebuildIR(body, env.bindEval(name, value2.typ), memo) ) case AggLet(name, value, body, isScan) => - val value2 = rebuildIR(ctx, value, if (isScan) env.promoteScan else env.promoteAgg, memo) + val value2 = rebuildIR(value, if (isScan) env.promoteScan else env.promoteAgg, memo) AggLet( name, value2, - rebuildIR(ctx, body, if (isScan) env.bindScan(name, value2.typ) else env.bindAgg(name, value2.typ), memo), + rebuildIR(body, if (isScan) env.bindScan(name, value2.typ) else env.bindAgg(name, value2.typ), memo), isScan ) case Ref(name, t) => Ref(name, env.eval.lookupOption(name).getOrElse(t)) case RelationalLet(name, value, body) => - val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) + val value2 = rebuildIR(value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLet(name, value2, rebuildIR(ctx, body, env, memo)) + RelationalLet(name, value2, rebuildIR(body, env, memo)) case RelationalRef(name, _) => RelationalRef(name, memo.relationalRefs(name)) case MakeArray(args, _) => val dep = requestedType.asInstanceOf[TArray] - val args2 = args.map(a => rebuildIR(ctx, a, env, memo)) - MakeArray.unify(ctx, args2, TArray(dep.elementType)) + val args2 = args.map(a => rebuildIR(a, env, memo)) + MakeArray.unify(args2, TArray(dep.elementType)) case MakeStream(args, _, requiresMemoryManagementPerElement) => val dep = requestedType.asInstanceOf[TStream] - val args2 = args.map(a => rebuildIR(ctx, a, env, memo)) - MakeStream.unify(ctx, args2, requiresMemoryManagementPerElement, requestedType = TStream(dep.elementType)) + val args2 = args.map(a => rebuildIR(a, env, memo)) + MakeStream.unify(args2, requiresMemoryManagementPerElement, requestedType = TStream(dep.elementType)) case StreamMap(a, name, body) => - val a2 = rebuildIR(ctx, a, env, memo) - StreamMap(a2, name, rebuildIR(ctx, body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(a, env, memo) + StreamMap(a2, name, rebuildIR(body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamZip(as, names, body, b, errorID) => val (newAs, newNames) = (as, names) .zipped - .flatMap { case (a, name) => if (memo.requestedType.contains(a)) Some((rebuildIR(ctx, a, env, memo), name)) else None } + .flatMap { case (a, name) => if (memo.requestedType.contains(a)) Some((rebuildIR(a, env, memo), name)) else None } .unzip - StreamZip(newAs, newNames, rebuildIR(ctx, body, + StreamZip(newAs, newNames, rebuildIR(body, env.bindEval(newNames.zip(newAs.map(a => a.typ.asInstanceOf[TStream].elementType)): _*), memo), b, errorID) case StreamZipJoin(as, key, curKey, curVals, joinF) => - val newAs = as.map(a => rebuildIR(ctx, a, env, memo)) + val newAs = as.map(a => rebuildIR(a, env, memo)) val newEltType = as.head.typ.asInstanceOf[TStream].elementType.asInstanceOf[TStruct] - val newJoinF = rebuildIR(ctx, + val newJoinF = rebuildIR( joinF, env.bindEval(curKey -> selectKey(newEltType, key), curVals -> TArray(newEltType)), memo) StreamZipJoin(newAs, key, curKey, curVals, newJoinF) case StreamFilter(a, name, cond) => - val a2 = rebuildIR(ctx, a, env, memo) - StreamFilter(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(a, env, memo) + StreamFilter(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamTakeWhile(a, name, cond) => - val a2 = rebuildIR(ctx, a, env, memo) - StreamTakeWhile(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(a, env, memo) + StreamTakeWhile(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamDropWhile(a, name, cond) => - val a2 = rebuildIR(ctx, a, env, memo) - StreamDropWhile(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(a, env, memo) + StreamDropWhile(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamFlatMap(a, name, body) => - val a2 = rebuildIR(ctx, a, env, memo) - StreamFlatMap(a2, name, rebuildIR(ctx, body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(a, env, memo) + StreamFlatMap(a2, name, rebuildIR(body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamFold(a, zero, accumName, valueName, body) => - val a2 = rebuildIR(ctx, a, env, memo) - val z2 = rebuildIR(ctx, zero, env, memo) + val a2 = rebuildIR(a, env, memo) + val z2 = rebuildIR(zero, env, memo) StreamFold( a2, z2, accumName, valueName, - rebuildIR(ctx, body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + rebuildIR(body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) ) case StreamFold2(a: IR, accum, valueName, seqs, result) => - val a2 = rebuildIR(ctx, a, env, memo) - val newAccum = accum.map { case (n, z) => n -> rebuildIR(ctx, z, env, memo) } + val a2 = rebuildIR(a, env, memo) + val newAccum = accum.map { case (n, z) => n -> rebuildIR(z, env, memo) } val newEnv = env .bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType) .bindEval(newAccum.map { case (n, z) => n -> z.typ }: _*) @@ -1956,49 +1906,49 @@ object PruneDeadFields { a2, newAccum, valueName, - seqs.map(rebuildIR(ctx, _, newEnv, memo)), - rebuildIR(ctx, result, newEnv, memo)) + seqs.map(rebuildIR(_, newEnv, memo)), + rebuildIR(result, newEnv, memo)) case StreamScan(a, zero, accumName, valueName, body) => - val a2 = rebuildIR(ctx, a, env, memo) - val z2 = rebuildIR(ctx, zero, env, memo) + val a2 = rebuildIR(a, env, memo) + val z2 = rebuildIR(zero, env, memo) StreamScan( a2, z2, accumName, valueName, - rebuildIR(ctx, body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + rebuildIR(body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) ) case StreamJoinRightDistinct(left, right, lKey, rKey, l, r, join, joinType) => - val left2 = rebuildIR(ctx, left, env, memo) - val right2 = rebuildIR(ctx, right, env, memo) + val left2 = rebuildIR(left, env, memo) + val right2 = rebuildIR(right, env, memo) val ltyp = left2.typ.asInstanceOf[TStream] val rtyp = right2.typ.asInstanceOf[TStream] StreamJoinRightDistinct( left2, right2, lKey, rKey, l, r, - rebuildIR(ctx, join, env.bindEval(l -> ltyp.elementType, r -> rtyp.elementType), memo), + rebuildIR(join, env.bindEval(l -> ltyp.elementType, r -> rtyp.elementType), memo), joinType) case StreamFor(a, valueName, body) => - val a2 = rebuildIR(ctx, a, env, memo) - val body2 = rebuildIR(ctx, body, env.bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + val a2 = rebuildIR(a, env, memo) + val body2 = rebuildIR(body, env.bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) StreamFor(a2, valueName, body2) case ArraySort(a, left, right, lessThan) => - val a2 = rebuildIR(ctx, a, env, memo) + val a2 = rebuildIR(a, env, memo) val et = a2.typ.asInstanceOf[TStream].elementType - val lessThan2 = rebuildIR(ctx, lessThan, env.bindEval(left -> et, right -> et), memo) + val lessThan2 = rebuildIR(lessThan, env.bindEval(left -> et, right -> et), memo) ArraySort(a2, left, right, lessThan2) case MakeNDArray(data, shape, rowMajor, errorId) => - val data2 = rebuildIR(ctx, data, env, memo) - val shape2 = rebuildIR(ctx, shape, env, memo) - val rowMajor2 = rebuildIR(ctx, rowMajor, env, memo) + val data2 = rebuildIR(data, env, memo) + val shape2 = rebuildIR(shape, env, memo) + val rowMajor2 = rebuildIR(rowMajor, env, memo) MakeNDArray(data2, shape2, rowMajor2, errorId) case NDArrayMap(nd, valueName, body) => - val nd2 = rebuildIR(ctx, nd, env, memo) - NDArrayMap(nd2, valueName, rebuildIR(ctx, body, env.bindEval(valueName, nd2.typ.asInstanceOf[TNDArray].elementType), memo)) + val nd2 = rebuildIR(nd, env, memo) + NDArrayMap(nd2, valueName, rebuildIR(body, env.bindEval(valueName, nd2.typ.asInstanceOf[TNDArray].elementType), memo)) case NDArrayMap2(left, right, leftName, rightName, body, errorID) => - val left2 = rebuildIR(ctx, left, env, memo) - val right2 = rebuildIR(ctx, right, env, memo) - val body2 = rebuildIR(ctx, body, + val left2 = rebuildIR(left, env, memo) + val right2 = rebuildIR(right, env, memo) + val body2 = rebuildIR(body, env.bindEval(leftName, left2.typ.asInstanceOf[TNDArray].elementType).bindEval(rightName, right2.typ.asInstanceOf[TNDArray].elementType), memo) NDArrayMap2(left2, right2, leftName, rightName, body2, errorID) @@ -2008,7 +1958,7 @@ object PruneDeadFields { val depFields = depStruct.fieldNames.toSet MakeStruct(fields.flatMap { case (f, fir) => if (depFields.contains(f)) - Some(f -> rebuildIR(ctx, fir, env, memo)) + Some(f -> rebuildIR(fir, env, memo)) else { log.info(s"Prune: MakeStruct: eliminating field '$f'") None @@ -2020,14 +1970,14 @@ object PruneDeadFields { val depFieldIndices = depTuple.fieldIndex.keySet MakeTuple(fields.flatMap { case (i, f) => if (depFieldIndices(i)) - Some(i -> rebuildIR(ctx, f, env, memo)) + Some(i -> rebuildIR(f, env, memo)) else None }) case InsertFields(old, fields, fieldOrder) => val depStruct = requestedType.asInstanceOf[TStruct] val depFields = depStruct.fieldNames.toSet - val rebuiltChild = rebuildIR(ctx, old, env, memo) + val rebuiltChild = rebuildIR(old, env, memo) val preservedChildFields = rebuiltChild.typ.asInstanceOf[TStruct].fieldNames.toSet val insertOverwritesUnrequestedButPreservedField = fields.exists{ case (fieldName, _) => @@ -2044,7 +1994,7 @@ object PruneDeadFields { InsertFields(wrappedChild, fields.flatMap { case (f, fir) => if (depFields.contains(f)) - Some(f -> rebuildIR(ctx, fir, env, memo)) + Some(f -> rebuildIR(fir, env, memo)) else { log.info(s"Prune: InsertFields: eliminating field '$f'") None @@ -2052,104 +2002,104 @@ object PruneDeadFields { }, fieldOrder.map(fds => fds.filter(f => depFields.contains(f) || wrappedChild.typ.asInstanceOf[TStruct].hasField(f)))) case SelectFields(old, fields) => val depStruct = requestedType.asInstanceOf[TStruct] - val old2 = rebuildIR(ctx, old, env, memo) + val old2 = rebuildIR(old, env, memo) SelectFields(old2, fields.filter(f => old2.typ.asInstanceOf[TStruct].hasField(f) && depStruct.hasField(f))) case ConsoleLog(message, result) => - val message2 = rebuildIR(ctx, message, env, memo) - val result2 = rebuildIR(ctx, result, env, memo) + val message2 = rebuildIR(message, env, memo) + val result2 = rebuildIR(result, env, memo) ConsoleLog(message2, result2) case TableAggregate(child, query) => - val child2 = rebuild(ctx, child, memo) - val query2 = rebuildIR(ctx, query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) + val child2 = rebuild(child, memo) + val query2 = rebuildIR(query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) TableAggregate(child2, query2) case MatrixAggregate(child, query) => - val child2 = rebuild(ctx, child, memo) - val query2 = rebuildIR(ctx, query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.entryEnv)), memo) + val child2 = rebuild(child, memo) + val query2 = rebuildIR(query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.entryEnv)), memo) MatrixAggregate(child2, query2) case TableCollect(child) => val rStruct = requestedType.asInstanceOf[TStruct] if (!rStruct.hasField("rows")) if (rStruct.hasField("global")) - MakeStruct(FastSeq("global" -> TableGetGlobals(rebuild(ctx, child, memo)))) + MakeStruct(FastSeq("global" -> TableGetGlobals(rebuild(child, memo)))) else MakeStruct(FastSeq()) else - TableCollect(rebuild(ctx, child, memo)) + TableCollect(rebuild(child, memo)) case AggExplode(array, name, aggBody, isScan) => - val a2 = rebuildIR(ctx, array, if (isScan) env.promoteScan else env.promoteAgg, memo) + val a2 = rebuildIR(array, if (isScan) env.promoteScan else env.promoteAgg, memo) val a2t = a2.typ.asInstanceOf[TStream].elementType - val body2 = rebuildIR(ctx, aggBody, if (isScan) env.bindScan(name, a2t) else env.bindAgg(name, a2t), memo) + val body2 = rebuildIR(aggBody, if (isScan) env.bindScan(name, a2t) else env.bindAgg(name, a2t), memo) AggExplode(a2, name, body2, isScan) case AggFilter(cond, aggIR, isScan) => - val cond2 = rebuildIR(ctx, cond, if (isScan) env.promoteScan else env.promoteAgg, memo) - val aggIR2 = rebuildIR(ctx, aggIR, env, memo) + val cond2 = rebuildIR(cond, if (isScan) env.promoteScan else env.promoteAgg, memo) + val aggIR2 = rebuildIR(aggIR, env, memo) AggFilter(cond2, aggIR2, isScan) case AggGroupBy(key, aggIR, isScan) => - val key2 = rebuildIR(ctx, key, if (isScan) env.promoteScan else env.promoteAgg, memo) - val aggIR2 = rebuildIR(ctx, aggIR, env, memo) + val key2 = rebuildIR(key, if (isScan) env.promoteScan else env.promoteAgg, memo) + val aggIR2 = rebuildIR(aggIR, env, memo) AggGroupBy(key2, aggIR2, isScan) case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) => val aEnv = if (isScan) env.promoteScan else env.promoteAgg - val a2 = rebuildIR(ctx, a, aEnv, memo) + val a2 = rebuildIR(a, aEnv, memo) val a2t = a2.typ.asInstanceOf[TArray].elementType val env_ = env.bindEval(indexName -> TInt32) - val aggBody2 = rebuildIR(ctx, aggBody, if (isScan) env_.bindScan(elementName, a2t) else env_.bindAgg(elementName, a2t), memo) - AggArrayPerElement(a2, elementName, indexName, aggBody2, knownLength.map(rebuildIR(ctx, _, aEnv, memo)), isScan) + val aggBody2 = rebuildIR(aggBody, if (isScan) env_.bindScan(elementName, a2t) else env_.bindAgg(elementName, a2t), memo) + AggArrayPerElement(a2, elementName, indexName, aggBody2, knownLength.map(rebuildIR(_, aEnv, memo)), isScan) case StreamAgg(a, name, query) => - val a2 = rebuildIR(ctx, a, env, memo) + val a2 = rebuildIR(a, env, memo) val newEnv = env.copy(agg = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))) - val query2 = rebuildIR(ctx, query, newEnv, memo) + val query2 = rebuildIR(query, newEnv, memo) StreamAgg(a2, name, query2) case StreamAggScan(a, name, query) => - val a2 = rebuildIR(ctx, a, env, memo) - val query2 = rebuildIR(ctx, query, env.copy(scan = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))), memo) + val a2 = rebuildIR(a, env, memo) + val query2 = rebuildIR(query, env.copy(scan = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))), memo) StreamAggScan(a2, name, query2) case RunAgg(body, result, signatures) => - val body2 = rebuildIR(ctx, body, env, memo) - val result2 = rebuildIR(ctx, result, env, memo) + val body2 = rebuildIR(body, env, memo) + val result2 = rebuildIR(result, env, memo) RunAgg(body2, result2, signatures) case RunAggScan(array, name, init, seqs, result, signature) => - val array2 = rebuildIR(ctx, array, env, memo) - val init2 = rebuildIR(ctx, init, env, memo) + val array2 = rebuildIR(array, env, memo) + val init2 = rebuildIR(init, env, memo) val eltEnv = env.bindEval(name, array2.typ.asInstanceOf[TStream].elementType) - val seqs2 = rebuildIR(ctx, seqs, eltEnv, memo) - val result2 = rebuildIR(ctx, result, eltEnv, memo) + val seqs2 = rebuildIR(seqs, eltEnv, memo) + val result2 = rebuildIR(result, eltEnv, memo) RunAggScan(array2, name, init2, seqs2, result2, signature) case ApplyAggOp(initOpArgs, seqOpArgs, aggSig) => - val initOpArgs2 = initOpArgs.map(rebuildIR(ctx, _, env, memo)) - val seqOpArgs2 = seqOpArgs.map(rebuildIR(ctx, _, env.promoteAgg, memo)) + val initOpArgs2 = initOpArgs.map(rebuildIR(_, env, memo)) + val seqOpArgs2 = seqOpArgs.map(rebuildIR(_, env.promoteAgg, memo)) ApplyAggOp(initOpArgs2, seqOpArgs2, aggSig.copy( initOpArgs = initOpArgs2.map(_.typ), seqOpArgs = seqOpArgs2.map(_.typ))) case ApplyScanOp(initOpArgs, seqOpArgs, aggSig) => - val initOpArgs2 = initOpArgs.map(rebuildIR(ctx, _, env, memo)) - val seqOpArgs2 = seqOpArgs.map(rebuildIR(ctx, _, env.promoteScan, memo)) + val initOpArgs2 = initOpArgs.map(rebuildIR(_, env, memo)) + val seqOpArgs2 = seqOpArgs.map(rebuildIR(_, env.promoteScan, memo)) ApplyScanOp(initOpArgs2, seqOpArgs2, aggSig.copy( initOpArgs = initOpArgs2.map(_.typ), seqOpArgs = seqOpArgs2.map(_.typ))) case AggFold(zero, seqOp, combOp, accumName, otherAccumName, isScan) => - val zero2 = rebuildIR(ctx, zero, env, memo) - val seqOp2 = rebuildIR(ctx, seqOp, if (isScan) env.promoteScan else env.promoteAgg, memo) - val combOp2 = rebuildIR(ctx, combOp, env, memo) + val zero2 = rebuildIR(zero, env, memo) + val seqOp2 = rebuildIR(seqOp, if (isScan) env.promoteScan else env.promoteAgg, memo) + val combOp2 = rebuildIR(combOp, env, memo) AggFold(zero2, seqOp2, combOp2, accumName, otherAccumName, isScan) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => - val contexts2 = upcast(ctx, rebuildIR(ctx, contexts, env, memo), memo.requestedType.lookup(contexts).asInstanceOf[Type]) - val globals2 = upcast(ctx, rebuildIR(ctx, globals, env, memo), memo.requestedType.lookup(globals).asInstanceOf[Type]) - val body2 = rebuildIR(ctx, body, BindingEnv(Env(cname -> contexts2.typ.asInstanceOf[TStream].elementType, gname -> globals2.typ)), memo) + val contexts2 = upcast(rebuildIR(contexts, env, memo), memo.requestedType.lookup(contexts).asInstanceOf[Type]) + val globals2 = upcast(rebuildIR(globals, env, memo), memo.requestedType.lookup(globals).asInstanceOf[Type]) + val body2 = rebuildIR(body, BindingEnv(Env(cname -> contexts2.typ.asInstanceOf[TStream].elementType, gname -> globals2.typ)), memo) CollectDistributedArray(contexts2, globals2, cname, gname, body2, tsd) case _ => ir.copy(ir.children.map { - case valueIR: IR => rebuildIR(ctx, valueIR, env, memo) // FIXME: assert IR does not bind or change env - case mir: MatrixIR => rebuild(ctx, mir, memo) - case tir: TableIR => rebuild(ctx, tir, memo) + case valueIR: IR => rebuildIR(valueIR, env, memo) // FIXME: assert IR does not bind or change env + case mir: MatrixIR => rebuild(mir, memo) + case tir: TableIR => rebuild(tir, memo) case bmir: BlockMatrixIR => bmir //NOTE Currently no BlockMatrixIRs would have dead fields }) } } - def upcast(ctx: ExecuteContext, ir: IR, rType: Type): IR = { + def upcast(ir: IR, rType: Type): IR = { if (ir.typ == rType) ir else { @@ -2160,7 +2110,7 @@ object PruneDeadFields { val ref = Ref(uid, ir.typ) val ms = MakeStruct( rs.fields.map { f => - f.name -> upcast(ctx, GetField(ref, f.name), f.typ) + f.name -> upcast(GetField(ref, f.name), f.typ) } ) Let(uid, ir, If(IsNA(ref), NA(ms.typ), ms)) @@ -2168,43 +2118,40 @@ object PruneDeadFields { val ra = rType.asInstanceOf[TStream] val uid = genUID() val ref = Ref(uid, ts.elementType) - StreamMap(ir, uid, upcast(ctx, ref, ra.elementType)) + StreamMap(ir, uid, upcast(ref, ra.elementType)) case ts: TArray => val ra = rType.asInstanceOf[TArray] val uid = genUID() val ref = Ref(uid, ts.elementType) - ToArray(StreamMap(ToStream(ir), uid, upcast(ctx, ref, ra.elementType))) + ToArray(StreamMap(ToStream(ir), uid, upcast(ref, ra.elementType))) case _: TTuple => val rt = rType.asInstanceOf[TTuple] val uid = genUID() val ref = Ref(uid, ir.typ) val mt = MakeTuple(rt._types.map { tupleField => - tupleField.index -> upcast(ctx, GetTupleElement(ref, tupleField.index), tupleField.typ) + tupleField.index -> upcast(GetTupleElement(ref, tupleField.index), tupleField.typ) }) Let(uid, ir, If(IsNA(ref), NA(mt.typ), mt)) case _: TDict => val rd = rType.asInstanceOf[TDict] - ToDict(upcast(ctx, ToStream(ir), TArray(rd.elementType))) + ToDict(upcast(ToStream(ir), TArray(rd.elementType))) case _: TSet => val rs = rType.asInstanceOf[TSet] - ToSet(upcast(ctx, ToStream(ir), TSet(rs.elementType))) + ToSet(upcast(ToStream(ir), TSet(rs.elementType))) case _ => ir } - assert(result.typ == rType, s"${ Pretty(ctx, result) }, ${ result.typ }, $rType") + assert(result.typ == rType, s"${ Pretty(result) }, ${ result.typ }, $rType") result } } - def upcast( - ctx: ExecuteContext, - ir: MatrixIR, - rType: MatrixType, + def upcast(ir: MatrixIR, rType: MatrixType, upcastRows: Boolean = true, upcastCols: Boolean = true, upcastGlobals: Boolean = true, - upcastEntries: Boolean = true - ): MatrixIR = { + upcastEntries: Boolean = true): MatrixIR = { + if (ir.typ == rType || !(upcastRows || upcastCols || upcastGlobals || upcastEntries)) ir else { @@ -2216,25 +2163,24 @@ object PruneDeadFields { } if (upcastEntries && mt.typ.entryType != rType.entryType) - mt = MatrixMapEntries(mt, upcast(ctx, Ref("g", mt.typ.entryType), rType.entryType)) + mt = MatrixMapEntries(mt, upcast(Ref("g", mt.typ.entryType), rType.entryType)) if (upcastRows && mt.typ.rowType != rType.rowType) - mt = MatrixMapRows(mt, upcast(ctx, Ref("va", mt.typ.rowType), rType.rowType)) + mt = MatrixMapRows(mt, upcast(Ref("va", mt.typ.rowType), rType.rowType)) if (upcastCols && (mt.typ.colType != rType.colType || mt.typ.colKey != rType.colKey)) { - mt = MatrixMapCols(mt, upcast(ctx, Ref("sa", mt.typ.colType), rType.colType), + mt = MatrixMapCols(mt, upcast(Ref("sa", mt.typ.colType), rType.colType), if (rType.colKey == mt.typ.colKey) None else Some(rType.colKey)) } if (upcastGlobals && mt.typ.globalType != rType.globalType) - mt = MatrixMapGlobals(mt, upcast(ctx, Ref("global", ir.typ.globalType), rType.globalType)) + mt = MatrixMapGlobals(mt, upcast(Ref("global", ir.typ.globalType), rType.globalType)) mt } } def upcastTable( - ctx: ExecuteContext, ir: TableIR, rType: TableType, upcastRow: Boolean = true, @@ -2249,11 +2195,11 @@ object PruneDeadFields { table = TableKeyBy(table, rType.key) } if (upcastRow && ir.typ.rowType != rType.rowType) { - table = TableMapRows(table, upcast(ctx, Ref("row", table.typ.rowType), rType.rowType)) + table = TableMapRows(table, upcast(Ref("row", table.typ.rowType), rType.rowType)) } if (upcastGlobals && ir.typ.globalType != rType.globalType) { table = TableMapGlobals(table, - upcast(ctx, Ref("global", table.typ.globalType), rType.globalType)) + upcast(Ref("global", table.typ.globalType), rType.globalType)) } table } diff --git a/hail/src/main/scala/is/hail/expr/ir/Random.scala b/hail/src/main/scala/is/hail/expr/ir/Random.scala deleted file mode 100644 index 085d6873d70..00000000000 --- a/hail/src/main/scala/is/hail/expr/ir/Random.scala +++ /dev/null @@ -1,481 +0,0 @@ -package is.hail.expr.ir - -import is.hail.asm4s._ -import is.hail.types.physical.stypes.concrete.SRNGState -import is.hail.utils.FastIndexedSeq -import net.sourceforge.jdistlib.rng.RandomEngine - -object Threefry { - val keyConst = 0x1BD11BDAA9FC1A22L - - val rotConsts = Array( - Array(14, 16), - Array(52, 57), - Array(23, 40), - Array( 5, 37), - Array(25, 33), - Array(46, 12), - Array(58, 22), - Array(32, 32)) - - val defaultNumRounds = 20 - - def expandKey(k: IndexedSeq[Long]): IndexedSeq[Long] = { - assert(k.length == 4) - val k4 = k(0) ^ k(1) ^ k(2) ^ k(3) ^ keyConst - k :+ k4 - } - - def rotL(i: Value[Long], n: Value[Int]): Code[Long] = { - (i << n) | (i >>> -n) - } - - def mix(cb: CodeBuilderLike, x0: Settable[Long], x1: Settable[Long], n: Int): Unit = { - cb.assign(x0, x0 + x1) - cb.assign(x1, rotL(x1, n)) - cb.assign(x1, x0 ^ x1) - } - - def injectKey(key: IndexedSeq[Long], tweak: Long, block: Array[Long], s: Int): Unit = { - val tweakExt = Array[Long](tweak, 0, tweak) - block(0) += key(s % 5) - block(1) += key((s + 1) % 5) + tweakExt(s % 3) - block(2) += key((s + 2) % 5) + tweakExt((s + 1) % 3) - block(3) += key((s + 3) % 5) + s.toLong - } - - def injectKey(cb: CodeBuilderLike, - key: IndexedSeq[Long], - tweak: Value[Long], - block: IndexedSeq[Settable[Long]], - s: Int - ): Unit = { - val tweakExt = Array[Value[Long]](tweak, const(0), tweak) - cb.assign(block(0), block(0) + key(s % 5)) - cb.assign(block(1), block(1) + const(key((s + 1) % 5)) + tweakExt(s % 3)) - cb.assign(block(2), block(2) + const(key((s + 2) % 5)) + tweakExt((s + 1) % 3)) - cb.assign(block(3), block(3) + const(key((s + 3) % 5)) + const(s.toLong)) - } - - def permute(x: Array[Settable[Long]]): Unit = { - val tmp = x(1) - x(1) = x(3) - x(3) = tmp - } - - def encryptUnrolled(k0: Long, k1: Long, k2: Long, k3: Long, t: Long, _x0: Long, _x1: Long, _x2: Long, _x3: Long): Unit = { - import java.lang.Long.rotateLeft - var x0 = _x0 - var x1 = _x1 - var x2 = _x2 - var x3 = _x3 - val k4 = k0 ^ k1 ^ k2 ^ k3 ^ keyConst - // d = 0 - // injectKey s = 0 - x0 += k0; x1 += k1 + t; x2 += k2; x3 += k3 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 1 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 2 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 3 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 4 - // injectKey s = 1 - x0 += k1; x1 += k2; x2 += k3 + t; x3 += k4 + 1 - x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 - // d = 5 - x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 - // d = 6 - x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 - // d = 7 - x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 - // d = 8 - // injectKey s = 2 - x0 += k2; x1 += k3 + t; x2 += k4 + t; x3 += k0 + 2 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 9 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 10 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 11 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 12 - // injectKey s = 3 - x0 += k3; x1 += k4 + t; x2 += k0; x3 += k1 + 3 - x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 - // d = 13 - x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 - // d = 14 - x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 - // d = 15 - x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 - // d = 16 - // injectKey s = 4 - x0 += k4; x1 += k0; x2 += k1 + t; x3 += k2 + 4 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 17 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 18 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 19 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 20 - // injectKey s = 5 - x0 += k0; x1 += k1 + t; x2 += k2 + t; x3 += k3 + 5 - } - - def encrypt(k: IndexedSeq[Long], t: Long, x: Array[Long]): Unit = - encrypt(k, t, x, defaultNumRounds) - - def encrypt(k: IndexedSeq[Long], t: Long, x: Array[Long], rounds: Int): Unit = { - assert(k.length == 5) - assert(x.length == 4) - - for (d <- 0 until rounds) { - if (d % 4 == 0) - injectKey(k, t, x, d / 4) - - x(0) += x(1) - x(1) = java.lang.Long.rotateLeft(x(1), rotConsts(d % 8)(0)) - x(1) ^= x(0) - x(2) += x(3) - x(3) = java.lang.Long.rotateLeft(x(3), rotConsts(d % 8)(1)) - x(3) ^= x(2) - - val tmp = x(1) - x(1) = x(3) - x(3) = tmp - } - - if (rounds % 4 == 0) - injectKey(k, t, x, rounds / 4) - } - - def encrypt(cb: CodeBuilderLike, - k: IndexedSeq[Long], - t: Value[Long], - x: IndexedSeq[Settable[Long]] - ): Unit = - encrypt(cb, k, t, x, defaultNumRounds) - - def encrypt(cb: CodeBuilderLike, - k: IndexedSeq[Long], - t: Value[Long], - _x: IndexedSeq[Settable[Long]], - rounds: Int - ): Unit = { - assert(k.length == 5) - assert(_x.length == 4) - val x = _x.toArray - - for (d <- 0 until rounds) { - if (d % 4 == 0) - injectKey(cb, k, t, x, d / 4) - - for (j <- 0 until 2) - mix(cb, x(2*j), x(2*j+1), rotConsts(d % 8)(j)) - - permute(x) - } - - if (rounds % 4 == 0) - injectKey(cb, k, t, x, rounds / 4) - } - - def debugPrint(cb: EmitCodeBuilder, x: IndexedSeq[Settable[Long]], info: String) { - cb.println(s"[$info]=\n\t", x(0).toString, " ", x(1).toString, " ", x(2).toString, " ", x(3).toString) - } - - def apply(k: IndexedSeq[Long]): AsmFunction2[Array[Long], Long, Unit] = { - val f = FunctionBuilder[Array[Long], Long, Unit]("Threefry") - f.mb.emitWithBuilder { cb => - val xArray = f.mb.getArg[Array[Long]](1) - val t = f.mb.getArg[Long](2) - val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"x$i", xArray(i))) - encrypt(cb, expandKey(k), t, x) - for (i <- 0 until 4) cb += (xArray(i) = x(i)) - Code._empty - } - f.result(false)(new HailClassLoader(getClass.getClassLoader)) - } -} - -class RNGState { - val staticAcc: Array[Long] = Array.fill(4)(0) - val staticIdx: Int = 0 - val staticOpen: Array[Long] = Array.fill(4)(0) - val staticOpenLen: Int = 0 - val dynAcc: Array[Long] = Array.fill(4)(0) - val dynIdx: Int = 0 - val dynOpen: Array[Long] = Array.fill(4)(0) - val dynOpenLen: Int = 0 -} - -object ThreefryRandomEngine { - def apply( - k1: Long, k2: Long, k3: Long, k4: Long, - h1: Long, h2: Long, h3: Long, h4: Long, - x1: Long, x2: Long, x3: Long - ): ThreefryRandomEngine = { - new ThreefryRandomEngine( - Threefry.expandKey(FastIndexedSeq(k1, k2, k3, k4)), - Array(h1 ^ x1, h2 ^ x2, h3 ^ x3, h4), - 0) - } - - def apply(): ThreefryRandomEngine = { - val rand = new java.util.Random() - new ThreefryRandomEngine( - Threefry.expandKey(Array.fill(4)(rand.nextLong())), - Array.fill(4)(rand.nextLong()), - 0) - } -} - -class ThreefryRandomEngine( - val key: IndexedSeq[Long], - val state: Array[Long], - var counter: Long, - val tweak: Long = SRNGState.finalBlockNoPadTweak -) extends RandomEngine { - val buffer: Array[Long] = Array.ofDim[Long](4) - var usedInts: Int = 8 - var hasBufferedGaussian: Boolean = false - var bufferedGaussian: Double = 0.0 - - override def clone(): ThreefryRandomEngine = ??? - - private def fillBuffer(): Unit = { - import java.lang.Long.rotateLeft - var x0 = state(0) - var x1 = state(1) - var x2 = state(2) - var x3 = state(3) ^ counter - val k0 = key(0); val k1 = key(1); val k2 = key(2); val k3 = key(3) - val k4 = k0 ^ k1 ^ k2 ^ k3 ^ Threefry.keyConst - val t = tweak - // d = 0 - // injectKey s = 0 - x0 += k0; x1 += k1 + t; x2 += k2; x3 += k3 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 1 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 2 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 3 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 4 - // injectKey s = 1 - x0 += k1; x1 += k2; x2 += k3 + t; x3 += k4 + 1 - x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 - // d = 5 - x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 - // d = 6 - x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 - // d = 7 - x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 - // d = 8 - // injectKey s = 2 - x0 += k2; x1 += k3 + t; x2 += k4 + t; x3 += k0 + 2 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 9 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 10 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 11 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 12 - // injectKey s = 3 - x0 += k3; x1 += k4 + t; x2 += k0; x3 += k1 + 3 - x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 - // d = 13 - x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 - // d = 14 - x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 - // d = 15 - x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 - // d = 16 - // injectKey s = 4 - x0 += k4; x1 += k0; x2 += k1 + t; x3 += k2 + 4 - x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 - // d = 17 - x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 - // d = 18 - x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 - x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 - // d = 19 - x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 - x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 - // d = 20 - // injectKey s = 5 - x0 += k0; x1 += k1 + t; x2 += k2 + t; x3 += k3 + 5 - - buffer(0) = x0; buffer(1) = x1; buffer(2) = x2; buffer(3) = x3 - counter += 1 - usedInts = 0 - } - - override def setSeed(seed: Long): Unit = ??? - - override def getSeed: Long = ??? - - override def nextLong(): Long = { - usedInts += usedInts & 1 // round up to multiple of 2 - if (usedInts >= 8) fillBuffer() - val result = buffer(usedInts >> 1) - usedInts += 2 - result - } - - override def nextInt(): Int = { - if (usedInts >= 8) fillBuffer() - val result = buffer(usedInts >> 1) - usedInts += 1 - val parity = usedInts & 1 - val shift = parity << 5 // either 0 or 32 - (result >>> shift).toInt // either first or second 32 bits - } - - // Uses approach from https://github.com/apple/swift/pull/39143 - override def nextInt(n: Int): Int = { - val nL = n.toLong - val mult = nL * (nextInt().toLong & 0xFFFFFFFFL) - val result = (mult >>> 32).toInt - val fraction = mult & 0xFFFFFFFFL - - // optional early return, benchmark to decide if it helps - if (fraction < ((1L << 32) - nL)) return result - - val multHigh = (((nL * (nextInt().toLong & 0xFFFFFFFFL)) >>> 32) + (nL * (nextInt().toLong & 0xFFFFFFFFL))) >>> 32 - val sum = fraction + multHigh - val carry = (sum >>> 32).toInt - result + carry - } - - // Uses standard Java approach. We could use the same approach as for ints, - // but that requires full-width multiplication of two longs, which adds some - // complexity. - override def nextLong(l: Long): Long = { - var x = nextLong() >>> 1 - var r = x % l - while (x - r + (l - 1) < 0) { - x = nextLong() >>> 1 - r = x % l - } - r - } - - override def nextGaussian(): Double = { - if (hasBufferedGaussian) { - hasBufferedGaussian = false - return bufferedGaussian - } - - var v1 = 2 * nextDouble() - 1 // between -1 and 1 - var v2 = 2 * nextDouble() - 1 - var s = v1 * v1 + v2 * v2 - while (s >= 1 || s == 0) { - v1 = 2 * nextDouble() - 1 // between -1 and 1 - v2 = 2 * nextDouble() - 1 - s = v1 * v1 + v2 * v2 - } - val multiplier = StrictMath.sqrt(-2 * StrictMath.log(s) / s) - bufferedGaussian = v2 * multiplier - hasBufferedGaussian = true - v1 * multiplier - } - - // Equivalent to generating an infinite-precision real number in [0, 1), - // represented as an infinitely long bitstream, and rounding down to the - // nearest representable floating point number. - // In contrast, the standard Java and jdistlib generators sample uniformly - // from a sequence of equidistant floating point numbers in [0, 1), using - // (nextLong() >>> 11).toDouble / (1L << 53) - // - // Intuitively, the algorithm is: - // * lazily generate an infinite string of random bits, interpreted as - // the binary expansion of a real number in [0, 1), i.e. `0.${bits}` - // * convert to floating point representation: the exponent is -n, where n is - // the number of 0s before the first 1, and the significand is the first 1 - // followed by the next 52 bits. - override def nextDouble(): Double = { - // first generate random bits until we get the first 1, counting the number - // of zeroes - var bits: Long = nextLong() - // the exponent starts at 1022 and subtracts the number of leading zeroes, - // to account for the exponent bias in IEE754 - var exponent: Int = 1022 - while (bits == 0) { - bits = nextLong() - exponent -= 64 - } - // use trailing zeroes instead of leading zeroes as slight optimization, - // but probabilistically equivalent - val e = java.lang.Long.numberOfTrailingZeros(bits) - exponent -= e - // If there are at least 52 bits before the trailing 1, use those - val significand = (if (e < 12) bits else nextLong()) >>> 12 - val result = (exponent.toLong << 52) | significand - java.lang.Double.longBitsToDouble(result) - } - - override def nextFloat(): Float = { - // first generate random bits until we get the first 1, counting the number - // of zeroes - var bits: Int = nextInt() - // the exponent starts at 126 and subtracts the number of leading zeroes, - // to account for the exponent bias in IEE754 - var exponent: Int = 126 - while (bits == 0) { - bits = nextInt() - exponent -= 32 - } - // use trailing zeroes instead of leading zeroes as slight optimization, - // but probabilistically equivalent - val e = java.lang.Long.numberOfTrailingZeros(bits) - exponent -= e - // If there are at least 23 bits before the trailing 1, use those - val significand = (if (e < 9) bits else nextInt()) >>> 9 - val result = (exponent << 23) | significand - java.lang.Float.intBitsToFloat(result) - } -} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index 077237ddc6d..af5b747149e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -289,7 +289,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { refMap(partitionStreamName).foreach { u => defs.bind(u, Array[BaseTypeWithRequiredness](RIterable(lookup(child).rowType))) } val refs = refMap.getOrElse(globalName, FastIndexedSeq()) ++ refMap.getOrElse(partitionStreamName, FastIndexedSeq()) dependents.getOrElseUpdate(child, mutable.Set[RefEquality[BaseIR]]()) ++= refs - case _ => fatal(Pretty(ctx, node)) + case _ => fatal(Pretty(node)) } } @@ -758,7 +758,6 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { coerce[RStruct](requiredness).field("global").unionFrom(lookup(c).globalType) case BlockMatrixToValueApply(child, GetElement(_)) => // BlockMatrix elements are all required case BlockMatrixCollect(child) => // BlockMatrix elements are all required - case BlockMatrixWrite(child, writer) => // write result is required } requiredness.probeChangedAndReset() } diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 1891e894941..06646653420 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -1,7 +1,6 @@ package is.hail.expr.ir import is.hail.HailContext -import is.hail.backend.ExecuteContext import is.hail.types.virtual._ import is.hail.io.bgen.MatrixBGENReader import is.hail.rvd.{PartitionBoundOrdering, RVDPartitionInfo} @@ -11,18 +10,18 @@ object Simplify { /** Transform 'ir' using simplification rules until none apply. */ - def apply(ctx: ExecuteContext, ir: BaseIR): BaseIR = Simplify(ctx, ir, allowRepartitioning = true) + def apply(ir: BaseIR): BaseIR = Simplify(ir, allowRepartitioning = true) /** Use 'allowRepartitioning'=false when in a context where simplification * should not change the partitioning of the result of 'ast', such as when * some parent (downstream) node of 'ast' uses seeded randomness. */ - private[ir] def apply(ctx: ExecuteContext, ast: BaseIR, allowRepartitioning: Boolean): BaseIR = + private[ir] def apply(ast: BaseIR, allowRepartitioning: Boolean): BaseIR = ast match { - case ir: IR => simplifyValue(ctx)(ir) - case tir: TableIR => simplifyTable(ctx, allowRepartitioning)(tir) - case mir: MatrixIR => simplifyMatrix(ctx, allowRepartitioning)(mir) - case bmir: BlockMatrixIR => simplifyBlockMatrix(ctx)(bmir) + case ir: IR => simplifyValue(ir) + case tir: TableIR => simplifyTable(allowRepartitioning)(tir) + case mir: MatrixIR => simplifyMatrix(allowRepartitioning)(mir) + case bmir: BlockMatrixIR => simplifyBlockMatrix(bmir) } private[this] def visitNode[T <: BaseIR]( @@ -34,38 +33,38 @@ object Simplify { transform(t1).map(post).getOrElse(t1) } - private[this] def simplifyValue(ctx: ExecuteContext): IR => IR = + private[this] def simplifyValue: IR => IR = visitNode( - Simplify(ctx, _), + Simplify(_), rewriteValueNode, - simplifyValue(ctx)) + simplifyValue) - private[this] def simplifyTable(ctx: ExecuteContext, allowRepartitioning: Boolean)(tir: TableIR): TableIR = + private[this] def simplifyTable(allowRepartitioning: Boolean)(tir: TableIR): TableIR = visitNode( - Simplify(ctx, _, allowRepartitioning && isDeterministicallyRepartitionable(tir)), - rewriteTableNode(ctx, allowRepartitioning), - simplifyTable(ctx, allowRepartitioning) + Simplify(_, allowRepartitioning && isDeterministicallyRepartitionable(tir)), + rewriteTableNode(allowRepartitioning), + simplifyTable(allowRepartitioning) )(tir) - private[this] def simplifyMatrix(ctx: ExecuteContext, allowRepartitioning: Boolean)(mir: MatrixIR): MatrixIR = + private[this] def simplifyMatrix(allowRepartitioning: Boolean)(mir: MatrixIR): MatrixIR = visitNode( - Simplify(ctx, _, allowRepartitioning && isDeterministicallyRepartitionable(mir)), + Simplify(_, allowRepartitioning && isDeterministicallyRepartitionable(mir)), rewriteMatrixNode(allowRepartitioning), - simplifyMatrix(ctx, allowRepartitioning) + simplifyMatrix(allowRepartitioning) )(mir) - private[this] def simplifyBlockMatrix(ctx: ExecuteContext)(bmir: BlockMatrixIR): BlockMatrixIR = { + private[this] def simplifyBlockMatrix(bmir: BlockMatrixIR): BlockMatrixIR = { visitNode( - Simplify(ctx, _), + Simplify(_), rewriteBlockMatrixNode, - simplifyBlockMatrix(ctx) + simplifyBlockMatrix )(bmir) } private[this] def rewriteValueNode: IR => Option[IR] = valueRules.lift - private[this] def rewriteTableNode(ctx: ExecuteContext, allowRepartitioning: Boolean)(tir: TableIR): Option[TableIR] = - tableRules(ctx, allowRepartitioning && isDeterministicallyRepartitionable(tir)).lift(tir) + private[this] def rewriteTableNode(allowRepartitioning: Boolean)(tir: TableIR): Option[TableIR] = + tableRules(allowRepartitioning && isDeterministicallyRepartitionable(tir)).lift(tir) private[this] def rewriteMatrixNode(allowRepartitioning: Boolean)(mir: MatrixIR): Option[MatrixIR] = matrixRules(allowRepartitioning && isDeterministicallyRepartitionable(mir)).lift(mir) @@ -595,7 +594,7 @@ object Simplify { case LiftMeOut(child) if IsConstant(child) => child } - private[this] def tableRules(ctx: ExecuteContext, canRepartition: Boolean): PartialFunction[TableIR, TableIR] = { + private[this] def tableRules(canRepartition: Boolean): PartialFunction[TableIR, TableIR] = { case TableRename(child, m1, m2) if m1.isTrivial && m2.isTrivial => child @@ -783,16 +782,10 @@ object Simplify { TableAggregateByKey(child, expr) case TableAggregateByKey(x@TableKeyBy(child, keys, false), expr) if canRepartition && !x.definitelyDoesNotShuffle => - TableKeyByAndAggregate(child, expr, MakeStruct(keys.map(k => k -> GetField(Ref("row", child.typ.rowType), k))), bufferSize = ctx.getFlag("grouped_aggregate_buffer_size").toInt) + TableKeyByAndAggregate(child, expr, MakeStruct(keys.map(k => k -> GetField(Ref("row", child.typ.rowType), k))), bufferSize = HailContext.getFlag("grouped_aggregate_buffer_size").toInt) case TableParallelize(TableCollect(child), _) if isDeterministicallyRepartitionable(child) => child - case TableFilterIntervals(child, intervals, keep) if intervals.isEmpty => - if (keep) - TableFilter(child, False()) - else - child - // push down filter intervals nodes case TableFilterIntervals(TableFilter(child, pred), intervals, keep) => TableFilter(TableFilterIntervals(child, intervals, keep), pred) diff --git a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala index 2376f2f618c..a6e724a0619 100644 --- a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala +++ b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala @@ -806,69 +806,6 @@ final class BooleanArrayBuilder(initialCapacity: Int = 16) { } } -final class StringArrayBuilder(initialCapacity: Int = 16) { - var size_ : Int = 0 - var b: Array[String] = new Array[String](initialCapacity) - - def size: Int = size_ - - def setSize(n: Int) { - require(n >= 0 && n <= size) - size_ = n - } - - def apply(i: Int): String = { - require(i >= 0 && i < size) - b(i) - } - - def ensureCapacity(n: Int): Unit = { - if (b.length < n) { - val newCapacity = math.max(n, b.length * 2) - val newb = new Array[String](newCapacity) - Array.copy(b, 0, newb, 0, size_) - b = newb - } - } - - def +=(x: String) = add(x) - - def add(x: String): Unit = { - ensureCapacity(size_ + 1) - b(size_) = x - size_ += 1 - } - - def update(i: Int, x: String): Unit = { - require(i >= 0 && i < size) - b(i) = x - } - - def clear() { size_ = 0 } - - def result(): Array[String] = { - val a = new Array[String](size_) - System.arraycopy(b, 0, a, 0, size_) - a - } - - def clearAndResize(): Unit = { - size_ = 0 - if (b.length > initialCapacity) - b = new Array[String](initialCapacity) - } - def appendFrom(ab2: StringArrayBuilder): Unit = { - ensureCapacity(size_ + ab2.size_) - System.arraycopy(ab2.b, 0, b, size_, ab2.size_) - size_ = size_ + ab2.size_ - } - - def pop(): String = { - size_ -= 1 - b(size) - } -} - final class AnyRefArrayBuilder[T <: AnyRef](initialCapacity: Int = 16)(implicit ct: ClassTag[T]) { var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala b/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala index 7ce9b95a40b..50946b0bd60 100644 --- a/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala +++ b/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala @@ -12,21 +12,16 @@ import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, SStreamValue} import is.hail.types.physical.{PCanonicalString, PCanonicalStruct, PField, PStruct} import is.hail.types.virtual.{TArray, TString, TStruct, Type} import is.hail.types.{BaseTypeWithRequiredness, RStruct, TableType, TypeWithRequiredness} -import is.hail.types.physical.{PCanonicalString, PCanonicalStruct, PField, PStruct, PType} -import is.hail.types.virtual.{Field, TArray, TStream, TString, TStruct, Type} -import is.hail.utils.{FastIndexedSeq, FastSeq, checkGzippedFile, fatal} +import is.hail.utils.{FastIndexedSeq, FastSeq, fatal} import org.json4s.{Extraction, Formats, JValue} case class StringTableReaderParameters( files: Array[String], - minPartitions: Option[Int], - forceBGZ: Boolean, - forceGZ: Boolean, - filePerPartition: Boolean) + minPartitions: Option[Int]) object StringTableReader { def apply(fs: FS, params: StringTableReaderParameters): StringTableReader = { - val fileStatuses = getFileStatuses(fs, params.files, params.forceBGZ, params.forceGZ) + val fileStatuses = getFileStatuses(fs, params.files) new StringTableReader(params, fileStatuses) } def fromJValue(fs: FS, jv: JValue): StringTableReader = { @@ -35,17 +30,10 @@ object StringTableReader { StringTableReader(fs, params) } - def getFileStatuses(fs: FS, files: Array[String], forceBGZ: Boolean, forceGZ: Boolean): Array[FileStatus] = { + def getFileStatuses(fs: FS, files: Array[String]): Array[FileStatus] = { val status = fs.globAllStatuses(files) if (status.isEmpty) fatal(s"arguments refer to no files: ${files.toIndexedSeq}.") - if (!forceBGZ) { - status.foreach { status => - val file = status.getPath - if (file.endsWith(".gz")) - checkGzippedFile(fs, file, forceGZ, forceBGZ) - } - } status } } @@ -134,8 +122,7 @@ class StringTableReader( override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = { val fs = ctx.fs - val lines = GenericLines.read(fs, fileStatuses, None, None, params.minPartitions, false, true, - params.filePerPartition) + val lines = GenericLines.read(fs, fileStatuses, None, None, params.minPartitions, false, true) TableStage(globals = MakeStruct(FastSeq()), partitioner = RVDPartitioner.unkeyed(lines.nPartitions), dependency = TableStageDependency.none, @@ -150,10 +137,12 @@ class StringTableReader( val (broadCastRow, rVD) = TableStageToRVD.apply(ctx, ts, Map[String, IR]()) TableValue(ctx, tr.typ, broadCastRow, rVD) } + override def partitionCounts: Option[IndexedSeq[Long]] = None override def rowAndGlobalPTypes(ctx: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = (PCanonicalStruct(IndexedSeq(PField("file", PCanonicalString(true), 0), PField("text", PCanonicalString(true), 1)), true).subsetTo(requestedType.rowType).asInstanceOf[PStruct], PCanonicalStruct.empty(required = true)) + } diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index 78d57898cca..84f8c6ad3b1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -58,7 +58,7 @@ abstract sealed class TableIR extends BaseIR { } protected[ir] def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = - fatal("tried to execute unexecutable IR:\n" + Pretty(ctx, this)) + fatal("tried to execute unexecutable IR:\n" + Pretty(this)) override def copy(newChildren: IndexedSeq[BaseIR]): TableIR @@ -107,6 +107,7 @@ object TableReader { def fromJValue(fs: FS, jv: JValue): TableReader = { (jv \ "name").extract[String] match { case "TableNativeReader" => TableNativeReader.fromJValue(fs, jv) + case "TextTableReader" => TextTableReader.fromJValue(fs, jv) case "TableFromBlockMatrixNativeReader" => TableFromBlockMatrixNativeReader.fromJValue(fs, jv) case "StringTableReader" => StringTableReader.fromJValue(fs, jv) case "AvroTableReader" => AvroTableReader.fromJValue(jv) @@ -123,7 +124,7 @@ object LoweredTableReader { contextType: Type, contexts: IndexedSeq[Any], keyType: TStruct, - bodyPType: (TStruct) => PStruct, + keyPType: (TStruct) => PStruct, keys: (TStruct) => (Region, HailClassLoader, FS, Any) => Iterator[Long] ): LoweredTableReaderCoercer = { assert(key.nonEmpty) @@ -172,7 +173,7 @@ object LoweredTableReader { ReadPartition(ctx, keyType, new PartitionIteratorLongReader( keyType, contextType, - (requestedType: Type) => bodyPType(requestedType.asInstanceOf[TStruct]), + (requestedType: Type) => keyPType(requestedType.asInstanceOf[TStruct]), (requestedType: Type) => keys(requestedType.asInstanceOf[TStruct]))), "key", MakeStruct(FastIndexedSeq( @@ -397,7 +398,7 @@ object LoweredTableReader { ToStream(Literal(TArray(contextType), partOrigIndex.map(i => contexts(i)))), body) - val rowRType = VirtualTypeWithReq(bodyPType(tableStage.rowType)).r.asInstanceOf[RStruct] + val rowRType = TypeWithRequiredness(tableStage.rowType).asInstanceOf[RStruct] ctx.backend.lowerDistributedSort(ctx, tableStage, @@ -494,7 +495,7 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { FastIndexedSeq(("elt", SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(rvd.rowPType)))), FastIndexedSeq(classInfo[Region], LongInfo), LongInfo, - PruneDeadFields.upcast(ctx, Ref("elt", rvd.rowType), requestedType)) + PruneDeadFields.upcast(Ref("elt", rvd.rowType), requestedType)) val upcastCode = mb.getObject[Function4[HailClassLoader, FS, Int, Region, AsmFunction2RegionLongLong]](upcast) @@ -2103,7 +2104,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { } } - if (ctx.getFlag("distributed_scan_comb_op") != null && extracted.shouldTreeAggregate) { + if (HailContext.getFlag("distributed_scan_comb_op") != null && extracted.shouldTreeAggregate) { val fsBc = ctx.fs.broadcast val tmpBase = ctx.createTmpPath("table-map-rows-distributed-scan") val d = digitsNeeded(tv.rvd.getNumPartitions) @@ -2235,7 +2236,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { } Iterator.single(write(aggRegion, seq.getAggOffset())) } - }, ctx.getFlag("max_leader_scans").toInt) + }, HailContext.getFlag("max_leader_scans").toInt) // 3. load in partition aggregations, comb op as necessary, write back out. val partAggs = scanPartitionAggs.scanLeft(initAgg)(combOpFNeedsPool(() => ctx.r.pool)) diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index 270ddf85daa..fb09854cbe5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -1,34 +1,33 @@ package is.hail.expr.ir -import is.hail.backend.ExecuteContext import is.hail.expr.ir.streams.StreamUtils import is.hail.types.virtual._ import is.hail.utils._ object TypeCheck { - def apply(ctx: ExecuteContext, ir: BaseIR): Unit = { + def apply(ir: BaseIR): Unit = { try { - check(ctx, ir, BindingEnv.empty) + check(ir, BindingEnv.empty) } catch { - case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ctx, ir) }", e) + case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ir) }", e) } } - def apply(ctx: ExecuteContext, ir: IR, env: BindingEnv[Type]): Unit = { + def apply(ir: IR, env: BindingEnv[Type]): Unit = { try { - check(ctx, ir, env) + check(ir, env) } catch { - case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ctx, ir) }", e) + case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ir) }", e) } } - private def check(ctx: ExecuteContext, ir: BaseIR, env: BindingEnv[Type]): Unit = { + private def check(ir: BaseIR, env: BindingEnv[Type]): Unit = { ir.children .iterator .zipWithIndex .foreach { case (child, i) => - check(ctx, child, ChildBindings(ir, i, env)) + check(child, ChildBindings(ir, i, env)) if (child.typ == TVoid) { ir match { @@ -45,7 +44,7 @@ object TypeCheck { case _: WriteMetadata => case _ => throw new RuntimeException(s"unexpected void-typed IR at child $i of ${ ir.getClass.getSimpleName }" + - s"\n IR: ${ Pretty(ctx, ir) }") + s"\n IR: ${ Pretty(ir) }") } } } @@ -324,7 +323,7 @@ object TypeCheck { assert(body.typ.isInstanceOf[TStream]) case x@StreamFold(a, zero, accumName, valueName, body) => assert(a.typ.isInstanceOf[TStream]) - assert(a.typ.asInstanceOf[TStream].elementType.isRealizable, Pretty(ctx, x)) + assert(a.typ.asInstanceOf[TStream].elementType.isRealizable, Pretty(x)) assert(body.typ == zero.typ) assert(x.typ == zero.typ) case x@StreamFold2(a, accum, valueName, seq, res) => @@ -473,7 +472,7 @@ object TypeCheck { case MatrixToValueApply(_, _) => case BlockMatrixToValueApply(_, _) => case BlockMatrixCollect(_) => - case BlockMatrixWrite(_, writer) => writer.loweredTyp + case BlockMatrixWrite(_, _) => case BlockMatrixMultiWrite(_, _) => case ValueToBlockMatrix(child, _, _) => assert(child.typ.isInstanceOf[TArray] || child.typ.isInstanceOf[TNDArray] || child.typ == TFloat64) diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala index 5e26262d44d..ce9f1eb6c15 100644 --- a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala +++ b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala @@ -1,14 +1,13 @@ package is.hail.expr.ir.analyses import is.hail.HailContext -import is.hail.backend.ExecuteContext import is.hail.expr.ir._ object ComputeMethodSplits { - def apply(ctx: ExecuteContext, ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { + def apply(ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { val m = Memo.empty[Unit] - val splitThreshold = ctx.getFlag("method_split_ir_limit").toInt + val splitThreshold = HailContext.getFlag("method_split_ir_limit").toInt require(splitThreshold > 0, s"invalid method_split_ir_limit") def recurAndComputeSizeUnderneath(x: IR): Int = { diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index 46db3559b2f..8df34cb93b1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -6,7 +6,6 @@ import is.hail.expr.ir._ import is.hail.types._ import is.hail.utils._ import is.hail.asm4s.coerce -import is.hail.backend.ExecuteContext import is.hail.experimental.ExperimentalFunctions import is.hail.types.physical._ import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} @@ -83,39 +82,6 @@ object IRFunctionRegistry { }) } - def pyRegisterIRForServiceBackend( - ctx: ExecuteContext, - name: String, - typeParamStrs: Array[String], - argNames: Array[String], - argTypeStrs: Array[String], - returnType: String, - bodyStr: String - ): Unit = { - requireJavaIdentifier(name) - - val typeParameters = typeParamStrs.map(IRParser.parseType).toFastIndexedSeq - val valueParameterTypes = argTypeStrs.map(IRParser.parseType).toFastIndexedSeq - val refMap = argNames.zip(valueParameterTypes).toMap - val body = IRParser.parse_value_ir( - bodyStr, - IRParserEnvironment(ctx, refMap, Map()) - ) - - userAddedFunctions += ((name, (body.typ, typeParameters, valueParameterTypes))) - addIR( - name, - typeParameters, - valueParameterTypes, - IRParser.parseType(returnType), - false, - { (_, args, _) => - Subst(body, - BindingEnv(Env[IR](argNames.zip(args): _*))) - } - ) - } - def removeIRFunction( name: String, returnType: Type, @@ -522,10 +488,6 @@ abstract class RegistryFunctions { pt: (Type, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2, a3), returnType, unwrappedApply(pt))(cls, method) - def registerWrappedScalaFunction4(name: String, a1: Type, a2: Type, a3: Type, a4: Type, returnType: Type, - pt: (Type, SType, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = - registerWrappedScalaFunction(name, Array(a1, a2, a3, a4), returnType, unwrappedApply(pt))(cls, method) - def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[SType]) => SType)(cls: Class[_], method: String) { registerCode(name, valueParameterTypes, returnType, pt) { case (r, cb, rt, _, args) => val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala index da7274f51b3..ec62ed1ff93 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala @@ -1,7 +1,6 @@ package is.hail.expr.ir.functions import is.hail.backend.ExecuteContext -import is.hail.expr.ir.lowering.TableStage import is.hail.expr.ir.{LowerMatrixIR, MatrixValue, RelationalSpec, TableReader, TableValue} import is.hail.types.virtual.Type import is.hail.types.{BlockMatrixType, MatrixType, RTable, TableType, TypeWithRequiredness} diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index e9f508a9e17..c2d22891715 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -1,11 +1,15 @@ package is.hail.expr.ir.functions +import java.time.temporal.ChronoField +import java.time.{Instant, ZoneId} +import java.util.Locale import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir._ +import is.hail.types.physical._ import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.{SJavaArrayString, SJavaArrayStringSettable, SJavaArrayStringValue, SJavaString} +import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SJavaArrayString, SJavaString, SStringPointer} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32, SInt64} import is.hail.types.virtual._ @@ -14,13 +18,10 @@ import org.apache.spark.sql.Row import org.json4s.JValue import org.json4s.jackson.JsonMethods -import java.time.temporal.ChronoField -import java.time.{Instant, ZoneId} -import java.util.Locale -import java.util.regex.{Matcher, Pattern} import scala.collection.mutable object StringFunctions extends RegistryFunctions { + def reverse(s: String): String = { val sb = new StringBuilder sb.append(s) @@ -45,13 +46,11 @@ object StringFunctions extends RegistryFunctions { def regexMatch(regex: String, s: String): Boolean = regex.r.findFirstIn(s).isDefined - def regexFullMatch(regex: String, s: String): Boolean = s.matches(regex) - def concat(s: String, t: String): String = s + t def replace(str: String, pattern1: String, pattern2: String): String = str.replaceAll(pattern1, pattern2) - + def split(s: String, p: String): Array[String] = s.split(p, -1) def translate(s: String, d: Map[String, String]): String = { @@ -83,142 +82,10 @@ object StringFunctions extends RegistryFunctions { def escapeString(s: String): String = StringEscapeUtils.escapeString(s) - def addValueOrNull(ab: StringArrayBuilder, value: String, missingValues: Array[String]): Unit = { - var i = 0 - while (i < missingValues.length) { - if (missingValues(i) == value) { - ab += null - return - } - i += 1 - } - ab += value - } - - def matchPattern(s: String, i: Int, m: Matcher): Int = { - m.region(i, s.length) - if (m.lookingAt()) - m.end() - m.start() - else - -1 - } - - def generateSplitQuotedRegex( - cb: EmitCodeBuilder, - string: Value[String], - separator: Either[Value[Char], Value[String]], - quoteChar: Option[Value[Char]], - missingSV: SIndexableValue, - errorID: Value[Int] - ): Value[Array[String]] = { - - // note: it will be inefficient to convert a SIndexablePointer to SJavaArrayString to split each line. - // We should really choose SJavaArrayString as the stype for a literal if used in a place like this, - // but this is a non-local stype decision that is hard in the current system - val missing: Value[Array[String]] = missingSV.st match { - case SJavaArrayString(elementRequired) => missingSV.asInstanceOf[SJavaArrayStringSettable].array - case _ => - val mb = cb.emb.ecb.newEmitMethod("convert_region_to_str_array", FastIndexedSeq(missingSV.st.paramType), arrayInfo[String]) - mb.emitWithBuilder[Array[String]] { cb => - val sv = mb.getSCodeParam(1).asIndexable - val m = cb.newLocal[Array[String]]("missingvals", Code.newArray[String](sv.loadLength())) - sv.forEachDefined(cb) { case (cb, idx, sc) => cb += (m(idx) = sc.asString.loadString(cb)) } - m - } - cb.newLocal[Array[String]]("missing_arr", cb.invokeCode(mb, missingSV)) - } - - // lazy field reused across calls to split functions - val ab = cb.emb.getOrDefineLazyField[StringArrayBuilder](Code.newInstance[StringArrayBuilder, Int](16), "generate_split_quoted_regex_ab") - cb += ab.invoke[Unit]("clear") - - // takes the current position and current char value, returns the number of matching chars - // in the separator, or -1 if not a separator - val getPatternMatch: (Value[Int], Value[Char]) => Value[Int] = { - val x = cb.newLocal[Int]("sepCharMatch"); - separator match { - case Left(sepChar) => - (_: Value[Int], char: Value[Char]) => { - cb.ifx(char.ceq(sepChar), cb.assign(x, 1), cb.assign(x, -1)); - x - } - case Right(regex) => - val m = cb.newLocal[Matcher]("matcher", - Code.invokeStatic1[Pattern, String, Pattern]("compile", regex) - .invoke[CharSequence, Matcher]("matcher", string)); - (idx: Value[Int], _: Value[Char]) => { - cb.assign(x, Code.invokeScalaObject3[String, Int, Matcher, Int]( - StringFunctions.getClass, "matchPattern", string, idx, m)); - x - } - } - } - - val i = cb.newLocal[Int]("i", 0) - val lastFieldStart = cb.newLocal[Int]("lastfieldstart", 0) - - def addValueOrNA(cb: EmitCodeBuilder, endIdx: Code[Int]): Unit = { - cb += Code.invokeScalaObject3[StringArrayBuilder, String, Array[String], Unit]( - StringFunctions.getClass, "addValueOrNull", ab, string.invoke[Int, Int, String]("substring", lastFieldStart, endIdx), missing) - } - - val LreturnWithoutAppending = CodeLabel() - - cb.whileLoop(i < string.length(), { - val c = cb.newLocal[Char]("c", string(i)) - - val l = getPatternMatch(i, c) - cb.ifx(l.cne(-1), { - addValueOrNA(cb, i) - cb.assign(i, i + l) // skip delim - cb.assign(lastFieldStart, i) - }, { - quoteChar match { - case Some(qc) => - cb.ifx(c.ceq(qc), { - cb.ifx(i.cne(lastFieldStart), - cb._fatalWithError(errorID, "opening quote character '", qc.toS, "' not at start of field")) - cb.assign(i, i + 1) // skip quote - cb.assign(lastFieldStart, i) - - cb.whileLoop(i < string.length() && string(i).cne(qc), { - cb.assign(i, i + 1) - }) - - addValueOrNA(cb, i) - - cb.ifx(i.ceq(string.length()), - cb._fatalWithError(errorID, "missing terminating quote character '", qc.toS, "'")) - cb.assign(i, i + 1) // skip quote - - cb.ifx(i < string.length, { - cb.assign(c, string(i)) - val l = getPatternMatch(i, c) - cb.ifx(l.ceq(-1), { - cb._fatalWithError(errorID, "terminating quote character '", qc.toS, "' not at end of field") - }) - cb.assign(i, i + l) // skip delim - cb.assign(lastFieldStart, i) - }, { - cb.goto(LreturnWithoutAppending) - }) - }, { - cb.assign(i, i + 1) - }) - case None => - cb.assign(i, i + 1) - } - }) - }) - - addValueOrNA(cb, string.length()) - cb.define(LreturnWithoutAppending) - cb.memoize(ab.invoke[Array[String]]("result"), "generateSplitQuotedRegexResult") - } - def softBounds(i: IR, len: IR): IR = If(i < -len, 0, If(i < 0, i + len, If(i >= len, len, i))) + private val locale: Locale = Locale.US def strftime(fmtStr: String, epochSeconds: Long, zoneId: String): String = @@ -322,7 +189,6 @@ object StringFunctions extends RegistryFunctions { IEmitCode.present(cb, st.construct(cb, str)) } - registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "reverse") registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "upper") registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "lower") @@ -342,9 +208,6 @@ object StringFunctions extends RegistryFunctions { registerWrappedScalaFunction2("regexMatch", TString, TString, TBoolean, { case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "regexMatch") - registerWrappedScalaFunction2("regexFullMatch", TString, TString, TBoolean, { - case (_: Type, _: SType, _: SType) => SBoolean - })(thisClass, "regexFullMatch") registerWrappedScalaFunction2("concat", TString, TString, TString, { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "concat") @@ -367,61 +230,6 @@ object StringFunctions extends RegistryFunctions { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "setMkString") - registerSCode4("splitQuotedRegex", TString, TString, TArray(TString), TString, TArray(TString), { - case (_: Type, _: SType, _: SType, _: SType, _: SType) => SJavaArrayString(false) - }) { case (r, cb, st: SJavaArrayString, s, separator, missing, quote, errorID) => - val quoteStr = cb.newLocal[String]("quoteStr", quote.asString.loadString(cb)) - val quoteChar = cb.newLocal[Char]("quoteChar") - cb.ifx(quoteStr.length().cne(1), cb._fatalWithError(errorID, "quote must be a single character")) - cb.assign(quoteChar, quoteStr(0)) - - val string = cb.newLocal[String]("string", s.asString.loadString(cb)) - val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) - val mv = missing.asIndexable - - new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Right(sep), Some(quoteChar), mv, errorID)) - } - - registerSCode4("splitQuotedChar", TString, TString, TArray(TString), TString, TArray(TString), { - case (_: Type, _: SType, _: SType, _: SType, _: SType) => SJavaArrayString(false) - }) { case (r, cb, st: SJavaArrayString, s, separator, missing, quote, errorID) => - val quoteStr = cb.newLocal[String]("quoteStr", quote.asString.loadString(cb)) - val quoteChar = cb.newLocal[Char]("quoteChar") - cb.ifx(quoteStr.length().cne(1), cb._fatalWithError(errorID, "quote must be a single character")) - cb.assign(quoteChar, quoteStr(0)) - - val string = cb.newLocal[String]("string", s.asString.loadString(cb)) - val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) - val sepChar = cb.newLocal[Char]("sepChar") - cb.ifx(sep.length().cne(1), cb._fatalWithError(errorID, "splitQuotedChar expected a single character for separator")) - cb.assign(sepChar, sep(0)) - val mv = missing.asIndexable - - new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Left(sepChar), Some(quoteChar), mv, errorID)) - } - - registerSCode3("splitRegex", TString, TString, TArray(TString), TArray(TString), { - case (_: Type, _: SType, _: SType, _: SType) => SJavaArrayString(false) - }) { case (r, cb, st: SJavaArrayString, s, separator, missing, errorID) => - val string = cb.newLocal[String]("string", s.asString.loadString(cb)) - val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) - val mv = missing.asIndexable - new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Right(sep), None, mv, errorID)) - } - - registerSCode3("splitChar", TString, TString, TArray(TString), TArray(TString), { - case (_: Type, _: SType, _: SType, _: SType) => SJavaArrayString(false) - }) { case (r, cb, st: SJavaArrayString, s, separator, missing, errorID) => - val string = cb.newLocal[String]("string", s.asString.loadString(cb)) - val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) - val sepChar = cb.newLocal[Char]("sepChar") - cb.ifx(sep.length().cne(1), cb._fatalWithError(errorID, "splitChar expected a single character for separator")) - cb.assign(sepChar, sep(0)) - val mv = missing.asIndexable - - new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Left(sepChar), None, mv, errorID)) - } - registerWrappedScalaFunction2("mkString", TArray(TString), TString, TString, { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "arrayMkString") diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala index 05ee96b5a8f..b3697ad1a82 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala @@ -1,19 +1,19 @@ package is.hail.expr.ir.lowering import is.hail.HailContext -import is.hail.backend.ExecuteContext import is.hail.expr.ir._ -import is.hail.expr.ir.functions.{TableCalculateNewPartitions, TableToValueFunction} +import is.hail.expr.ir.functions.TableToValueFunction +import is.hail.io.TextMatrixReader import is.hail.io.avro.AvroTableReader import is.hail.io.bgen.MatrixBGENReader import is.hail.io.plink.MatrixPLINKReader import is.hail.io.vcf.MatrixVCFReader -import is.hail.methods.{ForceCountTable, NPartitionsTable, TableFilterPartitions} +import is.hail.methods.{ForceCountTable, NPartitionsTable} object CanLowerEfficiently { - def apply(ctx: ExecuteContext, ir0: BaseIR): Option[String] = { + def apply(ir0: BaseIR): Option[String] = { - if (ctx.getFlag("no_whole_stage_codegen") != null) + if (HailContext.getFlag("no_whole_stage_codegen") != null) return Some("flag 'no_whole_stage_codegen' is enabled") var prohibitiveReason: Option[String] = None @@ -28,6 +28,8 @@ object CanLowerEfficiently { ir match { case TableRead(_, _, _: TableNativeReader) => case TableRead(_, _, _: TableNativeZippedReader) => + case TableRead(_, _, _: TextTableReader) => + case TableRead(_, _, _: TextMatrixReader) => case TableRead(_, _, _: StringTableReader) => case TableRead(_, _, _: MatrixPLINKReader) => case TableRead(_, _, _: MatrixVCFReader) => @@ -62,7 +64,6 @@ object CanLowerEfficiently { case t: TableAggregateByKey => case t: TableRename => case t: TableFilterIntervals => - case TableToTableApply(_, TableFilterPartitions(_, _)) => case t: TableToTableApply => fail(s"TableToTableApply") case t: BlockMatrixToTableApply => fail(s"BlockMatrixToTableApply") case t: BlockMatrixToTable => fail(s"BlockMatrixToTable has no lowered implementation") @@ -78,7 +79,6 @@ object CanLowerEfficiently { case TableCount(_) => case TableToValueApply(_, ForceCountTable()) => case TableToValueApply(_, NPartitionsTable()) => - case TableToValueApply(_, TableCalculateNewPartitions(_)) => case TableToValueApply(_, f: TableToValueFunction) => fail(s"TableToValueApply: no lowering for ${ f.getClass.getName }") case TableAggregate(_, _) => case TableCollect(_) => diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index eefb8d06faf..d4a1ed9c957 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -15,7 +15,7 @@ object BlockMatrixStage { def broadcastVector(vector: IR, typ: BlockMatrixType, asRowVector: Boolean): BlockMatrixStage = { val v = Ref(genUID(), vector.typ) - new BlockMatrixStage(IndexedSeq(), Array(v.name -> vector), TStruct("start" -> TInt32, "shape" -> TTuple(TInt32, TInt32))) { + new BlockMatrixStage(Array(v.name -> vector), TStruct("start" -> TInt32, "shape" -> TTuple(TInt32, TInt32))) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = typ.blockShape(idx._1, idx._2) val start = (if (asRowVector) idx._2 else idx._1) * typ.blockSize @@ -38,7 +38,7 @@ object BlockMatrixStage { } } -case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(IndexedSeq(), Array(), TInt32) { +case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(Array(), TInt32) { def blockContext(idx: (Int, Int)): IR = throw new LowererUnsupportedOperation("empty stage has no block contexts!") @@ -50,31 +50,24 @@ case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(Indexed } } -// Scope structure: -// letBindings are available in blockContext and broadcastVals. -// broadcastVals are available in the blockContext and the blockBody -abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val broadcastVals: Array[(String, IR)], val ctxType: Type) { +abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType: Type) { def blockContext(idx: (Int, Int)): IR def blockBody(ctxRef: Ref): IR - def wrapLetsAndBroadcasts(ctxIR: IR): IR = { - (letBindings ++ broadcastVals).foldRight[IR](ctxIR) { case ((f, v), accum) => Let(f, v, accum) } - } - def collectBlocks(relationalBindings: Map[String, IR])(f: (IR, IR) => IR, blocksToCollect: Array[(Int, Int)]): IR = { val ctxRef = Ref(genUID(), ctxType) val body = f(ctxRef, blockBody(ctxRef)) val ctxs = MakeStream(blocksToCollect.map(idx => blockContext(idx)), TStream(ctxRef.typ)) val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) - val bcFields = broadcastVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } + val bcFields = globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } val bcVals = MakeStruct(bcFields.map { case (f, v) => f -> Ref(f, v.typ) }) val bcRef = Ref(genUID(), bcVals.typ) val wrappedBody = bcFields.foldLeft(body) { case (accum, (f, _)) => Let(f, GetField(bcRef, f), accum) } - val collect = wrapLetsAndBroadcasts(CollectDistributedArray(ctxs, bcVals, ctxRef.name, bcRef.name, wrappedBody)) - LowerToCDA.substLets(collect, relationalBindings) + val collect = CollectDistributedArray(ctxs, bcVals, ctxRef.name, bcRef.name, wrappedBody) + LowerToCDA.substLets(globalVals.foldRight[IR](collect) { case ((f, v), accum) => Let(f, v, accum) }, relationalBindings) } def collectLocal(relationalBindings: Map[String, IR], typ: BlockMatrixType): IR = { @@ -108,17 +101,9 @@ abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val b Let(blockResults.name, cda, NDArrayConcat(rows, 0)) } - def addLets(newLets: (String, IR)*): BlockMatrixStage = { - val outer = this - new BlockMatrixStage(outer.letBindings ++ newLets, outer.broadcastVals, ctxType) { - override def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) - override def blockBody(ctxRef: Ref): IR = outer.blockBody(ctxRef) - } - } - def addGlobals(newGlobals: (String, IR)*): BlockMatrixStage = { val outer = this - new BlockMatrixStage(outer.letBindings, broadcastVals ++ newGlobals, ctxType) { + new BlockMatrixStage(globalVals ++ newGlobals, ctxType) { def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) def blockBody(ctxRef: Ref): IR = outer.blockBody(ctxRef) } @@ -127,7 +112,7 @@ abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val b def addContext(newTyp: Type)(newCtx: ((Int, Int)) => IR): BlockMatrixStage = { val outer = this val newCtxType = TStruct("old" -> ctxType, "new" -> newTyp) - new BlockMatrixStage(outer.letBindings, broadcastVals, newCtxType) { + new BlockMatrixStage(globalVals, newCtxType) { def blockContext(idx: (Int, Int)): IR = makestruct("old" -> outer.blockContext(idx), "new" -> newCtx(idx)) @@ -136,7 +121,7 @@ abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val b } def mapBody(f: (IR, IR) => IR): BlockMatrixStage = { val outer = this - new BlockMatrixStage(outer.letBindings, broadcastVals, outer.ctxType) { + new BlockMatrixStage(globalVals, outer.ctxType) { def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) def blockBody(ctxRef: Ref): IR = f(ctxRef, outer.blockBody(ctxRef)) @@ -146,7 +131,7 @@ abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val b def condenseBlocks(typ: BlockMatrixType, rowBlocks: Array[Array[Int]], colBlocks: Array[Array[Int]]): BlockMatrixStage = { val outer = this val ctxType = TArray(TArray(TTuple(TTuple(TInt64, TInt64), outer.ctxType))) - new BlockMatrixStage(outer.letBindings, outer.broadcastVals, ctxType) { + new BlockMatrixStage(outer.globalVals, ctxType) { def blockContext(idx: (Int, Int)): IR = { val i = idx._1 val j = idx._2 @@ -205,15 +190,15 @@ object LowerBlockMatrixIR { NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) } - lowered.wrapLetsAndBroadcasts(elt) + lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } case BlockMatrixWrite(child, writer) => writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 - case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(ctx, node) + case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") case node => - throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") } } @@ -230,13 +215,13 @@ object LowerBlockMatrixIR { } val emptyGlobals = MakeStruct(Seq()) val globalsId = genUID() - val letBindings = bmsWithCtx.letBindings ++ bmsWithCtx.broadcastVals :+ globalsId -> emptyGlobals + val letBindings = bmsWithCtx.globalVals :+ globalsId -> emptyGlobals val contextsIR = MakeStream(blocksRowMajor.map{ case (i, j) => bmsWithCtx.blockContext((i, j)) }, TStream(bmsWithCtx.ctxType)) val ctxRef = Ref(genUID(), bmsWithCtx.ctxType) val body = bmsWithCtx.blockBody(ctxRef) val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) - val bcFields = bmsWithCtx.broadcastVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) + val bcFields = bmsWithCtx.globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) def tsPartitionFunction(ctxRef: Ref): IR = { val s = MakeStruct(Seq("blockRow" -> GetTupleElement(GetField(ctxRef, "new"), 0), "blockCol" -> GetTupleElement(GetField(ctxRef, "new"), 1), "block" -> bmsWithCtx.blockBody(ctxRef))) @@ -248,15 +233,15 @@ object LowerBlockMatrixIR { ts } - private def unimplemented[T](ctx: ExecuteContext, node: BaseIR): T = - throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(ctx, node) }") + private def unimplemented[T](node: BaseIR): T = + throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") def lower(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, analyses: Analyses, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { if (!DArrayLowering.lowerBM(typesToLower)) throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") bmir.children.foreach { case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => - throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(ctx, bmir) }") + throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") case _ => } if (bmir.typ.nDefinedBlocks == 0) @@ -273,7 +258,7 @@ object LowerBlockMatrixIR { case BlockMatrixRead(reader) => reader.lower(ctx) case x@BlockMatrixRandom(seed, gaussian, shape, blockSize) => val generator = invokeSeeded(if (gaussian) "rand_norm" else "rand_unif", seed, TFloat64, F64(0.0), F64(1.0)) - new BlockMatrixStage(IndexedSeq(), Array(), TTuple(TInt64, TInt64)) { + new BlockMatrixStage(Array(), TTuple(TInt64, TInt64)) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = x.typ.blockShape(idx._1, idx._2) MakeTuple.ordered(FastSeq(i, j)) @@ -291,20 +276,19 @@ object LowerBlockMatrixIR { val loweredLeft = lower(left) val loweredRight = lower(right) loweredLeft - .addLets(loweredRight.letBindings: _*) - .addGlobals(loweredRight.broadcastVals: _*) + .addGlobals(loweredRight.globalVals: _*) .addContext(loweredRight.ctxType)(loweredRight.blockContext).mapBody { (ctx, leftBody) => NDArrayMap2(leftBody, bindIR(GetField(ctx, "new"))(loweredRight.blockBody), lname, rname, f, ErrorIDs.NO_ERROR) } case x@BlockMatrixBroadcast(child, IndexedSeq(), _, _) => val lowered = lower(child) - val eltValue = lowered.wrapLetsAndBroadcasts(bindIR(lowered.blockContext(0 -> 0)) { ctx => + val eltValue = lowered.globalVals.foldRight[IR](bindIR(lowered.blockContext(0 -> 0)) { ctx => NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(0L), I64(0L)), -1) - }) + }) { case ((f, v), accum) => Let(f, v, accum) } val elt = Ref(genUID(), eltValue.typ) - new BlockMatrixStage(lowered.letBindings, Array(elt.name -> eltValue), TTuple(TInt64, TInt64)) { + new BlockMatrixStage(Array(elt.name -> eltValue), TTuple(TInt64, TInt64)) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = x.typ.blockShape(idx._1, idx._2) MakeTuple.ordered(FastSeq(I64(i.toInt), I64(j.toInt))) @@ -348,7 +332,7 @@ object LowerBlockMatrixIR { case BlockMatrixBroadcast(child, IndexedSeq(1, 0), _, _) => //transpose val lowered = lower(child) - new BlockMatrixStage(lowered.letBindings, lowered.broadcastVals, lowered.ctxType) { + new BlockMatrixStage(lowered.globalVals, lowered.ctxType) { def blockContext(idx: (Int, Int)): IR = lowered.blockContext(idx.swap) def blockBody(ctxRef: Ref): IR = NDArrayReindex(lowered.blockBody(ctxRef), FastIndexedSeq(1, 0)) } @@ -364,12 +348,12 @@ object LowerBlockMatrixIR { } val summedChildType = BlockMatrixType(child.typ.elementType, IndexedSeq[Long](child.typ.nRowBlocks, child.typ.nColBlocks), child.typ.nRowBlocks == 1, 1, BlockMatrixSparsity.dense) val res = NDArrayAgg(summedChild.collectLocal(relationalLetsAbove, summedChildType), IndexedSeq[Int](0, 1)) - new BlockMatrixStage(loweredChild.letBindings, summedChild.broadcastVals, TStruct.empty) { + new BlockMatrixStage(summedChild.globalVals, TStruct.empty) { override def blockContext(idx: (Int, Int)): IR = makestruct() override def blockBody(ctxRef: Ref): IR = NDArrayReshape(res, MakeTuple.ordered(Seq(I64(1L), I64(1L))), ErrorIDs.NO_ERROR) } case IndexedSeq(0) => { // Number of rows goes to 1. Number of cols remains the same. - new BlockMatrixStage(loweredChild.letBindings, loweredChild.broadcastVals, TArray(loweredChild.ctxType)) { + new BlockMatrixStage(loweredChild.globalVals, TArray(loweredChild.ctxType)) { override def blockContext(idx: (Int, Int)): IR = { val (row, col) = idx assert(row == 0, s"Asked for idx ${idx}") @@ -388,7 +372,7 @@ object LowerBlockMatrixIR { } } case IndexedSeq(1) => { // Number of cols goes to 1. Number of rows remains the same. - new BlockMatrixStage(loweredChild.letBindings, loweredChild.broadcastVals, TArray(loweredChild.ctxType)) { + new BlockMatrixStage(loweredChild.globalVals, TArray(loweredChild.ctxType)) { override def blockContext(idx: (Int, Int)): IR = { val (row, col) = idx assert(col == 0, s"Asked for idx ${idx}") @@ -467,11 +451,11 @@ object LowerBlockMatrixIR { case BlockMatrixDensify(child) => lower(child) case BlockMatrixSparsify(child, sparsifier) => lower(child) - case RelationalLetBlockMatrix(name, value, body) => unimplemented(ctx, bmir) + case RelationalLetBlockMatrix(name, value, body) => unimplemented(bmir) case ValueToBlockMatrix(child, shape, blockSize) if !child.typ.isInstanceOf[TArray] && !child.typ.isInstanceOf[TNDArray] => { val element = lowerIR(child) - new BlockMatrixStage(IndexedSeq(), Array(), TStruct()) { + new BlockMatrixStage(Array(), TStruct()) { override def blockContext(idx: (Int, Int)): IR = MakeStruct(Seq()) override def blockBody(ctxRef: Ref): IR = MakeNDArray(MakeArray(element), MakeTuple(Seq((0, I64(1)), (1, I64(1)))), False(), ErrorIDs.NO_ERROR) @@ -483,7 +467,9 @@ object LowerBlockMatrixIR { case _: TNDArray => lowerIR(child) } val v = Ref(genUID(), nd.typ) - new BlockMatrixStage(IndexedSeq(v.name -> nd), Array(), nd.typ) { + new BlockMatrixStage( + Array(v.name -> nd), + nd.typ) { def blockContext(idx: (Int, Int)): IR = { val (r, c) = idx NDArraySlice(v, MakeTuple.ordered(FastSeq( @@ -497,7 +483,7 @@ object LowerBlockMatrixIR { val left = lower(leftIR) val right = lower(rightIR) val newCtxType = TArray(TTuple(left.ctxType, right.ctxType)) - new BlockMatrixStage(left.letBindings ++ right.letBindings, left.broadcastVals ++ right.broadcastVals, newCtxType) { + new BlockMatrixStage(left.globalVals ++ right.globalVals, newCtxType) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = idx MakeArray(Array.tabulate[Option[IR]](leftIR.typ.nColBlocks) { k => diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala index d18c5a06047..49cbfd7d8d1 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala @@ -103,14 +103,13 @@ object LowerDistributedSort { inputStage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - rowTypeRequiredness: RStruct, - optTargetNumPartitions: Option[Int] = None + rowTypeRequiredness: RStruct ): TableStage = { val oversamplingNum = 3 val seed = 7L - val defaultBranchingFactor = ctx.getFlag("shuffle_max_branch_factor").toInt - val sizeCutoff = ctx.getFlag("shuffle_cutoff_to_local_sort").toInt + val defaultBranchingFactor = 4 + val sizeCutoff = HailContext.getFlag("shuffle_cutoff_to_local_sort").toInt val (keyToSortBy, _) = inputStage.rowType.select(sortFields.map(sf => sf.field)) @@ -121,47 +120,31 @@ object LowerDistributedSort { val initialStageDataRow = CompileAndEvaluate[Annotation](ctx, inputStage.mapCollectWithGlobals(relationalLetsAbove) { part => WritePartition(part, UUID4(), writer) - }{ case (part, globals) => - val streamElement = Ref(genUID(), part.typ.asInstanceOf[TArray].elementType) - bindIR(StreamAgg(ToStream(part), streamElement.name, - MakeStruct(FastSeq( - "min" -> AggFold.min(GetField(streamElement, "firstKey"), sortFields), - "max" -> AggFold.max(GetField(streamElement, "lastKey"), sortFields) - )) - )) { intervalRange => MakeTuple.ordered(Seq(part, globals, intervalRange)) } - }).asInstanceOf[Row] - val (initialPartInfo, initialGlobals, intervalRange) = (initialStageDataRow(0).asInstanceOf[IndexedSeq[Row]], initialStageDataRow(1).asInstanceOf[Row], initialStageDataRow(2).asInstanceOf[Row]) + }{ case (part, globals) => MakeTuple.ordered(Seq(part, globals))}).asInstanceOf[Row] + val (initialPartInfo, initialGlobals) = (initialStageDataRow(0).asInstanceOf[IndexedSeq[Row]], initialStageDataRow(1).asInstanceOf[Row]) val initialGlobalsLiteral = Literal(inputStage.globalType, initialGlobals) val initialChunks = initialPartInfo.map(row => Chunk(initialTmpPath + row(0).asInstanceOf[String], row(1).asInstanceOf[Long].toInt, None)) - - val initialInterval = Interval(intervalRange(0), intervalRange(1), true, true) - val initialSegment = SegmentResult(IndexedSeq(0), initialInterval, initialChunks) + val initialSegment = SegmentResult(IndexedSeq(0), inputStage.partitioner.range.get, initialChunks) val totalNumberOfRows = initialChunks.map(_.size).sum - optTargetNumPartitions.foreach(i => assert(i >= 1, s"Must request positive number of partitions. Requested ${i}")) - val targetNumPartitions = optTargetNumPartitions.getOrElse(inputStage.numPartitions) - - val idealNumberOfRowsPerPart = if (targetNumPartitions == 0) 1 else { - Math.max(1, totalNumberOfRows / targetNumPartitions) - } + val idealNumberOfRowsPerPart = Math.max(1, totalNumberOfRows / inputStage.numPartitions) - var loopState = LoopState(IndexedSeq(initialSegment), IndexedSeq.empty[SegmentResult], IndexedSeq.empty[OutputPartition]) + var loopState = LoopState(IndexedSeq(initialSegment), IndexedSeq.empty[SegmentResult], IndexedSeq.empty[SegmentResult]) var i = 0 val rand = new IRRandomness(seed) /* - Loop state keeps track of three things. largeSegments are too big to sort locally so have to broken up. - smallSegments are small enough to be sorted locally. readyOutputParts are any partitions that we noticed were - sorted already during course of the recursion. Loop continues until there are no largeSegments left. Then we - sort the small segments and combine them with readyOutputParts to get the final table. + There are three categories of segments. largeUnsortedSegments are too big to sort locally so have to broken up. + largeSortedSegments were identified as being already sorted, so no reason to recur on them. smallSegments are small + enough to be sorted locally. */ - while (!loopState.largeSegments.isEmpty) { - val partitionDataPerSegment = segmentsToPartitionData(loopState.largeSegments, idealNumberOfRowsPerPart) + while (!loopState.largeUnsortedSegments.isEmpty) { + val partitionDataPerSegment = segmentsToPartitionData(loopState.largeUnsortedSegments, idealNumberOfRowsPerPart) val partitionCountsPerSegment = partitionDataPerSegment.map(oneSegment => oneSegment.map(_.currentPartSize)) - assert(partitionCountsPerSegment.size == loopState.largeSegments.size) + assert(partitionCountsPerSegment.size == loopState.largeUnsortedSegments.size) val numSamplesPerPartitionPerSegment = partitionCountsPerSegment.map { partitionCountsForOneSegment => val recordsInSegment = partitionCountsForOneSegment.sum @@ -187,7 +170,6 @@ object LowerDistributedSort { /* Aggregate over the segments, to compute the pivots, whether it's already sorted, and what key interval is contained in that segment. - Also get the min and max of each individual partition. That way if it's sorted already, we know the partitioning to use. */ val pivotsPerSegmentAndSortedCheck = ToArray(bindIR(perPartStatsIR) { perPartStats => mapIR(StreamGroupByKey(ToStream(perPartStats), IndexedSeq("segmentIdx"))) { oneGroup => @@ -196,10 +178,8 @@ object LowerDistributedSort { bindIR(StreamAgg(oneGroup, streamElementRef.name, { AggLet(dataRef.name, GetField(streamElementRef, "partData"), MakeStruct(Seq( - ("min", AggFold.min(GetField(dataRef, "min"), sortFields)), // Min of the mins - ("max", AggFold.max(GetField(dataRef, "max"), sortFields)), // Max of the maxes - ("perPartMins", ApplyAggOp(Collect())(GetField(dataRef, "min"))), // All the mins - ("perPartMaxes", ApplyAggOp(Collect())(GetField(dataRef, "max"))), // All the maxes + ("min", AggFold.min(GetField(dataRef, "min"), sortFields)), + ("max", AggFold.max(GetField(dataRef, "max"), sortFields)), ("samples", ApplyAggOp(Collect())(GetField(dataRef, "samples"))), ("eachPartSorted", AggFold.all(GetField(dataRef, "isSorted"))), ("perPartIntervalTuples", ApplyAggOp(Collect())(MakeTuple.ordered(Seq(GetField(dataRef, "min"), GetField(dataRef, "max"))))) @@ -220,9 +200,7 @@ object LowerDistributedSort { MakeStruct(Seq( "pivotsWithEndpoints" -> ArrayFunctions.extend(ArrayFunctions.extend(minArray, sortedSampling), maxArray), "isSorted" -> ApplySpecial("land", Seq.empty[Type], Seq(GetField(aggResults, "eachPartSorted"), tuplesInSortedOrder), TBoolean, ErrorIDs.NO_ERROR), - "intervalTuple" -> MakeTuple.ordered(Seq(GetField(aggResults, "min"), GetField(aggResults, "max"))), - "perPartMins" -> GetField(aggResults, "perPartMins"), - "perPartMaxes" -> GetField(aggResults, "perPartMaxes") + "intervalTuple" -> MakeTuple.ordered(Seq(GetField(aggResults, "min"), GetField(aggResults, "max"))) )) } } @@ -232,22 +210,12 @@ object LowerDistributedSort { // Going to check now if it's fully sorted, as well as collect and sort all the samples. val pivotsWithEndpointsAndInfoGroupedBySegmentNumber = CompileAndEvaluate[Annotation](ctx, pivotsPerSegmentAndSortedCheck) - .asInstanceOf[IndexedSeq[Row]].map(x => (x(0).asInstanceOf[IndexedSeq[Row]], x(1).asInstanceOf[Boolean], x(2).asInstanceOf[Row], x(3).asInstanceOf[IndexedSeq[Row]], x(4).asInstanceOf[IndexedSeq[Row]])) - - val (sortedSegmentsTuples, unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber) = pivotsWithEndpointsAndInfoGroupedBySegmentNumber.zipWithIndex.partition { case ((_, isSorted, _, _, _), _) => isSorted} + .asInstanceOf[IndexedSeq[Row]].map(x => (x(0).asInstanceOf[IndexedSeq[Row]], x(1).asInstanceOf[Boolean], x(2).asInstanceOf[Row])) - val outputPartitions = sortedSegmentsTuples.flatMap { case ((_, _, _, partMins, partMaxes), originalSegmentIdx) => - val segmentToBreakUp = loopState.largeSegments(originalSegmentIdx) - val currentSegmentPartitionData = partitionDataPerSegment(originalSegmentIdx) - val partRanges = partMins.zip(partMaxes) - assert(partRanges.size == currentSegmentPartitionData.size) + val (sortedSegmentsTuples, unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber) = pivotsWithEndpointsAndInfoGroupedBySegmentNumber.zipWithIndex.partition { case ((_, isSorted, _), _) => isSorted} - currentSegmentPartitionData.zip(partRanges).zipWithIndex.map { case ((pi, (intervalStart, intervalEnd)), idx) => - OutputPartition(segmentToBreakUp.indices :+ idx, Interval(intervalStart, intervalEnd, true, true), pi.files) - } - } - - val remainingUnsortedSegments = unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.map {case (_, idx) => loopState.largeSegments(idx)} + val sortedSegments = sortedSegmentsTuples.map { case (_, idx) => loopState.largeUnsortedSegments(idx)} + val remainingUnsortedSegments = unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.map {case (_, idx) => loopState.largeUnsortedSegments(idx)} val (newBigUnsortedSegments, newSmallSegments) = if (unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.size > 0) { @@ -316,7 +284,7 @@ object LowerDistributedSort { isBig && (sr.interval.left.point != sr.interval.right.point) && (sr.chunks.map(_.size).sum > 1) } } else { (IndexedSeq.empty[SegmentResult], IndexedSeq.empty[SegmentResult]) } - loopState = LoopState(newBigUnsortedSegments, loopState.smallSegments ++ newSmallSegments, loopState.readyOutputParts ++ outputPartitions) + loopState = LoopState(newBigUnsortedSegments, loopState.largeSortedSegments ++ sortedSegments, loopState.smallSegments ++ newSmallSegments) i = i + 1 } @@ -337,21 +305,28 @@ object LowerDistributedSort { val sortedFilenames = CompileAndEvaluate[Annotation](ctx, sortedFilenamesIR).asInstanceOf[IndexedSeq[Row]].map(_(0).asInstanceOf[String]) val newlySortedSegments = loopState.smallSegments.zip(sortedFilenames).map { case (sr, newFilename) => - OutputPartition(sr.indices, sr.interval, IndexedSeq(initialTmpPath + newFilename)) + // "Small" segments are small because we decided they had a small enough number of bytes, so it must be present. + val totalNumRows = sr.chunks.map(_.size).sum + val totalByteSize = sr.chunks.map(_.byteSize.get).sum + SegmentResult(sr.indices, sr.interval, IndexedSeq(Chunk(initialTmpPath + newFilename, totalNumRows, Some(totalByteSize)))) } - val unorderedOutputPartitions = newlySortedSegments ++ loopState.readyOutputParts - val orderedOutputPartitions = unorderedOutputPartitions.sortWith{ (srt1, srt2) => lessThanForSegmentIndices(srt1.indices, srt2.indices)} + val unorderedSegments = newlySortedSegments ++ loopState.largeSortedSegments + val orderedSegments = unorderedSegments.sortWith{ (srt1, srt2) => lessThanForSegmentIndices(srt1.indices, srt2.indices)} + + // Now let's treat the whole thing as one segment that can be partitioned by the segmentToPartitionData method. + val megaSegment = SegmentResult(IndexedSeq(), null, orderedSegments.flatMap(sr => sr.chunks)) + val partitioned = segmentsToPartitionData(IndexedSeq(megaSegment), idealNumberOfRowsPerPart).flatten - val contextData = orderedOutputPartitions.map { segment => Row(segment.files) } + val contextData = partitioned.map { part => Row(part.files) } val contexts = ToStream(Literal(TArray(TStruct("files" -> TArray(TString))), contextData)) // Note: If all of the sort fields are not ascending, the the resulting table is sorted, but not keyed. val keyed = sortFields.forall(sf => sf.sortOrder == Ascending) val (partitionerKey, intervals) = if (keyed) { - (keyToSortBy, orderedOutputPartitions.map{ segment => segment.interval}) + (keyToSortBy, orderedSegments.map{ segment => segment.interval}) } else { - (TStruct(), orderedOutputPartitions.map{ _ => Interval(Row(), Row(), true, false)}) + (TStruct(), orderedSegments.map{ _ => Interval(Row(), Row(), true, false)}) } val partitioner = new RVDPartitioner(partitionerKey, intervals) @@ -519,5 +494,4 @@ object LowerDistributedSort { case class Chunk(filename: String, size: Int, byteSize: Option[Long]) case class SegmentResult(indices: IndexedSeq[Int], interval: Interval, chunks: IndexedSeq[Chunk]) -case class OutputPartition(indices: IndexedSeq[Int], interval: Interval, files: IndexedSeq[String]) -case class LoopState(largeSegments: IndexedSeq[SegmentResult], smallSegments: IndexedSeq[SegmentResult], readyOutputParts: IndexedSeq[OutputPartition]) +case class LoopState(largeUnsortedSegments: IndexedSeq[SegmentResult], largeSortedSegments: IndexedSeq[SegmentResult], smallSegments: IndexedSeq[SegmentResult]) diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index 4579eaa2245..d939c85f591 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -2,14 +2,13 @@ package is.hail.expr.ir.lowering import is.hail.HailContext import is.hail.backend.ExecuteContext -import is.hail.expr.ir.functions.TableCalculateNewPartitions import is.hail.expr.ir.{agg, _} import is.hail.io.{BufferSpec, TypedCodecSpec} -import is.hail.methods.{ForceCountTable, NPartitionsTable, TableFilterPartitions} +import is.hail.methods.{ForceCountTable, NPartitionsTable} import is.hail.rvd.{PartitionBoundOrdering, RVDPartitioner} import is.hail.types.physical.{PCanonicalBinary, PCanonicalTuple} import is.hail.types.virtual._ -import is.hail.types.{RField, RPrimitive, RStruct, RTable, TableType, TypeWithRequiredness} +import is.hail.types.{RField, RStruct, RTable, TableType} import is.hail.utils.{partition, _} import org.apache.spark.sql.Row @@ -78,10 +77,8 @@ class TableStage( // useful for debugging, but should be disabled in production code due to N^2 complexity // typecheckPartition() - def typecheckPartition(ctx: ExecuteContext): Unit = { - TypeCheck( - ctx, - partitionIR, + def typecheckPartition(): Unit = { + TypeCheck(partitionIR, BindingEnv(Env[Type](((letBindings ++ broadcastVals).map { case (s, x) => (s, x.typ) }) ++ FastIndexedSeq[(String, Type)]((ctxRefName, contexts.typ.asInstanceOf[TStream].elementType)): _*))) @@ -428,12 +425,11 @@ class TableStage( } } } - val rightRowRTypeWithPartNum = RStruct(IndexedSeq(RField("__partNum", TypeWithRequiredness(TInt32), 0)) ++ rightRowRType.fields.map(rField => RField(rField.name, rField.typ, rField.index + 1))) val sorted = ctx.backend.lowerDistributedSort(ctx, rightWithPartNums, SortField("__partNum", Ascending) +: right.key.map(k => SortField(k, Ascending)), relationalLetsAbove, - rightRowRTypeWithPartNum) + rightRowRType) assert(sorted.kType.fieldNames.sameElements("__partNum" +: right.key)) val newRightPartitioner = new RVDPartitioner( Some(1), @@ -468,67 +464,6 @@ object LowerTableIR { invoke("sum", TInt64, stage.mapCollect(relationalLetsAbove)(rows => foldIR(mapIR(rows)(row => Consume(row)), 0L)(_ + _))) - case TableToValueApply(child, TableCalculateNewPartitions(nPartitions)) => - val stage = lower(child) - val sampleSize = math.min(nPartitions * 20, 1000000) - val samplesPerPartition = sampleSize / math.max(1, stage.numPartitions) - val keyType = child.typ.keyType - val samplekey = AggSignature(TakeBy(), - FastIndexedSeq(TInt32), - FastIndexedSeq(keyType, TFloat64)) - - val minkey = AggSignature(TakeBy(), - FastIndexedSeq(TInt32), - FastIndexedSeq(keyType, keyType)) - - val maxkey = AggSignature(TakeBy(Descending), - FastIndexedSeq(TInt32), - FastIndexedSeq(keyType, keyType)) - - - bindIR(flatten(stage.mapCollect(relationalLetsAbove) { rows => - streamAggIR(rows) { elt => - ToArray(flatMapIR(ToStream( - MakeArray( - ApplyAggOp( - FastIndexedSeq(I32(samplesPerPartition)), - FastIndexedSeq(SelectFields(elt, keyType.fieldNames), invokeSeeded("rand_unif", 1, TFloat64, F64(0.0), F64(1.0))), - samplekey), - ApplyAggOp( - FastIndexedSeq(I32(1)), - FastIndexedSeq(elt, elt), - minkey), - ApplyAggOp( - FastIndexedSeq(I32(1)), - FastIndexedSeq(elt, elt), - maxkey) - ) - )) { inner => ToStream(inner) }) - } - })) { partData => - - val sorted = sortIR(partData) { (l, r) => ApplyComparisonOp(LT(keyType, keyType), l, r) } - bindIR(ToArray(flatMapIR(StreamGroupByKey(ToStream(sorted), keyType.fieldNames)) { groupRef => - StreamTake(groupRef, 1) - })) { boundsArray => - - bindIR(ArrayLen(boundsArray)) { nBounds => - bindIR(minIR(nBounds, nPartitions)) { nParts => - If(nParts.ceq(0), - MakeArray(Seq(), TArray(TInterval(keyType))), - bindIR((nBounds + (nParts - 1)) floorDiv nParts) { stepSize => - ToArray(mapIR(StreamRange(0, nBounds, stepSize)) { i => - If((i + stepSize) < (nBounds - 1), - invoke("Interval", TInterval(keyType), ArrayRef(boundsArray, i), ArrayRef(boundsArray, i + stepSize), True(), False()), - invoke("Interval", TInterval(keyType), ArrayRef(boundsArray, i), ArrayRef(boundsArray, nBounds - 1), True(), True()) - )}) - } - ) - } - } - } - } - case TableGetGlobals(child) => lower(child).getGlobals() @@ -671,7 +606,7 @@ object LowerTableIR { writer.lower(ctx, lower(child), child, coerce[RTable](analyses.requirednessAnalysis.lookup(child)), relationalLetsAbove) case node if node.children.exists(_.isInstanceOf[TableIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with TableIR children must be defined explicitly: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"IR nodes with TableIR children must be defined explicitly: \n${ Pretty(node) }") } lowered } @@ -1603,32 +1538,6 @@ object LowerTableIR { case TableLiteral(typ, rvd, enc, encodedGlobals) => RVDToTableStage(rvd, EncodedLiteral(enc, encodedGlobals)) - case TableToTableApply(child, TableFilterPartitions(seq, keep)) => - val lc = lower(child) - - val arr = seq.sorted.toArray - val keptSet = seq.toSet - val lit = Literal(TSet(TInt32), keptSet) - if (keep) { - lc.copy( - partitioner = lc.partitioner.copy(rangeBounds = arr.map(idx => lc.partitioner.rangeBounds(idx))), - contexts = mapIR( - filterIR( - zipWithIndex(lc.contexts)) { t => - invoke("contains", TBoolean, lit, GetField(t, "idx")) }) { t => - GetField(t, "elt") } - ) - } else { - lc.copy( - partitioner = lc.partitioner.copy(rangeBounds = lc.partitioner.rangeBounds.zipWithIndex.filter { case (_, idx) => !keptSet.contains(idx) }.map(_._1)), - contexts = mapIR( - filterIR( - zipWithIndex(lc.contexts)) { t => - !invoke("contains", TBoolean, lit, GetField(t, "idx")) }) { t => - GetField(t, "elt") } - ) - } - case bmtt@BlockMatrixToTable(bmir) => val ts = LowerBlockMatrixIR.lowerToTableStage(bmir, typesToLower, ctx, analyses, relationalLetsAbove) // I now have an unkeyed table of (blockRow, blockCol, block). @@ -1653,7 +1562,7 @@ object LowerTableIR { ctx.backend.lowerDistributedSort(ctx, entriesUnkeyed, IndexedSeq(SortField("i", Ascending), SortField("j", Ascending)), relationalLetsAbove, rowR) case node => - throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(node) }") } assert(tir.typ.globalType == lowered.globalType, s"\n ir global: ${tir.typ.globalType}\n lowered global: ${lowered.globalType}") diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala index 7743047508e..52a0aa1a8b8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala @@ -38,7 +38,7 @@ object LowerToCDA { val loweredValue = substLets(lower(value, typesToLower, ctx, analyses, relationalLetsAbove), relationalLetsAbove) if (!Compilable(loweredValue)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") val (Some(PTypeReferenceSingleCodeType(pt: PTuple)), f) = ctx.timer.time("Compile") { Compile[AsmFunction1RegionLong](ctx, @@ -72,10 +72,10 @@ object LowerToCDA { LowerBlockMatrixIR(ir, typesToLower, ctx, analyses, relationalLetsAbove) case node if node.children.exists(_.isInstanceOf[MatrixIR]) => - throw new LowererUnsupportedOperation(s"MatrixIR nodes must be lowered to TableIR nodes separately: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"MatrixIR nodes must be lowered to TableIR nodes separately: \n${ Pretty(node) }") case node => - throw new LowererUnsupportedOperation(s"Cannot lower: \n${ Pretty(ctx, node) }") + throw new LowererUnsupportedOperation(s"Cannot lower: \n${ Pretty(node) }") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala index af7aa11599b..efff62a5214 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala @@ -36,10 +36,10 @@ case object LowerMatrixToTablePass extends LoweringPass { val context: String = "LowerMatrixToTable" def transform(ctx: ExecuteContext, ir: BaseIR): BaseIR = ir match { - case x: IR => LowerMatrixIR(ctx, x) - case x: TableIR => LowerMatrixIR(ctx, x) - case x: MatrixIR => LowerMatrixIR(ctx, x) - case x: BlockMatrixIR => LowerMatrixIR(ctx, x) + case x: IR => LowerMatrixIR(x) + case x: TableIR => LowerMatrixIR(x) + case x: MatrixIR => LowerMatrixIR(x) + case x: BlockMatrixIR => LowerMatrixIR(x) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala index 3ec015862f5..b92dbb5234b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala @@ -19,7 +19,7 @@ case class LoweringPipeline(lowerings: LoweringPass*) { throw e } try { - TypeCheck(ctx, x) + TypeCheck(x) } catch { case e: Throwable => fatal(s"error after applying ${ l.context }", e) diff --git a/hail/src/main/scala/is/hail/expr/ir/package.scala b/hail/src/main/scala/is/hail/expr/ir/package.scala index b9368741400..037af8b7fcc 100644 --- a/hail/src/main/scala/is/hail/expr/ir/package.scala +++ b/hail/src/main/scala/is/hail/expr/ir/package.scala @@ -121,11 +121,6 @@ package object ir { If(a < b, a, b) } - def streamAggIR(stream: IR)(f: Ref => IR): IR = { - val ref = Ref(genUID(), coerce[TStream](stream.typ).elementType) - StreamAgg(stream, ref.name, f(ref)) - } - def forIR(stream: IR)(f: Ref => IR): IR = { val ref = Ref(genUID(), coerce[TStream](stream.typ).elementType) StreamFor(stream, ref.name, f(ref)) @@ -146,9 +141,9 @@ package object ir { StreamFlatMap(stream, ref.name, f(ref)) } - def flatten(stream: IR): IR = flatMapIR(if (stream.typ.isInstanceOf[TStream]) stream else ToStream(stream)) { elt => - if (elt.typ.isInstanceOf[TStream]) elt else ToStream(elt) - } + def flatten(stream: IR): IR = flatMapIR(stream) { elt => + if (elt.typ.isInstanceOf[TStream]) elt else ToStream(elt) + } def foldIR(stream: IR, zero: IR)(f: (Ref, Ref) => IR): IR = { val elt = Ref(genUID(), coerce[TStream](stream.typ).elementType) diff --git a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala new file mode 100644 index 00000000000..53d77b6bd6c --- /dev/null +++ b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala @@ -0,0 +1,702 @@ +package is.hail.io + +import is.hail.annotations._ +import is.hail.asm4s._ +import is.hail.backend.ExecuteContext +import is.hail.expr.ir.lowering.TableStage +import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitFunctionBuilder, GenericLine, GenericLines, GenericTableValue, IEmitCode, IRParser, IntArrayBuilder, LowerMatrixIR, MatrixHybridReader, TableRead, TableValue, TextReaderOptions} +import is.hail.io.fs.FS +import is.hail.rvd.RVDPartitioner +import is.hail.types._ +import is.hail.types.physical._ +import is.hail.types.physical.stypes.concrete.{SIndexablePointerValue, SStackStruct, SStringPointer} +import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.{SCode, SValue} +import is.hail.types.virtual._ +import is.hail.utils._ +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.json4s.{DefaultFormats, Formats, JValue} + +import scala.collection.mutable +import scala.io.Source +import scala.language.{existentials, implicitConversions} + +case class TextMatrixHeaderInfo( + headerValues: Array[String], + rowFieldNames: Array[String], + columnIdentifiers: Array[_] // String or Int +) { + val nCols: Int = columnIdentifiers.length +} + +object TextMatrixReader { + + def warnDuplicates(ids: Array[String]) { + val duplicates = ids.counter().filter(_._2 > 1) + if (duplicates.nonEmpty) { + warn(s"Found ${ duplicates.size } duplicate ${ plural(duplicates.size, "sample ID") }:\n @1", + duplicates.toArray.sortBy(-_._2).map { case (id, count) => s"""($count) "$id"""" }.truncatable("\n ")) + } + } + + private def parseHeader( + fs: FS, + file: String, + sep: Char, + nRowFields: Int, + opts: TextMatrixReaderOptions + ): TextMatrixHeaderInfo = { + val maybeFirstTwoLines = using(fs.open(file)) { s => + Source.fromInputStream(s).getLines().filter(!opts.isComment(_)).take(2).toArray.toSeq + } + + (opts.hasHeader, maybeFirstTwoLines) match { + case (true, Seq()) => + fatal(s"Expected header in every file, but found empty file: $file") + case (true, Seq(header)) => + warn(s"File $file contains a header, but no lines of data.") + val headerValues = header.split(sep) + if (headerValues.length < nRowFields) { + fatal( + s"""File ${ file } contains one line and you told me it had a header, + |so I expected to see at least the ${ nRowFields } row field names + |on the header line, but instead I only saw ${ headerValues.length } + |separated values. The header was: + | ${ header }""".stripMargin) + } + TextMatrixHeaderInfo( + headerValues, + headerValues.slice(0, nRowFields), + headerValues.drop(nRowFields)) + case (true, Seq(header, dataLine)) => + val headerValues = header.split(sep) + val nHeaderValues = headerValues.length + val nSeparatedValues = dataLine.split(sep).length + if (nHeaderValues + nRowFields == nSeparatedValues) { + TextMatrixHeaderInfo( + headerValues, + rowFieldNames = Array.tabulate(nRowFields)(i => s"f$i"), + columnIdentifiers = headerValues) + } else if (nHeaderValues == nSeparatedValues) { + TextMatrixHeaderInfo( + headerValues, + rowFieldNames = headerValues.slice(0, nRowFields), + columnIdentifiers = headerValues.drop(nRowFields)) + } else { + fatal( + s"""In file $file, expected the header line to match either: + | rowField0 rowField1 ... rowField${ nRowFields } colId0 colId1 ... + |or + | colId0 colId1 ... + |Instead the first two lines were: + | ${ header.truncate } + | ${ dataLine.truncate } + |The first line contained ${ nHeaderValues } separated values and the + |second line contained ${ nSeparatedValues } separated values.""".stripMargin) + } + case (false, Seq()) => + warn(s"File $file is empty and has no header, so we assume no columns.") + TextMatrixHeaderInfo(Array(), Array.tabulate(nRowFields)(i => s"f$i"), Array()) + case (false, firstLine +: _) => + val nSeparatedValues = firstLine.split(sep).length + TextMatrixHeaderInfo( + Array(), + Array.tabulate(nRowFields)(i => s"f$i"), + Array.range(0, nSeparatedValues - nRowFields)) + } + } + + def makePartitionerFromCounts(partitionCounts: Array[Long], kType: TStruct): (RVDPartitioner, Array[Int]) = { + var includesStart = true + val keepPartitions = new IntArrayBuilder() + val rangeBoundIntervals = partitionCounts.zip(partitionCounts.tail).zipWithIndex.flatMap { case ((s, e), i) => + val interval = Interval.orNone(kType.ordering, + Row(if (includesStart) s else s - 1), + Row(e - 1), + includesStart, true) + includesStart = false + if (interval.isDefined) + keepPartitions.add(i) + interval + } + val ranges = rangeBoundIntervals + (new RVDPartitioner(Array(kType.fieldNames(0)), kType, ranges), keepPartitions.result()) + } + + def verifyRowFields( + fileName: String, + fieldNames: Array[String], + fieldTypes: Map[String, Type] + ): TStruct = { + val headerDups = fieldNames.duplicates() + if (headerDups.nonEmpty) + fatal(s"Found following duplicate row fields in header: \n ${ headerDups.mkString("\n ") }") + + val fields: Array[(String, Type)] = fieldNames.map { name => + fieldTypes.get(name) match { + case Some(t) => (name, t) + case None => + val rowFieldsAsPython = fieldTypes + .map { case (fieldName, typ) => s"'${ fieldName }': ${ typ.toString }" } + .mkString("{", ",\n ", "}") + fatal( + s"""In file $fileName, found a row field, $name, that is not in `row_fields': + | row fields found in file: + | ${ fieldNames.mkString("\n ") } + | row_fields: + | ${ rowFieldsAsPython } + """.stripMargin) + } + } + TStruct(fields: _*) + } + + def checkHeaders( + header1Path: String, + header1: Array[String], + headerPartitions: mutable.Set[Int], + partitionPaths: Array[String], + lines: RDD[GenericLine], + separator: Char + ): Unit = { + lines + .mapPartitionsWithIndex { (i, it) => + if (headerPartitions.contains(i)) { + val hd = it.next().toString.split(separator) + if (!header1.sameElements(hd)) { + if (header1.length != hd.length) { + fatal( + s"""invalid header: lengths of headers differ. + | ${ header1.length } elements in $header1Path + | ${ header1.truncate } + | ${ hd.length } elements in ${ partitionPaths(i) } + | ${ hd.truncate }""".stripMargin + ) + } + header1.zip(hd).zipWithIndex.foreach { case ((s1, s2), j) => + if (s1 != s2) { + fatal( + s"""invalid header: expected elements to be identical for all input paths. Found different elements at position $j. + | ${ header1Path }: $s1 + | ${ partitionPaths(i) }: $s2""". + stripMargin) + } + } + } + } + it + }.foreachPartition { _ => () } + } + + def fromJValue(ctx: ExecuteContext, jv: JValue): TextMatrixReader = { + val fs = ctx.fs + + implicit val formats: Formats = DefaultFormats + val params = jv.extract[TextMatrixReaderParameters] + + assert(params.separatorStr.length == 1) + val separator = params.separatorStr.charAt(0) + val rowFields = params.rowFieldsStr.mapValues(IRParser.parseType(_)) + val entryType = TStruct("x" -> IRParser.parseType(params.entryTypeStr)) + val fileStatuses = fs.globAllStatuses(params.paths) + require(entryType.size == 1, "entryType can only have 1 field") + if (fileStatuses.isEmpty) + fatal("no paths specified for import_matrix_table.") + assert((rowFields.values ++ entryType.types).forall { t => + t == TString || + t == TInt32 || + t == TInt64 || + t == TFloat32 || + t == TFloat64 + }) + + val opts = TextMatrixReaderOptions(params.comment, params.hasHeader) + + val headerInfo = parseHeader(fs, fileStatuses.head.getPath, separator, rowFields.size, opts) + if (params.addRowId && headerInfo.rowFieldNames.contains("row_id")) { + fatal( + s"""If no key is specified, `import_matrix_table`, uses 'row_id' + |as the key, please provide a key or choose a different row field name.\n + | Row field names: ${ headerInfo.rowFieldNames }""".stripMargin) + } + val rowFieldTypeWithoutRowId = verifyRowFields( + fileStatuses.head.getPath, headerInfo.rowFieldNames, rowFields) + val rowFieldType = + if (params.addRowId) + TStruct("row_id" -> TInt64) ++ rowFieldTypeWithoutRowId + else + rowFieldTypeWithoutRowId + if (params.hasHeader) + warnDuplicates(headerInfo.columnIdentifiers.asInstanceOf[Array[String]]) + + val lines = GenericLines.read(fs, fileStatuses, params.nPartitions, None, None, params.gzipAsBGZip, false) + + val linesRDD = lines.toRDD(fs) + .filter { line => + val l = line.toString + l.nonEmpty && !opts.isComment(l) + } + + val linesPartitionCounts = linesRDD.countPerPartition() + val partitionPaths = lines.contexts.map(a => a.asInstanceOf[Row].getAs[String](1)).toArray + + val headerPartitions = mutable.Set[Int]() + val partitionLineIndexWithinFile = new Array[Long](linesRDD.getNumPartitions) + + var indexWithinFile = 0L + var i = 0 + var prevPartitionPath: String = null + while (i < linesRDD.getNumPartitions) { + if (linesPartitionCounts(i) > 0) { + val partPath = partitionPaths(i) + if (prevPartitionPath == null + || prevPartitionPath != partPath) { + prevPartitionPath = partPath + indexWithinFile = 0 + if (opts.hasHeader) { + linesPartitionCounts(i) -= 1 + headerPartitions += i + } + } + } + partitionLineIndexWithinFile(i) = indexWithinFile + indexWithinFile += linesPartitionCounts(i) + i += 1 + } + + if (params.hasHeader) + checkHeaders(fileStatuses.head.getPath, headerInfo.headerValues, headerPartitions, partitionPaths, linesRDD, separator) + + val fullMatrixType = MatrixType( + TStruct.empty, + colType = TStruct("col_id" -> (if (params.hasHeader) TString else TInt32)), + colKey = Array("col_id"), + rowType = rowFieldType, + rowKey = Array().toFastIndexedSeq, + entryType = entryType) + + new TextMatrixReader(params, opts, lines, separator, rowFieldType, fullMatrixType, headerInfo, headerPartitions, linesPartitionCounts, partitionLineIndexWithinFile, partitionPaths) + } +} + +case class TextMatrixReaderParameters( + paths: Array[String], + nPartitions: Option[Int], + rowFieldsStr: Map[String, String], + entryTypeStr: String, + missingValue: String, + hasHeader: Boolean, + separatorStr: String, + gzipAsBGZip: Boolean, + addRowId: Boolean, + comment: Array[String]) + +case class TextMatrixReaderOptions(comment: Array[String], hasHeader: Boolean) extends TextReaderOptions + +class TextMatrixReader( + val params: TextMatrixReaderParameters, + opts: TextMatrixReaderOptions, + lines: GenericLines, + separator: Char, + rowFieldType: TStruct, + val fullMatrixType: MatrixType, + headerInfo: TextMatrixHeaderInfo, + headerPartitions: mutable.Set[Int], + _partitionCounts: Array[Long], + partitionLineIndexWithinFile: Array[Long], + partitionPaths: Array[String] +) extends MatrixHybridReader { + def pathsUsed: Seq[String] = params.paths + + def columnCount = Some(headerInfo.nCols) + + def partitionCounts = Some(_partitionCounts) + + def rowAndGlobalPTypes(context: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = { + PType.canonical(requestedType.rowType, required = true).asInstanceOf[PStruct] -> + PType.canonical(requestedType.globalType, required = true).asInstanceOf[PStruct] + } + + def executeGeneric(ctx: ExecuteContext): GenericTableValue = { + val tt = fullMatrixType.toTableType(LowerMatrixIR.entriesFieldName, LowerMatrixIR.colsFieldName) + + val globals = Row(headerInfo.columnIdentifiers.map(Row(_)).toFastIndexedSeq) + + val bodyPType = (requestedRowType: TStruct) => PType.canonical(requestedRowType, required = true).asInstanceOf[PCanonicalStruct] + + val body = { (requestedType: TStruct) => + val linesBody = lines.body + val requestedPType = bodyPType(requestedType) + val localOpts = opts + + val partitionRowIdxGlobal = (0 until _partitionCounts.length - 1).scanLeft(0L) { case (acc, i) => acc + _partitionCounts(i) }.toArray + + val compiledLineParser = new CompiledLineParser(ctx, + rowFieldType, + requestedPType, + headerInfo.nCols, + params.missingValue, + separator, + headerPartitions, + _partitionCounts, + partitionPaths, + partitionRowIdxGlobal, + partitionLineIndexWithinFile, + params.hasHeader) + + { (region: Region, theHailClassLoader: HailClassLoader, fs: FS, context: Any) => + val Row(lc, partitionIdx: Int) = context + compiledLineParser.apply(partitionIdx, region, theHailClassLoader, + linesBody(fs, lc).filter { line => + val l = line.toString + l.nonEmpty && !localOpts.isComment(l) + } + ) + } + } + + new GenericTableValue( + tt, + None, + { (requestedGlobalsType: Type) => + val subset = tt.globalType.valueSubsetter(requestedGlobalsType) + subset(globals).asInstanceOf[Row] + }, + TTuple(lines.contextType, TInt32), + lines.contexts.zipWithIndex.map { case (x, i) => Row(x, i) }, + bodyPType, + body) + + } + + override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = + executeGeneric(ctx).toTableStage(ctx, requestedType) + + def apply(tr: TableRead, ctx: ExecuteContext): TableValue = { + executeGeneric(ctx).toTableValue(ctx, tr.typ) + } + + override def toJValue: JValue = { + implicit val formats: Formats = DefaultFormats + decomposeWithName(params, "TextMatrixReader") + } + + override def renderShort(): String = defaultRender() + + override def hashCode(): Int = params.hashCode() + + override def equals(that: Any): Boolean = that match { + case that: TextMatrixReader => params == that.params + case _ => false + } +} + +class MatrixParseError( + val msg: String, + val filename: String, + val line: Long, + val posStart: Int, + val posEnd: Int +) extends RuntimeException(s"${ filename }:${ posStart }-${ posEnd }, ${ msg }") + +class CompiledLineParser( + ctx: ExecuteContext, + onDiskRowFieldsType: TStruct, + rowPType: PCanonicalStruct, + nCols: Int, + missingValue: String, + separator: Char, + headerPartitions: mutable.Set[Int], + partitionCounts: Array[Long], + partitionPaths: Array[String], + partitionRowIndexGlobal: Array[Long], + partitionRowIndexFile: Array[Long], + hasHeader: Boolean +) extends ((Int, Region, HailClassLoader, Iterator[GenericLine]) => Iterator[Long]) with Serializable { + assert(!missingValue.contains(separator)) + @transient private[this] val entriesType = rowPType + .selfField(MatrixType.entriesIdentifier) + .map(f => f.typ.asInstanceOf[PCanonicalArray]) + @transient private[this] val rowFieldsType = rowPType + .dropFields(Set(MatrixType.entriesIdentifier)) + @transient private[this] val fb = EmitFunctionBuilder[Region, String, Long, String, Long](ctx, "text_matrix_reader") + @transient private[this] val mb = fb.apply_method + @transient private[this] val region = fb.getCodeParam[Region](1) + @transient private[this] val _filename = fb.getCodeParam[String](2) + @transient private[this] val _lineNumber = fb.getCodeParam[Long](3) + @transient private[this] val _line = fb.getCodeParam[String](4) + @transient private[this] val filename = mb.genFieldThisRef[String]("filename") + @transient private[this] val lineNumber = mb.genFieldThisRef[Long]("lineNumber") + @transient private[this] val line = mb.genFieldThisRef[String]("line") + @transient private[this] val pos = mb.genFieldThisRef[Int]("pos") + + fb.cb.emitInit(Code( + pos := 0, + filename := Code._null[String], + lineNumber := 0L, + line := Code._null[String])) + + + @transient private[this] val parseStringMb = fb.genEmitMethod[Region, String]("parseString") + parseStringMb.emitWithBuilder(parseString(_)) + @transient private[this] val parseIntMb = fb.genEmitMethod[Region, Int]("parseInt") + parseIntMb.emitWithBuilder(parseInt(_)) + @transient private[this] val parseLongMb = fb.genEmitMethod[Region, Long]("parseLong") + parseLongMb.emitWithBuilder(parseLong(_)) + + @transient private[this] def parseEntriesOpt(cb: EmitCodeBuilder): Option[EmitCode] = entriesType.map { entriesType => + val sc = parseEntries(cb, entriesType) + EmitCode.present(cb.emb, sc) + } + + mb.emitWithBuilder[Long] { cb => + cb.assign(pos, 0) + cb.assign(filename, _filename) + cb.assign(lineNumber, _lineNumber) + cb.assign(line, _line) + val rowFields = parseRowFields(cb) + val entries = parseEntriesOpt(cb) + rowPType.constructFromFields(cb, region, rowFields ++ entries, deepCopy = false).a + } + + private[this] val loadParserOnWorker = fb.result() + + private[this] def parseError(cb: EmitCodeBuilder, msg: Code[String]): Unit = + cb += Code._throw[MatrixParseError, Unit](Code.newInstance[MatrixParseError, String, String, Long, Int, Int]( + msg, filename, lineNumber, pos, pos + 1)) + + private[this] def numericValue(cb: EmitCodeBuilder, cCode: Code[Char]): Code[Int] = { + val c = cb.newLocal[Char]("clp_numeric_val_c", cCode) + cb.ifx(c < const('0') || c > const('9'), + parseError(cb, const("invalid character '") + .concat(c.toS) + .concat("' in integer literal"))) + (c - const('0')).toI + } + + private[this] def endField(cb: EmitCodeBuilder, p: Value[Int]): Code[Boolean] = { + p.ceq(line.length()) || line(p).ceq(const(separator)) + } + + private[this] def endField(cb: EmitCodeBuilder): Code[Boolean] = + endField(cb, pos) + + private[this] def parseOptionalValue( + cb: EmitCodeBuilder, + parse: EmitCodeBuilder => SValue + ): IEmitCode = { + assert(missingValue.size > 0) + val end = cb.newLocal[Int]("parse_optional_value_end", pos + missingValue.size) + + val Lmissing = CodeLabel() + + cb.ifx(end <= line.length, + cb.ifx(endField(cb, end), + cb.ifx(line.invoke[Int, String, Int, Int, Boolean]("regionMatches", + pos, missingValue, 0, missingValue.size), + { + cb.assign(pos, end) + cb.goto(Lmissing) + }))) + + val pc = parse(cb) + val Ldefined = CodeLabel() + cb.goto(Ldefined) + + IEmitCode(Lmissing, Ldefined, pc, false) + } + + private[this] def skipOptionalValue(cb: EmitCodeBuilder, skip: EmitCodeBuilder => Unit): Unit = { + assert(missingValue.size > 0) + val end = cb.newLocal[Int]("skip_optional_value_end", pos + missingValue.size) + + val Lfinished = CodeLabel() + + cb.ifx(end <= line.length, + cb.ifx(endField(cb, end), + cb.ifx(line.invoke[Int, String, Int, Int, Boolean]("regionMatches", + pos, missingValue, 0, missingValue.size), + { + cb.assign(pos, end) + cb.goto(Lfinished) + }))) + + skip(cb) + + cb.define(Lfinished) + } + + private[this] def parseInt(cb: EmitCodeBuilder): Code[Int] = { + cb.ifx(endField(cb), parseError(cb, "empty integer literal")) + + val mul = cb.newLocal[Int]("mul", 1) + cb.ifx(line(pos).ceq(const('-')), { + cb.assign(mul, -1) + cb.assign(pos, pos + 1) + }) + val c = cb.newLocal[Char]("c", line(pos)) + val v = cb.newLocal[Int]("v", numericValue(cb, c)) + cb.assign(pos, pos + 1) + + cb.whileLoop(!endField(cb), { + cb.assign(c, line(pos)) + cb.assign(v, v * const(10) + numericValue(cb, c)) + cb.assign(pos, pos + 1) + }) + v * mul + } + + private[this] def parseLong(cb: EmitCodeBuilder): Code[Long] = { + cb.ifx(endField(cb), parseError(cb, "empty integer literal")) + + val mul = cb.newLocal[Long]("mulL", 1L) + cb.ifx(line(pos).ceq(const('-')), { + cb.assign(mul, -1L) + cb.assign(pos, pos + 1) + }) + val c = cb.newLocal[Char]("cL", line(pos)) + val v = cb.newLocal[Long]("vL", numericValue(cb, c).toL) + cb.assign(pos, pos + 1) + + cb.whileLoop(!endField(cb), { + cb.assign(c, line(pos)) + cb.assign(v, v * const(10L) + numericValue(cb, c).toL) + cb.assign(pos, pos + 1) + }) + v * mul + } + + private[this] def parseString(cb: EmitCodeBuilder): Code[String] = { + val start = cb.newLocal[Int]("start", pos) + cb.whileLoop(!endField(cb), + cb.assign(pos, pos + 1)) + line.invoke[Int, Int, String]("substring", start, pos) + } + + private[this] def parseValueOfType(cb: EmitCodeBuilder, t: PType): IEmitCode = { + def parseDefinedValue(cb: EmitCodeBuilder): SValue = t match { + case t: PInt32 => + primitive(cb.memoize(cb.invokeCode[Int](parseIntMb, region))) + case t: PInt64 => + primitive(cb.memoize(cb.invokeCode[Long](parseLongMb, region))) + case t: PFloat32 => + primitive(cb.memoize(Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region)))) + case t: PFloat64 => + primitive(cb.memoize(Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region)))) + case t: PString => + val st = SStringPointer(t) + st.constructFromString(cb, region, cb.invokeCode[String](parseStringMb, region)) + } + if (t.required) + IEmitCode.present(cb, parseDefinedValue(cb)) + else + parseOptionalValue(cb, parseDefinedValue) + } + + private[this] def skipValueOfType(cb: EmitCodeBuilder, t: PType): Unit = { + def skipDefinedValue(cb: EmitCodeBuilder): Unit = { + cb.whileLoop(!endField(cb), cb.assign(pos, pos + 1)) + } + + if (t.required) skipDefinedValue(cb) else skipOptionalValue(cb, skipDefinedValue) + } + + private[this] def parseRowFields(cb: EmitCodeBuilder): Array[EmitCode] = { + assert(onDiskRowFieldsType.size >= rowFieldsType.size) + + // need to be careful to ensure parsing code is directly appended to code builder, not EmitCode block + val fieldEmitCodes = new Array[EmitCode](rowFieldsType.size) + + onDiskRowFieldsType.fields.foreach { onDiskField => + rowPType.selfField(onDiskField.name) match { + + case Some(requestedField) => + val reqFieldType = requestedField.typ + val reqIndex = requestedField.index + + + val ec = if (onDiskField.name == "row_id") + EmitCode.present(cb.emb, primitive(lineNumber)) + else { + cb.ifx(pos >= line.length, + parseError(cb, const("unexpected end of line while reading row field ") + .concat(onDiskField.name))) + val ev = parseValueOfType(cb, reqFieldType).memoize(cb, s"field_${onDiskField.name}") + cb.assign(pos, pos + 1) + ev.load + } + + fieldEmitCodes(reqIndex) = ec + + case None => + if (onDiskField.name != "row_id") { + skipValueOfType(cb, PType.canonical(onDiskField.typ)) // will always be optional + cb.assign(pos, pos + 1) + } + } + } + fieldEmitCodes + } + + private[this] def parseEntries(cb: EmitCodeBuilder, entriesType: PCanonicalArray): SIndexablePointerValue = { + val entryType = entriesType.elementType.asInstanceOf[PCanonicalStruct] + assert(entryType.fields.size == 1) + val (push, finish) = entriesType.constructFromFunctions(cb, region, nCols, false) + + val i = cb.newLocal[Int]("i", 0) + cb.whileLoop(i < nCols, { + cb.ifx(pos >= line.length, parseError(cb, const("unexpected end of line while reading entry ").concat(i.toS))) + + val ec = EmitCode.fromI(cb.emb)(cb => parseValueOfType(cb, entryType.fields(0).typ)) + push(cb, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, entryType.virtualType, ec))) + cb.assign(pos, pos + 1) + cb.assign(i, i + 1) + }) + finish(cb) + } + + def apply( + partition: Int, + r: Region, + theHailClassLoader: HailClassLoader, + it: Iterator[GenericLine] + ): Iterator[Long] = { + val filename = partitionPaths(partition) + if (hasHeader && headerPartitions.contains(partition)) + it.next() + + val parse = loadParserOnWorker(theHailClassLoader) + val fileLineIndex = partitionRowIndexFile(partition) + val globalLineIndex = partitionRowIndexGlobal(partition) + + var idxWithinPartition = 0L + it.map { line => + val x = line.toString + try { + val res = + parse( + r, + filename, + globalLineIndex + idxWithinPartition, + x) + idxWithinPartition += 1 + res + } catch { + case e: MatrixParseError => + fatal( + s"""""Error parse line ${ fileLineIndex + idxWithinPartition }:${ e.posStart }-${ e.posEnd }: + | File: $filename + | Line: + | ${ x.truncate }""".stripMargin, + e) + case e: Exception => fatal( + s"""""Error parse line ${ fileLineIndex + idxWithinPartition }: + | File: $filename + | Line: + | ${ x.truncate }""".stripMargin, + e) + } + } + } +} diff --git a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala index 27fe23eb195..5ce75847c5e 100644 --- a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala +++ b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala @@ -421,11 +421,7 @@ case class MatrixBGENReaderParameters( }.toList), "nPartitions" -> nPartitions.map(JInt(_)).getOrElse(JNull), "blockSizeInMB" -> blockSizeInMB.map(JInt(_)).getOrElse(JNull), - // FIXME: feels like a hack that Pretty needs execute context - // FIXME: feels like a hack that I use null here - // FIXME: feels like a hack that toJValue uses Pretty? - // Q: can we parse SSA'ed pretty table IR? - "includedVariants" -> includedVariants.map(t => JString(Pretty(null, t))).getOrElse(JNull))) + "includedVariants" -> includedVariants.map(t => JString(Pretty(t))).getOrElse(JNull))) } } diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index e9cb2cf41f3..92e834d4d49 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -104,26 +104,19 @@ class GoogleStorageFileStatus(path: String, modificationTime: java.lang.Long, si class GoogleStorageFS(val serviceAccountKey: Option[String] = None) extends FS { import GoogleStorageFS._ - @transient private lazy val storage: Storage = { - val transportOptions = StorageOptions.getDefaultHttpTransportOptions().toBuilder() - .setConnectTimeout(5000) - .setReadTimeout(5000) - .build() - serviceAccountKey match { - case None => - log.info("Initializing google storage client from latent credentials") - StorageOptions.newBuilder() - .build() - .getService - case Some(keyData) => - log.info("Initializing google storage client from service account key") - StorageOptions.newBuilder() - .setCredentials( - ServiceAccountCredentials.fromStream(new ByteArrayInputStream(keyData.getBytes))) - .setTransportOptions(transportOptions) - .build() - .getService - } + @transient private lazy val storage: Storage = serviceAccountKey match { + case None => + log.info("Initializing google storage client from latent credentials") + StorageOptions.newBuilder() + .build() + .getService + case Some(keyData) => + log.info("Initializing google storage client from service account key") + StorageOptions.newBuilder() + .setCredentials( + ServiceAccountCredentials.fromStream(new ByteArrayInputStream(keyData.getBytes))) + .build() + .getService } def asCacheable(): CacheableGoogleStorageFS = new CacheableGoogleStorageFS(serviceAccountKey, null) diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala index 2fcd584d46b..95fe5307ac9 100644 --- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala +++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala @@ -289,8 +289,7 @@ class PlinkVariant( val index: Int, val locusAlleles: Any, val cmPos: Double, - val rsid: String -) extends Serializable + val rsid: String) class MatrixPLINKReader( val params: MatrixPLINKReaderParameters, @@ -316,6 +315,8 @@ class MatrixPLINKReader( } def executeGeneric(ctx: ExecuteContext): GenericTableValue = { + val fsBc = ctx.fsBc + val localA2Reference = params.a2Reference val variantsBc = ctx.backend.broadcast(variants) val localNSamples = nSamples @@ -360,12 +361,9 @@ class MatrixPLINKReader( val rvb = new RegionValueBuilder(region) - val is = fs.open(bed) - if (TaskContext.get != null) { - // FIXME: need to close InputStream for other backends too - TaskContext.get.addTaskCompletionListener[Unit] { (context: TaskContext) => - is.close() - } + val is = fsBc.value.open(bed) + TaskContext.get.addTaskCompletionListener[Unit] { (context: TaskContext) => + is.close() } var offset: Long = 0 diff --git a/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala b/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala index 340379d2f42..5d39aa0162d 100644 --- a/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala +++ b/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala @@ -10,11 +10,12 @@ import is.hail.expr.ir.MatrixValue import is.hail.io.compress.{BGzipLineReader, BGzipOutputStream} import is.hail.io.fs.FS import is.hail.io.{VCFAttributes, VCFFieldAttributes, VCFMetadata} -import is.hail.types.MatrixType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.utils._ -import is.hail.variant.{Call, ReferenceGenome, RegionValueVariant} +import is.hail.variant.{Call, RegionValueVariant} + +import scala.io.Source object ExportVCF { def infoNumber(t: Type): String = t match { @@ -24,8 +25,87 @@ object ExportVCF { case _ => "1" } - def fmtFloat(fmt: String, value: Float): String = value.formatted(fmt) - def fmtDouble(fmt: String, value: Double): String = value.formatted(fmt) + def strVCF(sb: StringBuilder, elementType: PType, offset: Long) { + elementType match { + case PInt32(_) => + val x = Region.loadInt(offset) + sb.append(x) + case PInt64(_) => + val x = Region.loadLong(offset) + if (x > Int.MaxValue || x < Int.MinValue) + fatal(s"Cannot convert Long to Int if value is greater than Int.MaxValue (2^31 - 1) " + + s"or less than Int.MinValue (-2^31). Found $x.") + sb.append(x) + case PFloat32(_) => + val x = Region.loadFloat(offset) + if (x.isNaN) + sb += '.' + else + sb.append(x.formatted("%.6g")) + case PFloat64(_) => + val x = Region.loadDouble(offset) + if (x.isNaN) + sb += '.' + else + sb.append(x.formatted("%.6g")) + case t@PCanonicalString(_) => + sb.append(t.loadString(offset)) + case _: PCall => + val c = Region.loadInt(offset) + Call.vcfString(c, sb) + case _ => + fatal(s"VCF does not support type $elementType") + } + } + + def iterableVCF(sb: StringBuilder, t: PContainer, length: Int, offset: Long, delim: Char) { + if (length > 0) { + var i = 0 + while (i < length) { + if (i > 0) + sb += delim + if (t.isElementDefined(offset, i)) { + val eOffset = t.loadElement(offset, length, i) + strVCF(sb, t.elementType, eOffset) + } else + sb += '.' + i += 1 + } + } else + sb += '.' + } + + def emitInfo(sb: StringBuilder, f: PField, offset: Long, wroteLast: Boolean): Boolean = { + f.typ match { + case it: PContainer if it.elementType.virtualType != TBoolean => + val length = it.loadLength(offset) + if (length == 0) + wroteLast + else { + if (wroteLast) + sb += ';' + sb.append(f.name) + sb += '=' + iterableVCF(sb, it, length, offset, ',') + true + } + case PBoolean(_) => + if (Region.loadBoolean(offset)) { + if (wroteLast) + sb += ';' + sb.append(f.name) + true + } else + wroteLast + case t => + if (wroteLast) + sb += ';' + sb.append(f.name) + sb += '=' + strVCF(sb, t, offset) + true + } + } def infoType(t: Type): Option[String] = t match { case TInt32 | TInt64 => Some("Integer") @@ -92,102 +172,309 @@ object ExportVCF { } } + def emitGenotype(sb: StringBuilder, formatFieldOrder: Array[Int], tg: PStruct, offset: Long, fieldDefined: Array[Boolean], missingFormat: String) { + var i = 0 + while (i < formatFieldOrder.length) { + fieldDefined(i) = tg.isFieldDefined(offset, formatFieldOrder(i)) + i += 1 + } + + var end = i + while (end > 0 && !fieldDefined(end - 1)) + end -= 1 + + if (end == 0) + sb.append(missingFormat) + else { + i = 0 + while (i < end) { + if (i > 0) + sb += ':' + val j = formatFieldOrder(i) + val fIsDefined = fieldDefined(i) + val fOffset = tg.loadField(offset, j) + + tg.fields(j).typ match { + case it: PContainer => + val pt = it + if (fIsDefined) { + val fLength = pt.loadLength(fOffset) + iterableVCF(sb, pt, fLength, fOffset, ',') + } else + sb += '.' + case t => + if (fIsDefined) + strVCF(sb, t, fOffset) + else if (t.virtualType == TCall) + sb.append("./.") + else + sb += '.' + } + i += 1 + } + } + } + def getAttributes(k1: String, attributes: Option[VCFMetadata]): Option[VCFAttributes] = attributes.flatMap(_.get(k1)) def getAttributes(k1: String, k2: String, attributes: Option[VCFMetadata]): Option[VCFFieldAttributes] = getAttributes(k1, attributes).flatMap(_.get(k2)) - def makeHeader(rowType: TStruct, entryType: TStruct, rg: ReferenceGenome, append: Option[String], - metadata: Option[VCFMetadata], sampleIds: Array[String]): String = { - val sb = new StringBuilder() - - sb.append("##fileformat=VCFv4.2\n") - sb.append(s"##hailversion=${ hail.HAIL_PRETTY_VERSION }\n") - - entryType.fields.foreach { f => - val attrs = getAttributes("format", f.name, metadata).getOrElse(Map.empty[String, String]) - sb.append("##FORMAT=\n") - } + def apply(ctx: ExecuteContext, mv: MatrixValue, path: String, append: Option[String], + exportType: String, metadata: Option[VCFMetadata], tabix: Boolean = false) { + val fs = ctx.fs - val filters = getAttributes("filter", metadata).getOrElse(Map.empty[String, Any]).keys.toArray.sorted - filters.foreach { id => - val attrs = getAttributes("filter", id, metadata).getOrElse(Map.empty[String, String]) - sb.append("##FILTER=\n") - } + mv.typ.requireColKeyString() + mv.typ.requireRowKeyVariant() - val tinfo = rowType.selfField("info") match { - case Some(fld) if fld.typ.isInstanceOf[TStruct] => - fld.typ.asInstanceOf[TStruct] - case _ => - TStruct() - } + val typ = mv.typ - tinfo.fields.foreach { f => - val attrs = getAttributes("info", f.name, metadata).getOrElse(Map.empty[String, String]) - sb.append("##INFO=\n") - } + val tg = mv.entryPType - append.foreach { append => - sb.append(append) + checkFormatSignature(tg.virtualType) + + val formatFieldOrder: Array[Int] = tg.fieldIdx.get("GT") match { + case Some(i) => (i +: tg.fields.filter(fd => fd.name != "GT").map(_.index)).toArray + case None => tg.fields.indices.toArray } + val formatFieldString = formatFieldOrder.map(i => tg.fields(i).name).mkString(":") + + val missingFormatStr = if (typ.entryType.size > 0 && typ.entryType.types(formatFieldOrder(0)) == TCall) + "./." + else "." + + val tinfo = + if (typ.rowType.hasField("info")) { + typ.rowType.field("info").typ match { + case _: TStruct => mv.rvRowPType.field("info").typ.asInstanceOf[PStruct] + case t => + warn(s"export_vcf found row field 'info' of type $t, but expected type 'Struct'. Emitting no INFO fields.") + PCanonicalStruct.empty() + } + } else { + warn(s"export_vcf found no row field 'info'. Emitting no INFO fields.") + PCanonicalStruct.empty() + } + val rg = mv.referenceGenome val assembly = rg.name - rg.contigs.foreachBetween { c => - sb.append("##contig= + + val localNSamples = mv.nCols + val hasSamples = localNSamples > 0 + + def header: String = { + val sb = new StringBuilder() + + sb.append("##fileformat=VCFv4.2\n") + sb.append(s"##hailversion=${ hail.HAIL_PRETTY_VERSION }\n") + + tg.fields.foreach { f => + val attrs = getAttributes("format", f.name, metadata).getOrElse(Map.empty[String, String]) + sb.append("##FORMAT=\n") + } + + val filters = getAttributes("filter", metadata).getOrElse(Map.empty[String, Any]).keys.toArray.sorted + filters.foreach { id => + val attrs = getAttributes("filter", id, metadata).getOrElse(Map.empty[String, String]) + sb.append("##FILTER=\n") + } + + tinfo.virtualType.fields.foreach { f => + val attrs = getAttributes("info", f.name, metadata).getOrElse(Map.empty[String, String]) + sb.append("##INFO=\n") + } + + append.foreach { f => + using(fs.open(f)) { s => + Source.fromInputStream(s) + .getLines() + .filterNot(_.isEmpty) + .foreach { line => + sb.append(line) + sb += '\n' + } + } + } + + rg.contigs.foreachBetween { c => + sb.append("##contig= sb += '\t' sb.append(id) } + sb.result() } - sb.result() - } - def lookupVAField(rowType: TStruct, fieldName: String, vcfColName: String, expectedTypeOpt: Option[Type]): (Boolean, Int) = { - rowType.fieldIdx.get(fieldName) match { - case Some(idx) => - val t = rowType.types(idx) - if (expectedTypeOpt.forall(t == _)) // FIXME: make sure this is right - (true, idx) - else { - warn(s"export_vcf found row field $fieldName with type '$t', but expected type ${ expectedTypeOpt.get }. " + - s"Emitting missing $vcfColName.") - (false, 0) + val fieldIdx = typ.rowType.fieldIdx + + def lookupVAField(fieldName: String, vcfColName: String, expectedTypeOpt: Option[Type]): (Boolean, Int) = { + fieldIdx.get(fieldName) match { + case Some(idx) => + val t = typ.rowType.types(idx) + if (expectedTypeOpt.forall(t == _)) // FIXME: make sure this is right + (true, idx) + else { + warn(s"export_vcf found row field $fieldName with type '$t', but expected type ${ expectedTypeOpt.get }. " + + s"Emitting missing $vcfColName.") + (false, 0) + } + case None => (false, 0) + } + } + val filtersType = TSet(TString) + val filtersPType = if (typ.rowType.hasField("filters")) { + assert(typ.rowType.fieldType("filters") == TSet(TString)) + mv.rvRowPType.field("filters").typ.asInstanceOf[PSet] + } else null + + val (idExists, idIdx) = lookupVAField("rsid", "ID", Some(TString)) + val (qualExists, qualIdx) = lookupVAField("qual", "QUAL", Some(TFloat64)) + val (filtersExists, filtersIdx) = lookupVAField("filters", "FILTERS", Some(filtersType)) + val (infoExists, infoIdx) = lookupVAField("info", "INFO", None) + + val fullRowType = mv.rvRowPType + val localEntriesIndex = mv.entriesIdx + val localEntriesType = mv.entryArrayPType + + mv.rvd.mapPartitions { (_, it) => + val sb = new StringBuilder + + val formatDefinedArray = new Array[Boolean](formatFieldOrder.length) + + val rvv = new RegionValueVariant(fullRowType) + it.map { ptr => + sb.clear() + + rvv.set(ptr) + + sb.append(rvv.contig()) + sb += '\t' + sb.append(rvv.position()) + sb += '\t' + + if (idExists && fullRowType.isFieldDefined(ptr, idIdx)) { + val idOffset = fullRowType.loadField(ptr, idIdx) + sb.append(fullRowType.types(idIdx).asInstanceOf[PString].loadString(idOffset)) + } else + sb += '.' + + sb += '\t' + sb.append(rvv.alleles()(0)) + sb += '\t' + if (rvv.alleles().length > 1) { + rvv.alleles().tail.foreachBetween(aa => + sb.append(aa))(sb += ',') + } else { + sb += '.' + } + sb += '\t' + + if (qualExists && fullRowType.isFieldDefined(ptr, qualIdx)) { + val qualOffset = fullRowType.loadField(ptr, qualIdx) + sb.append(Region.loadDouble(qualOffset).formatted("%.2f")) + } else + sb += '.' + + sb += '\t' + + if (filtersExists && fullRowType.isFieldDefined(ptr, filtersIdx)) { + val filtersOffset = fullRowType.loadField(ptr, filtersIdx) + val filtersLength = filtersPType.loadLength(filtersOffset) + if (filtersLength == 0) + sb.append("PASS") + else + iterableVCF(sb, filtersPType, filtersLength, filtersOffset, ';') + } else + sb += '.' + + sb += '\t' + + var wroteAnyInfo: Boolean = false + if (infoExists && fullRowType.isFieldDefined(ptr, infoIdx)) { + var wrote: Boolean = false + val infoOffset = fullRowType.loadField(ptr, infoIdx) + var i = 0 + while (i < tinfo.size) { + if (tinfo.isFieldDefined(infoOffset, i)) { + wrote = emitInfo(sb, tinfo.fields(i), tinfo.loadField(infoOffset, i), wrote) + wroteAnyInfo = wroteAnyInfo || wrote + } + i += 1 + } + } + if (!wroteAnyInfo) + sb += '.' + + if (hasSamples) { + sb += '\t' + sb.append(formatFieldString) + + val gsOffset = fullRowType.loadField(ptr, localEntriesIndex) + var i = 0 + while (i < localNSamples) { + sb += '\t' + if (localEntriesType.isElementDefined(gsOffset, i)) + emitGenotype(sb, formatFieldOrder, tg, localEntriesType.loadElement(gsOffset, localNSamples, i), formatDefinedArray, missingFormatStr) + else + sb.append(missingFormatStr) + + i += 1 + } } - case None => (false, 0) + + sb.result() + } + }.writeTable(ctx, path, Some(header), exportType = exportType) + + if (tabix) { + exportType match { + case ExportType.CONCATENATED => + info(s"Writing tabix index for $path") + TabixVCF(fs, path) + case ExportType.PARALLEL_SEPARATE_HEADER | ExportType.PARALLEL_HEADER_IN_SHARD => + val files = fs.glob(path + "/part-*").map(_.getPath.getBytes) + info(s"Writing tabix index for ${ files.length } in $path") + ctx.backend.parallelizeAndComputeWithIndex(ctx.backendContext, ctx.fs, files)({ (pathBytes, _, _, fs) => + TabixVCF(fs, new String(pathBytes)) + Array.empty + }) + case ExportType.PARALLEL_COMPOSABLE => + warn("Writing tabix index for `parallel=composable` is not supported. No index will be written.") + } } } } diff --git a/hail/src/main/scala/is/hail/lir/X.scala b/hail/src/main/scala/is/hail/lir/X.scala index cd6cec20189..4f8b1ffb212 100644 --- a/hail/src/main/scala/is/hail/lir/X.scala +++ b/hail/src/main/scala/is/hail/lir/X.scala @@ -56,7 +56,7 @@ class Classx[C](val name: String, val superName: String, var sourceFile: Option[ sourceFile = Some(path) } - def asBytes(writeIRs: Boolean, print: Option[PrintWriter]): Array[(String, Array[Byte])] = { + def asBytes(print: Option[PrintWriter]): Array[(String, Array[Byte])] = { val classes = new mutable.ArrayBuffer[Classx[_]]() classes += this @@ -65,6 +65,8 @@ class Classx[C](val name: String, val superName: String, var sourceFile: Option[ SimplifyControl(m) } + val writeIRs = HailContext.isInitialized && HailContext.getFlag("write_ir_files") != null + if (writeIRs) saveToFile(s"/tmp/hail/${name}.lir") for (m <- methods) { diff --git a/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala b/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala deleted file mode 100644 index 4b3fdb125aa..00000000000 --- a/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala +++ /dev/null @@ -1,77 +0,0 @@ -package is.hail.types.encoded - -import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir.EmitCodeBuilder -import is.hail.io.{InputBuffer, OutputBuffer} -import is.hail.types.physical.PCanonicalNDArray -import is.hail.types.physical.stypes.{SType, SValue} -import is.hail.types.physical.stypes.concrete.SNDArrayPointer -import is.hail.types.physical.stypes.interfaces.SNDArrayValue -import is.hail.types.physical.stypes.primitives.SFloat64 -import is.hail.types.virtual.{TNDArray, Type} -import is.hail.utils.FastIndexedSeq - -final case class ENumpyBinaryNDArray(nRows: Long, nCols: Long, required: Boolean) extends EType { - type DecodedPType = PCanonicalNDArray - val elementType = EFloat64(true) - - def setRequired(newRequired: Boolean): ENumpyBinaryNDArray = ENumpyBinaryNDArray(nRows, nCols, newRequired) - - def _decodedSType(requestedType: Type): SType = { - val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TNDArray].elementType) - SNDArrayPointer(PCanonicalNDArray(elementPType, 2, false)) - } - - override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { - val ndarray = v.asInstanceOf[SNDArrayValue] - assert(ndarray.st.elementType == SFloat64) - val i = cb.newLocal[Long]("i") - val j = cb.newLocal[Long]("j") - val writeElemF = elementType.buildEncoder(ndarray.st.elementType, cb.emb.ecb) - - cb.forLoop(cb.assign(i, 0L), i < nRows, cb.assign(i, i + 1L), { - cb.forLoop(cb.assign(j, 0L), j < nCols, cb.assign(j, j + 1L), { - writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) - }) - }) - - } - - override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SValue = { - val st = decodedSType(t).asInstanceOf[SNDArrayPointer] - val pt = st.pType - val readElemF = elementType.buildInplaceDecoder(pt.elementType, cb.emb.ecb) - - val stride0 = cb.newLocal[Long]("stride0", nCols * pt.elementType.byteSize) - val stride1 = cb.newLocal[Long]("stride1", pt.elementType.byteSize) - - val n = cb.newLocal[Long]("length", nRows * nCols) - - val (tFirstElementAddress, tFinisher) = pt.constructDataFunction(IndexedSeq(nRows, nCols), IndexedSeq(stride0, stride1), cb, region) - val currElementAddress = cb.newLocal[Long]("eblockmatrix_ndarray_currElementAddress", tFirstElementAddress) - - val i = cb.newLocal[Long]("i") - cb.forLoop(cb.assign(i, 0L), i < n, cb.assign(i, i + 1L), { - readElemF(cb, region, currElementAddress, in) - cb.assign(currElementAddress, currElementAddress + pt.elementType.byteSize) - }) - - tFinisher(cb) - } - - def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = { - ??? - } - - def _asIdent = s"ndarray_of_${ elementType.asIdent }" - - def _toPretty = s"ENDArray[$elementType]" - - override def _pretty(sb: StringBuilder, indent: Int, compact: Boolean = false) { - sb.append("ENDArray[") - elementType.pretty(sb, indent, compact) - sb.append("]") - } - -} diff --git a/hail/src/main/scala/is/hail/types/encoded/EType.scala b/hail/src/main/scala/is/hail/types/encoded/EType.scala index 48897b9eea7..04978a408d7 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EType.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EType.scala @@ -210,7 +210,7 @@ object EType { val f = et.buildEncoder(pc.st, mb.ecb) f(cb, pc, out) } - val func = fb.result(ctx) + val func = fb.result() encoderCache.put(k, func) func } @@ -247,7 +247,7 @@ object EType { pt.store(cb, region, pc, false) } - val r = (pt, fb.result(ctx)) + val r = (pt, fb.result()) decoderCache.put(k, r) r } diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index 911da7c13ee..53bc962820c 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -334,7 +334,7 @@ object PType { val srcAddr = fb.apply_method.getCodeParam[Long](2) cpt.store(cb, region, t.loadCheapSCode(cb, srcAddr), deepCopy = false) } - Some(fb.result(ctx)) + Some(fb.result()) } } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala index d2e0fb362a7..a9de6f56bc5 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala @@ -3,7 +3,6 @@ package is.hail.types.physical.stypes import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder -import is.hail.types.physical.stypes.concrete.SRNGStateValue import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ @@ -90,8 +89,6 @@ trait SValue { def asStream: SStreamValue = asInstanceOf[SStreamValue] - def asRNGState: SRNGStateValue = asInstanceOf[SRNGStateValue] - def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SValue = castTo(cb, region, destType, false) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala deleted file mode 100644 index 43bcafa65b1..00000000000 --- a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala +++ /dev/null @@ -1,117 +0,0 @@ -package is.hail.types.physical.stypes.concrete - -import is.hail.annotations.Region -import is.hail.asm4s._ -import is.hail.expr.ir.{EmitCodeBuilder, Threefry} -import is.hail.types.TypeWithRequiredness -import is.hail.types.physical.PType -import is.hail.types.physical.stypes.{SSettable, SType, SValue} -import is.hail.types.virtual.{TRNGState, Type} -import is.hail.utils.{Bitstring, toRichIterable} - -import scala.collection.mutable - -import scala.collection.mutable - -object SRNGState { - val staticTweak = -1L - val finalBlockNoPadTweak = -2L - val finalBlockPaddedTweak = -3L -} - -final case class SRNGState( - key: IndexedSeq[Long], - numWordsInLastDynBlock: Int -) extends SType { - assert(key.length == 4) - assert(numWordsInLastDynBlock <= 4 && numWordsInLastDynBlock >= 0) - - def virtualType: Type = TRNGState - - override protected[stypes] def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SValue, deepCopy: Boolean): SValue = ??? - - override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = - Array.fill(4 + numWordsInLastDynBlock)(typeInfo[Long]) - - override def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = ??? - - override def fromValues(values: IndexedSeq[Value[_]]): SValue = ??? - - override def storageType(): PType = ??? - - override def copiedType: SType = ??? - - override def castRename(t: Type): SType = ??? - - override protected[stypes] def _typeWithRequiredness: TypeWithRequiredness = ??? - - override def containsPointers: Boolean = false -} - -object SRNGStateValue { - def apply(cb: EmitCodeBuilder, key: IndexedSeq[Long]): SRNGStateValue = { - val typ = SRNGState(key, 0) - new SRNGStateValue( - typ, - Array.fill[Value[Long]](4)(0), - Array[Value[Long]](), - false, - 0) - } -} - -final case class SRNGStateValue( - st: SRNGState, - runningSum: IndexedSeq[Value[Long]], - lastDynBlock: IndexedSeq[Value[Long]], - hasStaticSplit: Boolean, - numDynBlocks: Int, -) extends SValue { - assert(runningSum.length == 4) - assert(lastDynBlock.length == st.numWordsInLastDynBlock) - - override def valueTuple: IndexedSeq[Value[_]] = - runningSum ++ lastDynBlock - - override def sizeToStoreInBytes(cb: EmitCodeBuilder) = ??? - - def splitStatic(cb: EmitCodeBuilder, idx: Long): SRNGStateValue = { - assert(!hasStaticSplit) - val x = Array.ofDim[Long](4) - x(0) = idx - Threefry.encrypt(st.key, SRNGState.staticTweak, x) - val newDynBlocksSum = Array.tabulate[Value[Long]](4)(i => cb.memoize(runningSum(i) ^ x(i))) - copy( - runningSum = newDynBlocksSum, - hasStaticSplit = true) - } - - def splitDyn(cb: EmitCodeBuilder, idx: Value[Long]): SRNGStateValue = { - if (st.numWordsInLastDynBlock < 4) { - return copy( - st = st.copy(numWordsInLastDynBlock = st.numWordsInLastDynBlock + 1), - lastDynBlock = lastDynBlock :+ idx) - } - val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"splitDyn_x$i", lastDynBlock(i))) - Threefry.encrypt(cb, st.key, numDynBlocks.toLong, x) - for (i <- 0 until 4) cb.assign(x(i), x(i) ^ runningSum(i)) - copy( - st = st.copy(numWordsInLastDynBlock = 1), - runningSum = x, - lastDynBlock = Array(idx), - numDynBlocks = numDynBlocks + 1) - } - - def rand(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] = { - val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"rand_x$i", runningSum(i))) - if (st.numWordsInLastDynBlock == 4) { - for (i <- lastDynBlock.indices) cb.assign(x(i), x(i) ^ lastDynBlock(i)) - Threefry.encrypt(cb, st.key, SRNGState.finalBlockNoPadTweak, x) - } else { - for (i <- lastDynBlock.indices) cb.assign(x(i), x(i) ^ lastDynBlock(i)) - cb.assign(x(lastDynBlock.size), x(lastDynBlock.size) ^ (1L << 63)) - Threefry.encrypt(cb, st.key, SRNGState.finalBlockPaddedTweak, x) - } - x - } -} diff --git a/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala b/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala deleted file mode 100644 index e82ba110146..00000000000 --- a/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala +++ /dev/null @@ -1,12 +0,0 @@ -package is.hail.types.virtual - -case object TRNGState extends Type { - override def _toPretty = "RNGState" - - override def pyString(sb: StringBuilder): Unit = { - sb.append("rng_state") - } - def _typeCheck(a: Any): Boolean = ??? - def mkOrdering(missingEqual: Boolean): is.hail.annotations.ExtendedOrdering = ??? - def scalaClassTag: scala.reflect.ClassTag[_ <: AnyRef] = ??? -} diff --git a/hail/src/main/scala/is/hail/utils/Bitstring.scala b/hail/src/main/scala/is/hail/utils/Bitstring.scala deleted file mode 100644 index 09427161b62..00000000000 --- a/hail/src/main/scala/is/hail/utils/Bitstring.scala +++ /dev/null @@ -1,90 +0,0 @@ -package is.hail.utils - -import scala.collection.mutable - -object Bitstring { - def apply(string: String): Bitstring = { - assert(string.forall(c => c == '0' || c == '1')) - val bitstring = mutable.ArrayBuilder.make[Long]() - var pos: Int = 0 - while (string.length - pos > 64) { - bitstring += java.lang.Long.parseUnsignedLong(string.slice(pos, pos + 64), 2) - pos += 64 - } - val lastWord = java.lang.Long.parseUnsignedLong(string.slice(pos, string.length)) - val bitsInLastWord = string.length - pos - bitstring += (lastWord << (64 - bitsInLastWord)) - new Bitstring(bitstring.result(), bitsInLastWord) - } -} - -case class Bitstring(contents: IndexedSeq[Long], bitsInLastWord: Int) { - def numWords = contents.length - def length = (contents.length - 1) * 64 + bitsInLastWord - - override def toString: String = { - if (contents.isEmpty) return "Bitstring()" - val result = new mutable.StringBuilder("Bitstring(") - var i = 0 - while (i < contents.length - 1) { - result ++= contents(i).toBinaryString - i += 1 - } - i = 0 - var lastWord = contents.last - val bits = Array('0', '1') - while (i < bitsInLastWord) { - result += bits((lastWord >>> 63).toInt) - lastWord <<= 1 - i += 1 - } - result += ')' - result.result - } - - def ++(rhs: Bitstring): Bitstring = { - if (length == 0) return rhs - if (rhs.length == 0) return this - if (bitsInLastWord < 64) { - val newNumWords = (length + rhs.length + 63) >> 6 - val newContents = Array.ofDim[Long](newNumWords) - for (i <- 0 until (numWords - 2)) { - newContents(i) = contents(i) - } - newContents(numWords - 1) = contents.last & (rhs.contents.head >>> bitsInLastWord) - for (i <- 0 until (rhs.numWords - 2)) { - newContents(numWords + i) = - (rhs.contents(i) << (64 - bitsInLastWord)) & - (rhs.contents(i + 1) >>> bitsInLastWord) - } - var newBitsInLastWord = bitsInLastWord + rhs.bitsInLastWord - if (newBitsInLastWord > 64) { - newContents(numWords + rhs.numWords - 1) = rhs.contents.last << (64 - bitsInLastWord) - newBitsInLastWord = newBitsInLastWord - 64 - } - new Bitstring(newContents, newBitsInLastWord) - } else { - new Bitstring(contents ++ rhs.contents, rhs.bitsInLastWord) - } - } - - def popWords(n: Int): (Array[Long], Bitstring) = { - assert(n < numWords || (n == numWords && bitsInLastWord == 64)) - val result = contents.slice(0, n).toArray - val newContents = contents.slice(n, numWords) - val newBitsInLastWord = if (n < numWords) bitsInLastWord else 0 - (result, new Bitstring(newContents, newBitsInLastWord)) - } - - def padTo(n: Int): Array[Long] = { - assert(n > numWords || (n == numWords && bitsInLastWord < 64)) - val result = Array.ofDim[Long](n) - Array.copy(contents, 0, result, 0, numWords) - if (bitsInLastWord == 64) { - result(numWords) = 1L << 63 - } else { - result(numWords - 1) = result(numWords - 1) & (1L << (63 - bitsInLastWord)) - } - result - } -} diff --git a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala index b5d1c3433b8..f6b92450b54 100644 --- a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala +++ b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala @@ -7,12 +7,6 @@ class HailException(val msg: String, val logMsg: Option[String], cause: Throwabl def this(msg: String, errorId: Int) = this(msg, None, null, errorId) } -class HailWorkerException( - val shortMessage: String, - val expandedMessage: String, - val errorId: Int -) extends RuntimeException(shortMessage) - trait ErrorHandling { def fatal(msg: String): Nothing = throw new HailException(msg) diff --git a/hail/src/main/scala/is/hail/utils/TextTableReader.scala b/hail/src/main/scala/is/hail/utils/TextTableReader.scala new file mode 100644 index 00000000000..b983e71f45b --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/TextTableReader.scala @@ -0,0 +1,444 @@ +package is.hail.expr.ir + +import java.util.regex.Pattern +import is.hail.asm4s.HailClassLoader +import is.hail.HailContext +import is.hail.annotations.{Region, RegionValueBuilder} +import is.hail.backend.ExecuteContext +import is.hail.backend.spark.SparkBackend +import is.hail.expr.TableAnnotationImpex +import is.hail.expr.ir.lowering.TableStage +import is.hail.io.fs.{FS, FileStatus} +import is.hail.rvd.RVDPartitioner +import is.hail.types._ +import is.hail.types.physical.{PCanonicalStringRequired, PCanonicalStruct, PStruct, PType} +import is.hail.types.virtual._ +import is.hail.utils.StringEscapeUtils._ +import is.hail.utils._ +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.json4s.{DefaultFormats, Formats, JValue} + +import scala.collection.mutable +import scala.util.matching.Regex + +abstract class TextReaderOptions { + val comment: Array[String] + val hasHeader: Boolean + + private lazy val commentStartsWith: Array[String] = comment.filter(_.length == 1) + private lazy val commentRegexes: Array[Regex] = comment.filter(_.length > 1).map(_.r) + + final def isComment(line: String): Boolean = + commentStartsWith.exists(pattern => line.startsWith(pattern)) || commentRegexes.exists(pattern => pattern.matches(line)) +} + +case class TextTableReaderParameters( + files: Array[String], + typeMapStr: Map[String, String], + comment: Array[String], + separator: String, + missing: Set[String], + hasHeader: Boolean, + nPartitionsOpt: Option[Int], + quoteStr: String, + skipBlankLines: Boolean, + forceBGZ: Boolean, + filterAndReplace: TextInputFilterAndReplace, + forceGZ: Boolean, + sourceFileField: Option[String]) extends TextReaderOptions { + @transient val typeMap: Map[String, Type] = typeMapStr.mapValues(s => IRParser.parseType(s)).map(identity) + + val quote: java.lang.Character = if (quoteStr != null) quoteStr(0) else null + + def nPartitions: Int = nPartitionsOpt.getOrElse(HailContext.backend.defaultParallelism) +} + +case class TextTableReaderMetadata(fileStatuses: Array[FileStatus], header: String, rowPType: PStruct) { + def fullType: TableType = TableType(rowType = rowPType.virtualType, globalType = TStruct(), key = FastIndexedSeq()) +} + +object TextTableReader { + + def splitLine(s: String, separator: String, quote: java.lang.Character): Array[String] = + splitLine(s, separator, quote, new BoxedArrayBuilder[String], new StringBuilder) + + def splitLine( + s: String, + separator: String, + quote: java.lang.Character, + ab: BoxedArrayBuilder[String], + sb: StringBuilder): Array[String] = { + ab.clear() + sb.clear() + + val matchSep: Int => Int = separator.length match { + case 0 => fatal("Hail does not currently support 0-character separators") + case 1 => + val sepChar = separator(0) + (i: Int) => if (s(i) == sepChar) 1 else -1 + case _ => + val p = Pattern.compile(separator) + val m = p.matcher(s) + + { (i: Int) => + m.region(i, s.length) + if (m.lookingAt()) + m.end() - m.start() + else + -1 + } + } + + var i = 0 + while (i < s.length) { + val c = s(i) + + val l = matchSep(i) + if (l != -1) { + i += l + ab += sb.result() + sb.clear() + } else if (quote != null && c == quote) { + if (sb.nonEmpty) + fatal(s"opening quote character '$quote' not at start of field") + i += 1 // skip quote + + while (i < s.length && s(i) != quote) { + sb += s(i) + i += 1 + } + + if (i == s.length) + fatal(s"missing terminating quote character '$quote'") + i += 1 // skip quote + + // full field must be quoted + if (i < s.length) { + val l = matchSep(i) + if (l == -1) + fatal(s"terminating quote character '$quote' not at end of field") + i += l + ab += sb.result() + sb.clear() + } + } else { + sb += c + i += 1 + } + } + ab += sb.result() + + ab.result() + } + + type Matcher = String => Boolean + val booleanMatcher: Matcher = x => try { + x.toBoolean + true + } catch { + case e: IllegalArgumentException => false + } + val int32Matcher: Matcher = x => try { + Integer.parseInt(x) + true + } catch { + case e: NumberFormatException => false + } + val int64Matcher: Matcher = x => try { + java.lang.Long.parseLong(x) + true + } catch { + case e: NumberFormatException => false + } + val float64Matcher: Matcher = x => try { + java.lang.Double.parseDouble(x) + true + } catch { + case e: NumberFormatException => false + } + + def imputeTypes( + fs: FS, + fileStatuses: Array[FileStatus], + params: TextTableReaderParameters, + headerLine: String, + columns: Array[String], + delimiter: String, + missing: Set[String], + quote: java.lang.Character + ): Array[(Option[Type], Boolean)] = { + val nFields = columns.length + + val matchTypes: Array[Type] = Array(TBoolean, TInt32, TInt64, TFloat64) + val matchers: Array[String => Boolean] = Array( + booleanMatcher, + int32Matcher, + int64Matcher, + float64Matcher) + val nMatchers = matchers.length + + val lines = GenericLines.read(fs, fileStatuses, nPartitions = params.nPartitionsOpt, + blockSizeInMB = None, minPartitions = None, gzAsBGZ = params.forceBGZ, allowSerialRead = params.forceGZ) + + val linesRDD: RDD[GenericLine] = lines.toRDD(fs) + + val (imputation, allDefined) = linesRDD.mapPartitions { it => + val allDefined = Array.fill(nFields)(true) + val ma = MultiArray2.fill[Boolean](nFields, nMatchers + 1)(true) + val ab = new BoxedArrayBuilder[String] + val sb = new StringBuilder + it.foreach { genericLine => + val line = genericLine.toString + + if (!params.isComment(line) && + (!params.hasHeader || line != headerLine) && + !(params.skipBlankLines && line.isEmpty)) { + + try { + val split = splitLine(line, delimiter, quote, ab, sb) + if (split.length != nFields) + fatal(s"expected $nFields fields, but found ${ split.length }") + + var i = 0 + while (i < nFields) { + val field = split(i) + if (!missing.contains(field)) { + var j = 0 + while (j < nMatchers) { + ma.update(i, j, ma(i, j) && matchers(j)(field)) + j += 1 + } + ma.update(i, nMatchers, false) + } else + allDefined(i) = false + i += 1 + } + } catch { + case e: Throwable => + fatal( + s"""Caught exception while reading ${ genericLine.file }: ${ e.getMessage } + | offending line: @1""".stripMargin, line, e) + } + } + } + Iterator.single((ma, allDefined)) + } + .reduce({ case ((ma1, allDefined1), (ma2, allDefined2)) => + var i = 0 + while (i < nFields) { + var j = 0 + while (j < nMatchers) { + ma1.update(i, j, ma1(i, j) && ma2(i, j)) + j += 1 + } + ma1.update(i, nMatchers, ma1(i, nMatchers) && ma2(i, nMatchers)) + i += 1 + } + (ma1, Array.tabulate(allDefined1.length)(i => (allDefined1(i) && allDefined2(i)))) + }) + + imputation.rowIndices.map { i => + someIf(!imputation(i, nMatchers), + (0 until nMatchers).find(imputation(i, _)) + .map(matchTypes) + .getOrElse(TString)) + }.zip(allDefined).toArray + } + + def readMetadata(fs: FS, options: TextTableReaderParameters): TextTableReaderMetadata = { + val TextTableReaderParameters(files, _, _, separator, missing, hasHeader, _, _, skipBlankLines, forceBGZ, filterAndReplace, forceGZ, sourceFileField) = options + + val fileStatuses: Array[FileStatus] = { + val status = fs.globAllStatuses(files) + if (status.isEmpty) + fatal("arguments refer to no files") + if (!forceBGZ) { + status.foreach { status => + val file = status.getPath + if (file.endsWith(".gz")) + checkGzippedFile(fs, file, forceGZ, forceBGZ) + } + } + status + } + + val types = options.typeMap + val quote = options.quote + + val firstFile = fileStatuses.head.getPath + val header = fs.readLines(firstFile, filterAndReplace) { lines => + val filt = lines.filter(line => !options.isComment(line.value) && !(skipBlankLines && line.value.isEmpty)) + + if (filt.isEmpty) + fatal( + s"""invalid file: no lines remaining after comment filter + | Offending file: $firstFile""".stripMargin) + else + filt.next().value + } + + val splitHeader = splitLine(header, separator, quote) + val preColumns = if (!hasHeader) { + splitHeader + .indices + .map(i => s"f$i") + .toArray + } else splitHeader.map(unescapeString) + + val (columns, duplicates) = mangle(preColumns) + if (duplicates.nonEmpty) { + warn(s"Found ${ duplicates.length } duplicate ${ plural(duplicates.length, "column") }. Mangled columns follows:\n @1", + duplicates.map { case (pre, post) => s"'$pre' -> '$post'" }.truncatable("\n ")) + } + + val sourceTypeOption = sourceFileField.map(f => (f, PCanonicalStringRequired)).toIndexedSeq + val namesAndTypes = + columns.map { c => + types.get(c) match { + case Some(t) => + (c, PType.canonical(t)) + case None => + (c, PType.canonical(TString)) + } + } + TextTableReaderMetadata(fileStatuses, header, PCanonicalStruct(true, (namesAndTypes ++ sourceTypeOption): _*)) + } + + def apply(fs: FS, params: TextTableReaderParameters): TextTableReader = { + val metadata = TextTableReader.readMetadata(fs, params) + new TextTableReader(params, metadata.header, metadata.fileStatuses, metadata.rowPType) + } + + def fromJValue(fs: FS, jv: JValue): TextTableReader = { + implicit val formats: Formats = TableReader.formats + val params = jv.extract[TextTableReaderParameters] + TextTableReader(fs, params) + } +} + +class TextTableReader( + val params: TextTableReaderParameters, + header: String, + fileStatuses: IndexedSeq[FileStatus], + fullRowPType: PStruct +) extends TableReader { + val fullType: TableType = TableType(fullRowPType.virtualType, FastIndexedSeq.empty, TStruct()) + + def pathsUsed: Seq[String] = params.files + + val partitionCounts: Option[IndexedSeq[Long]] = None + + def rowAndGlobalPTypes(ctx: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = { + PType.canonical(requestedType.rowType, required = true).asInstanceOf[PStruct] -> + PCanonicalStruct.empty(required = true) + } + + def renderShort(): String = defaultRender() + + def executeGeneric(ctx: ExecuteContext): GenericTableValue = { + val fs = ctx.fs + + val lines = GenericLines.read(fs, fileStatuses, nPartitions = params.nPartitionsOpt, + blockSizeInMB = None, minPartitions = None, gzAsBGZ = params.forceBGZ, allowSerialRead = params.forceGZ) + val partitioner: Option[RVDPartitioner] = None + val globals: TStruct => Row = _ => Row.empty + + val localParams = params + val localHeader = header + val localFullRowType = fullRowPType + val bodyPType: TStruct => PStruct = (requestedRowType: TStruct) => localFullRowType.subsetTo(requestedRowType).asInstanceOf[PStruct] + val linesBody = lines.body + val nFieldOrig = localFullRowType.size - (params.sourceFileField.isDefined).toInt + + val transformer = localParams.filterAndReplace.transformer() + val body = { (requestedRowType: TStruct) => + + val includeFileName = localParams.sourceFileField.exists(requestedRowType.hasField) + val dataFieldNames = if (includeFileName) requestedRowType.fieldNames.init else requestedRowType.fieldNames + val useColIndices = dataFieldNames.map(localFullRowType.virtualType.fieldIdx) + val rowFields = requestedRowType.fields.toArray + val requestedPType = bodyPType(requestedRowType) + + { (region: Region, theHailClassLoader: HailClassLoader, fs: FS, context: Any) => + + val rvb = new RegionValueBuilder(region) + val ab = new BoxedArrayBuilder[String] + val sb = new StringBuilder + linesBody(fs, context) + .filter { bline => + val line = transformer(bline.toString) + if (line == null || localParams.isComment(line) || + (localParams.hasHeader && localHeader == line) || + (localParams.skipBlankLines && line.isEmpty)) + false + else { + try { + val sp = TextTableReader.splitLine(line, localParams.separator, localParams.quote, ab, sb) + if (sp.length != nFieldOrig) + fatal(s"expected $nFieldOrig fields, but found ${ sp.length } fields") + + rvb.start(requestedPType) + rvb.startStruct() + + var i = 0 + while (i < useColIndices.length) { + val f = rowFields(i) + val name = f.name + val typ = f.typ + val field = sp(useColIndices(i)) + try { + if (localParams.missing.contains(field)) + rvb.setMissing() + else + rvb.addAnnotation(typ, TableAnnotationImpex.importAnnotation(field, typ)) + } catch { + case e: Exception => + fatal(s"""${ e.getClass.getName }: could not convert "$field" to $typ in column "$name" """, e) + } + i += 1 + } + + if (includeFileName) + rvb.addString(bline.file) + + rvb.endStruct() + rvb.end() + true + } catch { + case e: Throwable => + fatal( + s"""Caught exception while reading ${ bline.file }: ${ e.getMessage } + | offending line: @1""".stripMargin, line, e) + } + } + }.map(_ => rvb.result().offset) + } + } + new GenericTableValue(partitioner = partitioner, + fullTableType = fullType, + globals = globals, + contextType = lines.contextType, + contexts = lines.contexts, + bodyPType = bodyPType, + body = body) + } + + override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = + executeGeneric(ctx).toTableStage(ctx, requestedType) + + def apply(tr: TableRead, ctx: ExecuteContext): TableValue = + executeGeneric(ctx).toTableValue(ctx, tr.typ) + + override def toJValue: JValue = { + implicit val formats: Formats = DefaultFormats + decomposeWithName(params, "TextTableReader") + } + + override def hashCode(): Int = params.hashCode() + + override def equals(that: Any): Boolean = that match { + case that: TextTableReader => params == that.params + case _ => false + } +} diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala index 27d69292413..07320e2b245 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala @@ -93,6 +93,7 @@ class RichContextRDD[T: ClassTag](crdd: ContextRDD[T]) { ): Array[FileWriteMetadata] = { val localTmpdir = ctx.localTmpdir val fs = ctx.fs + val fsBc = ctx.fsBc fs.mkDir(path + "/parts") if (idxRelPath != null) diff --git a/hail/src/test/resources/sampleheaderdiffelem.txt b/hail/src/test/resources/sampleheaderdiffelem.txt deleted file mode 100644 index 8cedbe05608..00000000000 --- a/hail/src/test/resources/sampleheaderdiffelem.txt +++ /dev/null @@ -1,11 +0,0 @@ -col000000 col000009 col000002 col000003 col000004 -row000000 0 0 0 0 0 -row000001 0 0 0 0 0 -row000002 0 0 0 0 0 -row000003 0 0 0 0 0 -row000004 0 0 0 0 0 -row000005 0 0 0 0 0 -row000006 0 0 0 0 0 -row000007 0 0 0 0 0 -row000008 0 0 0 0 0 -row000009 0 0 0 0 0 \ No newline at end of file diff --git a/hail/src/test/resources/samplenonintentries.txt b/hail/src/test/resources/samplenonintentries.txt deleted file mode 100644 index 247a1cd0741..00000000000 --- a/hail/src/test/resources/samplenonintentries.txt +++ /dev/null @@ -1,6 +0,0 @@ -col000000 col000001 col000002 col000003 col000004 col000005 -row000000 0 0 0 abc 0 0 -row000001 0 0 0 0 0 0 -row000002 0 0 0 0 0 0 -row000003 0 0 0 0 0 0 -row000004 0 0 0 0 0 0 \ No newline at end of file diff --git a/hail/src/test/scala/is/hail/HailSuite.scala b/hail/src/test/scala/is/hail/HailSuite.scala index 0f5e51c0ede..8ee2e12d88b 100644 --- a/hail/src/test/scala/is/hail/HailSuite.scala +++ b/hail/src/test/scala/is/hail/HailSuite.scala @@ -1,20 +1,12 @@ package is.hail -import java.io.{File, PrintWriter} - -import breeze.linalg.{DenseMatrix, Matrix, Vector} -import is.hail.ExecStrategy.ExecStrategy -import is.hail.annotations._ import is.hail.asm4s.HailClassLoader -import is.hail.expr.ir._ +import is.hail.annotations.{Region, RegionPool} import is.hail.backend.{BroadcastValue, ExecuteContext} import is.hail.backend.spark.SparkBackend -import is.hail.types.virtual._ -import is.hail.utils._ +import is.hail.utils.{ExecutionTimer, using} import is.hail.io.fs.FS -import is.hail.TestUtils._ import org.apache.spark.SparkContext -import org.apache.spark.sql.Row import org.scalatest.testng.TestNGSuite import org.testng.ITestContext import org.testng.annotations.{AfterMethod, BeforeClass, BeforeMethod} @@ -38,7 +30,7 @@ object HailSuite { lazy val hc: HailContext = { val hc = withSparkBackend() - hc.sparkBackend("HailSuite.hc").setFlag("lower", "1") + hc.flags.set("lower", "1") hc.checkRVDKeys = true hc } @@ -71,7 +63,7 @@ class HailSuite extends TestNGSuite { timer = new ExecutionTimer("HailSuite") assert(ctx == null) pool = RegionPool() - ctx = backend.createExecuteContextForTests(timer, Region(pool=pool)) + ctx = new ExecuteContext(backend.tmpdir, backend.localTmpdir, backend, fs, Region(pool=pool), timer, null, HailSuite.theHailClassLoader) } @AfterMethod @@ -91,205 +83,4 @@ class HailSuite extends TestNGSuite { hc.sparkBackend("HailSuite.withExecuteContext").withExecuteContext(timer)(f) } } - - def assertEvalsTo( - x: IR, - env: Env[(Any, Type)], - args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ) { - - TypeCheck(ctx, x, BindingEnv(env.mapValues(_._2), agg = agg.map(_._2.toEnv))) - - val t = x.typ - assert(t == TVoid || t.typeCheck(expected), s"$t, $expected") - - ExecuteContext.scoped() { ctx => - val filteredExecStrats: Set[ExecStrategy] = - if (HailContext.backend.isInstanceOf[SparkBackend]) - execStrats - else { - info("skipping interpret and non-lowering compile steps on non-spark backend") - execStrats.intersect(ExecStrategy.backendOnly) - } - - filteredExecStrats.foreach { strat => - try { - val res = strat match { - case ExecStrategy.Interpret => - assert(agg.isEmpty) - Interpret[Any](ctx, x, env, args) - case ExecStrategy.InterpretUnoptimized => - assert(agg.isEmpty) - Interpret[Any](ctx, x, env, args, optimize = false) - case ExecStrategy.JvmCompile => - assert(Forall(x, node => Compilable(node))) - eval(x, env, args, agg, bytecodePrinter = - Option(ctx.getFlag("jvm_bytecode_dump")) - .map { path => - val pw = new PrintWriter(new File(path)) - pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(ctx, x)}\n */\n\n") - pw - }, true, ctx) - case ExecStrategy.JvmCompileUnoptimized => - assert(Forall(x, node => Compilable(node))) - eval(x, env, args, agg, bytecodePrinter = - Option(ctx.getFlag("jvm_bytecode_dump")) - .map { path => - val pw = new PrintWriter(new File(path)) - pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(ctx, x)}\n */\n\n") - pw - }, - optimize = false, ctx) - case ExecStrategy.LoweredJVMCompile => - loweredExecute(ctx, x, env, args, agg) - } - if (t != TVoid) { - assert(t.typeCheck(res), s"\n t=$t\n result=$res\n strategy=$strat") - assert(t.valuesSimilar(res, expected), s"\n result=$res\n expect=$expected\n strategy=$strat)") - } - } catch { - case e: Exception => - error(s"error from strategy $strat") - if (execStrats.contains(strat)) throw e - } - } - } - } - - def assertNDEvals(nd: IR, expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected) - } - - def assertNDEvals(nd: IR, expected: (Any, IndexedSeq[Long])) - (implicit execStrats: Set[ExecStrategy]) { - if (expected == null) - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, null, null) - else - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected._2, expected._1) - } - - def assertNDEvals(nd: IR, args: IndexedSeq[(Any, Type)], expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, args, None, expected) - } - - def assertNDEvals(nd: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, FastIndexedSeq(), Some(agg), expected) - } - - def assertNDEvals( - nd: IR, - env: Env[(Any, Type)], - args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ): Unit = { - var e: IndexedSeq[Any] = expected.asInstanceOf[IndexedSeq[Any]] - val dims = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { - val n = e.length - if (n != 0 && e.head.isInstanceOf[IndexedSeq[_]]) - e = e.head.asInstanceOf[IndexedSeq[Any]] - n.toLong - } - assertNDEvals(nd, Env.empty, FastIndexedSeq(), agg, dims, expected) - } - - def assertNDEvals( - nd: IR, - env: Env[(Any, Type)], - args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], - dims: IndexedSeq[Long], - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ): Unit = { - val arrayIR = if (expected == null) nd else { - val refs = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { Ref(genUID(), TInt32) } - Let("nd", nd, - dims.zip(refs).foldRight[IR](NDArrayRef(Ref("nd", nd.typ), refs.map(Cast(_, TInt64)), -1)) { - case ((n, ref), accum) => - ToArray(StreamMap(rangeIR(n.toInt), ref.name, accum)) - }) - } - assertEvalsTo(arrayIR, env, args, agg, expected) - } - - def assertBMEvalsTo( - bm: BlockMatrixIR, - expected: DenseMatrix[Double] - )( - implicit execStrats: Set[ExecStrategy] - ): Unit = { - ExecuteContext.scoped() { ctx => - val filteredExecStrats: Set[ExecStrategy] = - if (HailContext.backend.isInstanceOf[SparkBackend]) execStrats - else { - info("skipping interpret and non-lowering compile steps on non-spark backend") - execStrats.intersect(ExecStrategy.backendOnly) - } - filteredExecStrats.filter(ExecStrategy.interpretOnly).foreach { strat => - try { - val res = strat match { - case ExecStrategy.Interpret => - Interpret(bm, ctx, optimize = true) - case ExecStrategy.InterpretUnoptimized => - Interpret(bm, ctx, optimize = false) - } - assert(res.toBreezeMatrix() == expected) - } catch { - case e: Exception => - error(s"error from strategy $strat") - if (execStrats.contains(strat)) throw e - } - } - val expectedArray = Array.tabulate(expected.rows)(i => Array.tabulate(expected.cols)(j => expected(i, j)).toFastIndexedSeq).toFastIndexedSeq - assertNDEvals(BlockMatrixCollect(bm), expectedArray)(filteredExecStrats.filterNot(ExecStrategy.interpretOnly)) - } - } - - def assertAllEvalTo( - xs: (IR, Any)* - )( - implicit execStrats: Set[ExecStrategy] - ): Unit = { - assertEvalsTo(MakeTuple.ordered(xs.map(_._1)), Row.fromSeq(xs.map(_._2))) - } - - def assertEvalsTo( - x: IR, - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ) { - assertEvalsTo(x, Env.empty, FastIndexedSeq(), None, expected) - } - - def assertEvalsTo( - x: IR, - args: IndexedSeq[(Any, Type)], - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ) { - assertEvalsTo(x, Env.empty, args, None, expected) - } - - def assertEvalsTo( - x: IR, - agg: (IndexedSeq[Row], TStruct), - expected: Any - )( - implicit execStrats: Set[ExecStrategy] - ) { - assertEvalsTo(x, Env.empty, FastIndexedSeq(), Some(agg), expected) - } } diff --git a/hail/src/test/scala/is/hail/TestUtils.scala b/hail/src/test/scala/is/hail/TestUtils.scala index 92b8e8c15bd..d93521e7ec5 100644 --- a/hail/src/test/scala/is/hail/TestUtils.scala +++ b/hail/src/test/scala/is/hail/TestUtils.scala @@ -309,6 +309,90 @@ object TestUtils { assert(t.valuesSimilar(i2, c), s"interpret (optimize = false) $i vs compile $c") } + def assertAllEvalTo(xs: (IR, Any)*)(implicit execStrats: Set[ExecStrategy]): Unit = { + assertEvalsTo(MakeTuple.ordered(xs.map(_._1)), Row.fromSeq(xs.map(_._2))) + } + + def assertEvalsTo(x: IR, expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertEvalsTo(x, Env.empty, FastIndexedSeq(), None, expected) + } + + def assertEvalsTo(x: IR, args: IndexedSeq[(Any, Type)], expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertEvalsTo(x, Env.empty, args, None, expected) + } + + def assertEvalsTo(x: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertEvalsTo(x, Env.empty, FastIndexedSeq(), Some(agg), expected) + } + + def assertEvalsTo(x: IR, + env: Env[(Any, Type)], + args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], + expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + + TypeCheck(x, BindingEnv(env.mapValues(_._2), agg = agg.map(_._2.toEnv))) + + val t = x.typ + assert(t == TVoid || t.typeCheck(expected), s"$t, $expected") + + ExecuteContext.scoped() { ctx => + val filteredExecStrats: Set[ExecStrategy] = + if (HailContext.backend.isInstanceOf[SparkBackend]) + execStrats + else { + info("skipping interpret and non-lowering compile steps on non-spark backend") + execStrats.intersect(ExecStrategy.backendOnly) + } + + filteredExecStrats.foreach { strat => + try { + val res = strat match { + case ExecStrategy.Interpret => + assert(agg.isEmpty) + Interpret[Any](ctx, x, env, args) + case ExecStrategy.InterpretUnoptimized => + assert(agg.isEmpty) + Interpret[Any](ctx, x, env, args, optimize = false) + case ExecStrategy.JvmCompile => + assert(Forall(x, node => Compilable(node))) + eval(x, env, args, agg, bytecodePrinter = + Option(HailContext.getFlag("jvm_bytecode_dump")) + .map { path => + val pw = new PrintWriter(new File(path)) + pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(x)}\n */\n\n") + pw + }, true, ctx) + case ExecStrategy.JvmCompileUnoptimized => + assert(Forall(x, node => Compilable(node))) + eval(x, env, args, agg, bytecodePrinter = + Option(HailContext.getFlag("jvm_bytecode_dump")) + .map { path => + val pw = new PrintWriter(new File(path)) + pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(x)}\n */\n\n") + pw + }, + optimize = false, ctx) + case ExecStrategy.LoweredJVMCompile => + loweredExecute(ctx, x, env, args, agg) + } + if (t != TVoid) { + assert(t.typeCheck(res), s"\n t=$t\n result=$res\n strategy=$strat") + assert(t.valuesSimilar(res, expected), s"\n result=$res\n expect=$expected\n strategy=$strat)") + } + } catch { + case e: Exception => + error(s"error from strategy $strat") + if (execStrats.contains(strat)) throw e + } + } + } + } + def assertThrows[E <: Throwable : Manifest](x: IR, regex: String) { assertThrows[E](x, Env.empty[(Any, Type)], FastIndexedSeq.empty[(Any, Type)], regex) } @@ -347,6 +431,85 @@ object TestUtils { assertCompiledThrows[HailException](x, regex) } + def assertNDEvals(nd: IR, expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected) + } + + def assertNDEvals(nd: IR, expected: (Any, IndexedSeq[Long])) + (implicit execStrats: Set[ExecStrategy]) { + if (expected == null) + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, null, null) + else + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected._2, expected._1) + } + + def assertNDEvals(nd: IR, args: IndexedSeq[(Any, Type)], expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, args, None, expected) + } + + def assertNDEvals(nd: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, FastIndexedSeq(), Some(agg), expected) + } + + def assertNDEvals(nd: IR, env: Env[(Any, Type)], args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], expected: Any) + (implicit execStrats: Set[ExecStrategy]): Unit = { + var e: IndexedSeq[Any] = expected.asInstanceOf[IndexedSeq[Any]] + val dims = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { + val n = e.length + if (n != 0 && e.head.isInstanceOf[IndexedSeq[_]]) + e = e.head.asInstanceOf[IndexedSeq[Any]] + n.toLong + } + assertNDEvals(nd, Env.empty, FastIndexedSeq(), agg, dims, expected) + } + + def assertNDEvals(nd: IR, env: Env[(Any, Type)], args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], dims: IndexedSeq[Long], expected: Any) + (implicit execStrats: Set[ExecStrategy]): Unit = { + val arrayIR = if (expected == null) nd else { + val refs = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { Ref(genUID(), TInt32) } + Let("nd", nd, + dims.zip(refs).foldRight[IR](NDArrayRef(Ref("nd", nd.typ), refs.map(Cast(_, TInt64)), -1)) { + case ((n, ref), accum) => + ToArray(StreamMap(rangeIR(n.toInt), ref.name, accum)) + }) + } + assertEvalsTo(arrayIR, env, args, agg, expected) + } + + def assertBMEvalsTo(bm: BlockMatrixIR, expected: DenseMatrix[Double]) + (implicit execStrats: Set[ExecStrategy]): Unit = { + ExecuteContext.scoped() { ctx => + val filteredExecStrats: Set[ExecStrategy] = + if (HailContext.backend.isInstanceOf[SparkBackend]) execStrats + else { + info("skipping interpret and non-lowering compile steps on non-spark backend") + execStrats.intersect(ExecStrategy.backendOnly) + } + filteredExecStrats.filter(ExecStrategy.interpretOnly).foreach { strat => + try { + val res = strat match { + case ExecStrategy.Interpret => + Interpret(bm, ctx, optimize = true) + case ExecStrategy.InterpretUnoptimized => + Interpret(bm, ctx, optimize = false) + } + assert(res.toBreezeMatrix() == expected) + } catch { + case e: Exception => + error(s"error from strategy $strat") + if (execStrats.contains(strat)) throw e + } + } + val expectedArray = Array.tabulate(expected.rows)(i => Array.tabulate(expected.cols)(j => expected(i, j)).toFastIndexedSeq).toFastIndexedSeq + assertNDEvals(BlockMatrixCollect(bm), expectedArray)(filteredExecStrats.filterNot(ExecStrategy.interpretOnly)) + } + } + def importVCF(ctx: ExecuteContext, file: String, force: Boolean = false, forceBGZ: Boolean = false, headerFile: Option[String] = None, diff --git a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala index 9774a965a2d..6ae59b84572 100644 --- a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala +++ b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala @@ -31,7 +31,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "string") @@ -68,7 +68,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "int") @@ -105,7 +105,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "array") @@ -149,7 +149,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "struct") @@ -193,7 +193,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "array of struct") @@ -326,7 +326,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "struct with array") @@ -374,7 +374,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) + rv.setOffset(fb.result()(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "missing array") @@ -415,7 +415,7 @@ class StagedConstructorSuite extends HailSuite { } val region = Region(pool=pool) - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) def run(i: Int, b: Boolean, d: Double): (Int, Boolean, Double) = { val off = f(region, i, b, d) (Region.loadInt(t.loadField(off, 0)), diff --git a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala index 2bf3b9d77ce..88ee0daa94d 100644 --- a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala @@ -2,7 +2,6 @@ package is.hail.asm4s import java.io.PrintWriter -import is.hail.HailSuite import is.hail.asm4s.Code._ import is.hail.asm4s.FunctionBuilder._ import is.hail.check.{Gen, Prop} @@ -14,13 +13,13 @@ import scala.language.postfixOps trait Z2Z { def apply(z:Boolean): Boolean } -class ASM4SSuite extends HailSuite { +class ASM4SSuite extends TestNGSuite { private[this] val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) @Test def not(): Unit = { val notb = FunctionBuilder[Z2Z]("is/hail/asm4s/Z2Z", Array(NotGenericTypeInfo[Boolean]), NotGenericTypeInfo[Boolean]) notb.emit(!notb.getArg[Boolean](1)) - val not = notb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val not = notb.result()(theHailClassLoader) assert(!not(true)) assert(not(false)) } @@ -28,7 +27,7 @@ class ASM4SSuite extends HailSuite { @Test def mux(): Unit = { val gb = FunctionBuilder[Boolean, Int]("G") gb.emit(gb.getArg[Boolean](1).mux(11, -1)) - val g = gb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val g = gb.result()(theHailClassLoader) assert(g(true) == 11) assert(g(false) == -1) } @@ -36,7 +35,7 @@ class ASM4SSuite extends HailSuite { @Test def add(): Unit = { val fb = FunctionBuilder[Int, Int]("F") fb.emit(fb.getArg[Int](1) + 5) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(-2) == 3) } @@ -44,7 +43,7 @@ class ASM4SSuite extends HailSuite { val fb = FunctionBuilder[Int]("F") val l = fb.newLocal[Int]() fb.emit(Code(l := 0, l++, l += 2, l)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f() == 3) } @@ -58,7 +57,7 @@ class ASM4SSuite extends HailSuite { arr(2) = -6, arr(hb.getArg[Int](1)) )) - val h = hb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val h = hb.result()(theHailClassLoader) assert(h(0) == 6) assert(h(1) == 7) assert(h(2) == -6) @@ -67,7 +66,7 @@ class ASM4SSuite extends HailSuite { @Test def get(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).getField[Int]("i")) - val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val i = fb.result()(theHailClassLoader) val a = new A assert(i(a) == 5) @@ -76,7 +75,7 @@ class ASM4SSuite extends HailSuite { @Test def invoke(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).invoke[Int]("f")) - val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val i = fb.result()(theHailClassLoader) val a = new A assert(i(a) == 6) @@ -85,7 +84,7 @@ class ASM4SSuite extends HailSuite { @Test def invoke2(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).invoke[Int, Int]("g", 6)) - val j = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val j = fb.result()(theHailClassLoader) val a = new A assert(j(a) == 11) @@ -94,7 +93,7 @@ class ASM4SSuite extends HailSuite { @Test def newInstance(): Unit = { val fb = FunctionBuilder[Int]("F") fb.emit(Code.newInstance[A]().invoke[Int]("f")) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f() == 6) } @@ -105,7 +104,7 @@ class ASM4SSuite extends HailSuite { inst.store(Code.newInstance[A]()), inst.put("i", -2), inst.getField[Int]("i"))) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f() == -2) } @@ -116,21 +115,21 @@ class ASM4SSuite extends HailSuite { inst.store(Code.newInstance[A]()), inst.put("j", -2), Code.getStatic[A, Int]("j"))) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f() == -2) } @Test def f2(): Unit = { val fb = FunctionBuilder[Int, Int, Int]("F") fb.emit(fb.getArg[Int](1) + fb.getArg[Int](2)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(3, 5) == 8) } @Test def compare(): Unit = { val fb = FunctionBuilder[Int, Int, Boolean]("F") fb.emit(fb.getArg[Int](1) > fb.getArg[Int](2)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(5, 2)) assert(!f(-1, -1)) assert(!f(2, 5)) @@ -148,7 +147,7 @@ class ASM4SSuite extends HailSuite { r.store(r * i), i.store(i - 1))), r)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(3) == 6) assert(f(4) == 24) @@ -157,7 +156,7 @@ class ASM4SSuite extends HailSuite { @Test def dcmp(): Unit = { val fb = FunctionBuilder[Double, Double, Boolean]("F") fb.emit(fb.getArg[Double](1) > fb.getArg[Double](2)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(5.2, 2.3)) val d = -2.3 @@ -174,7 +173,7 @@ class ASM4SSuite extends HailSuite { arr(1) = Code.newInstance[A](), arr(0).getField[Int]("i") + arr(1).getField[Int]("i") )) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f() == 10) } @@ -205,7 +204,7 @@ class ASM4SSuite extends HailSuite { ) ), vn_2 + vn_1))) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) Prop.forAll(Gen.choose(0, 100)) { i => fibonacciReference(i) == f(i) @@ -217,37 +216,37 @@ class ASM4SSuite extends HailSuite { { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN < x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN <= x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN > x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN >= x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeDouble(Double.NaN).ceq(x)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeDouble(Double.NaN).cne(x)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f()) } @@ -260,37 +259,37 @@ class ASM4SSuite extends HailSuite { { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN < x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN <= x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN > x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN >= x) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeFloat(Float.NaN).ceq(x)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeFloat(Float.NaN).cne(x)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f()) } @@ -322,7 +321,7 @@ class ASM4SSuite extends HailSuite { }) res } - val f = fb.result(ctx.shouldWriteIRFiles(), Some(new PrintWriter(System.out)))(theHailClassLoader) + val f = fb.result(Some(new PrintWriter(System.out)))(theHailClassLoader) assert(f(0, 1, 1) == 2) assert(f(1, 5, 1) == 4) assert(f(2, 2, 8) == 16) @@ -341,7 +340,7 @@ class ASM4SSuite extends HailSuite { v1 + v2)) fb.emitWithBuilder(add.invoke(_, fb.getArg[Int](1), fb.getArg[Int](2))) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(1, 1) == 2) } @@ -362,7 +361,7 @@ class ASM4SSuite extends HailSuite { case LongInfo => fb.emit(Code(c, longField.load())) case BooleanInfo => fb.emit(Code(c, booleanField.load())) } - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) f(arg1, arg2, arg3) } @@ -389,7 +388,7 @@ class ASM4SSuite extends HailSuite { case BooleanInfo => mb.emit(Code(c, booleanField.load())) } fb.emitWithBuilder(mb.invoke(_, fb.getArg[Int](1), fb.getArg[Long](2), fb.getArg[Boolean](3))) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) f(arg1, arg2, arg3) } @@ -409,7 +408,7 @@ class ASM4SSuite extends HailSuite { v2 := v1, v1)) - assert(fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)() == 1) + assert(fb.result()(theHailClassLoader)() == 1) } @Test def testInitialize(): Unit = { @@ -418,7 +417,7 @@ class ASM4SSuite extends HailSuite { fb.emit(Code( fb.getArg[Boolean](1).mux(Code._empty, l := 5), l)) - val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) + val f = fb.result()(theHailClassLoader) assert(f(true) == 0) assert(f(false) == 5) } diff --git a/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala b/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala index 58ba5a573b1..1d61ffee487 100644 --- a/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala @@ -40,7 +40,7 @@ class CodeSuite extends HailSuite { mb.emit(EmitCodeBuilder.scopedCode(mb) { cb => v.sizeToStoreInBytes(cb).value }) - fb.result(ctx)(theHailClassLoader)() + fb.result()(theHailClassLoader)() } assert(testSizeHelper(int64) == 8L) @@ -61,7 +61,7 @@ class CodeSuite extends HailSuite { } sarray.sizeToStoreInBytes(cb).value }) - assert(fb.result(ctx)(theHailClassLoader)(ctx.r) == 28L) // 2 missing bytes 4 byte aligned + 4 header bytes + 5 elements * 4 bytes for ints. + assert(fb.result()(theHailClassLoader)(ctx.r) == 28L) // 2 missing bytes 4 byte aligned + 4 header bytes + 5 elements * 4 bytes for ints. } @Test def testIntervalSizeInBytes(): Unit = { @@ -86,7 +86,7 @@ class CodeSuite extends HailSuite { true, true) sval.sizeToStoreInBytes(cb).value }) - assert(fb.result(ctx)(theHailClassLoader)(ctx.r) == 72L) // 2 28 byte structs, plus 2 1 byte booleans that get 8 byte for an extra 8 bytes, plus missing bytes. + assert(fb.result()(theHailClassLoader)(ctx.r) == 72L) // 2 28 byte structs, plus 2 1 byte booleans that get 8 byte for an extra 8 bytes, plus missing bytes. } @Test def testHash() { @@ -109,7 +109,7 @@ class CodeSuite extends HailSuite { val hash = v.hash(cb) hash.value }) - fb.result(ctx)(theHailClassLoader)() + fb.result()(theHailClassLoader)() } def hashTestStringHelper(toHash: String): Int = { @@ -125,7 +125,7 @@ class CodeSuite extends HailSuite { hash.value }) val region = Region(pool=pool) - fb.result(ctx)(theHailClassLoader)(region) + fb.result()(theHailClassLoader)(region) } def hashTestArrayHelper(toHash: IndexedSeq[Int]): Int = { @@ -140,7 +140,7 @@ class CodeSuite extends HailSuite { }) val region = Region(pool=pool) val arrayPointer = pArray.unstagedStoreJavaObject(toHash, region) - fb.result(ctx)(theHailClassLoader)(arrayPointer) + fb.result()(theHailClassLoader)(arrayPointer) } def hashTestStructHelper(toHash: Row, fields : IndexedSeq[PField]): Int = { @@ -155,6 +155,6 @@ class CodeSuite extends HailSuite { }) val region = Region(pool=pool) val structPointer = pStruct.unstagedStoreJavaObject(toHash, region) - fb.result(ctx)(theHailClassLoader)(structPointer) + fb.result()(theHailClassLoader)(structPointer) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala index b7067828b44..fa7976e1468 100644 --- a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala @@ -69,6 +69,7 @@ class BlockMatrixIRSuite extends HailSuite { } @Test def testBlockMatrixBroadcastValue_Scalars() { + implicit val execStrats: Set[ExecStrategy] = ExecStrategy.interpretOnly val broadcastTwo = BlockMatrixBroadcast( ValueToBlockMatrix(MakeArray(Seq[F64](F64(2)), TArray(TFloat64)), Array[Long](1, 1), ones.typ.blockSize), FastIndexedSeq(), shape, ones.typ.blockSize) diff --git a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala index 56473e4d87b..97becfa9426 100644 --- a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala @@ -27,7 +27,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, R](ctx, "stream_test") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1))) - val asmFn = fb.result(ctx)(theHailClassLoader) + val asmFn = fb.result()(theHailClassLoader) asmFn.apply } @@ -35,7 +35,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, U, R](ctx, "F") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2))) - val asmFn = fb.result(ctx)(theHailClassLoader) + val asmFn = fb.result()(theHailClassLoader) asmFn.apply } @@ -43,7 +43,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, U, V, R](ctx, "F") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2), mb.getCodeParam[V](3))) - val asmFn = fb.result(ctx)(theHailClassLoader) + val asmFn = fb.result()(theHailClassLoader) asmFn.apply } @@ -67,7 +67,7 @@ class EmitStreamSuite extends HailSuite { case ToArray(s) => s case s => s } - TypeCheck(ctx, s) + TypeCheck(s) EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, EmitEnv(Env.empty, inputTypes.indices.map(i => mb.storeEmitParam(i + 2, cb))), None) .consumeCode[Long](cb, 0L, { s => val arr = StreamUtils.toArray(cb, s.asStream.producer, region) @@ -131,7 +131,7 @@ class EmitStreamSuite extends HailSuite { val emitContext = EmitContext.analyze(ctx, ir) fb.emitWithBuilder { cb => - TypeCheck(ctx, ir) + TypeCheck(ir) val len = cb.newLocal[Int]("len", 0) val len2 = cb.newLocal[Int]("len2", -1) @@ -170,8 +170,8 @@ class EmitStreamSuite extends HailSuite { IndexedSeq("hi", "world") ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) } } @@ -237,8 +237,8 @@ class EmitStreamSuite extends HailSuite { ) for ((ir, v) <- tests) { val expectedLen = Option(v).map(_.length) - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir) == expectedLen, Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir) == expectedLen, Pretty(ir)) } } @@ -251,8 +251,8 @@ class EmitStreamSuite extends HailSuite { "i", MakeStream(Seq(Ref("i", TInt32), Ref("end", TInt32)), TStream(TInt32))) ) - assert(evalStream(ir) == (3 until 10).flatMap { i => Seq(i, 10) }, Pretty(ctx, ir)) - assert(evalStreamLen(ir).isEmpty, Pretty(ctx, ir)) + assert(evalStream(ir) == (3 until 10).flatMap { i => Seq(i, 10) }, Pretty(ir)) + assert(evalStreamLen(ir).isEmpty, Pretty(ir)) } @Test def testEmitMap() { @@ -269,8 +269,8 @@ class EmitStreamSuite extends HailSuite { StreamMap(ten, "x", NA(TInt32)) -> IndexedSeq.tabulate(10) { _ => null } ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) } } @@ -288,8 +288,8 @@ class EmitStreamSuite extends HailSuite { StreamFilter(StreamMap(ten, "x", NA(TInt32)), "z", True()) -> IndexedSeq.tabulate(10) { _ => null } ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir).isEmpty, Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir).isEmpty, Pretty(ir)) } } @@ -317,9 +317,9 @@ class EmitStreamSuite extends HailSuite { IndexedSeq(0, 0, 1, 1, 2, 2, 3, 3) ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) if (v != null) - assert(evalStreamLen(ir) == None, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == None, Pretty(ir)) } } @@ -486,10 +486,10 @@ class EmitStreamSuite extends HailSuite { for ((lstream, rstream, expectedLeft, expectedOuter) <- tests) { val l = leftjoin(lstream, rstream) val o = outerjoin(lstream, rstream) - assert(evalStream(l) == expectedLeft, Pretty(ctx, l)) - assert(evalStream(o) == expectedOuter, Pretty(ctx, o)) - assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(ctx, l)) - assert(evalStreamLen(o) == None, Pretty(ctx, o)) + assert(evalStream(l) == expectedLeft, Pretty(l)) + assert(evalStream(o) == expectedOuter, Pretty(o)) + assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(l)) + assert(evalStreamLen(o) == None, Pretty(o)) } } @@ -553,10 +553,10 @@ class EmitStreamSuite extends HailSuite { for ((lstream, rstream, expectedLeft, expectedInner) <- tests) { val l = leftjoin(lstream, rstream) val i = innerjoin(lstream, rstream) - assert(evalStream(l) == expectedLeft, Pretty(ctx, l)) - assert(evalStream(i) == expectedInner, Pretty(ctx, i)) - assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(ctx, l)) - assert(evalStreamLen(i) == None, Pretty(ctx, i)) + assert(evalStream(l) == expectedLeft, Pretty(l)) + assert(evalStream(i) == expectedInner, Pretty(i)) + assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(l)) + assert(evalStreamLen(i) == None, Pretty(i)) } } @@ -618,8 +618,8 @@ class EmitStreamSuite extends HailSuite { 1, "a", "v", a + v) -> IndexedSeq(1, 1 /*1+0*0*/ , 2 /*1+1*1*/ , 6 /*2+2*2*/ , 15 /*6+3*3*/) ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) } } @@ -628,7 +628,7 @@ class EmitStreamSuite extends HailSuite { val aggregate = compileStream(LoweringPipeline.compileLowerer(false).apply(ctx, ir).asInstanceOf[IR], PType.canonical(inType)) for ((inp, expected) <- tests) - assert(aggregate(inp) == expected, Pretty(ctx, ir)) + assert(aggregate(inp) == expected, Pretty(ir)) } def scanOp(op: AggOp, initArgs: Seq[IR], opArgs: Seq[IR]): ApplyScanOp = @@ -718,8 +718,8 @@ class EmitStreamSuite extends HailSuite { ) val lens: Array[Option[Int]] = Array(Some(3), Some(4), Some(3), None, None, None) for (((ir, v), len) <- tests zip lens) { - assert(evalStream(ir) == v, Pretty(ctx, ir)) - assert(evalStreamLen(ir) == len, Pretty(ctx, ir)) + assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStreamLen(ir) == len, Pretty(ir)) } } @@ -865,7 +865,7 @@ class EmitStreamSuite extends HailSuite { StreamScan(StreamMap(target, "i", i), 0, "a", "i", i) -> 1, StreamScan(StreamScan(target, 0, "a", "i", i), 0, "a", "i", i) -> 1 )) { - assert(StreamUtils.multiplicity(ir, "target") == v, Pretty(ctx, ir)) + assert(StreamUtils.multiplicity(ir, "target") == v, Pretty(ir)) } } @@ -884,7 +884,7 @@ class EmitStreamSuite extends HailSuite { throw new RuntimeException(s"memory usage scales with stream size!" + s"\n at size=$lowSize, memory=$memUsed1" + s"\n at size=$highSize, memory=$memUsed2" + - s"\n IR: ${ Pretty(ctx, f(lowSize)) }") + s"\n IR: ${ Pretty(f(lowSize)) }") } diff --git a/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala index 74cf4cfea6a..ca3a1c307d0 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala @@ -278,7 +278,7 @@ class ExtractIntervalFiltersSuite extends HailSuite { invoke("lor", TBoolean, Ref("acc", TBoolean), invoke("contains", TBoolean, Ref("elt", TInterval(TInt32)), k1))) - TypeCheck(ctx, ir, BindingEnv(Env(ref1.name -> ref1.typ))) + TypeCheck(ir, BindingEnv(Env(ref1.name -> ref1.typ))) val (rw, intervals) = ExtractIntervalFilters.extractPartitionFilters(ir, ref1, ref1Key).get assert(rw == True()) @@ -329,7 +329,7 @@ class ExtractIntervalFiltersSuite extends HailSuite { ApplyComparisonOp(LTEQ(TInt32), k, I32(9)) ), False()))) - assert(ExtractIntervalFilters(ctx, tf).asInstanceOf[TableFilter].child.isInstanceOf[TableFilterIntervals]) + assert(ExtractIntervalFilters(tf).asInstanceOf[TableFilter].child.isInstanceOf[TableFilterIntervals]) assertEvalsTo(TableCount(tf), 6L)(ExecStrategy.interpretOnly) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala index 45ec68779dc..a4db25ff2ce 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala @@ -132,7 +132,6 @@ class ForwardLetsSuite extends HailSuite { .apply(aggEnv) TypeCheck( - ctx, ForwardLets(ir0).asInstanceOf[IR], BindingEnv(Env.empty, agg = Some(aggEnv))) } @@ -143,8 +142,8 @@ class ForwardLetsSuite extends HailSuite { 'x + 'x + 'y }(env) - TypeCheck(ctx, ir, BindingEnv(env)) - TypeCheck(ctx, ForwardLets(ir).asInstanceOf[IR], BindingEnv(env)) + TypeCheck(ir, BindingEnv(env)) + TypeCheck(ForwardLets(ir).asInstanceOf[IR], BindingEnv(env)) } @Test def testLetsDoNotForwardInsideArrayAggWithNoOps(): Unit = { @@ -160,7 +159,7 @@ class ForwardLetsSuite extends HailSuite { Ref("y", TInt32) + Ref("x", TInt32 ))) - TypeCheck(ctx, x, BindingEnv(Env("y" -> TInt32))) - TypeCheck(ctx, ForwardLets(x).asInstanceOf[IR], BindingEnv(Env("y" -> TInt32))) + TypeCheck(x, BindingEnv(Env("y" -> TInt32))) + TypeCheck(ForwardLets(x).asInstanceOf[IR], BindingEnv(Env("y" -> TInt32))) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index 10502f0fe37..bffdf48c706 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -2135,8 +2135,8 @@ class IRSuite extends HailSuite { def joinRows(left: IndexedSeq[Integer], right: IndexedSeq[Integer], joinType: String): IR = { join( - MakeStream.unify(ctx, left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk1" -> (if (n == null) NA(TInt32) else I32(n)), "lk2" -> Str("x"), "a" -> I64(idx))) }), - MakeStream.unify(ctx, right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("b" -> I32(idx), "rk2" -> Str("x"), "rk1" -> (if (n == null) NA(TInt32) else I32(n)), "c" -> Str("foo"))) }), + MakeStream.unify(left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk1" -> (if (n == null) NA(TInt32) else I32(n)), "lk2" -> Str("x"), "a" -> I64(idx))) }), + MakeStream.unify(right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("b" -> I32(idx), "rk2" -> Str("x"), "rk1" -> (if (n == null) NA(TInt32) else I32(n)), "c" -> Str("foo"))) }), FastIndexedSeq("lk1", "lk2"), FastIndexedSeq("rk1", "rk2"), rightDistinct = true, @@ -2150,7 +2150,7 @@ class IRSuite extends HailSuite { assertEvalsTo( join( NA(TStream(TStruct("k1" -> TInt32, "k2" -> TString, "a" -> TInt64))), - MakeStream.unify(ctx, Seq(MakeStruct(FastIndexedSeq("b" -> I32(0), "k2" -> Str("x"), "k1" -> I32(3), "c" -> Str("foo"))))), + MakeStream.unify(Seq(MakeStruct(FastIndexedSeq("b" -> I32(0), "k2" -> Str("x"), "k1" -> I32(3), "c" -> Str("foo"))))), FastIndexedSeq("k1", "k2"), FastIndexedSeq("k1", "k2"), true, @@ -2159,7 +2159,7 @@ class IRSuite extends HailSuite { assertEvalsTo( join( - MakeStream.unify(ctx, Seq(MakeStruct(FastIndexedSeq("k1" -> I32(0), "k2" -> Str("x"), "a" -> I64(3))))), + MakeStream.unify(Seq(MakeStruct(FastIndexedSeq("k1" -> I32(0), "k2" -> Str("x"), "a" -> I64(3))))), NA(TStream(TStruct("b" -> TInt32, "k2" -> TString, "k1" -> TInt32, "c" -> TString))), FastIndexedSeq("k1", "k2"), FastIndexedSeq("k1", "k2"), @@ -2199,8 +2199,8 @@ class IRSuite extends HailSuite { def joinRows(left: IndexedSeq[Integer], right: IndexedSeq[Integer], joinType: String): IR = { join( - MakeStream.unify(ctx, left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk" -> (if (n == null) NA(TInt32) else I32(n)), "l" -> I32(idx))) }), - MakeStream.unify(ctx, right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("rk" -> (if (n == null) NA(TInt32) else I32(n)), "r" -> I32(idx))) }), + MakeStream.unify(left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk" -> (if (n == null) NA(TInt32) else I32(n)), "l" -> I32(idx))) }), + MakeStream.unify(right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("rk" -> (if (n == null) NA(TInt32) else I32(n)), "r" -> I32(idx))) }), FastIndexedSeq("lk"), FastIndexedSeq("rk"), false, diff --git a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala index 8d299449a3d..555fb21fcdf 100644 --- a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala @@ -2,6 +2,7 @@ package is.hail.expr.ir import is.hail.ExecStrategy import is.hail.HailSuite +import is.hail.TestUtils.assertEvalsTo import is.hail.types.physical.{PCanonicalLocus, PInterval} import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, FastSeq, Interval} diff --git a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala index 00581bd9494..d924c50fc42 100644 --- a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala @@ -48,22 +48,15 @@ class PruneSuite extends HailSuite { assert(PruneDeadFields.isSupertype(tuple2IntsFirstRemoved, tuple2Ints)) } - @Test def testIsSupertypeWithDistinctFieldTypes(): Unit = { - val tuple2Ints = TTuple(TInt32, TFloat64) - val tuple2IntsFirstRemoved = TTuple(IndexedSeq(TupleField(1, TFloat64))) - - assert(PruneDeadFields.isSupertype(tuple2IntsFirstRemoved, tuple2Ints)) - } - def checkMemo(ir: BaseIR, requestedType: BaseType, expected: Array[BaseType]) { val irCopy = ir.deepCopy() assert(PruneDeadFields.isSupertype(requestedType, irCopy.typ), s"not supertype:\n super: ${ requestedType.parsableString() }\n sub: ${ irCopy.typ.parsableString() }") val ms = PruneDeadFields.ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) irCopy match { - case mir: MatrixIR => PruneDeadFields.memoizeMatrixIR(ctx, mir, requestedType.asInstanceOf[MatrixType], ms) - case tir: TableIR => PruneDeadFields.memoizeTableIR(ctx, tir, requestedType.asInstanceOf[TableType], ms) - case ir: IR => PruneDeadFields.memoizeValueIR(ctx, ir, requestedType.asInstanceOf[Type], ms) + case mir: MatrixIR => PruneDeadFields.memoizeMatrixIR(mir, requestedType.asInstanceOf[MatrixType], ms) + case tir: TableIR => PruneDeadFields.memoizeTableIR(tir, requestedType.asInstanceOf[TableType], ms) + case ir: IR => PruneDeadFields.memoizeValueIR(ir, requestedType.asInstanceOf[Type], ms) } irCopy.children.zipWithIndex.foreach { case (child, i) => if (expected(i) != null && expected(i) != ms.requestedType.lookup(child)) { @@ -80,14 +73,14 @@ class PruneSuite extends HailSuite { val ms = PruneDeadFields.ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) val rebuilt = (irCopy match { case mir: MatrixIR => - PruneDeadFields.memoizeMatrixIR(ctx, mir, requestedType.asInstanceOf[MatrixType], ms) - PruneDeadFields.rebuild(ctx, mir, ms.rebuildState) + PruneDeadFields.memoizeMatrixIR(mir, requestedType.asInstanceOf[MatrixType], ms) + PruneDeadFields.rebuild(mir, ms.rebuildState) case tir: TableIR => - PruneDeadFields.memoizeTableIR(ctx, tir, requestedType.asInstanceOf[TableType], ms) - PruneDeadFields.rebuild(ctx, tir, ms.rebuildState) + PruneDeadFields.memoizeTableIR(tir, requestedType.asInstanceOf[TableType], ms) + PruneDeadFields.rebuild(tir, ms.rebuildState) case ir: IR => - PruneDeadFields.memoizeValueIR(ctx, ir, requestedType.asInstanceOf[Type], ms) - PruneDeadFields.rebuildIR(ctx, ir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) + PruneDeadFields.memoizeValueIR(ir, requestedType.asInstanceOf[Type], ms) + PruneDeadFields.rebuildIR(ir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) }).asInstanceOf[T] if (!f(ir, rebuilt)) fatal(s"IR did not rebuild the same:\n Base: $ir\n Rebuilt: $rebuilt") @@ -899,7 +892,7 @@ class PruneSuite extends HailSuite { checkRebuild(TableFilter(tr, tableRefBoolean(tr.typ, "row.2")), subsetTable(tr.typ, "row.3"), (_: BaseIR, r: BaseIR) => { val tf = r.asInstanceOf[TableFilter] - TypeCheck(ctx, tf.pred, PruneDeadFields.relationalTypeToEnv(tf.typ)) + TypeCheck(tf.pred, PruneDeadFields.relationalTypeToEnv(tf.typ)) tf.child.typ == subsetTable(tr.typ, "row.3", "row.2") }) } @@ -909,7 +902,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmr, subsetTable(tmr.typ, "row.foo"), (_: BaseIR, r: BaseIR) => { val tmr = r.asInstanceOf[TableMapRows] - TypeCheck(ctx, tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) + TypeCheck(tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) tmr.child.typ == subsetTable(tr.typ, "row.2", "global.g1", "row.3") }) @@ -917,7 +910,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmr2, subsetTable(tmr2.typ, "row.foo", "NO_KEY"), (_: BaseIR, r: BaseIR) => { val tmr = r.asInstanceOf[TableMapRows] - TypeCheck(ctx, tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) + TypeCheck(tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) tmr.child.typ == subsetTable(tr.typ, "row.2", "global.g1", "row.3", "NO_KEY") // FIXME: remove row.3 when TableRead is fixed }) @@ -928,7 +921,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmg, subsetTable(tmg.typ, "global.foo"), (_: BaseIR, r: BaseIR) => { val tmg = r.asInstanceOf[TableMapGlobals] - TypeCheck(ctx, tmg.newGlobals, PruneDeadFields.relationalTypeToEnv(tmg.child.typ)) + TypeCheck(tmg.newGlobals, PruneDeadFields.relationalTypeToEnv(tmg.child.typ)) tmg.child.typ == subsetTable(tr.typ, "global.g1") }) } @@ -993,7 +986,7 @@ class PruneSuite extends HailSuite { checkRebuild(mfc, subsetMatrixTable(mfc.typ, "global.g1"), (_: BaseIR, r: BaseIR) => { val mfc = r.asInstanceOf[MatrixFilterCols] - TypeCheck(ctx, mfc.pred, PruneDeadFields.relationalTypeToEnv(mfc.child.typ)) + TypeCheck(mfc.pred, PruneDeadFields.relationalTypeToEnv(mfc.child.typ)) mfc.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2") } ) @@ -1004,7 +997,7 @@ class PruneSuite extends HailSuite { checkRebuild(mfe, subsetMatrixTable(mfe.typ, "global.g1"), (_: BaseIR, r: BaseIR) => { val mfe = r.asInstanceOf[MatrixFilterEntries] - TypeCheck(ctx, mfe.pred, PruneDeadFields.relationalTypeToEnv(mfe.child.typ)) + TypeCheck(mfe.pred, PruneDeadFields.relationalTypeToEnv(mfe.child.typ)) mfe.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "va.r2", "g.e1") } ) @@ -1017,7 +1010,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmr, subsetMatrixTable(mmr.typ, "global.g1", "g.e1", "va.foo"), (_: BaseIR, r: BaseIR) => { val mmr = r.asInstanceOf[MatrixMapRows] - TypeCheck(ctx, mmr.newRow, PruneDeadFields.relationalTypeToEnv(mmr.child.typ)) + TypeCheck(mmr.newRow, PruneDeadFields.relationalTypeToEnv(mmr.child.typ)) mmr.child.asInstanceOf[MatrixKeyRowsBy].child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2", "g.e1") } ) @@ -1029,7 +1022,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmc, subsetMatrixTable(mmc.typ, "global.g1", "g.e1", "sa.foo"), (_: BaseIR, r: BaseIR) => { val mmc = r.asInstanceOf[MatrixMapCols] - TypeCheck(ctx, mmc.newCol, PruneDeadFields.relationalTypeToEnv(mmc.child.typ)) + TypeCheck(mmc.newCol, PruneDeadFields.relationalTypeToEnv(mmc.child.typ)) mmc.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "g.e1") } ) @@ -1040,7 +1033,7 @@ class PruneSuite extends HailSuite { checkRebuild(mme, subsetMatrixTable(mme.typ, "global.g1", "g.foo"), (_: BaseIR, r: BaseIR) => { val mme = r.asInstanceOf[MatrixMapEntries] - TypeCheck(ctx, mme.newEntries, PruneDeadFields.relationalTypeToEnv(mme.child.typ)) + TypeCheck(mme.newEntries, PruneDeadFields.relationalTypeToEnv(mme.child.typ)) mme.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "va.r2") } ) @@ -1051,7 +1044,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmg, subsetMatrixTable(mmg.typ, "global.foo", "g.e1", "va.r2"), (_: BaseIR, r: BaseIR) => { val mmg = r.asInstanceOf[MatrixMapGlobals] - TypeCheck(ctx, mmg.newGlobals, PruneDeadFields.relationalTypeToEnv(mmg.child.typ)) + TypeCheck(mmg.newGlobals, PruneDeadFields.relationalTypeToEnv(mmg.child.typ)) mmg.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2", "g.e1") } ) @@ -1062,7 +1055,7 @@ class PruneSuite extends HailSuite { checkRebuild(ma, subsetMatrixTable(ma.typ, "va.foo", "g.foo"), (_: BaseIR, r: BaseIR) => { val ma = r.asInstanceOf[MatrixAggregateRowsByKey] - TypeCheck(ctx, ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) + TypeCheck(ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) ma.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2") } ) @@ -1073,7 +1066,7 @@ class PruneSuite extends HailSuite { checkRebuild(ma, subsetMatrixTable(ma.typ, "g.foo", "sa.foo"), (_: BaseIR, r: BaseIR) => { val ma = r.asInstanceOf[MatrixAggregateColsByKey] - TypeCheck(ctx, ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) + TypeCheck(ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) ma.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2") } ) @@ -1441,7 +1434,7 @@ class PruneSuite extends HailSuite { .bind(ifIR, pruneT) // should run without error! - PruneDeadFields.rebuildIR(ctx, ifIR, BindingEnv.empty[Type].bindEval("a", t), + PruneDeadFields.rebuildIR(ifIR, BindingEnv.empty[Type].bindEval("a", t), PruneDeadFields.RebuildMutableState(memo, mutable.HashMap.empty)) } diff --git a/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala deleted file mode 100644 index 1db3c97e1ae..00000000000 --- a/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala +++ /dev/null @@ -1,106 +0,0 @@ -package is.hail.expr.ir - -import is.hail.HailSuite -import org.apache.commons.math3.distribution.ChiSquaredDistribution -import org.testng.annotations.Test - -class RandomSuite extends HailSuite { - @Test def testThreefry() { - val k = Array.fill[Long](4)(0) - val tf = Threefry(k) - val x = Array.fill[Long](4)(0) - val expected = Array( - 0x09218EBDE6C85537L, - 0x55941F5266D86105L, - 0x4BD25E16282434DCL, - 0xEE29EC846BD2E40BL - ) - tf(x, 0) - assert(x sameElements expected) - - val rand = new ThreefryRandomEngine(k, Array.fill(4)(0L), 0, tweak = 0) - val y = Array.fill(4)(rand.nextLong()) - assert(y sameElements expected) - } - - def runChiSquareTest(samples: Int, buckets: Int)(sample: => Int) { - val chiSquareDist = new ChiSquaredDistribution(buckets - 1) - val expected = samples.toDouble / buckets - var numRuns = 0 - val passThreshold = 0.1 - val failThreshold = 1e-6 - var geometricMean = failThreshold - - while (geometricMean >= failThreshold && geometricMean < passThreshold) { - val counts = Array.ofDim[Int](buckets) - for (_ <- 0 until samples) counts(sample) += 1 - val chisquare = counts.map(observed => math.pow(observed - expected, 2) / expected).sum - val pvalue = 1 - chiSquareDist.cumulativeProbability(chisquare) - numRuns += 1 - geometricMean = math.pow(geometricMean, (numRuns - 1).toDouble / numRuns) * math.pow(pvalue, 1.0 / numRuns) - } - assert(geometricMean >= passThreshold, s"failed after $numRuns runs with pvalue $geometricMean") - println(s"passed after $numRuns runs with pvalue $geometricMean") - } - - @Test def testRandomInt() { - val n = 1 << 25 - val k = 1 << 15 - val rand = ThreefryRandomEngine() - runChiSquareTest(n, k) { - rand.nextInt() & (k - 1) - } - } - - @Test def testBoundedUniformInt() { - var n = 1 << 25 - var k = 1 << 15 - val rand = ThreefryRandomEngine() - runChiSquareTest(n, k) { - rand.nextInt(k) - } - - n = 30000000 - k = math.pow(n, 3.0/5).toInt - runChiSquareTest(n, k) { - rand.nextInt(k) - } - } - - @Test def testBoundedUniformLong() { - var n = 1 << 25 - var k = 1 << 15 - val rand = ThreefryRandomEngine() - runChiSquareTest(n, k) { - rand.nextLong(k).toInt - } - - n = 30000000 - k = math.pow(n, 3.0/5).toInt - runChiSquareTest(n, k) { - rand.nextLong(k).toInt - } - } - - @Test def testUniformDouble() { - val n = 1 << 25 - val k = 1 << 15 - val rand = ThreefryRandomEngine() - runChiSquareTest(n, k) { - val r = rand.nextDouble() - assert(r >= 0.0 && r < 1.0, r) - (r * k).toInt - } - } - - @Test def testUniformFloat() { - val n = 1 << 25 - val k = 1 << 15 - val rand = ThreefryRandomEngine() - runChiSquareTest(n, k) { - val r = rand.nextFloat() - assert(r >= 0.0 && r < 1.0, r) - (r * k).toInt - } - } -} diff --git a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala index 3fd6c228a1d..1f38e497764 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala @@ -436,17 +436,17 @@ class RequirednessSuite extends HailSuite { valueIR().map(v => v(0) -> v(1)).foreach { case (n: IR, t: PType) => if (n.typ != t.virtualType) - s += s"${ n.typ } != ${ t.virtualType }: \n${ Pretty(ctx, n) }" + s += s"${ n.typ } != ${ t.virtualType }: \n${ Pretty(n) }" case (n: IR, et: EmitType) => if (n.typ != et.virtualType) - s += s"${ n.typ } != ${ et.virtualType }: \n${ Pretty(ctx, n) }" + s += s"${ n.typ } != ${ et.virtualType }: \n${ Pretty(n) }" } tableIR().map(v => (v(0), v(1), v(2))).foreach { case (n: TableIR, row: PType, global: PType) => if (n.typ.rowType != row.virtualType || n.typ.globalType != global.virtualType ) s += s"""row: ${ n.typ.rowType } vs ${ row.virtualType } |global: ${ n.typ.globalType } vs ${ global.virtualType }: - |${ Pretty(ctx, n) }" + |${ Pretty(n) }" |""".stripMargin } assert(s.size == 0, s.result().mkString("\n\n")) @@ -454,20 +454,20 @@ class RequirednessSuite extends HailSuite { def /**/dump(m: Memo[BaseTypeWithRequiredness]): String = { m.m.map { case (node, t) => - s"${Pretty(ctx, node.t)}: \n$t" + s"${Pretty(node.t)}: \n$t" }.mkString("\n\n") } @Test(dataProvider = "valueIR") def testRequiredness(node: IR, expected: Any): Unit = { - TypeCheck(ctx, node) + TypeCheck(node) val et = expected match { case pt: PType => EmitType(pt.sType, pt.required) case et: EmitType => et } val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[TypeWithRequiredness] - assert(actual.canonicalEmitType(node.typ) == et, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.canonicalEmitType(node.typ) == et, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") } @Test def sharedNodesWorkCorrectly(): Unit = { @@ -485,8 +485,8 @@ class RequirednessSuite extends HailSuite { def testTableRequiredness(node: TableIR, row: PType, global: PType): Unit = { val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[RTable] - assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") - assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") } @Test def testTableReader() { @@ -512,8 +512,8 @@ class RequirednessSuite extends HailSuite { val node = TableRead(rType, dropRows = false, reader) val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[RTable] - assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${ Pretty(ctx, node) }: \n$actual\n\n${ dump(res.r) }") - assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${ Pretty(ctx, node) }: \n$actual\n\n${ dump(res.r) }") + assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${ Pretty(node) }: \n$actual\n\n${ dump(res.r) }") + assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${ Pretty(node) }: \n$actual\n\n${ dump(res.r) }") } } diff --git a/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala b/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala index c5620545fc9..4bfe5abd5b1 100644 --- a/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala @@ -1,6 +1,7 @@ package is.hail.expr.ir import is.hail.{ExecStrategy, HailSuite} +import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.TestUtils.IRAggCount import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, FastSeq, Interval} @@ -41,16 +42,16 @@ class SimplifySuite extends HailSuite { @Test def testInsertFieldsRewriteRules() { val ir1 = InsertFields(InsertFields(base, Seq("1" -> I32(2)), None), Seq("1" -> I32(3)), None) - assert(Simplify(ctx, ir1) == InsertFields(base, Seq("1" -> I32(3)), Some(FastIndexedSeq("1", "2")))) + assert(Simplify(ir1) == InsertFields(base, Seq("1" -> I32(3)), Some(FastIndexedSeq("1", "2")))) val ir2 = InsertFields(InsertFields(base, Seq("3" -> I32(2)), Some(FastIndexedSeq("3", "1", "2"))), Seq("3" -> I32(3)), None) - assert(Simplify(ctx, ir2) == InsertFields(base, Seq("3" -> I32(3)), Some(FastIndexedSeq("3", "1", "2")))) + assert(Simplify(ir2) == InsertFields(base, Seq("3" -> I32(3)), Some(FastIndexedSeq("3", "1", "2")))) val ir3 = InsertFields(InsertFields(base, Seq("3" -> I32(2)), Some(FastIndexedSeq("3", "1", "2"))), Seq("4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4"))) - assert(Simplify(ctx, ir3) == InsertFields(base, Seq("3" -> I32(2), "4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4")))) + assert(Simplify(ir3) == InsertFields(base, Seq("3" -> I32(2), "4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4")))) val ir4 = InsertFields(InsertFields(base, Seq("3" -> I32(0), "4" -> I32(1))), Seq("3" -> I32(5))) - assert(Simplify(ctx, ir4) == InsertFields(base, Seq("4" -> I32(1), "3" -> I32(5)), Some(FastIndexedSeq("1", "2", "3", "4")))) + assert(Simplify(ir4) == InsertFields(base, Seq("4" -> I32(1), "3" -> I32(5)), Some(FastIndexedSeq("1", "2", "3", "4")))) } lazy val base2 = Literal(TStruct("A" -> TInt32, "B" -> TInt32, "C" -> TInt32, "D" -> TInt32), Row(1, 2, 3, 4)) @@ -61,16 +62,16 @@ class SimplifySuite extends HailSuite { IndexedSeq("B" -> GetField(base2, "B")), None ) - val simplify1 = Simplify(ctx, ir1) + val simplify1 = Simplify(ir1) assert(simplify1.typ == ir1.typ) } @Test def testInsertSelectRewriteRules() { val ir1 = SelectFields(InsertFields(base, FastIndexedSeq("3" -> I32(1)), None), FastIndexedSeq("1")) - assert(Simplify(ctx, ir1) == SelectFields(base, FastIndexedSeq("1"))) + assert(Simplify(ir1) == SelectFields(base, FastIndexedSeq("1"))) val ir2 = SelectFields(InsertFields(base, FastIndexedSeq("3" -> I32(1)), None), FastIndexedSeq("3", "1")) - assert(Simplify(ctx, ir2) == InsertFields(SelectFields(base, FastIndexedSeq("1")), FastIndexedSeq("3" -> I32(1)), Some(FastIndexedSeq("3", "1")))) + assert(Simplify(ir2) == InsertFields(SelectFields(base, FastIndexedSeq("1")), FastIndexedSeq("3" -> I32(1)), Some(FastIndexedSeq("3", "1")))) } @Test def testBlockMatrixRewriteRules() { @@ -78,7 +79,7 @@ class SimplifySuite extends HailSuite { FastIndexedSeq(2, 2), 10) val identityBroadcast = BlockMatrixBroadcast(bmir, FastIndexedSeq(0, 1), FastIndexedSeq(2, 2), 10) - assert(Simplify(ctx, identityBroadcast) == bmir) + assert(Simplify(identityBroadcast) == bmir) } @Test def testContainsRewrites() { @@ -111,14 +112,14 @@ class SimplifySuite extends HailSuite { val ir2 = Let("row2", InsertFields(r, FastSeq(("y", F64(0.0)))), InsertFields(r2, FastSeq(("z", GetField(r2, "x").toD + GetField(r2, "y"))))) val ir3 = Let("row2", InsertFields(r, FastSeq(("y", F64(0.0)))), InsertFields(Ref("something_else", TStruct.empty), FastSeq(("z", GetField(r2, "y").toI)))) - assert(Simplify(ctx, ir1) == InsertFields(r, FastSeq(("y", F64(0)), ("z", GetField(r, "x").toD)), Some(FastIndexedSeq("x", "y", "z")))) - assert(Simplify(ctx, ir2) == InsertFields(r, FastSeq(("y", F64(0.0)), ("z", GetField(r, "x").toD + F64(0.0))), Some(FastIndexedSeq("x", "y", "z")))) + assert(Simplify(ir1) == InsertFields(r, FastSeq(("y", F64(0)), ("z", GetField(r, "x").toD)), Some(FastIndexedSeq("x", "y", "z")))) + assert(Simplify(ir2) == InsertFields(r, FastSeq(("y", F64(0.0)), ("z", GetField(r, "x").toD + F64(0.0))), Some(FastIndexedSeq("x", "y", "z")))) assert(Optimize[IR](ir3, "direct", ctx) == InsertFields(Ref("something_else", TStruct.empty), FastSeq(("z", I32(0))))) val shouldNotRewrite = Let("row2", InsertFields(r, FastSeq(("y", Ref("other", TFloat64)))), InsertFields(r2, FastSeq(("z", invoke("str", TString, r2))))) - assert(Simplify(ctx, shouldNotRewrite) == shouldNotRewrite) + assert(Simplify(shouldNotRewrite) == shouldNotRewrite) } @Test def testNestedInsertsSimplifyAcrossLets() { @@ -137,7 +138,7 @@ class SimplifySuite extends HailSuite { ) ) ) - val simplified = new NormalizeNames(_.toString, true).apply(Simplify(ctx, l)) + val simplified = new NormalizeNames(_.toString, true).apply(Simplify(l)) val expected = Let("1", I32(1) + Ref("OTHER_1", TInt32), Let("2", I32(1) + Ref("1", TInt32), @@ -159,7 +160,7 @@ class SimplifySuite extends HailSuite { AggLet("bar", In(1, TInt32) * In(1, TInt32), Ref("x", TInt32), true))) doesRewrite.foreach { a => - assert(Simplify(ctx, a) == a.query) + assert(Simplify(a) == a.query) } val doesNotRewrite: Array[StreamAgg] = Array( @@ -171,7 +172,7 @@ class SimplifySuite extends HailSuite { ) doesNotRewrite.foreach { a => - assert(Simplify(ctx, a) == a) + assert(Simplify(a) == a) } } @@ -182,7 +183,7 @@ class SimplifySuite extends HailSuite { AggLet("bar", In(1, TInt32) * In(1, TInt32), Ref("x", TInt32), false))) doesRewrite.foreach { a => - assert(Simplify(ctx, a) == a.query) + assert(Simplify(a) == a.query) } val doesNotRewrite: Array[StreamAggScan] = Array( @@ -194,7 +195,7 @@ class SimplifySuite extends HailSuite { ) doesNotRewrite.foreach { a => - assert(Simplify(ctx, a) == a) + assert(Simplify(a) == a) } } @@ -202,7 +203,7 @@ class SimplifySuite extends HailSuite { val tr = TableRange(10, 10) val a = ArrayLen(GetField(TableCollect(tr), "rows")) assert(a.typ == TInt32) - val s = Simplify(ctx, a).asInstanceOf[IR] + val s = Simplify(a).asInstanceOf[IR] assertEvalsTo(s, 10) assert(s.typ == TInt32) } @@ -214,7 +215,7 @@ class SimplifySuite extends HailSuite { mir = MatrixMapCols(mir, AggLet("foo", I32(1), InsertFields(Ref("sa", colType), FastSeq(("bar", I32(2)))), false), None) val tir = MatrixColsTable(mir) - assert(Simplify(ctx, tir) == tir) + assert(Simplify(tir) == tir) } @Test def testFilterParallelize() { @@ -227,8 +228,8 @@ class SimplifySuite extends HailSuite { val tp = TableParallelize(rowsAndGlobals, None) val tf = TableFilter(tp, GetField(Ref("row", tp.typ.rowType), "x") < 100) - val rw = Simplify(ctx, tf) - TypeCheck(ctx, rw) + val rw = Simplify(tf) + TypeCheck(rw) assert(!Exists(rw, _.isInstanceOf[TableFilter])) } } @@ -238,9 +239,9 @@ class SimplifySuite extends HailSuite { val mapOfRange = mapIR(rangeIR)(range_element => range_element + 5) val mapBlockedByLet = bindIR(I32(5))(ref => mapIR(rangeIR)(range_element => range_element + ref)) - assert(Simplify(ctx, StreamLen(rangeIR)) == Simplify(ctx, StreamLen(mapOfRange))) - assert(Simplify(ctx, StreamLen(mapBlockedByLet)) match { - case Let(name, value, body) => body == Simplify(ctx, StreamLen(mapOfRange)) + assert(Simplify(StreamLen(rangeIR)) == Simplify(StreamLen(mapOfRange))) + assert(Simplify(StreamLen(mapBlockedByLet)) match { + case Let(name, value, body) => body == Simplify(StreamLen(mapOfRange)) }) } @@ -251,7 +252,7 @@ class SimplifySuite extends HailSuite { tir = TableKeyBy(tir, FastIndexedSeq("idx", "idx2")) tir = TableFilterIntervals(tir, FastIndexedSeq(Interval(Row(0), Row(1), true, false)), false) tir = TableFilterIntervals(tir, FastIndexedSeq(Interval(Row(8), Row(10), true, false)), false) - assert(Simplify(ctx, tir).asInstanceOf[TableFilterIntervals].intervals == FastIndexedSeq(Interval(Row(0), Row(1), true, false), Interval(Row(8), Row(10), true, false))) + assert(Simplify(tir).asInstanceOf[TableFilterIntervals].intervals == FastIndexedSeq(Interval(Row(0), Row(1), true, false), Interval(Row(8), Row(10), true, false))) } @Test def testSimplifyReadFilterIntervals() { @@ -272,21 +273,21 @@ class SimplifySuite extends HailSuite { val tfi1 = TableFilterIntervals(tr, intervals1, true) val exp1 = TableRead(tnr.fullType, false, TableNativeReader(fs, TableNativeReaderParameters(src + "/rows", Some(NativeReaderOptions(intervals1, tnr.fullType.keyType, true))))) - assert(Simplify(ctx, tfi1) == exp1) + assert(Simplify(tfi1) == exp1) val tfi2 = TableFilterIntervals(exp1, intervals2, true) val exp2 = TableRead(tnr.fullType, false, TableNativeReader(fs, TableNativeReaderParameters(src + "/rows", Some(NativeReaderOptions(intersection, tnr.fullType.keyType, true))))) - assert(Simplify(ctx, tfi2) == exp2) + assert(Simplify(tfi2) == exp2) val ztfi1 = TableFilterIntervals(tzr, intervals1, true) val zexp1 = TableRead(tzr.typ, false, tzrr.copy(options = Some(NativeReaderOptions(intervals1, tnr.fullType.keyType, true)))) - assert(Simplify(ctx, ztfi1) == zexp1) + assert(Simplify(ztfi1) == zexp1) val ztfi2 = TableFilterIntervals(ztfi1, intervals2, true) val zexp2 = TableRead(tzr.typ, false, tzrr.copy(options = Some(NativeReaderOptions(intersection, tnr.fullType.keyType, true)))) - assert(Simplify(ctx, ztfi2) == zexp2) + assert(Simplify(ztfi2) == zexp2) } @Test(enabled = false) def testFilterIntervalsKeyByToFilter() { @@ -295,7 +296,7 @@ class SimplifySuite extends HailSuite { t = TableKeyBy(t, FastIndexedSeq("x")) t = TableFilterIntervals(t, FastIndexedSeq(Interval(Row(-10), Row(10), includesStart = true, includesEnd = false)), keep = true) - val t2 = Simplify(ctx, t) + val t2 = Simplify(t) assert(t2 match { case TableKeyBy(TableFilter(child, _), _, _) => !Exists(child, _.isInstanceOf[TableFilterIntervals]) case _ => false @@ -304,28 +305,28 @@ class SimplifySuite extends HailSuite { @Test def testSimplifyArraySlice(): Unit = { val stream = StreamRange(I32(0), I32(10), I32(1)) - val streamSlice1 = Simplify(ctx, ArraySlice(ToArray(stream), I32(0), Some(I32(7)))) + val streamSlice1 = Simplify(ArraySlice(ToArray(stream), I32(0), Some(I32(7)))) assert(streamSlice1 match { case ToArray(StreamTake(_,_)) => true case _ => false } ) assertEvalsTo(streamSlice1.asInstanceOf[IR], FastSeq(0, 1, 2, 3, 4, 5, 6)) - val streamSlice2 = Simplify(ctx, ArraySlice(ToArray(stream), I32(3), Some(I32(5)))) + val streamSlice2 = Simplify(ArraySlice(ToArray(stream), I32(3), Some(I32(5)))) assert(streamSlice2 match { case ToArray(StreamTake(StreamDrop(_,_), _)) => true case _ => false } ) assertEvalsTo(streamSlice2.asInstanceOf[IR], FastSeq(3, 4)) - val streamSlice3 = Simplify(ctx, ArraySlice(ToArray(stream), I32(6), Some(I32(2)))) + val streamSlice3 = Simplify(ArraySlice(ToArray(stream), I32(6), Some(I32(2)))) assert(streamSlice3 match { case MakeArray(_, _) => true case _ => false } ) assertEvalsTo(streamSlice3.asInstanceOf[IR], FastSeq()) - val streamSlice4 = Simplify(ctx, ArraySlice(ToArray(stream), I32(0), None)) + val streamSlice4 = Simplify(ArraySlice(ToArray(stream), I32(0), None)) assert(streamSlice4 match { case ToArray(StreamDrop(_, _)) => true case _ => false diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala index a6560224d64..80e6b2d2d3f 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala @@ -1151,9 +1151,7 @@ class TableIRSuite extends HailSuite { Row((i / 5) * 5) }, Row("Hello"))) - val e = intercept[HailException](TypeCheck( - ctx, - collect(TableMapPartitions(table, "g", "part", StreamFlatMap(StreamRange(0, 2, 1), "_", part))))) + val e = intercept[HailException](TypeCheck(collect(TableMapPartitions(table, "g", "part", StreamFlatMap(StreamRange(0, 2, 1), "_", part))))) assert("must iterate over the partition exactly once".r.findFirstIn(e.getCause.getMessage).isDefined) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala index 78d1713fd48..ac465d71b01 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala @@ -31,7 +31,7 @@ class StagedBlockLinkedListSuite extends HailSuite { ptr } - fb.result(ctx)(theHailClassLoader)(_) + fb.result()(theHailClassLoader)(_) } private val pushF: (Region, Long, E) => Unit = { @@ -52,7 +52,7 @@ class StagedBlockLinkedListSuite extends HailSuite { Code._empty } - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) ({ (r, ptr, elt) => f(r, ptr, if(elt == null) 0L else ScalaToRegionValue(r, elemPType, elt)) }) @@ -75,7 +75,7 @@ class StagedBlockLinkedListSuite extends HailSuite { Code._empty } - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) ({ (r, ptr, other) => assert(other.elemPType.required == elemPType.required) f(r, ptr, other.ptr) @@ -96,7 +96,7 @@ class StagedBlockLinkedListSuite extends HailSuite { sbll.resultArray(cb, rArg, arrayPType).a } - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) ({ (r, ptr) => SafeRow.read(arrayPType, f(r, ptr)) .asInstanceOf[IndexedSeq[E]] @@ -119,7 +119,7 @@ class StagedBlockLinkedListSuite extends HailSuite { dstPtr } - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) ({ (r, other) => f(r, other.ptr) }) } diff --git a/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala b/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala index 15c24ad10c2..4c37b43d04f 100644 --- a/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala @@ -21,7 +21,9 @@ class BlockMatrixStageSuite extends HailSuite { val stage = if (ctxs.isEmpty) BlockMatrixStage.empty(TInt32) else { - new BlockMatrixStage(IndexedSeq(), globalVals, ctxs.head._2.typ) { + new BlockMatrixStage( + globalVals, + ctxs.head._2.typ) { private[this] val ctxMap = ctxs.toMap def blockContext(idx: (Int, Int)): IR = ctxMap(idx) def blockBody(ctxRef: Ref): IR = body(ctxRef) diff --git a/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala b/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala index 65f01963fe7..27ad08bce34 100644 --- a/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala @@ -1,5 +1,6 @@ package is.hail.expr.ir.lowering +import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.functions.IRRandomness import is.hail.expr.ir.{Analyses, Apply, ApplyBinaryPrimOp, Ascending, Descending, ErrorIDs, GetField, I32, IR, Literal, MakeStruct, Ref, Requiredness, RequirednessAnalysis, SelectFields, SortField, TableIR, TableMapRows, TableRange, ToArray, ToStream, mapIR} import is.hail.{ExecStrategy, HailContext, HailSuite, TestUtils} @@ -33,42 +34,37 @@ class LowerDistributedSortSuite extends HailSuite { // Only does ascending for now def testDistributedSortHelper(myTable: TableIR, sortFields: IndexedSeq[SortField]): Unit = { - val originalShuffleCutoff = backend.getFlag("shuffle_cutoff_to_local_sort") - try { - backend.setFlag("shuffle_cutoff_to_local_sort", "40") - val analyses: Analyses = Analyses.apply(myTable, ctx) - val rowType = analyses.requirednessAnalysis.lookup(myTable).asInstanceOf[RTable].rowType - val stage = LowerTableIR.applyTable(myTable, DArrayLowering.All, ctx, analyses, Map.empty[String, IR]) + HailContext.setFlag("shuffle_cutoff_to_local_sort", "40") + val analyses: Analyses = Analyses.apply(myTable, ctx) + val rowType = analyses.requirednessAnalysis.lookup(myTable).asInstanceOf[RTable].rowType + val stage = LowerTableIR.applyTable(myTable, DArrayLowering.All, ctx, analyses, Map.empty[String, IR]) - val sortedTs = LowerDistributedSort.distributedSort(ctx, stage, sortFields, Map.empty[String, IR], rowType) - val res = TestUtils.eval(sortedTs.mapCollect(Map.empty[String, IR])(x => ToArray(x))).asInstanceOf[IndexedSeq[IndexedSeq[Row]]].flatten + val sortedTs = LowerDistributedSort.distributedSort(ctx, stage, sortFields, Map.empty[String, IR], rowType) + val res = TestUtils.eval(sortedTs.mapCollect(Map.empty[String, IR])(x => ToArray(x))).asInstanceOf[IndexedSeq[IndexedSeq[Row]]].flatten - val rowFunc = myTable.typ.rowType.select(sortFields.map(_.field))._2 - val unsortedCollect = is.hail.expr.ir.TestUtils.collect(myTable) - val unsortedAnalyses = Analyses.apply(unsortedCollect, ctx) - val unsorted = TestUtils.eval(LowerTableIR.apply(unsortedCollect, DArrayLowering.All, ctx, unsortedAnalyses, Map.empty[String, IR])).asInstanceOf[Row](0).asInstanceOf[IndexedSeq[Row]] - val scalaSorted = unsorted.sortWith{ case (l, r) => - val leftKey = rowFunc(l) - val rightKey = rowFunc(r) - var ans = false - var i = 0 - while (i < sortFields.size) { - if (leftKey(i).asInstanceOf[Int] != rightKey(i).asInstanceOf[Int]) { - if (sortFields(i).sortOrder == Ascending) { - ans = leftKey(i).asInstanceOf[Int] < rightKey(i).asInstanceOf[Int] - } else { - ans = leftKey(i).asInstanceOf[Int] > rightKey(i).asInstanceOf[Int] - } - i = sortFields.size + val rowFunc = myTable.typ.rowType.select(sortFields.map(_.field))._2 + val unsortedCollect = is.hail.expr.ir.TestUtils.collect(myTable) + val unsortedAnalyses = Analyses.apply(unsortedCollect, ctx) + val unsorted = TestUtils.eval(LowerTableIR.apply(unsortedCollect, DArrayLowering.All, ctx, unsortedAnalyses, Map.empty[String, IR])).asInstanceOf[Row](0).asInstanceOf[IndexedSeq[Row]] + val scalaSorted = unsorted.sortWith{ case (l, r) => + val leftKey = rowFunc(l) + val rightKey = rowFunc(r) + var ans = false + var i = 0 + while (i < sortFields.size) { + if (leftKey(i).asInstanceOf[Int] != rightKey(i).asInstanceOf[Int]) { + if (sortFields(i).sortOrder == Ascending) { + ans = leftKey(i).asInstanceOf[Int] < rightKey(i).asInstanceOf[Int] + } else { + ans = leftKey(i).asInstanceOf[Int] > rightKey(i).asInstanceOf[Int] } - i += 1 + i = sortFields.size } - ans + i += 1 } - assert(res == scalaSorted) - } finally { - backend.setFlag("shuffle_cutoff_to_local_sort", originalShuffleCutoff) + ans } + assert(res == scalaSorted) } @Test def testDistributedSort(): Unit = { @@ -91,9 +87,4 @@ class LowerDistributedSortSuite extends HailSuite { testDistributedSortHelper(tableWithExtraField, IndexedSeq(SortField("idx", Descending))) testDistributedSortHelper(tableWithExtraField, IndexedSeq(SortField("foo", Descending), SortField("idx", Ascending))) } - - @Test def testDistributedSortEmpty(): Unit = { - val tableRange = TableRange(0, 1) - testDistributedSortHelper(tableRange, IndexedSeq(SortField("idx", Ascending))) - } } diff --git a/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala b/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala index 2509bc05107..6d6912fe488 100644 --- a/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala +++ b/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala @@ -1,6 +1,7 @@ package is.hail.io import is.hail.ExecStrategy.ExecStrategy +import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.{ReadPartition, Str, ToArray} import is.hail.io.avro.AvroPartitionReader import is.hail.utils.{FastIndexedSeq, fatal, using} diff --git a/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala b/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala index ff915e20ed9..e7b81bb9b0e 100644 --- a/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala +++ b/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala @@ -142,7 +142,7 @@ class BGzipCodecSuite extends HailSuite { val end = makeVirtualOffset(splits(i + 1), 0) Row(i, compPath, splits(i), end, true) } - val lines2 = GenericLines.collect(fs, GenericLines.read(fs, contexts, false, false)) + val lines2 = GenericLines.collect(fs, GenericLines.read(fs, contexts, false)) compareLines(lines2, lines) true } diff --git a/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala b/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala index cf8c2a1edc4..3c03b854f05 100644 --- a/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala +++ b/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala @@ -38,7 +38,7 @@ class PContainerTest extends PhysicalTestUtils { fb.emit(Region.containsNonZeroBits(value + sourceType.lengthHeaderBytes, sourceType.loadLength(value).toL)) - val res = fb.result(ctx)(theHailClassLoader)(src) + val res = fb.result()(theHailClassLoader)(src) res } @@ -53,7 +53,7 @@ class PContainerTest extends PhysicalTestUtils { fb.emit(sourceType.hasMissingValues(value)) - val res = fb.result(ctx)(theHailClassLoader)(src) + val res = fb.result()(theHailClassLoader)(src) res } diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala index c563e8be3ca..d63eb279223 100644 --- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala +++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala @@ -59,7 +59,7 @@ class PNDArraySuite extends PhysicalTestUtils { throw e } - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) val result1 = f(region1, region2, region3) val result1Data = nd.unstagedDataFirstElementPointer(result1) diff --git a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala index f7c2dc937da..d9f2585c088 100644 --- a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala +++ b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala @@ -76,7 +76,7 @@ abstract class PhysicalTestUtils extends HailSuite { } val copy = try { - val f = fb.result(ctx)(theHailClassLoader) + val f = fb.result()(theHailClassLoader) val copyOff = f(region, srcAddress) UnsafeRow.read(destType, region, copyOff) } catch { diff --git a/hail/src/test/scala/is/hail/utils/TextTableSuite.scala b/hail/src/test/scala/is/hail/utils/TextTableSuite.scala new file mode 100644 index 00000000000..280c49c2b92 --- /dev/null +++ b/hail/src/test/scala/is/hail/utils/TextTableSuite.scala @@ -0,0 +1,35 @@ +package is.hail.utils + +import is.hail.HailSuite +import is.hail.expr.ir.TextTableReader +import is.hail.types.virtual._ +import org.testng.annotations.Test + +class TextTableSuite extends HailSuite { + + @Test def testTypeGuessing() { + + val doubleStrings = Seq("1", ".1", "-1", "-.1", "1e1", "-1e1", + "1E1", "-1E1", "1.0e2", "-1.0e2", "1e-1", "-1e-1", "-1.0e-2") + val badDoubleStrings = Seq("1ee1", "1e--2", "1ee2", "1e0.1", "1e-0.1", "1e1.") + val intStrings = Seq("1", "0", "-1", "12312398", "-123092398") + val longStrings = Seq("11010101010101010", "-9223372036854775808") + val booleanStrings = Seq("true", "True", "TRUE", "false", "False", "FALSE") + + doubleStrings.foreach(str => assert(TextTableReader.float64Matcher(str))) + badDoubleStrings.foreach(str => assert(!TextTableReader.float64Matcher(str))) + + intStrings.foreach(str => assert(TextTableReader.int32Matcher(str))) + intStrings.foreach(str => assert(TextTableReader.int64Matcher(str))) + intStrings.foreach(str => assert(TextTableReader.float64Matcher(str))) + + longStrings.foreach(str => assert(TextTableReader.int64Matcher(str), str)) + longStrings.foreach(str => assert(!TextTableReader.int32Matcher(str))) + + booleanStrings.foreach(str => assert(TextTableReader.booleanMatcher(str))) + } + + @Test def testPipeDelimiter() { + assert(TextTableReader.splitLine("a|b", "|", '#').toSeq == Seq("a", "b")) + } +} diff --git a/infra/azure/main.tf b/infra/azure/main.tf index 65e07a3f140..fa13f011647 100644 --- a/infra/azure/main.tf +++ b/infra/azure/main.tf @@ -2,16 +2,12 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "=2.99.0" + version = "=2.97.0" } azuread = { source = "hashicorp/azuread" version = "=2.7.0" } - kubernetes = { - source = "hashicorp/kubernetes" - version = "2.8.0" - } http = { source = "hashicorp/http" version = "2.1.0" @@ -64,11 +60,7 @@ module "vdc" { resource_group = data.azurerm_resource_group.rg container_registry_id = azurerm_container_registry.acr.id - - k8s_default_node_pool_machine_type = var.k8s_default_node_pool_machine_type - k8s_user_pool_machine_type = var.k8s_user_pool_machine_type - k8s_preemptible_node_pool_name = var.k8s_preemptible_node_pool_name - k8s_nonpreemptible_node_pool_name = var.k8s_nonpreemptible_node_pool_name + k8s_machine_type = var.k8s_machine_type } module "db" { diff --git a/infra/azure/modules/batch/main.tf b/infra/azure/modules/batch/main.tf index 10e643b2ef0..648f3a9a31a 100644 --- a/infra/azure/modules/batch/main.tf +++ b/infra/azure/modules/batch/main.tf @@ -55,10 +55,6 @@ resource "azurerm_storage_account" "batch" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" - - blob_properties { - last_access_time_enabled = true - } } resource "azurerm_storage_container" "batch_logs" { @@ -79,10 +75,6 @@ resource "azurerm_storage_account" "test" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" - - blob_properties { - last_access_time_enabled = true - } } resource "azurerm_storage_container" "test" { @@ -99,7 +91,7 @@ resource "azurerm_storage_management_policy" "test" { enabled = true filters { prefix_match = [azurerm_storage_container.test.name] - blob_types = ["blockBlob"] + blob_types = ["blockBlob", "appendBlob"] } actions { base_blob { diff --git a/infra/azure/modules/ci/main.tf b/infra/azure/modules/ci/main.tf index 3b31d03a182..8a03bae37a7 100644 --- a/infra/azure/modules/ci/main.tf +++ b/infra/azure/modules/ci/main.tf @@ -4,10 +4,6 @@ resource "azurerm_storage_account" "ci" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" - - blob_properties { - last_access_time_enabled = true - } } resource "azurerm_storage_container" "ci_artifacts" { @@ -24,7 +20,7 @@ resource "azurerm_storage_management_policy" "ci" { enabled = true filters { prefix_match = [azurerm_storage_container.ci_artifacts.name] - blob_types = ["blockBlob"] + blob_types = ["blockBlob", "appendBlob"] } actions { base_blob { diff --git a/infra/azure/modules/vdc/main.tf b/infra/azure/modules/vdc/main.tf index 6123942cbce..49bca49e4de 100644 --- a/infra/azure/modules/vdc/main.tf +++ b/infra/azure/modules/vdc/main.tf @@ -52,13 +52,13 @@ resource "azurerm_kubernetes_cluster" "vdc" { default_node_pool { name = "nonpreempt" - vm_size = var.k8s_default_node_pool_machine_type + vm_size = var.k8s_machine_type vnet_subnet_id = azurerm_subnet.k8s_subnet.id enable_auto_scaling = true min_count = 1 - max_count = 5 + max_count = 200 node_labels = { "preemptible" = "false" @@ -82,32 +82,10 @@ resource "azurerm_kubernetes_cluster" "vdc" { } } -resource "azurerm_kubernetes_cluster_node_pool" "vdc_nonpreemptible_pool" { - name = var.k8s_nonpreemptible_node_pool_name - kubernetes_cluster_id = azurerm_kubernetes_cluster.vdc.id - vm_size = var.k8s_user_pool_machine_type - vnet_subnet_id = azurerm_subnet.k8s_subnet.id - - enable_auto_scaling = true - - min_count = 0 - max_count = 200 - - node_labels = { - "preemptible" = "false" - } - - lifecycle { - # Ignore if the node count has natually changed since last apply - # due to autoscaling - ignore_changes = [node_count] - } -} - resource "azurerm_kubernetes_cluster_node_pool" "vdc_preemptible_pool" { - name = var.k8s_preemptible_node_pool_name + name = "preempt" kubernetes_cluster_id = azurerm_kubernetes_cluster.vdc.id - vm_size = var.k8s_user_pool_machine_type + vm_size = var.k8s_machine_type vnet_subnet_id = azurerm_subnet.k8s_subnet.id enable_auto_scaling = true diff --git a/infra/azure/modules/vdc/variables.tf b/infra/azure/modules/vdc/variables.tf index 5f39919f551..d986b5c3987 100644 --- a/infra/azure/modules/vdc/variables.tf +++ b/infra/azure/modules/vdc/variables.tf @@ -5,19 +5,7 @@ variable resource_group { }) } -variable k8s_default_node_pool_machine_type { - type = string -} - -variable k8s_user_pool_machine_type { - type = string -} - -variable k8s_preemptible_node_pool_name { - type = string -} - -variable k8s_nonpreemptible_node_pool_name { +variable k8s_machine_type { type = string } diff --git a/infra/azure/variables.tf b/infra/azure/variables.tf index c1b0e0e2b6f..ffaa92acbb7 100644 --- a/infra/azure/variables.tf +++ b/infra/azure/variables.tf @@ -7,33 +7,18 @@ variable domain { } variable acr_name { - type = string + type = string default = "" } variable acr_sku { - type = string + type = string default = "Premium" } -variable k8s_default_node_pool_machine_type { - type = string - default = "Standard_D2_v2" # 2 vCPU -} - -variable k8s_user_pool_machine_type { - type = string - default = "Standard_D4_v2" # 8 vCPU -} - -variable k8s_preemptible_node_pool_name { - type = string - default = "preempt1" -} - -variable k8s_nonpreemptible_node_pool_name { - type = string - default = "nonpreempt1" +variable k8s_machine_type { + type = string + default = "Standard_D2_v2" } variable organization_domain { @@ -52,6 +37,6 @@ variable "ci_config" { } variable oauth2_developer_redirect_uris { - type = list(string) + type = list(string) default = [] } diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 350ffc058b7..4f6eb836799 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -6,7 +6,7 @@ terraform { } kubernetes = { source = "hashicorp/kubernetes" - version = "2.8.0" + version = "1.13.3" } } backend "gcs" { @@ -14,14 +14,6 @@ terraform { } } -variable "k8s_preemptible_node_pool_name" { - type = string - default = "preemptible-pool" -} -variable "k8s_nonpreemptible_node_pool_name" { - type = string - default = "nonpreemptible-pool" -} variable "batch_gcp_regions" {} variable "gcp_project" {} variable "batch_logs_bucket_location" {} @@ -126,9 +118,9 @@ resource "google_container_cluster" "vdc" { } resource "google_container_node_pool" "vdc_preemptible_pool" { - name = var.k8s_preemptible_node_pool_name + name = "preemptible-pool" location = var.gcp_zone - cluster = google_container_cluster.vdc.name + cluster = google_container_cluster.vdc.name # Allocate at least one node, so that autoscaling can take place. initial_node_count = 1 @@ -140,7 +132,7 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { node_config { preemptible = true - machine_type = "n1-standard-8" + machine_type = "n1-standard-2" labels = { "preemptible" = "true" @@ -163,9 +155,9 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { } resource "google_container_node_pool" "vdc_nonpreemptible_pool" { - name = var.k8s_nonpreemptible_node_pool_name + name = "nonpreemptible-pool" location = var.gcp_zone - cluster = google_container_cluster.vdc.name + cluster = google_container_cluster.vdc.name # Allocate at least one node, so that autoscaling can take place. initial_node_count = 1 @@ -177,7 +169,7 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { node_config { preemptible = false - machine_type = "n1-standard-8" + machine_type = "n1-standard-2" labels = { preemptible = "false" diff --git a/internal-gateway/internal-gateway.nginx.conf b/internal-gateway/internal-gateway.nginx.conf index e1e09e0e285..7389e85bd1d 100644 --- a/internal-gateway/internal-gateway.nginx.conf +++ b/internal-gateway/internal-gateway.nginx.conf @@ -13,7 +13,7 @@ map $service $batch_driver_limit_key { default ""; # no key => no limit } -limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=60r/s; +limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=18r/s; server { server_name internal.hail; diff --git a/letsencrypt/subdomains.txt b/letsencrypt/subdomains.txt index 7bc1e03642d..336db4be899 100644 --- a/letsencrypt/subdomains.txt +++ b/letsencrypt/subdomains.txt @@ -2,9 +2,6 @@ ci notebook batch batch-driver -benchmark -blog -memory monitoring auth prometheus diff --git a/query/Makefile b/query/Makefile index 11fedfc4a63..8763f9e08cf 100644 --- a/query/Makefile +++ b/query/Makefile @@ -1,8 +1,5 @@ include ../config.mk -QUERY_STORAGE_URI := $(shell kubectl get secret global-config --template={{.data.query_storage_uri}} | base64 --decode) -TEST_STORAGE_URI := $(shell kubectl get secret global-config --template={{.data.test_storage_uri}} | base64 --decode) - EXTRA_PYTHONPATH := ../hail/python:../gear PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -15,57 +12,51 @@ jar: cp ../hail/build/libs/hail-all-spark.jar ./hail.jar HAIL_TEST_GCS_TOKEN := $(shell whoami) -HAIL_TEST_RESOURCES_PREFIX := $(TEST_STORAGE_URI)/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources -HAIL_TEST_RESOURCES_DIR := $(HAIL_TEST_RESOURCES_PREFIX)/test/resources/ -HAIL_DOCTEST_DATA_DIR := $(HAIL_TEST_RESOURCES_PREFIX)/doctest/data/ HAIL_REVISION := $(shell git rev-parse HEAD) -ifeq ($(NAMESPACE),default) -ifeq ($(DEPLOY_JAR_FOR_PUBLIC_USE),true) -# This should only be used if the normal CI deploy process fails and you need to upload a JAR to the -# expected location for our users. -JAR_LOCATION := $(QUERY_STORAGE_URI)/jars/$(HAIL_REVISION).jar -else -JAR_LOCATION := $(QUERY_STORAGE_URI)/jars/$(HAIL_TEST_GCS_TOKEN)/$(HAIL_REVISION).jar -endif -else -JAR_LOCATION := $(TEST_STORAGE_URI)/$(NAMESPACE)/jars/$(HAIL_REVISION).jar -endif +# JAR_LOCATION := gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/jars/$(HAIL_REVISION).jar +JAR_LOCATION := gs://hail-query/jars/$(HAIL_REVISION).jar -.PHONY: upload-query-jar -upload-query-jar: jar - ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default +.PHONY: push-jar +push-jar: jar gsutil cp ./hail.jar "$(JAR_LOCATION)" echo >last_uploaded_jar "$(JAR_LOCATION)" upload-resources-dir: - ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 -m hailtop.aiotools.copy 'null' '[{"from":"../hail/src/test/resources","to":"$(HAIL_TEST_RESOURCES_DIR)"},{"from":"../hail/python/hail/docs/data","to":"$(HAIL_DOCTEST_DATA_DIR)"}]' + python3 -m hailtop.aiotools.copy 'null' '[ \ +{"from": "../hail/src/test/resources/", \ + "to": "gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/"}, \ +{"from": "../hail/python/hail/docs/data/", \ + "to": "gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/"}]' touch upload-resources-dir .PHONY: test -test: upload-query-jar upload-resources-dir - HAIL_QUERY_BACKEND=batch \ - HAIL_TEST_RESOURCES_DIR='$(HAIL_TEST_RESOURCES_DIR)' \ - HAIL_DOCTEST_DATA_DIR='$(HAIL_DOCTEST_DATA_DIR)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - $(MAKE) -C ../hail pytest +test: push-jar upload-resources-dir + HAIL_QUERY_BACKEND=service \ + HAIL_TEST_RESOURCES_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/' \ + HAIL_DOCTEST_DATA_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/' \ + HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + $(MAKE) -C ../hail pytest .PHONY: ipython -ipython: upload-query-jar - HAIL_QUERY_BACKEND=batch \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - ipython +ipython: push-jar + HAIL_QUERY_BACKEND=service \ + HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + ipython .PHONY: test-no-deps test-no-deps: - HAIL_QUERY_BACKEND=batch \ - HAIL_TEST_RESOURCES_DIR='$(HAIL_TEST_RESOURCES_DIR)' \ - HAIL_DOCTEST_DATA_DIR='$(HAIL_DOCTEST_DATA_DIR)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - $(MAKE) -C ../hail pytest + HAIL_QUERY_BACKEND=service \ + HAIL_TEST_RESOURCES_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/' \ + HAIL_DOCTEST_DATA_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/' \ + HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + $(MAKE) -C ../hail pytest .PHONY: ipython-no-deps ipython-no-deps: - HAIL_QUERY_BACKEND=batch \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - ipython + HAIL_QUERY_BACKEND=service \ + HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + ipython diff --git a/tls/config.yaml b/tls/config.yaml index e0b90ca7142..6fea03be255 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -19,10 +19,6 @@ principals: domains: - batch-driver kind: json -- name: batch-driver-nginx - domains: - - batch-driver - kind: nginx - name: ci domains: - ci From 47748b6324786f60e4cacd7b9ab83edecf1cb811 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 29 Mar 2022 18:17:43 +1100 Subject: [PATCH 349/501] Revert "Revert "Merge upstream 0.2.93" (#177)" (#178) This reverts commit e279414c8766be01ab7da46e7bb91b4a3f52241d. --- auth/Dockerfile | 2 +- auth/deployment.yaml | 2 +- batch/Dockerfile.driver-nginx | 12 + batch/Dockerfile.worker | 2 +- batch/Makefile | 11 +- .../batch/driver/instance_collection/pool.py | 8 + batch/batch/driver/job.py | 3 + batch/batch/driver/main.py | 2 - batch/batch/file_store.py | 20 + batch/batch/front_end/front_end.py | 294 ++-- batch/batch/front_end/templates/job.html | 21 +- batch/batch/front_end/validate.py | 7 +- batch/batch/resource_usage.py | 103 ++ batch/batch/spec_writer.py | 1 + batch/batch/worker/worker.py | 1225 ++++++++------ batch/deployment.yaml | 54 +- batch/driver-nginx.conf | 78 + batch/test/test_batch.py | 2 +- batch2/react-batch/package-lock.json | 12 +- .../benchmark_hail/run/table_benchmarks.py | 5 + bootstrap-gateway/deployment.yaml | 2 +- build.yaml | 214 ++- ci/ci/build.py | 6 +- ci/ci/github.py | 2 +- dev-docs/kubernetes-operations.md | 49 +- docker/Dockerfile.base | 2 +- docker/hail-ubuntu/curlrc | 2 +- docker/requirements.txt | 2 +- gateway/deployment.yaml | 2 +- gear/gear/database.py | 16 +- gear/gear/metrics.py | 1 + hail/.gitignore | 1 + hail/Dockerfile.hail-run-tests | 2 +- hail/Makefile | 8 +- hail/python/dev/requirements.txt | 2 +- hail/python/hail/__init__.py | 8 +- hail/python/hail/backend/backend.py | 69 +- hail/python/hail/backend/local_backend.py | 14 +- hail/python/hail/backend/py4j_backend.py | 83 +- hail/python/hail/backend/service_backend.py | 367 +++- hail/python/hail/backend/spark_backend.py | 12 +- hail/python/hail/context.py | 246 ++- hail/python/hail/docs/change_log.md | 33 + .../python/hail/docs/cloud/query_on_batch.rst | 65 + .../datasets/schemas/gnomad_genome_sites.rst | 368 ++-- .../schemas/gnomad_hgdp_1kg_callset.rst | 651 ------- .../schemas/gnomad_hgdp_1kg_subset_dense.rst | 1501 +++++++++++++++++ ...gnomad_hgdp_1kg_subset_sample_metadata.rst | 653 +++++++ .../schemas/gnomad_hgdp_1kg_subset_sparse.rst | 54 + ...ad_hgdp_1kg_subset_variant_annotations.rst | 857 ++++++++++ .../schemas/gnomad_pca_variant_loadings.rst | 26 + .../schemas/gnomad_variant_co-occurrence.rst | 60 + hail/python/hail/docs/ggplot/index.rst | 8 + hail/python/hail/docs/hail_on_the_cloud.rst | 10 +- hail/python/hail/experimental/datasets.json | 156 +- hail/python/hail/experimental/datasets.py | 6 +- hail/python/hail/experimental/expressions.py | 17 +- .../experimental/vcf_combiner/vcf_combiner.py | 1 + .../hail/expr/expressions/base_expression.py | 5 +- .../expr/expressions/typed_expressions.py | 45 +- hail/python/hail/expr/types.py | 4 +- hail/python/hail/fs/fs.py | 4 + hail/python/hail/fs/google_fs.py | 148 -- hail/python/hail/fs/hadoop_fs.py | 8 +- hail/python/hail/fs/local_fs.py | 49 +- hail/python/hail/fs/router_fs.py | 32 +- hail/python/hail/fs/stat_result.py | 4 +- hail/python/hail/ggplot/__init__.py | 9 +- hail/python/hail/ggplot/geoms.py | 9 +- hail/python/hail/ggplot/ggplot.py | 13 +- hail/python/hail/ggplot/scale.py | 106 +- hail/python/hail/ggplot/stats.py | 2 + hail/python/hail/ggplot/utils.py | 18 +- hail/python/hail/ir/__init__.py | 3 +- hail/python/hail/ir/base_ir.py | 2 +- hail/python/hail/ir/blockmatrix_ir.py | 10 +- hail/python/hail/ir/blockmatrix_writer.py | 29 + hail/python/hail/ir/ir.py | 7 +- hail/python/hail/ir/matrix_ir.py | 13 +- hail/python/hail/ir/matrix_reader.py | 90 +- hail/python/hail/ir/register_functions.py | 7 +- hail/python/hail/ir/table_ir.py | 13 +- hail/python/hail/ir/table_reader.py | 39 +- hail/python/hail/ir/utils.py | 52 + hail/python/hail/linalg/blockmatrix.py | 43 +- hail/python/hail/matrixtable.py | 33 +- hail/python/hail/methods/impex.py | 509 +++++- hail/python/hail/methods/statgen.py | 4 + hail/python/hail/stats/linear_mixed_model.py | 1 + hail/python/hail/table.py | 33 +- hail/python/hail/utils/__init__.py | 3 +- hail/python/hail/utils/hadoop_utils.py | 23 +- hail/python/hail/utils/java.py | 35 +- hail/python/hail/utils/misc.py | 40 +- .../vds/combiner/variant_dataset_combiner.py | 1 + hail/python/hail/vds/variant_dataset.py | 1 + hail/python/hailtop/aiotools/copy.py | 13 +- hail/python/hailtop/batch_client/aioclient.py | 4 +- hail/python/hailtop/config/user_config.py | 3 - hail/python/hailtop/hailctl/config/cli.py | 8 +- hail/python/hailtop/utils/__init__.py | 5 +- hail/python/hailtop/utils/time.py | 4 + hail/python/hailtop/utils/utils.py | 13 +- hail/python/requirements.txt | 2 +- hail/python/setup.py | 1 + hail/python/test/hail/backend/__init__.py | 0 .../test/hail/backend/test_service_backend.py | 28 + .../hail/experimental/test_experimental.py | 3 +- hail/python/test/hail/expr/test_expr.py | 37 +- hail/python/test/hail/expr/test_ndarrays.py | 10 +- .../test/hail/genetics/test_pedigree.py | 2 +- hail/python/test/hail/ggplot/test_ggplot.py | 36 +- hail/python/test/hail/helpers.py | 29 +- hail/python/test/hail/linalg/test_linalg.py | 30 +- .../hail/matrixtable/test_file_formats.py | 23 +- .../hail/matrixtable/test_matrix_table.py | 5 - .../methods/relatedness/test_pc_relate.py | 1 + .../test/hail/methods/test_family_methods.py | 4 - hail/python/test/hail/methods/test_impex.py | 275 ++- hail/python/test/hail/methods/test_misc.py | 2 - hail/python/test/hail/methods/test_pca.py | 22 +- hail/python/test/hail/methods/test_qc.py | 3 - hail/python/test/hail/methods/test_statgen.py | 19 +- .../hail/stats/test_linear_mixed_model.py | 3 +- hail/python/test/hail/table/test_table.py | 44 +- hail/python/test/hail/test_context.py | 6 +- .../test/hail/utils/test_google_fs_utils.py | 86 + hail/python/test/hail/utils/test_utils.py | 22 +- hail/python/test/hail/vds/test_combiner.py | 2 + hail/python/test/hail/vds/test_vds.py | 3 - .../test/hailtop/hailctl/dataproc/conftest.py | 2 +- hail/src/main/scala/is/hail/HailContext.scala | 57 - .../main/scala/is/hail/HailFeatureFlags.scala | 73 + .../scala/is/hail/asm4s/ClassBuilder.scala | 24 +- hail/src/main/scala/is/hail/asm4s/Code.scala | 4 +- .../is/hail/backend/ExecuteContext.scala | 40 +- .../is/hail/backend/local/LocalBackend.scala | 17 +- .../scala/is/hail/backend/service/Main.scala | 2 +- .../hail/backend/service/ServiceBackend.scala | 599 ++++--- .../is/hail/backend/service/Worker.scala | 61 +- .../is/hail/backend/spark/SparkBackend.scala | 47 +- .../scala/is/hail/expr/ir/BlockMatrixIR.scala | 30 +- .../is/hail/expr/ir/BlockMatrixWriter.scala | 36 +- .../main/scala/is/hail/expr/ir/Compile.scala | 14 +- .../src/main/scala/is/hail/expr/ir/Emit.scala | 13 +- .../is/hail/expr/ir/EmitClassBuilder.scala | 20 +- .../hail/expr/ir/ExtractIntervalFilters.scala | 13 +- .../scala/is/hail/expr/ir/GenericLines.scala | 17 +- hail/src/main/scala/is/hail/expr/ir/IR.scala | 10 +- .../scala/is/hail/expr/ir/InferType.scala | 2 +- .../scala/is/hail/expr/ir/LowerMatrixIR.scala | 169 +- .../ir/LowerOrInterpretNonCompilable.scala | 2 +- .../main/scala/is/hail/expr/ir/MatrixIR.scala | 2 - .../scala/is/hail/expr/ir/MatrixWriter.scala | 424 ++++- .../main/scala/is/hail/expr/ir/Optimize.scala | 18 +- .../main/scala/is/hail/expr/ir/Parser.scala | 2 +- .../main/scala/is/hail/expr/ir/Pretty.scala | 5 +- .../is/hail/expr/ir/PruneDeadFields.scala | 942 ++++++----- .../main/scala/is/hail/expr/ir/Random.scala | 481 ++++++ .../scala/is/hail/expr/ir/Requiredness.scala | 3 +- .../main/scala/is/hail/expr/ir/Simplify.scala | 53 +- .../expr/ir/SpecializedArrayBuilders.scala | 63 + .../is/hail/expr/ir/StringTableReader.scala | 25 +- .../main/scala/is/hail/expr/ir/TableIR.scala | 15 +- .../scala/is/hail/expr/ir/TypeCheck.scala | 23 +- .../ir/analyses/ComputeMethodSplits.scala | 5 +- .../is/hail/expr/ir/functions/Functions.scala | 38 + .../ir/functions/RelationalFunctions.scala | 1 + .../expr/ir/functions/StringFunctions.scala | 208 ++- .../ir/lowering/CanLowerEfficiently.scala | 14 +- .../expr/ir/lowering/LowerBlockMatrixIR.scala | 82 +- .../ir/lowering/LowerDistributedSort.scala | 98 +- .../hail/expr/ir/lowering/LowerTableIR.scala | 105 +- .../is/hail/expr/ir/lowering/LowerToCDA.scala | 6 +- .../hail/expr/ir/lowering/LoweringPass.scala | 8 +- .../expr/ir/lowering/LoweringPipeline.scala | 2 +- .../main/scala/is/hail/expr/ir/package.scala | 11 +- .../scala/is/hail/io/TextMatrixReader.scala | 702 -------- .../main/scala/is/hail/io/bgen/LoadBgen.scala | 6 +- .../scala/is/hail/io/fs/GoogleStorageFS.scala | 33 +- .../scala/is/hail/io/plink/LoadPlink.scala | 14 +- .../main/scala/is/hail/io/vcf/ExportVCF.scala | 449 +---- hail/src/main/scala/is/hail/lir/X.scala | 4 +- .../types/encoded/ENumpyBinaryNDArray.scala | 77 + .../scala/is/hail/types/encoded/EType.scala | 4 +- .../scala/is/hail/types/physical/PType.scala | 2 +- .../is/hail/types/physical/stypes/SCode.scala | 3 + .../physical/stypes/concrete/SRNGState.scala | 117 ++ .../is/hail/types/virtual/TRNGState.scala | 12 + .../main/scala/is/hail/utils/Bitstring.scala | 90 + .../scala/is/hail/utils/ErrorHandling.scala | 6 + .../scala/is/hail/utils/TextTableReader.scala | 444 ----- .../hail/utils/richUtils/RichContextRDD.scala | 1 - .../test/resources/sampleheaderdiffelem.txt | 11 + .../test/resources/samplenonintentries.txt | 6 + hail/src/test/scala/is/hail/HailSuite.scala | 217 ++- hail/src/test/scala/is/hail/TestUtils.scala | 163 -- .../annotations/StagedConstructorSuite.scala | 16 +- .../test/scala/is/hail/asm4s/ASM4SSuite.scala | 73 +- .../test/scala/is/hail/asm4s/CodeSuite.scala | 14 +- .../is/hail/expr/ir/BlockMatrixIRSuite.scala | 1 - .../is/hail/expr/ir/EmitStreamSuite.scala | 64 +- .../expr/ir/ExtractIntervalFiltersSuite.scala | 4 +- .../is/hail/expr/ir/ForwardLetsSuite.scala | 9 +- .../test/scala/is/hail/expr/ir/IRSuite.scala | 12 +- .../is/hail/expr/ir/LocusFunctionsSuite.scala | 1 - .../scala/is/hail/expr/ir/PruneSuite.scala | 51 +- .../scala/is/hail/expr/ir/RandomSuite.scala | 106 ++ .../is/hail/expr/ir/RequirednessSuite.scala | 20 +- .../scala/is/hail/expr/ir/SimplifySuite.scala | 67 +- .../scala/is/hail/expr/ir/TableIRSuite.scala | 4 +- .../ir/agg/StagedBlockLinkedListSuite.scala | 10 +- .../ir/lowering/BlockMatrixStageSuite.scala | 4 +- .../lowering/LowerDistributedSortSuite.scala | 61 +- .../scala/is/hail/io/AvroReaderSuite.scala | 1 - .../is/hail/io/compress/BGzipCodecSuite.scala | 2 +- .../hail/types/physical/PContainerTest.scala | 4 +- .../hail/types/physical/PNDArraySuite.scala | 2 +- .../types/physical/PhysicalTestUtils.scala | 2 +- .../scala/is/hail/utils/TextTableSuite.scala | 35 - infra/azure/main.tf | 12 +- infra/azure/modules/batch/main.tf | 10 +- infra/azure/modules/ci/main.tf | 6 +- infra/azure/modules/vdc/main.tf | 30 +- infra/azure/modules/vdc/variables.tf | 14 +- infra/azure/variables.tf | 27 +- infra/gcp/main.tf | 22 +- internal-gateway/internal-gateway.nginx.conf | 2 +- letsencrypt/subdomains.txt | 3 + query/Makefile | 71 +- tls/config.yaml | 4 + 231 files changed, 10932 insertions(+), 5714 deletions(-) create mode 100644 batch/Dockerfile.driver-nginx create mode 100644 batch/batch/resource_usage.py create mode 100644 batch/driver-nginx.conf create mode 100644 hail/python/hail/docs/cloud/query_on_batch.rst delete mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst create mode 100644 hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst delete mode 100644 hail/python/hail/fs/google_fs.py create mode 100644 hail/python/test/hail/backend/__init__.py create mode 100644 hail/python/test/hail/backend/test_service_backend.py create mode 100644 hail/src/main/scala/is/hail/HailFeatureFlags.scala create mode 100644 hail/src/main/scala/is/hail/expr/ir/Random.scala delete mode 100644 hail/src/main/scala/is/hail/io/TextMatrixReader.scala create mode 100644 hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala create mode 100644 hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala create mode 100644 hail/src/main/scala/is/hail/types/virtual/TRNGState.scala create mode 100644 hail/src/main/scala/is/hail/utils/Bitstring.scala delete mode 100644 hail/src/main/scala/is/hail/utils/TextTableReader.scala create mode 100644 hail/src/test/resources/sampleheaderdiffelem.txt create mode 100644 hail/src/test/resources/samplenonintentries.txt create mode 100644 hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala delete mode 100644 hail/src/test/scala/is/hail/utils/TextTableSuite.scala diff --git a/auth/Dockerfile b/auth/Dockerfile index 9e886004c45..0c2bfa4dad1 100644 --- a/auth/Dockerfile +++ b/auth/Dockerfile @@ -1,7 +1,7 @@ FROM {{ service_base_image.image }} RUN hail-pip-install \ - google-auth-oauthlib==0.4.2 \ + google-auth-oauthlib==0.4.6 \ google-auth==1.25.0 COPY auth/setup.py auth/MANIFEST.in /auth/ diff --git a/auth/deployment.yaml b/auth/deployment.yaml index 20c45711fc2..a790339f89f 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -242,7 +242,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 95 + targetAverageUtilization: 2500 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/batch/Dockerfile.driver-nginx b/batch/Dockerfile.driver-nginx new file mode 100644 index 00000000000..0e095d572a7 --- /dev/null +++ b/batch/Dockerfile.driver-nginx @@ -0,0 +1,12 @@ +FROM {{ hail_ubuntu_image.image }} + +RUN hail-apt-get-install nginx + +RUN rm -f /etc/nginx/sites-enabled/default && \ + rm -f /etc/nginx/nginx.conf +ADD driver-nginx.conf /etc/nginx/nginx.conf + +RUN ln -sf /dev/stdout /var/log/nginx/access.log +RUN ln -sf /dev/stderr /var/log/nginx/error.log + +CMD ["nginx", "-g", "daemon off;"] diff --git a/batch/Dockerfile.worker b/batch/Dockerfile.worker index ee43e8863d9..1686e193381 100644 --- a/batch/Dockerfile.worker +++ b/batch/Dockerfile.worker @@ -56,7 +56,7 @@ FROM base AS crun_builder RUN hail-apt-get-install make git gcc build-essential pkgconf libtool \ libsystemd-dev libcap-dev libseccomp-dev \ go-md2man libtool autoconf automake -RUN git clone --depth 1 --branch 0.19.1 https://github.com/containers/crun.git && \ +RUN git clone --depth 1 --branch 1.4.4 https://github.com/containers/crun.git && \ cd crun && \ ./autogen.sh && \ ./configure && \ diff --git a/batch/Makefile b/batch/Makefile index 28a2c62ac10..c024438bde4 100644 --- a/batch/Makefile +++ b/batch/Makefile @@ -5,6 +5,8 @@ TOKEN = $(shell cat /dev/urandom | LC_ALL=C tr -dc 'a-z0-9' | head -c 12) BATCH_IMAGE := $(DOCKER_PREFIX)/batch:$(TOKEN) BATCH_WORKER_IMAGE := $(DOCKER_PREFIX)/batch-worker:$(TOKEN) +BATCH_DRIVER_NGINX_IMAGE := $(DOCKER_PREFIX)/batch-driver-nginx:$(TOKEN) + EXTRA_PYTHONPATH := ../hail/python:../gear:../web_common PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 CLOUD := $(shell kubectl get secret global-config --template={{.data.cloud}} | base64 --decode) @@ -34,15 +36,20 @@ jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar: src/main/java/is/hail/JVMEntryway.class: src/main/java/is/hail/JVMEntryway.java jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar javac -cp jars/junixsocket-selftest-2.3.3-jar-with-dependencies.jar $< +.PHONY: build-batch-driver-nginx +build-batch-driver-nginx: + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.driver-nginx Dockerfile.driver-nginx.out + ../docker-build.sh . Dockerfile.driver-nginx.out $(BATCH_DRIVER_NGINX_IMAGE) + .PHONY: build-worker build-worker: src/main/java/is/hail/JVMEntryway.class python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"},"global":{"cloud":"$(CLOUD)"}}' Dockerfile.worker Dockerfile.worker.out ../docker-build.sh .. batch/Dockerfile.worker.out $(BATCH_WORKER_IMAGE) .PHONY: build -build: build-batch build-worker +build: build-batch build-batch-driver-nginx build-worker -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"scope":"$(SCOPE)"}' +JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"batch_image":{"image":"$(BATCH_IMAGE)"},"batch_worker_image":{"image":"$(BATCH_WORKER_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"batch_database":{"user_secret_name":"sql-batch-user-config"},"scope":"$(SCOPE)","batch_driver_nginx_image":{"image":"$(BATCH_DRIVER_NGINX_IMAGE)"}}' .PHONY: deploy deploy: build diff --git a/batch/batch/driver/instance_collection/pool.py b/batch/batch/driver/instance_collection/pool.py index c177ec839ad..93c22d4e3bf 100644 --- a/batch/batch/driver/instance_collection/pool.py +++ b/batch/batch/driver/instance_collection/pool.py @@ -3,6 +3,7 @@ import random from typing import Optional +import prometheus_client as pc import sortedcontainers from gear import Database @@ -28,6 +29,12 @@ log = logging.getLogger('pool') +SCHEDULING_LOOP_RUNS = pc.Counter( + 'scheduling_loop_runs', + 'Number of scheduling loop executions per pool', + ['pool_name'], +) + class Pool(InstanceCollection): @staticmethod @@ -373,6 +380,7 @@ async def schedule_loop_body(self): log.info(f'schedule {self.pool}: starting') start = time_msecs() + SCHEDULING_LOOP_RUNS.labels(pool_name=self.pool.name).inc() n_scheduled = 0 user_resources = await self.compute_fair_share() diff --git a/batch/batch/driver/job.py b/batch/batch/driver/job.py index c008053693b..ebd28f4c1fa 100644 --- a/batch/batch/driver/job.py +++ b/batch/batch/driver/job.py @@ -46,6 +46,7 @@ async def notify_batch_job_complete(db: Database, client_session: httpx.ClientSe GROUP BY batches.id; ''', (batch_id,), + 'notify_batch_job_complete', ) if not record: @@ -85,6 +86,7 @@ async def add_attempt_resources(db, batch_id, job_id, attempt_id, resources): ON DUPLICATE KEY UPDATE quantity = quantity; ''', resource_args, + 'add_attempt_resources', ) except Exception: log.exception(f'error while inserting resources for job {job_id}, attempt {attempt_id}') @@ -209,6 +211,7 @@ async def mark_job_creating( CALL mark_job_creating(%s, %s, %s, %s, %s); ''', (batch_id, job_id, attempt_id, instance.name, start_time), + 'mark_job_creating', ) except Exception: log.info(f'error while marking job {id} creating on {instance}') diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index 266c29b96d3..63f1d6c260e 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -30,7 +30,6 @@ from hailtop import aiotools, httpx from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger -from hailtop.tls import internal_server_ssl_context from hailtop.utils import AsyncWorkerPool, Notice, dump_all_stacktraces, periodically_call, serialization, time_msecs from web_common import render_template, set_message, setup_aiohttp_jinja2, setup_common_static_routes @@ -1261,5 +1260,4 @@ def run(): host='0.0.0.0', port=5000, access_log_class=AccessLogger, - ssl_context=internal_server_ssl_context(), ) diff --git a/batch/batch/file_store.py b/batch/batch/file_store.py index a6490475ae0..bd073e9318e 100644 --- a/batch/batch/file_store.py +++ b/batch/batch/file_store.py @@ -1,10 +1,14 @@ import asyncio import logging +from typing import Optional + +import pandas as pd from hailtop.aiotools.fs import AsyncFS from .batch_format_version import BatchFormatVersion from .globals import BATCH_FORMAT_VERSION +from .resource_usage import ResourceUsageMonitor from .spec_writer import SpecWriter log = logging.getLogger('logstore') @@ -30,6 +34,11 @@ def log_path(self, format_version, batch_id, job_id, attempt_id, task): return f'{self.batch_log_dir(batch_id)}/{job_id}/{task}/log' return f'{self.batch_log_dir(batch_id)}/{job_id}/{attempt_id}/{task}/log' + def resource_usage_path(self, format_version, batch_id, job_id, attempt_id, task): + if not format_version.has_attempt_in_log_path(): + return f'{self.batch_log_dir(batch_id)}/{job_id}/{task}/resource_usage' + return f'{self.batch_log_dir(batch_id)}/{job_id}/{attempt_id}/{task}/resource_usage' + async def read_log_file(self, format_version, batch_id, job_id, attempt_id, task): url = self.log_path(format_version, batch_id, job_id, attempt_id, task) data = await self.fs.read(url) @@ -39,6 +48,17 @@ async def write_log_file(self, format_version, batch_id, job_id, attempt_id, tas url = self.log_path(format_version, batch_id, job_id, attempt_id, task) await self.fs.write(url, data.encode('utf-8')) + async def read_resource_usage_file( + self, format_version, batch_id, job_id, attempt_id, task + ) -> Optional[pd.DataFrame]: + url = self.resource_usage_path(format_version, batch_id, job_id, attempt_id, task) + data = await self.fs.read(url) + return ResourceUsageMonitor.decode_to_df(data) + + async def write_resource_usage_file(self, format_version, batch_id, job_id, attempt_id, task, data): + url = self.resource_usage_path(format_version, batch_id, job_id, attempt_id, task) + await self.fs.write(url, data) + async def delete_batch_logs(self, batch_id): url = self.batch_log_dir(batch_id) await self.fs.rmtree(None, url) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index a170d1ef0ea..b24a7c3a1c6 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -5,11 +5,12 @@ import logging import os import random +import re import signal import traceback from functools import wraps from numbers import Number -from typing import Dict, Optional, Union +from typing import Any, Awaitable, Callable, Dict, Optional, Union import aiohttp import aiohttp_session @@ -62,6 +63,7 @@ memory_to_worker_type, valid_machine_types, ) +from ..cloud.utils import ACCEPTABLE_QUERY_JAR_URL_PREFIX from ..exceptions import ( BatchOperationAlreadyCompletedError, BatchUserError, @@ -72,6 +74,7 @@ from ..file_store import FileStore from ..globals import BATCH_FORMAT_VERSION, HTTP_CLIENT_MAX_SIZE from ..inst_coll_config import InstanceCollectionConfigs +from ..resource_usage import ResourceUsageMonitor from ..spec_writer import SpecWriter from ..utils import accrued_cost_from_cost_and_msec_mcpu, coalesce, query_billing_projects from .validate import ValidationError, validate_and_clean_jobs, validate_batch @@ -328,7 +331,45 @@ async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argum return web.json_response(resp) -async def _get_job_log_from_record(app, batch_id, job_id, record): +async def _get_job_record(app, batch_id, job_id): + db: Database = app['db'] + + record = await db.select_and_fetchone( + ''' +SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id +FROM jobs +INNER JOIN batches + ON jobs.batch_id = batches.id +LEFT JOIN attempts + ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id AND jobs.attempt_id = attempts.attempt_id +LEFT JOIN instances + ON attempts.instance_name = instances.name +LEFT JOIN ( + SELECT batch_id, job_id, attempt_id + FROM attempts + WHERE reason = "cancelled" AND batch_id = %s AND job_id = %s + ORDER BY end_time DESC + LIMIT 1 +) AS t + ON jobs.batch_id = t.batch_id AND jobs.job_id = t.job_id +WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; +''', + (batch_id, job_id, batch_id, job_id), + ) + if not record: + raise web.HTTPNotFound() + return record + + +async def _get_resource_from_record( + app, + batch_id: int, + job_id: int, + record: dict, + endpoint: str, + handle_running_response: Callable[[aiohttp.ClientResponse], Awaitable[Any]], + cloud_storage_reader: Callable[[BatchFormatVersion, int, int, str, str], Awaitable[Any]], +) -> Optional[Dict[str, Any]]: client_session: httpx.ClientSession = app['client_session'] batch_format_version = BatchFormatVersion(record['format_version']) @@ -350,13 +391,11 @@ async def _get_job_log_from_record(app, batch_id, job_id, record): if state == 'Running': try: - resp = await request_retry_transient_errors( - client_session, 'GET', f'http://{ip_address}:5000/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log' - ) - return await resp.json() + resp = await request_retry_transient_errors(client_session, 'GET', f'http://{ip_address}:5000{endpoint}') + return await handle_running_response(resp) except aiohttp.ClientResponseError: - log.exception(f'while getting log for {(batch_id, job_id)}') - return {task: 'ERROR: encountered a problem while fetching the log' for task in tasks} + log.exception(f'while getting resource for {(batch_id, job_id)}') + return {task: None for task in tasks} if state in ('Pending', 'Ready', 'Creating'): return None @@ -369,49 +408,76 @@ async def _get_job_log_from_record(app, batch_id, job_id, record): attempt_id = record['attempt_id'] or record['last_cancelled_attempt_id'] assert attempt_id is not None - file_store: FileStore = app['file_store'] - batch_format_version = BatchFormatVersion(record['format_version']) - - async def _read_log_from_cloud_storage(task): + async def _read_resource_from_cloud_storage(task): try: - data = await file_store.read_log_file(batch_format_version, batch_id, job_id, attempt_id, task) + data = await cloud_storage_reader(batch_format_version, batch_id, job_id, attempt_id, task) except FileNotFoundError: id = (batch_id, job_id) - log.exception(f'missing log file for {id} and task {task}') - data = 'ERROR: could not find log file' + log.exception(f'missing file for {id} and task {task}') + data = None return task, data - return dict(await asyncio.gather(*[_read_log_from_cloud_storage(task) for task in tasks])) + return dict(await asyncio.gather(*[_read_resource_from_cloud_storage(task) for task in tasks])) -async def _get_job_log(app, batch_id, job_id): - db: Database = app['db'] +async def _get_job_log(app, batch_id, job_id) -> Optional[Dict[str, str]]: + file_store: FileStore = app['file_store'] + record = await _get_job_record(app, batch_id, job_id) - record = await db.select_and_fetchone( - ''' -SELECT jobs.state, jobs.spec, ip_address, format_version, jobs.attempt_id, t.attempt_id AS last_cancelled_attempt_id -FROM jobs -INNER JOIN batches - ON jobs.batch_id = batches.id -LEFT JOIN attempts - ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id AND jobs.attempt_id = attempts.attempt_id -LEFT JOIN instances - ON attempts.instance_name = instances.name -LEFT JOIN ( - SELECT batch_id, job_id, attempt_id - FROM attempts - WHERE reason = "cancelled" AND batch_id = %s AND job_id = %s - ORDER BY end_time DESC - LIMIT 1 -) AS t - ON jobs.batch_id = t.batch_id AND jobs.job_id = t.job_id -WHERE jobs.batch_id = %s AND NOT deleted AND jobs.job_id = %s; -''', - (batch_id, job_id, batch_id, job_id), + async def handle_running_response(resp: aiohttp.ClientResponse) -> Dict[str, str]: + return await resp.json() + + maybe_data = await _get_resource_from_record( + app, + batch_id, + job_id, + record, + f'/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log', + handle_running_response, + file_store.read_log_file, + ) + + if maybe_data is None: + return None + + data = {} + for task, log in maybe_data.items(): + if log is None: + log = 'ERROR: could not find file' + data[task] = log + return data + + +async def _get_job_resource_usage(app, batch_id, job_id) -> Optional[Dict[str, Optional[pd.DataFrame]]]: + file_store: FileStore = app['file_store'] + record = await _get_job_record(app, batch_id, job_id) + + async def handle_running_response(resp: aiohttp.ClientResponse) -> Dict[str, Optional[pd.DataFrame]]: + resource_usage = {} + + reader = aiohttp.MultipartReader.from_response(resp) + while True: + part = await reader.next() # pylint: disable=not-callable + if part is None: + break + + assert isinstance(part, aiohttp.BodyPartReader) + task = part.filename + assert task in ('input', 'main', 'output'), task + data = await part.read() + resource_usage[task] = ResourceUsageMonitor.decode_to_df(data) + + return resource_usage + + return await _get_resource_from_record( + app, + batch_id, + job_id, + record, + f'/api/v1alpha/batches/{batch_id}/jobs/{job_id}/resource_usage', + handle_running_response, + file_store.read_resource_usage_file, ) - if not record: - raise web.HTTPNotFound() - return await _get_job_log_from_record(app, batch_id, job_id, record) async def _get_attributes(app, record): @@ -665,6 +731,14 @@ async def create_jobs(request: aiohttp.web.Request, userdata: dict): return await _create_jobs(userdata, job_specs, batch_id, app) +NON_HEX_DIGIT = re.compile('[^A-Fa-f0-9]') + + +def assert_is_sha_1_hex_string(revision: str): + if len(revision) != 40 or NON_HEX_DIGIT.search(revision): + raise web.HTTPBadRequest(reason=f'revision must be 40 character hexadecimal encoded SHA-1, got: {revision}') + + async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aiohttp.web.Application): db: Database = app['db'] file_store: FileStore = app['file_store'] @@ -760,14 +834,25 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh raise web.HTTPBadRequest(reason='cannot specify cpu and memory with machine_type') if spec['process']['type'] == 'jvm': - if 'cpu' in resources: - raise web.HTTPBadRequest(reason='jvm jobs may not specify cpu') - if 'memory' in resources and resources['memory'] != 'standard': - raise web.HTTPBadRequest(reason='jvm jobs may not specify memory') + jvm_requested_cpu = parse_cpu_in_mcpu(resources.get('cpu', BATCH_JOB_DEFAULT_CPU)) + if 'cpu' in resources and jvm_requested_cpu not in (1000, 8000): + raise web.HTTPBadRequest(reason='invalid cpu for jvm jobs. must be 1 or 8') + if 'memory' in resources and resources['memory'] == 'lowmem': + raise web.HTTPBadRequest(reason='jvm jobs cannot be on lowmem machines') if 'storage' in resources: raise web.HTTPBadRequest(reason='jvm jobs may not specify storage') if machine_type is not None: raise web.HTTPBadRequest(reason='jvm jobs may not specify machine_type') + if spec['process']['jar_spec']['type'] == 'git_revision': + revision = spec['process']['jar_spec']['value'] + assert_is_sha_1_hex_string(revision) + spec['process']['jar_spec']['type'] = 'jar_url' + spec['process']['jar_spec']['value'] = ACCEPTABLE_QUERY_JAR_URL_PREFIX + '/' + revision + '.jar' + else: + assert spec['process']['jar_spec']['type'] == 'jar_url' + jar_url = spec['process']['jar_spec']['value'] + if not jar_url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX): + raise web.HTTPBadRequest(reason=f'unacceptable JAR url: {jar_url}') req_memory_bytes: Optional[int] if machine_type is None: @@ -1309,7 +1394,7 @@ async def _close_batch(app: aiohttp.web.Application, batch_id: int, user: str, d client_session: httpx.ClientSession = app['client_session'] try: now = time_msecs() - await db.check_call_procedure('CALL close_batch(%s, %s);', (batch_id, now)) + await db.check_call_procedure('CALL close_batch(%s, %s);', (batch_id, now), 'close_batch') except CallError as e: # 2: wrong number of jobs if e.rv['rc'] == 2: @@ -1511,6 +1596,63 @@ async def get_job(request, userdata, batch_id): # pylint: disable=unused-argume return web.json_response(status) +def plot_job_durations(container_statuses: dict, batch_id: int, job_id: int): + data = [] + for step in ['input', 'main', 'output']: + if container_statuses[step]: + for timing_name, timing_data in container_statuses[step]['timing'].items(): + if timing_data is not None: + plot_dict = { + 'Title': f'{(batch_id, job_id)}', + 'Step': step, + 'Task': timing_name, + } + + if timing_data.get('start_time') is not None: + plot_dict['Start'] = datetime.datetime.fromtimestamp(timing_data['start_time'] / 1000) + + finish_time = timing_data.get('finish_time') + if finish_time is None: + finish_time = time_msecs() + plot_dict['Finish'] = datetime.datetime.fromtimestamp(finish_time / 1000) + + data.append(plot_dict) + + if not data: + return None + + df = pd.DataFrame(data) + + fig = px.timeline( + df, + x_start='Start', + x_end='Finish', + y='Step', + color='Task', + hover_data=['Step'], + color_discrete_sequence=px.colors.sequential.dense, + category_orders={ + 'Step': ['input', 'main', 'output'], + 'Task': [ + 'pulling', + 'setting up overlay', + 'setting up network', + 'running', + 'uploading_log', + 'uploading_resource_usage', + ], + }, + ) + + return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) + + +def plot_resource_usage( + resource_usage: Optional[Dict[str, Optional[pd.DataFrame]]] # pylint: disable=unused-argument +) -> Optional[str]: + return None + + @routes.get('/batches/{batch_id}/jobs/{job_id}') @web_billing_project_users_only() @catch_ui_error_in_dev @@ -1518,8 +1660,11 @@ async def ui_get_job(request, userdata, batch_id): app = request.app job_id = int(request.match_info['job_id']) - job, attempts, job_log = await asyncio.gather( - _get_job(app, batch_id, job_id), _get_attempts(app, batch_id, job_id), _get_job_log(app, batch_id, job_id) + job, attempts, job_log, resource_usage = await asyncio.gather( + _get_job(app, batch_id, job_id), + _get_attempts(app, batch_id, job_id), + _get_job_log(app, batch_id, job_id), + _get_job_resource_usage(app, batch_id, job_id), ) job['duration'] = humanize_timedelta_msecs(job['duration']) @@ -1532,6 +1677,7 @@ async def ui_get_job(request, userdata, batch_id): 'timing': { 'pulling': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), 'running': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), + 'uploading_resource_usage': dictfix.NoneOr({'duration': dictfix.NoneOr(Number)}), }, 'short_error': dictfix.NoneOr(str), 'error': dictfix.NoneOr(str), @@ -1548,10 +1694,9 @@ async def ui_get_job(request, userdata, batch_id): } job_status = dictfix.dictfix(job_status, job_status_spec) container_statuses = job_status['container_statuses'] - step_statuses = [container_statuses['input'], container_statuses['main'], container_statuses['output']] step_errors = {step: status['error'] for step, status in container_statuses.items() if status is not None} - for status in step_statuses: + for status in container_statuses.values(): # backwards compatibility if status and status['short_error'] is None and status['container_status']['out_of_memory']: status['short_error'] = 'out of memory' @@ -1579,60 +1724,19 @@ async def ui_get_job(request, userdata, batch_id): resources['actual_cpu'] = resources['cores_mcpu'] / 1000 del resources['cores_mcpu'] - data = [] - for step in ['input', 'main', 'output']: - if container_statuses[step]: - for timing_name, timing_data in container_statuses[step]['timing'].items(): - if timing_data is not None: - plot_dict = { - 'Title': f'{(batch_id, job_id)}', - 'Step': step, - 'Task': timing_name, - } - - if timing_data.get('start_time') is not None: - plot_dict['Start'] = datetime.datetime.fromtimestamp(timing_data['start_time'] / 1000) - - finish_time = timing_data.get('finish_time') - if finish_time is None: - finish_time = time_msecs() - plot_dict['Finish'] = datetime.datetime.fromtimestamp(finish_time / 1000) - - data.append(plot_dict) - - if data: - df = pd.DataFrame(data) - - fig = px.timeline( - df, - x_start='Start', - x_end='Finish', - y='Step', - color='Task', - hover_data=['Step'], - color_discrete_sequence=px.colors.sequential.dense, - category_orders={ - 'Step': ['input', 'main', 'output'], - 'Task': ['pulling', 'setting up overlay', 'setting up network', 'running', 'uploading_log'], - }, - ) - - plot_json = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) - else: - plot_json = None - page_context = { 'batch_id': batch_id, 'job_id': job_id, 'job': job, 'job_log': job_log, 'attempts': attempts, - 'step_statuses': step_statuses, + 'container_statuses': container_statuses, 'job_specification': job_specification, 'job_status_str': json.dumps(job, indent=2), 'step_errors': step_errors, 'error': job_status.get('error'), - 'plot_json': plot_json, + 'plot_job_durations': plot_job_durations(container_statuses, batch_id, job_id), + 'plot_resource_usage': plot_resource_usage(resource_usage), } return await render_template('batch', request, userdata, 'job.html', page_context) diff --git a/batch/batch/front_end/templates/job.html b/batch/batch/front_end/templates/job.html index f4229d8f11d..e32f26c8e51 100644 --- a/batch/batch/front_end/templates/job.html +++ b/batch/batch/front_end/templates/job.html @@ -63,12 +63,21 @@

Attempts

Step Status

-{% if plot_json is not none %} +{% if plot_job_durations is not none %} -
+
+{% endif %} + +{% if plot_resource_usage is not none %} + +
+ {% endif %} @@ -83,10 +92,10 @@

Step Status

- {% for step in step_statuses %} + {% for name, step in container_statuses.items() %} {% if step %} - {{ step['name'] }} + {{ name }} {% if step['timing']['pulling'] and step['timing']['pulling']['duration'] %} {{ step['timing']['pulling']['duration'] / 1000.0 }} diff --git a/batch/batch/front_end/validate.py b/batch/batch/front_end/validate.py index f1242d51525..3dff4d80bda 100644 --- a/batch/batch/front_end/validate.py +++ b/batch/batch/front_end/validate.py @@ -71,7 +71,12 @@ required('image'): image_str, required('mount_docker_socket'): bool_type, }, - 'jvm': {required('command'): listof(str_type)}, + 'jvm': { + required('jar_spec'): keyed( + {required('type'): oneof('git_revision', 'jar_url'), required('value'): str_type} + ), + required('command'): listof(str_type), + }, }, ), 'requester_pays_project': str_type, diff --git a/batch/batch/resource_usage.py b/batch/batch/resource_usage.py new file mode 100644 index 00000000000..ebdac21f801 --- /dev/null +++ b/batch/batch/resource_usage.py @@ -0,0 +1,103 @@ +import asyncio +import os +import struct +from typing import Optional + +import numpy as np +import pandas as pd + +from hailtop.utils import periodically_call, retry_long_running, time_msecs, time_ns + + +class ResourceUsageMonitor: + VERSION = 1 + + @staticmethod + def no_data() -> bytes: + return ResourceUsageMonitor.version_to_bytes() + + @staticmethod + def version_to_bytes() -> bytes: + return struct.pack('>q', ResourceUsageMonitor.VERSION) + + @staticmethod + def decode_to_df(data: bytes) -> Optional[pd.DataFrame]: + if len(data) == 0: + return None + + (version,) = struct.unpack_from('>q', data, 0) + assert version == ResourceUsageMonitor.VERSION, version + + dtype = [('time_msecs', '>i8'), ('memory_in_bytes', '>i8'), ('cpu_usage', '>f8')] + np_array = np.frombuffer(data, offset=8, dtype=dtype) + return pd.DataFrame.from_records(np_array) + + def __init__(self, container_name: str, output_file_path: str): + self.container_name = container_name + self.output_file_path = output_file_path + + self.last_time_ns: Optional[int] = None + self.last_cpu_ns: Optional[int] = None + + self.out = open(output_file_path, 'wb') # pylint: disable=consider-using-with + self.write_header() + + self.task: Optional[asyncio.Future] = None + + def write_header(self): + data = ResourceUsageMonitor.version_to_bytes() + self.out.write(data) + self.out.flush() + + def cpu_ns(self) -> Optional[int]: + usage_file = f'/sys/fs/cgroup/cpu/{self.container_name}/cpuacct.usage' + if os.path.exists(usage_file): + with open(usage_file, 'r', encoding='utf-8') as f: + return int(f.read().rstrip()) + return None + + def percent_cpu_usage(self) -> Optional[float]: + now_time_ns = time_ns() + now_cpu_ns = self.cpu_ns() + + if now_cpu_ns is None or self.last_cpu_ns is None or self.last_time_ns is None: + self.last_time_ns = now_time_ns + self.last_cpu_ns = now_cpu_ns + return None + + cpu_usage = (now_cpu_ns - self.last_cpu_ns) / (now_time_ns - self.last_time_ns) + + self.last_time_ns = now_time_ns + self.last_cpu_ns = now_cpu_ns + + return cpu_usage + + def memory_usage_bytes(self) -> Optional[int]: + usage_file = f'/sys/fs/cgroup/memory/{self.container_name}/memory.usage_in_bytes' + if os.path.exists(usage_file): + with open(usage_file, 'r', encoding='utf-8') as f: + return int(f.read().rstrip()) + return None + + async def measure(self): + now = time_msecs() + memory_usage_bytes = self.memory_usage_bytes() + percent_cpu_usage = self.percent_cpu_usage() + + if memory_usage_bytes is None or percent_cpu_usage is None: + return + + data = struct.pack('>2qd', now, memory_usage_bytes, percent_cpu_usage) + self.out.write(data) + self.out.flush() + + async def __aenter__(self): + self.task = asyncio.ensure_future( + retry_long_running(f'monitor {self.container_name} resource usage', periodically_call, 5, self.measure) + ) + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.task is not None: + self.task.cancel() + self.out.close() diff --git a/batch/batch/spec_writer.py b/batch/batch/spec_writer.py index 1546ced2c8d..4fe4ae00eb7 100644 --- a/batch/batch/spec_writer.py +++ b/batch/batch/spec_writer.py @@ -36,6 +36,7 @@ async def get_token_start_id(db, batch_id, job_id): LIMIT 1; ''', (batch_id, job_id), + 'get_token_start_id', ) token = bunch_record['token'] start_job_id = bunch_record['start_job_id'] diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 4ae13b2bcbb..1c5e63ea106 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -16,7 +16,19 @@ import warnings from collections import defaultdict from contextlib import ExitStack, contextmanager -from typing import Any, Awaitable, Callable, Dict, Iterator, List, MutableMapping, Optional, Tuple, Union +from typing import ( + Any, + Awaitable, + Callable, + ContextManager, + Dict, + Iterator, + List, + MutableMapping, + Optional, + Tuple, + Union, +) import aiodocker # type: ignore import aiodocker.images @@ -26,10 +38,11 @@ import async_timeout from aiodocker.exceptions import DockerError # type: ignore from aiohttp import web +from sortedcontainers import SortedSet from gear.clients import get_cloud_async_fs, get_compute_client from hailtop import aiotools, httpx -from hailtop.aiotools import LocalAsyncFS +from hailtop.aiotools import AsyncFS, LocalAsyncFS from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES from hailtop.config import DeployConfig @@ -55,10 +68,16 @@ from ..batch_format_version import BatchFormatVersion from ..cloud.azure.worker.worker_api import AzureWorkerAPI from ..cloud.gcp.worker.worker_api import GCPWorkerAPI -from ..cloud.resource_utils import is_valid_storage_request, storage_gib_to_bytes +from ..cloud.resource_utils import ( + is_valid_storage_request, + storage_gib_to_bytes, + worker_memory_per_core_bytes, + worker_memory_per_core_mib, +) from ..file_store import FileStore from ..globals import HTTP_CLIENT_MAX_SIZE, RESERVED_STORAGE_GB_PER_CORE, STATUS_FORMAT_VERSION from ..publicly_available_images import publicly_available_images +from ..resource_usage import ResourceUsageMonitor from ..semaphore import FIFOWeightedSemaphore from ..utils import Box from ..worker.worker_api import CloudWorkerAPI @@ -147,6 +166,8 @@ def compose(auth: Union[MutableMapping, str, bytes], registry_addr: str = None): log.info(f'INSTANCE_CONFIG {INSTANCE_CONFIG}') log.info(f'CLOUD_WORKER_API {CLOUD_WORKER_API}') log.info(f'MAX_IDLE_TIME_MSECS {MAX_IDLE_TIME_MSECS}') +log.info(f'BATCH_WORKER_IMAGE {BATCH_WORKER_IMAGE}') +log.info(f'BATCH_WORKER_IMAGE_ID {BATCH_WORKER_IMAGE_ID}') log.info(f'INTERNET_INTERFACE {INTERNET_INTERFACE}') log.info(f'UNRESERVED_WORKER_DATA_DISK_SIZE_GB {UNRESERVED_WORKER_DATA_DISK_SIZE_GB}') log.info(f'ACCEPTABLE_QUERY_JAR_URL_PREFIX {ACCEPTABLE_QUERY_JAR_URL_PREFIX}') @@ -346,13 +367,161 @@ async def wrapper(f, *args, **kwargs): return wrapper +class ImageCannotBePulled(Exception): + pass + + +class ImageNotFound(Exception): + pass + + +class Image: + def __init__( + self, + name: str, + credentials: Union[CloudUserCredentials, 'JVMUserCredentials'], + client_session: httpx.ClientSession, + pool: concurrent.futures.ThreadPoolExecutor, + ): + self.image_name = name + self.credentials = credentials + self.client_session = client_session + self.pool = pool + + image_ref = parse_docker_image_reference(name) + if image_ref.tag is None and image_ref.digest is None: + log.info(f'adding latest tag to image {name} for {self}') + image_ref.tag = 'latest' + + if image_ref.name() in HAIL_GENETICS_IMAGES: + # We want the "hailgenetics/python-dill" translate to (based on the prefix): + # * gcr.io/hail-vdc/hailgenetics/python-dill + # * us-central1-docker.pkg.dev/hail-vdc/hail/hailgenetics/python-dill + image_ref.path = image_ref.name() + image_ref.domain = DOCKER_PREFIX.split('/', maxsplit=1)[0] + image_ref.path = '/'.join(DOCKER_PREFIX.split('/')[1:] + [image_ref.path]) + + self.image_ref = image_ref + self.image_ref_str = str(image_ref) + self.image_config: Optional[Dict[str, Any]] = None + self.image_id: Optional[str] = None + + @property + def is_cloud_image(self): + return (CLOUD == 'gcp' and self.image_ref.hosted_in('google')) or ( + CLOUD == 'azure' and self.image_ref.hosted_in('azure') + ) + + @property + def is_public_image(self): + return self.image_ref.name() in PUBLIC_IMAGES + + @property + def rootfs_path(self) -> str: + assert self.image_id is not None + return f'/host/rootfs/{self.image_id}' + + async def _pull_image(self): + assert docker + + try: + if not self.is_cloud_image: + await self._ensure_image_is_pulled() + elif self.is_public_image: + auth = await self._batch_worker_access_token() + await self._ensure_image_is_pulled(auth=auth) + elif self.image_ref_str == BATCH_WORKER_IMAGE and isinstance(self.credentials, JVMUserCredentials): + pass + else: + # Pull to verify this user has access to this + # image. + # FIXME improve the performance of this with a + # per-user image cache. + auth = self._current_user_access_token() + await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( + docker.images.pull, self.image_ref_str, auth=auth + ) + except DockerError as e: + if e.status == 404 and 'pull access denied' in e.message: + raise ImageCannotBePulled from e + if 'not found: manifest unknown' in e.message: + raise ImageNotFound from e + raise + + image_config, _ = await check_exec_output('docker', 'inspect', self.image_ref_str) + image_configs[self.image_ref_str] = json.loads(image_config)[0] + + async def _ensure_image_is_pulled(self, auth: Optional[Dict[str, str]] = None): + assert docker + + try: + await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(docker.images.get, self.image_ref_str) + except DockerError as e: + if e.status == 404: + await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( + docker.images.pull, self.image_ref_str, auth=auth + ) + else: + raise + + async def _batch_worker_access_token(self) -> Dict[str, str]: + return await CLOUD_WORKER_API.worker_access_token(self.client_session) + + def _current_user_access_token(self) -> Dict[str, str]: + assert self.credentials + return {'username': self.credentials.username, 'password': self.credentials.password} + + async def _extract_rootfs(self): + assert self.image_id + os.makedirs(self.rootfs_path) + await check_shell( + f'id=$(docker create {self.image_id}) && docker export $id | tar -C {self.rootfs_path} -xf - && docker rm $id' + ) + + async def _localize_rootfs(self): + async with image_lock.reader_lock: + # FIXME Authentication is entangled with pulling images. We need a way to test + # that a user has access to a cached image without pulling. + await self._pull_image() + self.image_config = image_configs[self.image_ref_str] + self.image_id = self.image_config['Id'].split(":")[1] + assert self.image_id + + worker.image_data[self.image_id] += 1 + + image_data = worker.image_data[self.image_id] + async with image_data.lock: + if not image_data.extracted: + try: + await self._extract_rootfs() + image_data.extracted = True + log.info(f'Added expanded image to cache: {self.image_ref_str}, ID: {self.image_id}') + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'while extracting image {self.image_ref_str}, ID: {self.image_id}') + await blocking_to_async(self.pool, shutil.rmtree, self.rootfs_path) + raise + + async def pull(self): + await asyncio.shield(self._localize_rootfs()) + + def release(self): + if self.image_id is not None: + worker.image_data[self.image_id] -= 1 + + +class StepInterruptedError(Exception): + pass + + async def run_until_done_or_deleted(event: asyncio.Event, f: Callable[..., Awaitable[Any]], *args, **kwargs): step = asyncio.ensure_future(f(*args, **kwargs)) deleted = asyncio.ensure_future(event.wait()) try: await asyncio.wait([deleted, step], return_when=asyncio.FIRST_COMPLETED) if deleted.done(): - raise JobDeletedError + raise StepInterruptedError assert step.done() return step.result() finally: @@ -382,7 +551,19 @@ class JobDeletedError(Exception): pass -class JobTimeoutError(Exception): +class ContainerDeletedError(Exception): + pass + + +class ContainerTimeoutError(Exception): + pass + + +class ContainerCreateError(Exception): + pass + + +class ContainerStartError(Exception): pass @@ -404,209 +585,248 @@ def user_error(e): # bucket name and your credentials.\n') if b'Bad credentials for bucket' in e.stderr: return True + if isinstance(e, (ImageNotFound, ImageCannotBePulled)): + return True + if isinstance(e, (ContainerTimeoutError, ContainerDeletedError)): + return True return False class Container: - def __init__(self, job, name, spec, client_session: httpx.ClientSession, worker: 'Worker'): - self.job = job - self.name = name - self.spec = spec - self.client_session = client_session - self.worker = worker - self.deleted_event = asyncio.Event() - - image_ref = parse_docker_image_reference(self.spec['image']) - if image_ref.tag is None and image_ref.digest is None: - log.info(f'adding latest tag to image {self.spec["image"]} for {self}') - image_ref.tag = 'latest' + def __init__( + self, + fs: AsyncFS, + name: str, + image: Image, + scratch_dir: str, + command: List[str], + cpu_in_mcpu: int, + memory_in_bytes: int, + network: Optional[Union[bool, str]] = None, + port: Optional[int] = None, + timeout: Optional[int] = None, + unconfined: Optional[bool] = None, + volume_mounts: Optional[List[dict]] = None, + env: Optional[List[str]] = None, + ): + self.fs = fs + assert self.fs - if image_ref.name() in HAIL_GENETICS_IMAGES: - # We want the "hailgenetics/python-dill" translate to (based on the prefix): - # * gcr.io/hail-vdc/hailgenetics/python-dill - # * us-central1-docker.pkg.dev/hail-vdc/hail/hailgenetics/python-dill - image_ref.path = image_ref.name() - image_ref.domain = DOCKER_PREFIX.split('/', maxsplit=1)[0] - image_ref.path = '/'.join(DOCKER_PREFIX.split('/')[1:] + [image_ref.path]) + self.name = name + self.image = image + self.command = command + self.cpu_in_mcpu = cpu_in_mcpu + self.memory_in_bytes = memory_in_bytes + self.network = network + self.port = port + self.timeout = timeout + self.unconfined = unconfined + self.volume_mounts = volume_mounts or [] + self.env = env or [] - self.image_ref = image_ref - self.image_ref_str = str(image_ref) - self.image_id = None + self.deleted_event = asyncio.Event() - self.port = self.spec.get('port') self.host_port = None - self.timeout = self.spec.get('timeout') - self.state = 'pending' - self.error = None - self.short_error = None - self.container_status = None + self.error: Optional[str] = None + self.short_error: Optional[str] = None + self.container_status: Optional[dict] = None self.started_at: Optional[int] = None self.finished_at: Optional[int] = None self.timings = Timings() - self.logbuffer = bytearray() self.overlay_path = None - self.image_config = None - self.rootfs_path = None - scratch = self.spec['scratch'] - self.container_scratch = f'{scratch}/{self.name}' + self.container_scratch = scratch_dir self.container_overlay_path = f'{self.container_scratch}/rootfs_overlay' self.config_path = f'{self.container_scratch}/config' self.log_path = f'{self.container_scratch}/container.log' + self.resource_usage_path = f'{self.container_scratch}/resource_usage' self.overlay_mounted = False - self.container_name = f'batch-{self.job.batch_id}-job-{self.job.job_id}-{self.name}' - self.netns: Optional[NetworkNamespace] = None # regarding no-member: https://github.com/PyCQA/pylint/issues/4223 self.process: Optional[asyncio.subprocess.Process] = None # pylint: disable=no-member - assert self.worker.fs is not None + self._run_fut: Optional[asyncio.Future] = None + self._cleanup_lock = asyncio.Lock() - async def run(self): + self._killed = False + self._cleaned_up = False + + async def create(self): + self.state = 'creating' try: + with self._step('pulling'): + await self._run_until_done_or_deleted(self.image.pull) - async def localize_rootfs(): - async def _localize_rootfs(): - async with image_lock.reader_lock: - # FIXME Authentication is entangled with pulling images. We need a way to test - # that a user has access to a cached image without pulling. - await self.pull_image() - self.image_config = image_configs[self.image_ref_str] - self.image_id = self.image_config['Id'].split(":")[1] - self.worker.image_data[self.image_id] += 1 - - self.rootfs_path = f'/host/rootfs/{self.image_id}' - - image_data = self.worker.image_data[self.image_id] - async with image_data.lock: - if not image_data.extracted: - try: - await self.extract_rootfs() - image_data.extracted = True - log.info( - f'Added expanded image to cache: {self.image_ref_str}, ID: {self.image_id}' - ) - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'while extracting image {self.image_ref_str}, ID: {self.image_id}') - await blocking_to_async(worker.pool, shutil.rmtree, self.rootfs_path) - - await asyncio.shield(_localize_rootfs()) - - with self.step('pulling'): - await self.run_until_done_or_deleted(localize_rootfs) - - with self.step('setting up overlay'): - await self.run_until_done_or_deleted(self.setup_overlay) - - with self.step('setting up network'): - await self.run_until_done_or_deleted(self.setup_network_namespace) - - with self.step('running'): - timed_out = await self.run_until_done_or_deleted(self.run_container) - - self.container_status = self.get_container_status() - - if timed_out: - self.short_error = 'timed out' - raise JobTimeoutError(f'timed out after {self.timeout}s') - - if self.container_status['exit_code'] == 0: - self.state = 'succeeded' - else: - if self.container_status['out_of_memory']: - self.short_error = 'out of memory' - self.state = 'failed' + with self._step('setting up overlay'): + await self._run_until_done_or_deleted(self._setup_overlay) + + with self._step('setting up network'): + await self._run_until_done_or_deleted(self._setup_network_namespace) except asyncio.CancelledError: raise - except JobDeletedError: - self.state = 'cancelled' except Exception as e: - if not isinstance(e, JobTimeoutError) and not user_error(e): - log.exception(f'while running {self}') + if isinstance(e, ImageNotFound): + self.short_error = 'image not found' + elif isinstance(e, ImageCannotBePulled): + self.short_error = 'image cannot be pulled' + self.state = 'error' self.error = traceback.format_exc() + + if not isinstance(e, ContainerDeletedError) and not user_error(e): + log.exception(f'while creating {self}') + raise ContainerCreateError from e + raise + + async def start(self): + async def _run(): + self.state = 'running' + try: + with self._step('running'): + timed_out = await self._run_until_done_or_deleted(self._run_container) + + self.container_status = self.get_container_status() + assert self.container_status is not None + + if timed_out: + self.short_error = 'timed out' + raise ContainerTimeoutError(f'timed out after {self.timeout}s') + + if self.container_status['exit_code'] == 0: + self.state = 'succeeded' + else: + if self.container_status['out_of_memory']: + self.short_error = 'out of memory' + self.state = 'failed' + except asyncio.CancelledError: + raise + except ContainerDeletedError: + self.state = 'cancelled' + except Exception as e: + self.state = 'error' + self.error = traceback.format_exc() + + if not isinstance(e, ContainerTimeoutError) and not user_error(e): + log.exception(f'while running {self}') + raise ContainerStartError from e + raise + + self._run_fut = asyncio.ensure_future(self._run_until_done_or_deleted(_run)) + + async def wait(self): + assert self._run_fut + try: + await self._run_fut finally: + self._run_fut = None + + async def run(self, on_completion: Callable[..., Awaitable[Any]], *args, **kwargs): + async with self._cleanup_lock: try: - with self.step('uploading_log'): - await self.upload_log() + await self.create() + await self.start() + await self.wait() finally: try: - await self.delete_container() + await on_completion(*args, **kwargs) finally: - if self.image_id: - self.worker.image_data[self.image_id] -= 1 + try: + await self._kill() + finally: + await self._cleanup() - async def run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]]): - return await run_until_done_or_deleted(self.deleted_event, f) + async def _kill(self): + if self._killed: + return - def step(self, name: str): - return self.timings.step(name) + try: + if self._run_fut is not None: + await self._run_fut + finally: + try: + if self.container_is_running(): + assert self.process is not None + try: + log.info(f'{self} container is still running, killing crun process') + try: + await check_exec_output('crun', 'kill', '--all', self.name, 'SIGKILL') + except CalledProcessError as e: + not_extant_message = ( + b'error opening file `/run/crun/' + + self.name.encode() + + b'/status`: No such file or directory' + ) + if not (e.returncode == 1 and not_extant_message in e.stderr): + log.exception(f'while deleting container {self}', exc_info=True) + finally: + try: + await send_signal_and_wait(self.process, 'SIGTERM', timeout=5) + except asyncio.TimeoutError: + try: + await send_signal_and_wait(self.process, 'SIGKILL', timeout=5) + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'could not kill process for container {self}') + finally: + self.process = None + finally: + self._run_fut = None + self._killed = True - async def pull_image(self): - is_cloud_image = (CLOUD == 'gcp' and self.image_ref.hosted_in('google')) or ( - CLOUD == 'azure' and self.image_ref.hosted_in('azure') - ) - is_public_image = self.image_ref.name() in PUBLIC_IMAGES + async def _cleanup(self): + if self._cleaned_up: + return + assert self._run_fut is None try: - if not is_cloud_image: - await self.ensure_image_is_pulled() - elif is_public_image: - auth = await self.batch_worker_access_token() - await self.ensure_image_is_pulled(auth=auth) - else: - # Pull to verify this user has access to this - # image. - # FIXME improve the performance of this with a - # per-user image cache. - auth = self.current_user_access_token() - await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( - docker.images.pull, self.image_ref_str, auth=auth - ) - except DockerError as e: - if e.status == 404 and 'pull access denied' in e.message: - self.short_error = 'image cannot be pulled' - elif 'not found: manifest unknown' in e.message: - self.short_error = 'image not found' - raise + if self.overlay_mounted: + try: + await check_shell(f'umount -l {self.container_overlay_path}/merged') + self.overlay_mounted = False + except asyncio.CancelledError: + raise + except Exception: + log.exception(f'while unmounting overlay in {self}', exc_info=True) - image_config, _ = await check_exec_output('docker', 'inspect', self.image_ref_str) - image_configs[self.image_ref_str] = json.loads(image_config)[0] + if self.host_port is not None: + port_allocator.free(self.host_port) + self.host_port = None - async def ensure_image_is_pulled(self, auth=None): - try: - await docker_call_retry(MAX_DOCKER_OTHER_OPERATION_SECS, f'{self}')(docker.images.get, self.image_ref_str) - except DockerError as e: - if e.status == 404: - await docker_call_retry(MAX_DOCKER_IMAGE_PULL_SECS, f'{self}')( - docker.images.pull, self.image_ref_str, auth=auth - ) - else: - raise + if self.netns: + network_allocator.free(self.netns) + self.netns = None + finally: + try: + self.image.release() + finally: + self._cleaned_up = True - async def batch_worker_access_token(self): - return await CLOUD_WORKER_API.worker_access_token(self.client_session) + async def remove(self): + self.deleted_event.set() + async with self._cleanup_lock: + try: + await self._kill() + finally: + await self._cleanup() - def current_user_access_token(self): - return {'username': self.job.credentials.username, 'password': self.job.credentials.password} + async def _run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]], *args, **kwargs): + try: + return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) + except StepInterruptedError as e: + raise ContainerDeletedError from e - async def extract_rootfs(self): - assert self.rootfs_path - os.makedirs(self.rootfs_path) - await check_shell( - f'id=$(docker create {self.image_id}) && docker export $id | tar -C {self.rootfs_path} -xf - && docker rm $id' - ) + def _step(self, name: str) -> ContextManager: + return self.timings.step(name) - async def setup_overlay(self): - lower_dir = self.rootfs_path + async def _setup_overlay(self): + lower_dir = self.image.rootfs_path upper_dir = f'{self.container_overlay_path}/upper' work_dir = f'{self.container_overlay_path}/work' merged_dir = f'{self.container_overlay_path}/merged' @@ -617,21 +837,21 @@ async def setup_overlay(self): ) self.overlay_mounted = True - async def setup_network_namespace(self): - network = self.spec.get('network') - if network is None or network is True: - self.netns = await network_allocator.allocate_public() - else: - assert network == 'private' + async def _setup_network_namespace(self): + if self.network == 'private': self.netns = await network_allocator.allocate_private() + else: + assert self.network is None or self.network == 'public' + self.netns = await network_allocator.allocate_public() + if self.port is not None: self.host_port = await port_allocator.allocate() await self.netns.expose_port(self.port, self.host_port) - async def run_container(self) -> bool: + async def _run_container(self) -> bool: self.started_at = time_msecs() try: - await self.write_container_config() + await self._write_container_config() async with async_timeout.timeout(self.timeout): with open(self.log_path, 'w', encoding='utf-8') as container_log: log.info(f'Creating the crun run process for {self}') @@ -642,11 +862,13 @@ async def run_container(self) -> bool: f'{self.container_overlay_path}/merged', '--config', f'{self.config_path}/config.json', - self.container_name, + self.name, stdout=container_log, stderr=container_log, ) - await self.process.wait() + + async with ResourceUsageMonitor(self.name, self.resource_usage_path): + await self.process.wait() log.info(f'crun process completed for {self}') except asyncio.TimeoutError: return True @@ -655,7 +877,7 @@ async def run_container(self) -> bool: return False - async def write_container_config(self): + async def _write_container_config(self): os.makedirs(self.config_path) with open(f'{self.config_path}/config.json', 'w', encoding='utf-8') as f: f.write(json.dumps(await self.container_config())) @@ -663,8 +885,8 @@ async def write_container_config(self): # https://github.com/opencontainers/runtime-spec/blob/master/config.md async def container_config(self): uid, gid = await self._get_in_container_user() - weight = worker_fraction_in_1024ths(self.spec['cpu']) - workdir = self.image_config['Config']['WorkingDir'] + weight = worker_fraction_in_1024ths(self.cpu_in_mcpu) + workdir = self.image.image_config['Config']['WorkingDir'] default_docker_capabilities = [ 'CAP_CHOWN', 'CAP_DAC_OVERRIDE', @@ -694,7 +916,7 @@ async def container_config(self): 'uid': uid, 'gid': gid, }, - 'args': self.spec['command'], + 'args': self.command, 'env': self._env(), 'cwd': workdir if workdir != "" else "/", 'capabilities': { @@ -721,8 +943,8 @@ async def container_config(self): 'resources': { 'cpu': {'shares': weight}, 'memory': { - 'limit': self.spec['memory'], - 'reservation': self.spec['memory'], + 'limit': self.memory_in_bytes, + 'reservation': self.memory_in_bytes, }, # 'blockIO': {'weight': min(weight, 1000)}, FIXME blkio.weight not supported }, @@ -748,7 +970,7 @@ async def container_config(self): }, } - if self.spec.get('unconfined'): + if self.unconfined: config['linux']['maskedPaths'] = [] config['linux']['readonlyPaths'] = [] config['process']['apparmorProfile'] = 'unconfined' @@ -757,7 +979,7 @@ async def container_config(self): return config async def _get_in_container_user(self): - user = self.image_config['Config']['User'] + user = self.image.image_config['Config']['User'] if not user: uid, gid = 0, 0 elif ":" in user: @@ -767,7 +989,7 @@ async def _get_in_container_user(self): return int(uid), int(gid) async def _read_user_from_rootfs(self, user) -> Tuple[str, str]: - with open(f'{self.rootfs_path}/etc/passwd', 'r', encoding='utf-8') as passwd: + with open(f'{self.image.rootfs_path}/etc/passwd', 'r', encoding='utf-8') as passwd: for record in passwd: if record.startswith(user): _, _, uid, gid, _, _, _ = record.split(":") @@ -777,7 +999,7 @@ async def _read_user_from_rootfs(self, user) -> Tuple[str, str]: def _mounts(self, uid, gid): # Only supports empty volumes external_volumes = [] - volumes = self.image_config['Config']['Volumes'] + volumes = self.image.image_config['Config']['Volumes'] if volumes: for v_container_path in volumes: if not v_container_path.startswith('/'): @@ -796,7 +1018,7 @@ def _mounts(self, uid, gid): ) return ( - self.spec.get('volume_mounts') + self.volume_mounts + external_volumes + [ # Recommended filesystems: @@ -859,65 +1081,16 @@ def _mounts(self, uid, gid): ) def _env(self): - env = self.image_config['Config']['Env'] + self.spec.get('env', []) + env = self.image.image_config['Config']['Env'] + self.env if self.port is not None: assert self.host_port is not None env.append(f'HAIL_BATCH_WORKER_PORT={self.host_port}') env.append(f'HAIL_BATCH_WORKER_IP={IP_ADDRESS}') return env - async def delete_container(self): - if self.container_is_running(): - assert self.process is not None - try: - log.info(f'{self} container is still running, killing crun process') - try: - await check_exec_output('crun', 'kill', '--all', self.container_name, 'SIGKILL') - except CalledProcessError as e: - not_extant_message = ( - b'error opening file `/run/crun/' - + self.container_name.encode() - + b'/status`: No such file or directory' - ) - if not (e.returncode == 1 and not_extant_message in e.stderr): - log.exception(f'while deleting container {self}', exc_info=True) - finally: - try: - await send_signal_and_wait(self.process, 'SIGTERM', timeout=5) - except asyncio.TimeoutError: - try: - await send_signal_and_wait(self.process, 'SIGKILL', timeout=5) - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'could not kill process for container {self}') - finally: - self.process = None - - if self.overlay_mounted: - try: - await check_shell(f'umount -l {self.container_overlay_path}/merged') - self.overlay_mounted = False - except asyncio.CancelledError: - raise - except Exception: - log.exception(f'while unmounting overlay in {self}', exc_info=True) - - if self.host_port is not None: - port_allocator.free(self.host_port) - self.host_port = None - - if self.netns: - network_allocator.free(self.netns) - self.netns = None - - async def delete(self): - log.info(f'deleting {self}') - self.deleted_event.set() - # { # name: str, - # state: str, (pending, pulling, creating, starting, running, uploading_log, deleting, succeeded, error, failed) + # state: str, (pending, running, succeeded, error, failed) # timing: dict(str, float), # error: str, (optional) # short_error: str, (optional) @@ -941,11 +1114,11 @@ def status(self): status['container_status'] = self.get_container_status() return status - def get_container_status(self): + def get_container_status(self) -> Optional[dict]: if not self.process: return None - status = { + status: dict = { 'started_at': self.started_at, 'finished_at': self.finished_at, } @@ -965,23 +1138,19 @@ def container_is_running(self): def container_finished(self): return self.process is not None and self.process.returncode is not None - async def upload_log(self): - await self.worker.file_store.write_log_file( - self.job.format_version, - self.job.batch_id, - self.job.job_id, - self.job.attempt_id, - self.name, - await self.get_log(), - ) - - async def get_log(self): + async def get_log(self, offset: Optional[int] = None): if os.path.exists(self.log_path): - return (await self.worker.fs.read(self.log_path)).decode() - return '' + if offset is None: + return (await self.fs.read(self.log_path)).decode() + return (await self.fs.read_from(self.log_path, offset)).decode() + + async def get_resource_usage(self) -> bytes: + if os.path.exists(self.resource_usage_path): + return await self.fs.read(self.resource_usage_path) + return ResourceUsageMonitor.no_data() def __str__(self): - return f'container {self.job.id}/{self.name}' + return f'container {self.name}' def populate_secret_host_path(host_path: str, secret_data: Optional[Dict[str, bytes]]): @@ -993,36 +1162,39 @@ def populate_secret_host_path(host_path: str, secret_data: Optional[Dict[str, by def copy_container( - job: 'Job', - name: str, - files, - volume_mounts, - cpu, - memory, + job: 'DockerJob', + task_name: str, + files: List[dict], + volume_mounts: List[dict], + cpu_in_mcpu: int, + memory_in_bytes: int, scratch: str, requester_pays_project: str, client_session: httpx.ClientSession, - worker: 'Worker', ) -> Container: assert files - copy_spec = { - 'image': BATCH_WORKER_IMAGE, - 'name': name, - 'command': [ - '/usr/bin/python3', - '-m', - 'hailtop.aiotools.copy', - json.dumps(requester_pays_project), - json.dumps(files), - '-v', - ], - 'env': [f'{job.credentials.cloud_env_name}={job.credentials.mount_path}'], - 'cpu': cpu, - 'memory': memory, - 'scratch': scratch, - 'volume_mounts': volume_mounts, - } - return Container(job, name, copy_spec, client_session, worker) + assert job.worker.fs is not None + + command = [ + '/usr/bin/python3', + '-m', + 'hailtop.aiotools.copy', + json.dumps(requester_pays_project), + json.dumps(files), + '-v', + ] + + return Container( + fs=job.worker.fs, + name=job.container_name(task_name), + image=Image(BATCH_WORKER_IMAGE, job.credentials, client_session, job.pool), + scratch_dir=f'{scratch}/{task_name}', + command=command, + cpu_in_mcpu=cpu_in_mcpu, + memory_in_bytes=memory_in_bytes, + volume_mounts=volume_mounts, + env=[f'{job.credentials.cloud_env_name}={job.credentials.mount_path}'], + ) class Job: @@ -1105,6 +1277,8 @@ def __init__( self.format_version = format_version self.task_manager = task_manager self.pool = pool + + assert worker self.worker = worker self.deleted_event = asyncio.Event() @@ -1199,6 +1373,9 @@ async def run(self): async def get_log(self): pass + async def get_resource_usage(self) -> Dict[str, Optional[bytes]]: + raise NotImplementedError + async def delete(self): log.info(f'deleting {self}') self.deleted_event.set() @@ -1274,12 +1451,14 @@ def __init__( worker: 'Worker', ): super().__init__(batch_id, user, credentials, job_spec, format_version, task_manager, pool, worker) + assert worker.fs + input_files = job_spec.get('input_files') output_files = job_spec.get('output_files') requester_pays_project = job_spec.get('requester_pays_project') - self.timings = Timings() + self.timings: Timings = Timings() if self.secrets: for secret in self.secrets: @@ -1309,34 +1488,23 @@ def __init__( self.scratch, requester_pays_project, client_session, - worker, ) - # main container - main_spec = { - 'command': job_spec['process']['command'], - 'image': job_spec['process']['image'], - 'name': 'main', - 'env': [f'{var["name"]}={var["value"]}' for var in self.env], - 'cpu': self.cpu_in_mcpu, - 'memory': self.memory_in_bytes, - 'volume_mounts': self.main_volume_mounts, - } - port = job_spec.get('port') - if port: - main_spec['port'] = port - timeout = job_spec.get('timeout') - if timeout: - main_spec['timeout'] = timeout - network = job_spec.get('network') - if network: - assert network in ('public', 'private') - main_spec['network'] = network - unconfined = job_spec.get('unconfined') - if unconfined: - main_spec['unconfined'] = unconfined - main_spec['scratch'] = self.scratch - containers['main'] = Container(self, 'main', main_spec, client_session, worker) + containers['main'] = Container( + fs=self.worker.fs, + name=self.container_name('main'), + image=Image(job_spec['process']['image'], self.credentials, client_session, pool), + scratch_dir=f'{self.scratch}/main', + command=job_spec['process']['command'], + cpu_in_mcpu=self.cpu_in_mcpu, + memory_in_bytes=self.memory_in_bytes, + network=job_spec.get('network'), + port=job_spec.get('port'), + timeout=job_spec.get('timeout'), + unconfined=job_spec.get('unconfined'), + volume_mounts=self.main_volume_mounts, + env=[f'{var["name"]}={var["value"]}' for var in self.env], + ) if output_files: containers['output'] = copy_container( @@ -1349,14 +1517,16 @@ def __init__( self.scratch, requester_pays_project, client_session, - worker, ) self.containers = containers - def step(self, name: str): + def step(self, name: str) -> ContextManager: return self.timings.step(name) + def container_name(self, task_name: str): + return f'batch-{self.batch_id}-job-{self.job_id}-{task_name}' + async def setup_io(self): if not instance_config.job_private: if self.worker.data_disk_space_remaining.value < self.external_storage_in_gib: @@ -1387,6 +1557,36 @@ async def setup_io(self): assert self.disk is None, self.disk os.makedirs(self.io_host_path()) + async def run_container(self, container: Container, task_name: str): + async def on_completion(): + with container._step('uploading_log'): + assert self.worker.file_store + await self.worker.file_store.write_log_file( + self.format_version, + self.batch_id, + self.job_id, + self.attempt_id, + task_name, + await container.get_log(), + ) + + with container._step('uploading_resource_usage'): + await self.worker.file_store.write_resource_usage_file( + self.format_version, + self.batch_id, + self.job_id, + self.attempt_id, + task_name, + await container.get_resource_usage(), + ) + + try: + await container.run(on_completion) + except asyncio.CancelledError: + raise + except Exception: + pass + async def run(self): async with self.worker.cpu_sem(self.cpu_in_mcpu): self.start_time = time_msecs() @@ -1454,21 +1654,21 @@ async def run(self): input = self.containers.get('input') if input: log.info(f'{self}: running input') - await input.run() + await self.run_container(input, 'input') log.info(f'{self} input: {input.state}') if not input or input.state == 'succeeded': log.info(f'{self}: running main') main = self.containers['main'] - await main.run() + await self.run_container(main, 'main') log.info(f'{self} main: {main.state}') output = self.containers.get('output') if output: log.info(f'{self}: running output') - await output.run() + await self.run_container(output, 'output') log.info(f'{self} output: {output.state}') if main.state != 'succeeded': @@ -1481,7 +1681,7 @@ async def run(self): self.state = input.state except asyncio.CancelledError: raise - except JobDeletedError: + except ContainerDeletedError: self.state = 'cancelled' except Exception as e: if not user_error(e): @@ -1540,9 +1740,12 @@ async def cleanup(self): async def get_log(self): return {name: await c.get_log() for name, c in self.containers.items()} + async def get_resource_usage(self): + return {name: await c.get_resource_usage() for name, c in self.containers.items()} + async def delete(self): await super().delete() - await asyncio.wait([c.delete() for c in self.containers.values()]) + await asyncio.wait([c.remove() for c in self.containers.values()]) def status(self): status = super().status() @@ -1577,10 +1780,9 @@ def __init__( if input_files or output_files: raise Exception("i/o not supported") - self.user_command_string = job_spec['process']['command'] - assert len(self.user_command_string) >= 3, self.user_command_string - self.revision = self.user_command_string[1] - self.jar_url = self.user_command_string[2] + assert job_spec['process']['jar_spec']['type'] == 'jar_url' + self.jar_url = job_spec['process']['jar_spec']['value'] + self.argv = job_spec['process']['command'] self.timings = Timings() self.state = 'pending' @@ -1596,21 +1798,20 @@ def step(self, name): return self.timings.step(name) async def run_until_done_or_deleted(self, f: Callable[..., Awaitable[Any]], *args, **kwargs): - return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) - - def verify_is_acceptable_query_jar_url(self, url: str): - if not url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX): - log.error(f'user submitted unacceptable JAR url: {url} for {self}. {ACCEPTABLE_QUERY_JAR_URL_PREFIX}') - raise ValueError(f'unacceptable JAR url: {url}') + try: + return await run_until_done_or_deleted(self.deleted_event, f, *args, **kwargs) + except StepInterruptedError as e: + raise JobDeletedError from e def secret_host_path(self, secret): return f'{self.scratch}/secrets/{secret["mount_path"]}' async def download_jar(self): - async with self.worker.jar_download_locks[self.revision]: - local_jar_location = f'/hail-jars/{self.revision}.jar' + async with self.worker.jar_download_locks[self.jar_url]: + unique_key = self.jar_url.replace('_', '__').replace('/', '_') + local_jar_location = f'/hail-jars/{unique_key}.jar' if not os.path.isfile(local_jar_location): - self.verify_is_acceptable_query_jar_url(self.jar_url) + assert self.jar_url.startswith(ACCEPTABLE_QUERY_JAR_URL_PREFIX) temporary_file = tempfile.NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with try: async with await self.worker.fs.open(self.jar_url) as jar_data: @@ -1638,7 +1839,7 @@ async def run(self): try: with self.step('connecting_to_jvm'): - self.jvm = await self.run_until_done_or_deleted(self.worker.borrow_jvm) + self.jvm = await self.worker.borrow_jvm(self.cpu_in_mcpu // 1000) self.jvm_name = str(self.jvm) self.task_manager.ensure_future(self.worker.post_job_started(self)) @@ -1659,11 +1860,11 @@ async def run(self): log.info(f'{self}: downloading JAR') with self.step('downloading_jar'): - local_jar_location = await self.run_until_done_or_deleted(self.download_jar) + local_jar_location = await self.download_jar() log.info(f'{self}: running jvm process') with self.step('running'): - await self.jvm.execute(local_jar_location, self.scratch, self.log_file, self.user_command_string) + await self.jvm.execute(local_jar_location, self.scratch, self.log_file, self.jar_url, self.argv) self.state = 'succeeded' log.info(f'{self} main: {self.state}') @@ -1676,12 +1877,15 @@ async def run(self): except JobDeletedError: self.state = 'cancelled' await self.cleanup() + except JVMCreationError: + self.state = 'error' + log.exception(f'while running {self}') + await self.cleanup() + raise except Exception: log.exception(f'while running {self}') - self.state = 'error' self.error = traceback.format_exc() - await self.cleanup() else: await self.cleanup() @@ -1695,7 +1899,7 @@ async def cleanup(self): with self.step('uploading_log'): log.info(f'{self}: uploading log') - await worker.file_store.write_log_file( + await self.worker.file_store.write_log_file( self.format_version, self.batch_id, self.job_id, self.attempt_id, 'main', await self._get_log() ) @@ -1716,6 +1920,9 @@ async def _get_log(self): async def get_log(self): return {'main': await self._get_log()} + async def get_resource_usage(self): + return {'main': ResourceUsageMonitor.no_data()} + async def delete(self): await super().delete() if self.jvm is not None: @@ -1787,59 +1994,112 @@ def scoped_ensure_future(coro_or_future, *, loop=None) -> Iterator[asyncio.Futur fut.cancel() -class BufferedOutputProcess: - @classmethod - async def create(cls, *args, **kwargs): - assert 'stdout' not in kwargs - assert 'stderr' not in kwargs +class JVMCreationError(Exception): + pass + - process = await asyncio.create_subprocess_exec( - *args, **kwargs, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE +class JVMUserCredentials: + def __init__(self): + self.username = None + self.password = None + + +class JVMContainer: + @staticmethod + async def create_and_start( + index: int, + n_cores: int, + socket_file: str, + root_dir: str, + client_session: httpx.ClientSession, + pool: concurrent.futures.ThreadPoolExecutor, + ): + assert os.path.commonpath([socket_file, root_dir]) == root_dir + assert os.path.isdir(root_dir) + + total_memory_bytes = n_cores * worker_memory_per_core_bytes(CLOUD, instance_config.worker_type()) + + # We allocate 60% of memory per core to off heap memory + memory_per_core_mib = worker_memory_per_core_mib(CLOUD, instance_config.worker_type()) + heap_memory_mb = int(0.4 * n_cores * memory_per_core_mib) + off_heap_memory_per_core_mb = int(0.6 * memory_per_core_mib) + + command = [ + 'java', + f'-Xmx{heap_memory_mb}M', + '-cp', + f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', + 'is.hail.JVMEntryway', + socket_file, + ] + + volume_mounts = [ + { + 'source': JVM.SPARK_HOME, + 'destination': JVM.SPARK_HOME, + 'type': 'none', + 'options': ['rbind', 'rw'], + }, + { + 'source': '/jvm-entryway', + 'destination': '/jvm-entryway', + 'type': 'none', + 'options': ['rbind', 'rw'], + }, + { + 'source': '/hail-jars', + 'destination': '/hail-jars', + 'type': 'none', + 'options': ['rbind', 'rw'], + }, + { + 'source': root_dir, + 'destination': root_dir, + 'type': 'none', + 'options': ['rbind', 'rw'], + }, + { + 'source': '/batch', + 'destination': '/batch', + 'type': 'none', + 'options': ['rbind', 'rw'], + }, + ] + + fs = LocalAsyncFS(pool) # worker does not have a fs when initializing JVMs + + c = Container( + fs=fs, + name=f'jvm-{index}', + image=Image(BATCH_WORKER_IMAGE, JVMUserCredentials(), client_session, pool), + scratch_dir=f'{root_dir}/container', + command=command, + cpu_in_mcpu=n_cores * 1000, + memory_in_bytes=total_memory_bytes, + env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mb}'], + volume_mounts=volume_mounts, ) - stop_event = asyncio.Event() - return cls(process, stop_event) - - def __init__(self, process, stop_event: asyncio.Event): - self.process = process - self.stop_event = stop_event - self.buf = bytearray() - assert process.stdout is not None - self.stdout_pump = asyncio.ensure_future(self.pump_to_buffer(process.stdout)) - assert process.stderr is not None - self.stderr_pump = asyncio.ensure_future(self.pump_to_buffer(process.stderr)) - - async def pump_to_buffer(self, strm: asyncio.StreamReader): - with scoped_ensure_future(self.stop_event.wait()) as stop_fut: - while not strm.at_eof() and not self.stop_event.is_set(): - with scoped_ensure_future(strm.readline()) as read_fut: - await asyncio.wait([read_fut, stop_fut], return_when=asyncio.FIRST_COMPLETED) - if read_fut.done(): - result = read_fut.result() - self.buf.extend(result) - - def output(self) -> str: - return self.buf.decode() - - def retrieve_and_clear_output(self) -> str: - buf = self.buf.decode() - self.buf = bytearray() - return buf - - def kill(self): - return self.process.kill() + + await c.create() + await c.start() + + return JVMContainer(c, fs) + + def __init__(self, container: Container, fs: LocalAsyncFS): + self.container = container + self.fs = fs @property def returncode(self) -> Optional[int]: - return self.process.returncode + if self.container.process is None: + return None + return self.container.process.returncode - def close(self): - try: - self.kill() - finally: - try: - self.stdout_pump.cancel() - finally: - self.stderr_pump.cancel() + async def remove(self): + if self.fs is not None: + await self.fs.close() + self.fs = None + await self.container.remove() class JVMUserError(Exception): @@ -1856,25 +2116,18 @@ class JVM: FINISH_JVM_EOS = 4 @classmethod - async def create_process(cls, socket_file: str) -> BufferedOutputProcess: - # JVM and Hail both treat MB as 1024 * 1024 bytes. - # JVMs only start in standard workers which have 3.75 GiB == 3840 MiB per core. - # We only allocate 3700 MiB so that we stay well below the machine's max memory. - # We allocate 60% of memory per core to off heap memory: 1480 + 2220 = 3700. - return await BufferedOutputProcess.create( - 'java', - '-Xmx1480M', - '-cp', - f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', - 'is.hail.JVMEntryway', - socket_file, - env={'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': '2220'}, - ) - - @classmethod - async def create_process_and_connect(cls, index: int, socket_file: str) -> Tuple[BufferedOutputProcess, str]: - process = await cls.create_process(socket_file) + async def create_container_and_connect( + cls, + index: int, + n_cores: int, + socket_file: str, + root_dir: str, + client_session: httpx.ClientSession, + pool: concurrent.futures.ThreadPoolExecutor, + ) -> JVMContainer: try: + container = await JVMContainer.create_and_start(index, n_cores, socket_file, root_dir, client_session, pool) + attempts = 0 delay = 0.25 while True: @@ -1889,71 +2142,77 @@ async def create_process_and_connect(cls, index: int, socket_file: str) -> Tuple break finally: writer.close() - except ConnectionRefusedError: - output = process.retrieve_and_clear_output() - log.warning(f'JVM-{index}: connection refused. {output}') - raise - except FileNotFoundError as err: + except (FileNotFoundError, ConnectionRefusedError) as err: attempts += 1 if attempts == 240: + jvm_output = await container.container.get_log() or '' raise ValueError( - f'JVM-{index}: failed to establish connection after {240 * delay} seconds' + f'JVM-{index}: failed to establish connection after {240 * delay} seconds. ' + 'JVM output:\n\n' + jvm_output ) from err await asyncio.sleep(delay) - startup_output = process.retrieve_and_clear_output() - return process, startup_output - except: - process.close() - raise + return container + except Exception as e: + raise JVMCreationError from e @classmethod - async def create(cls, index: int): - assert worker is not None - - while True: - try: - token = uuid.uuid4().hex - socket_file = '/socket-' + token - root_dir = '/root-' + token - output_file = root_dir + '/output' - should_interrupt = asyncio.Event() - await blocking_to_async(worker.pool, os.mkdir, root_dir) - process, startup_output = await cls.create_process_and_connect(index, socket_file) - log.info(f'JVM-{index}: startup output: {startup_output}') - return cls(index, socket_file, root_dir, output_file, should_interrupt, process) - except ConnectionRefusedError: - pass + async def create(cls, index: int, n_cores: int, worker: 'Worker'): + token = uuid.uuid4().hex + root_dir = f'/host/jvm-{token}' + socket_file = root_dir + '/socket' + output_file = root_dir + '/output' + should_interrupt = asyncio.Event() + await blocking_to_async(worker.pool, os.makedirs, root_dir) + container = await cls.create_container_and_connect( + index, n_cores, socket_file, root_dir, worker.client_session, worker.pool + ) + return cls( + index, + n_cores, + socket_file, + root_dir, + output_file, + should_interrupt, + container, + worker.client_session, + worker.pool, + ) async def new_connection(self): while True: try: - interim_output = self.process.retrieve_and_clear_output() - if len(interim_output) > 0: - log.warning(f'{self}: unexpected output between jobs') - return await asyncio.open_unix_connection(self.socket_file) except ConnectionRefusedError: - log.warning(f'{self}: unexpected exit between jobs', extra=dict(output=self.process.output())) os.remove(self.socket_file) - process, startup_output = await self.create_process_and_connect(self.index, self.socket_file) - self.process = process - log.info(f'JVM-{self.index}: startup output: {startup_output}') + if self.container: + await self.container.remove() + + container = await self.create_container_and_connect( + self.index, self.n_cores, self.socket_file, self.root_dir, self.client_session, self.pool + ) + self.container = container def __init__( self, index: int, + n_cores: int, socket_file: str, root_dir: str, output_file: str, should_interrupt: asyncio.Event, - process: BufferedOutputProcess, + container: JVMContainer, + client_session: httpx.ClientSession, + pool: concurrent.futures.ThreadPoolExecutor, ): self.index = index + self.n_cores = n_cores self.socket_file = socket_file self.root_dir = root_dir self.output_file = output_file self.should_interrupt = should_interrupt - self.process = process + self.container = container + self.client_session = client_session + self.pool = pool def __str__(self): return f'JVM-{self.index}' @@ -1967,14 +2226,11 @@ def interrupt(self): def reset(self): self.should_interrupt.clear() - def kill(self): - if self.process is not None: - self.process.kill() - - def close(self): - self.process.close() + async def kill(self): + if self.container is not None: + await self.container.remove() - async def execute(self, classpath: str, scratch_dir: str, log_file: str, command_string: List[str]): + async def execute(self, classpath: str, scratch_dir: str, log_file: str, jar_url: str, argv: List[str]): assert worker is not None log.info(f'{self}: execute') @@ -1986,33 +2242,35 @@ async def execute(self, classpath: str, scratch_dir: str, log_file: str, command stack.callback(writer.close) log.info(f'{self}: connection acquired') - command_string = [classpath, 'is.hail.backend.service.Main', scratch_dir, log_file, *command_string] + command = [classpath, 'is.hail.backend.service.Main', scratch_dir, log_file, jar_url, *argv] - write_int(writer, len(command_string)) - for arg in command_string: - assert isinstance(arg, str) - write_str(writer, arg) + write_int(writer, len(command)) + for part in command: + assert isinstance(part, str) + write_str(writer, part) await writer.drain() - wait_for_message_from_process: asyncio.Future = asyncio.ensure_future(read_int(reader)) - stack.callback(wait_for_message_from_process.cancel) + wait_for_message_from_container: asyncio.Future = asyncio.ensure_future(read_int(reader)) + stack.callback(wait_for_message_from_container.cancel) wait_for_interrupt: asyncio.Future = asyncio.ensure_future(self.should_interrupt.wait()) stack.callback(wait_for_interrupt.cancel) - await asyncio.wait([wait_for_message_from_process, wait_for_interrupt], return_when=asyncio.FIRST_COMPLETED) + await asyncio.wait( + [wait_for_message_from_container, wait_for_interrupt], return_when=asyncio.FIRST_COMPLETED + ) if wait_for_interrupt.done(): await wait_for_interrupt # retrieve exceptions - if not wait_for_message_from_process.done(): + if not wait_for_message_from_container.done(): write_int(writer, 0) # tell process to cancel await writer.drain() eos_exception = None try: - message = await wait_for_message_from_process + message = await wait_for_message_from_container except EndOfStream as exc: try: - self.kill() + await self.kill() except ProcessLookupError: log.warning(f'{self}: JVM died after we received EOS') message = JVM.FINISH_JVM_EOS @@ -2063,41 +2321,45 @@ def __init__(self, client_session: httpx.ClientSession): self.compute_client = None self._jvm_initializer_task = asyncio.ensure_future(self._initialize_jvms()) - self._jvms: List[JVM] = [] + self._jvms: SortedSet[JVM] = SortedSet([], key=lambda jvm: jvm.n_cores) async def _initialize_jvms(self): - if instance_config.worker_type() in ('standard', 'D'): - self._jvms = await asyncio.gather(*[JVM.create(i) for i in range(CORES)]) + if instance_config.worker_type() in ('standard', 'D', 'highmem', 'E'): + jvms = await asyncio.gather( + *[JVM.create(i, 1, self) for i in range(CORES)], + *[JVM.create(CORES + i, 8, self) for i in range(CORES // 8)], + ) + self._jvms.update(jvms) log.info(f'JVMs initialized {self._jvms}') - async def borrow_jvm(self) -> JVM: - if instance_config.worker_type() not in ('standard', 'D'): - raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') - await asyncio.shield(self._jvm_initializer_task) + async def borrow_jvm(self, n_cores: int) -> JVM: + if instance_config.worker_type() not in ('standard', 'D', 'highmem', 'E'): + raise ValueError(f'no JVMs available on {instance_config.worker_type()}') + await self._jvm_initializer_task assert self._jvms - return self._jvms.pop() + index = self._jvms.bisect_key_left(n_cores) + assert index < len(self._jvms), index + return self._jvms.pop(index) def return_jvm(self, jvm: JVM): - if instance_config.worker_type() not in ('standard', 'D'): - raise ValueError(f'JVM jobs not allowed on {instance_config.worker_type()}') jvm.reset() - self._jvms.append(jvm) + self._jvms.add(jvm) async def shutdown(self): log.info('Worker.shutdown') try: with ExitStack() as cleanup: for jvm in self._jvms: - cleanup.callback(jvm.close) + cleanup.callback(jvm.kill) finally: try: self.task_manager.shutdown() log.info('shutdown task manager') finally: try: - if self.fs: - await self.fs.close() - log.info('closed worker file system') + if self.file_store: + await self.file_store.close() + log.info('closed file store') finally: try: if self.compute_client: @@ -2105,9 +2367,9 @@ async def shutdown(self): log.info('closed compute client') finally: try: - if self.file_store: - await self.file_store.close() - log.info('closed file store') + if self.fs: + await self.fs.close() + log.info('closed worker file system') finally: await self.client_session.close() log.info('closed client session') @@ -2117,6 +2379,8 @@ async def run_job(self, job): # pylint: disable=no-self-use await job.run() except asyncio.CancelledError: raise + except JVMCreationError: + self.stop_event.set() except Exception as e: if not user_error(e): log.exception(f'while running {job}, ignoring') @@ -2185,26 +2449,46 @@ async def create_job(self, request): raise web.HTTPServiceUnavailable return await asyncio.shield(self.create_job_1(request)) - async def get_job_log(self, request): - if not self.active: - raise web.HTTPServiceUnavailable + def _job_from_request(self, request): batch_id = int(request.match_info['batch_id']) job_id = int(request.match_info['job_id']) id = (batch_id, job_id) job = self.jobs.get(id) if not job: raise web.HTTPNotFound() + return job + + async def get_job_log(self, request): + if not self.active: + raise web.HTTPServiceUnavailable + job = self._job_from_request(request) return web.json_response(await job.get_log()) + async def get_job_resource_usage(self, request): + if not self.active: + raise web.HTTPServiceUnavailable + job = self._job_from_request(request) + resource_usage = await job.get_resource_usage() + + boundary = '----WebKitFormBoundarywiBIWjWR7osAkgFI' + + resp = web.StreamResponse( + status=200, reason='OK', headers={'Content-Type': f'multipart/mixed;boundary={boundary}'} + ) + await resp.prepare(request) + + with aiohttp.MultipartWriter('mixed', boundary=boundary) as mpwriter: + for task, data in resource_usage.items(): + part = mpwriter.append(data) + part.set_content_disposition('attachment', filename=task) + await mpwriter.write(resp) + + return resp + async def get_job_status(self, request): if not self.active: raise web.HTTPServiceUnavailable - batch_id = int(request.match_info['batch_id']) - job_id = int(request.match_info['job_id']) - id = (batch_id, job_id) - job = self.jobs.get(id) - if not job: - raise web.HTTPNotFound() + job = self._job_from_request(request) return web.json_response(job.status()) async def delete_job_1(self, request): @@ -2243,6 +2527,7 @@ async def run(self): web.post('/api/v1alpha/batches/jobs/create', self.create_job), web.delete('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/delete', self.delete_job), web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/log', self.get_job_log), + web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/resource_usage', self.get_job_resource_usage), web.get('/api/v1alpha/batches/{batch_id}/jobs/{job_id}/status', self.get_job_status), web.get('/healthcheck', self.healthcheck), ] diff --git a/batch/deployment.yaml b/batch/deployment.yaml index b2f0c03560d..2447b3424fe 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -34,6 +34,26 @@ spec: value: "spot" {% endif %} containers: + - name: nginx + image: {{ batch_driver_nginx_image.image }} + resources: + requests: + cpu: "4" + memory: "2G" + limits: + cpu: "4.5" + memory: "4G" + ports: + - containerPort: 443 + volumeMounts: + - name: ssl-config-batch-driver-nginx + mountPath: /ssl-config + readOnly: true + readinessProbe: + tcpSocket: + port: 443 + initialDelaySeconds: 5 + periodSeconds: 5 - name: batch-driver image: {{ batch_image.image }} command: @@ -45,10 +65,10 @@ spec: - batch.driver resources: requests: - cpu: "600m" + cpu: "1" memory: "2G" limits: - cpu: "1" + cpu: "1.5" memory: "2.5G" env: - name: HAIL_DOMAIN @@ -127,15 +147,7 @@ spec: key: query_storage_uri - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER value: "/jars" -{% elif scope == "test" %} - - name: HAIL_QUERY_STORAGE_URI - valueFrom: - secretKeyRef: - name: global-config - key: test_storage_uri - - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER - value: "/{{ token }}/jars" -{% elif scope == "dev" %} +{% elif scope == "test" or scope == "dev" %} - name: HAIL_QUERY_STORAGE_URI valueFrom: secretKeyRef: @@ -150,8 +162,6 @@ spec: - name: HAIL_SHOULD_CHECK_INVARIANTS value: "1" {% endif %} - ports: - - containerPort: 5000 volumeMounts: - name: deploy-config mountPath: /deploy-config @@ -199,6 +209,10 @@ spec: secret: optional: false secretName: ssl-config-batch-driver + - name: ssl-config-batch-driver-nginx + secret: + optional: false + secretName: ssl-config-batch-driver-nginx - name: ssh-public-key secret: secretName: batch-worker-ssh-public-key @@ -317,15 +331,7 @@ spec: key: query_storage_uri - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER value: "/jars" -{% elif scope == "test" %} - - name: HAIL_QUERY_STORAGE_URI - valueFrom: - secretKeyRef: - name: global-config - key: test_storage_uri - - name: HAIL_QUERY_ACCEPTABLE_JAR_SUBFOLDER - value: "/{{ token }}/jars" -{% elif scope == "dev" %} +{% elif scope == "test" or scope == "dev" %} - name: HAIL_QUERY_STORAGE_URI valueFrom: secretKeyRef: @@ -405,7 +411,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 80 + targetAverageUtilization: 2500 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget @@ -441,6 +447,6 @@ spec: ports: - port: 443 protocol: TCP - targetPort: 5000 + targetPort: 443 selector: app: batch-driver diff --git a/batch/driver-nginx.conf b/batch/driver-nginx.conf new file mode 100644 index 00000000000..da65fbd41b4 --- /dev/null +++ b/batch/driver-nginx.conf @@ -0,0 +1,78 @@ +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; +} + +http { + + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + server_names_hash_bucket_size 128; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE + ssl_prefer_server_ciphers on; + + log_format json-log escape=json '{' + '"message":"$scheme $request done in ${request_time}s: $status",' + '"response_status":$status,' + '"request_duration":$request_time,' + '"remote_address":"$remote_addr",' + '"x_real_ip":"$http_x_real_ip",' + '"request_start_time":"$time_local",' + '"body_bytes_sent":"$body_bytes_sent",' + '"http_referer":"$http_referer",' + '"http_user_agent":"$http_user_agent"' + '}'; + + access_log /var/log/nginx/access.log json-log; + error_log /var/log/nginx/error.log; + + gzip on; + + include /ssl-config/ssl-config-http.conf; + map $http_x_forwarded_proto $updated_scheme { + default $http_x_forwarded_proto; + '' $scheme; + } + map $http_x_forwarded_host $updated_host { + default $http_x_forwarded_host; + '' $http_host; + } + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + server_name batch-driver.*; + + location = /healthcheck { + return 204; + } + + location / { + proxy_pass http://127.0.0.1:5000/; + + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $updated_host; + proxy_set_header X-Forwarded-Proto $updated_scheme; + proxy_set_header X-Real-IP $http_x_real_ip; + + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } + + listen 443 ssl; + listen [::]:443 ssl; + } +} diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index f724f7ce262..df53771e2fc 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -611,7 +611,7 @@ def test_timeout(client: BatchClient): status = j.wait() assert status['state'] == 'Error', str((status, b.debug_info())) error_msg = j._get_error(status, 'main') - assert error_msg and 'JobTimeoutError' in error_msg, str((error_msg, b.debug_info())) + assert error_msg and 'ContainerTimeoutError' in error_msg, str((error_msg, b.debug_info())) assert j.exit_code(status) is None, str((status, b.debug_info())) diff --git a/batch2/react-batch/package-lock.json b/batch2/react-batch/package-lock.json index d96370ea382..a4739d09f06 100644 --- a/batch2/react-batch/package-lock.json +++ b/batch2/react-batch/package-lock.json @@ -2492,9 +2492,9 @@ } }, "node_modules/minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", "dev": true }, "node_modules/ms": { @@ -5644,9 +5644,9 @@ } }, "minimist": { - "version": "1.2.5", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", - "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==", + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==", "dev": true }, "ms": { diff --git a/benchmark/python/benchmark_hail/run/table_benchmarks.py b/benchmark/python/benchmark_hail/run/table_benchmarks.py index a4e345fe229..2105d361a20 100644 --- a/benchmark/python/benchmark_hail/run/table_benchmarks.py +++ b/benchmark/python/benchmark_hail/run/table_benchmarks.py @@ -160,6 +160,11 @@ def table_import_ints(tsv): )._force_count() +@benchmark(args=many_ints_table.handle('tsv')) +def table_import_ints_impute(tsv): + hl.import_table(tsv, impute=True)._force_count() + + @benchmark(args=many_strings_table.handle('tsv')) def table_import_strings(tsv): hl.import_table(tsv)._force_count() diff --git a/bootstrap-gateway/deployment.yaml b/bootstrap-gateway/deployment.yaml index f6f51ad697f..50cbd1d76f2 100644 --- a/bootstrap-gateway/deployment.yaml +++ b/bootstrap-gateway/deployment.yaml @@ -63,7 +63,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 80 + targetAverageUtilization: 500 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/build.yaml b/build.yaml index 2bfa9174b5d..496265f71b0 100644 --- a/build.yaml +++ b/build.yaml @@ -559,6 +559,17 @@ steps: dependsOn: - service_base_image - merge_code + - kind: buildImage2 + name: batch_driver_nginx_image + dockerFile: /io/batch/Dockerfile.driver-nginx + contextPath: /io/batch + publishAs: batch-driver-nginx + inputs: + - from: /repo/batch + to: /io/batch + dependsOn: + - hail_ubuntu_image + - merge_code - kind: buildImage2 name: batch_image dockerFile: /io/batch/Dockerfile @@ -723,6 +734,13 @@ steps: mv build/libs/hail-all-spark-test.jar build/debug_libs/ mv build/deploy/dist/debug-wheel-container.tar build/debug_libs time retry make jars python-version-info wheel + + # Check wheel size is small enough for pypi (< 100 MB) + HAIL_PIP_VERSION=$(cat python/hail/hail_pip_version) + WHEEL_PATH="build/deploy/dist/hail-$HAIL_PIP_VERSION-py3-none-any.whl" + du -h $WHEEL_PATH + $(python3 -c "import os; exit(1) if (os.path.getsize('$WHEEL_PATH')) > 100_000_000 else exit(0)") + time (cd python && zip -r hail.zip hail hailtop) time tar czf test.tar.gz -C python test time tar czf resources.tar.gz -C src/test resources @@ -2012,7 +2030,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=0 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2061,7 +2081,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=1 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2110,7 +2132,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=2 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2159,7 +2183,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=3 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2208,7 +2234,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=4 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2257,7 +2285,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=5 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2306,7 +2336,9 @@ steps: export HAIL_DOCTEST_DATA_DIR=./data export PYTEST_SPLITS=7 export PYTEST_SPLIT_INDEX=6 - export HAIL_QUERY_BACKEND=local + + hailctl config set query/backend local + python3 -m pytest \ --ignore=test/hailtop/ \ --log-cli-level=INFO \ @@ -2965,6 +2997,7 @@ steps: - create_accounts - batch_image - batch_worker_image + - batch_driver_nginx_image - batch_database - deploy_auth - create_certs @@ -3090,9 +3123,7 @@ steps: {% if scope == "deploy" %} HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} + {% elif scope == "test" or scope == "dev" %} HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} {% else %} echo "!!! unexpected scope {{ scope }} !!!" @@ -3147,27 +3178,17 @@ steps: cd /io/repo/hail/python - export HAIL_SHA="$(cat /io/git_version)" - {% if scope == "deploy" %} - export HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} - {% else %} - echo "!!! unexpected scope {{ scope }} !!!" - exit 1 - {% endif %} - export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar - export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=0 - export HAIL_QUERY_BACKEND=service + export HAIL_SHUFFLE_MAX_BRANCH=4 + export HAIL_SHUFFLE_CUTOFF=1000000 + + hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3180,13 +3201,12 @@ steps: --durations=50 \ -n 4 \ test + timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test - - from: /git_version - to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3230,27 +3250,17 @@ steps: cd /io/repo/hail/python - export HAIL_SHA="$(cat /io/git_version)" - {% if scope == "deploy" %} - export HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} - {% else %} - echo "!!! unexpected scope {{ scope }} !!!" - exit 1 - {% endif %} - export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar - export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=1 - export HAIL_QUERY_BACKEND=service + export HAIL_SHUFFLE_MAX_BRANCH=4 + export HAIL_SHUFFLE_CUTOFF=1000000 + + hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3263,13 +3273,12 @@ steps: --durations=50 \ -n 4 \ test + timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test - - from: /git_version - to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3313,27 +3322,17 @@ steps: cd /io/repo/hail/python - export HAIL_SHA="$(cat /io/git_version)" - {% if scope == "deploy" %} - export HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} - {% else %} - echo "!!! unexpected scope {{ scope }} !!!" - exit 1 - {% endif %} - export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar - export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=2 - export HAIL_QUERY_BACKEND=service + export HAIL_SHUFFLE_MAX_BRANCH=4 + export HAIL_SHUFFLE_CUTOFF=1000000 + + hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3346,13 +3345,12 @@ steps: --durations=50 \ -n 4 \ test + timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test - - from: /git_version - to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3396,27 +3394,17 @@ steps: cd /io/repo/hail/python - export HAIL_SHA="$(cat /io/git_version)" - {% if scope == "deploy" %} - export HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} - {% else %} - echo "!!! unexpected scope {{ scope }} !!!" - exit 1 - {% endif %} - export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar - export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=3 - export HAIL_QUERY_BACKEND=service + export HAIL_SHUFFLE_MAX_BRANCH=4 + export HAIL_SHUFFLE_CUTOFF=1000000 + + hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3429,13 +3417,12 @@ steps: --durations=50 \ -n 4 \ test + timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test - - from: /git_version - to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -3479,27 +3466,17 @@ steps: cd /io/repo/hail/python - export HAIL_SHA="$(cat /io/git_version)" - {% if scope == "deploy" %} - export HAIL_JAR_URL={{ global.query_storage_uri }} - {% elif scope == "test" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ deploy_batch.token }} - {% elif scope == "dev" %} - export HAIL_JAR_URL={{ global.test_storage_uri }}/{{ default_ns.name }} - {% else %} - echo "!!! unexpected scope {{ scope }} !!!" - exit 1 - {% endif %} - export HAIL_JAR_URL=${HAIL_JAR_URL}/jars/$(cat /io/git_version).jar - export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }} export HAIL_TEST_RESOURCES_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/test/resources/" export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json export PYTEST_SPLITS=5 export PYTEST_SPLIT_INDEX=4 - export HAIL_QUERY_BACKEND=service + export HAIL_SHUFFLE_MAX_BRANCH=4 + export HAIL_SHUFFLE_CUTOFF=1000000 + + hailctl config set query/backend batch hailctl config set batch/billing_project test hailctl config set batch/remote_tmpdir {{ global.test_storage_uri }} @@ -3512,13 +3489,12 @@ steps: --durations=50 \ -n 4 \ test + timeout: 5400 inputs: - from: /just-wheel/wheel-container.tar to: /io/wheel-container.tar - from: /repo/hail/python/test to: /io/repo/hail/python/test - - from: /git_version - to: /io/git_version secrets: - name: test-gsa-key namespace: @@ -5032,6 +5008,61 @@ steps: - hail_run_tests_image - build_hail - deploy_batch + - kind: runImage + name: cancel_all_running_test_batches + image: + valueFrom: service_base_image.image + script: | + cat >cancel_all_running_test_batches.py <<'EOF' + from hailtop.batch_client.aioclient import BatchClient + import asyncio + + async def cancel_all(query): + bc = await BatchClient.create('test') + async for b in bc.list_batches(query): + status = await b.last_known_status() + print(status) + await b.cancel() + + asyncio.get_event_loop().run_until_complete(cancel_all('user:test running')) + EOF + + python3 cancel_all_running_test_batches.py + secrets: + - name: worker-deploy-config + namespace: + valueFrom: default_ns.name + mountPath: /deploy-config + - name: test-dev-tokens + namespace: + valueFrom: default_ns.name + mountPath: /user-tokens + - name: ssl-config-batch-tests + namespace: + valueFrom: default_ns.name + mountPath: /ssl-config + dependsOn: + - create_deploy_config + - create_accounts + - default_ns + - service_base_image + - deploy_batch + - test_batch_0 + - test_batch_1 + - test_batch_2 + - test_batch_3 + - test_batch_4 + - test_ci + - test_hailtop_batch_0 + - test_hailtop_batch_1 + - test_hailtop_batch_2 + - test_hailtop_batch_3 + - test_hailtop_batch_4 + - test_hail_python_service_backend_0 + - test_hail_python_service_backend_1 + - test_hail_python_service_backend_2 + - test_hail_python_service_backend_3 + - test_hail_python_service_backend_4 - kind: runImage name: test_batch_invariants image: @@ -5083,6 +5114,7 @@ steps: - test_hail_python_service_backend_2 - test_hail_python_service_backend_3 - test_hail_python_service_backend_4 + - cancel_all_running_test_batches - kind: runImage name: delete_gcp_batch_instances image: @@ -5132,6 +5164,7 @@ steps: - test_hail_python_service_backend_2 - test_hail_python_service_backend_3 - test_hail_python_service_backend_4 + - cancel_all_running_test_batches - kind: runImage name: delete_azure_batch_instances image: mcr.microsoft.com/azure-cli @@ -5182,3 +5215,4 @@ steps: - test_hailtop_batch_2 - test_hailtop_batch_3 - test_hailtop_batch_4 + - cancel_all_running_test_batches diff --git a/ci/ci/build.py b/ci/ci/build.py index 801dae20d4f..dc0e8d49759 100644 --- a/ci/ci/build.py +++ b/ci/ci/build.py @@ -9,7 +9,7 @@ import yaml from gear.cloud_config import get_global_config -from hailtop.utils import flatten +from hailtop.utils import RETRY_FUNCTION_SCRIPT, flatten from .environment import BUILDKIT_IMAGE, CI_UTILS_IMAGE, CLOUD, DEFAULT_NAMESPACE, DOCKER_PREFIX, DOMAIN, STORAGE_URI from .globals import is_test_deployment @@ -311,9 +311,11 @@ def build(self, batch, code, scope): /bin/sh /home/user/convert-cloud-credentials-to-docker-auth-config set -x +{RETRY_FUNCTION_SCRIPT} + export BUILDKITD_FLAGS='--oci-worker-no-process-sandbox --oci-worker-snapshotter=overlayfs' export BUILDCTL_CONNECT_RETRIES_MAX=100 # https://github.com/moby/buildkit/issues/1423 -buildctl-daemonless.sh \ +retry buildctl-daemonless.sh \ build \ --frontend dockerfile.v0 \ --local context={shq(context)} \ diff --git a/ci/ci/github.py b/ci/ci/github.py index 42400c11ae8..9d6faf9e0a1 100644 --- a/ci/ci/github.py +++ b/ci/ci/github.py @@ -406,7 +406,7 @@ def _hail_github_status_from_statuses(statuses_json) -> Dict[str, GithubStatus]: hail_statuses = {} for s in statuses: context = s['context'] - if context == GITHUB_STATUS_CONTEXT: + if context == GITHUB_STATUS_CONTEXT or context.startswith('hail-ci'): if context in hail_statuses: raise ValueError( f'github sent multiple status summaries for context {context}: {s}\n\n{statuses_json}' diff --git a/dev-docs/kubernetes-operations.md b/dev-docs/kubernetes-operations.md index 845fd817153..0690a4ca520 100644 --- a/dev-docs/kubernetes-operations.md +++ b/dev-docs/kubernetes-operations.md @@ -2,13 +2,15 @@ ## Altering a Node Pool +### When managing node pools manually + We will have the old node pool and the new node pool active simultaneously. We will use `cordon` and `drain` to move all load from the old node pool to the new node pool. Then we will delete the old node pool. 1. Add a new node pool to the cluster. You can use the UI or `gcloud`. We have two kinds of node pools: non-preemptible and preemptible, their names should always be non-preemptible-pool-N and - prremptible-pool-N, respectively. When you re-create the nodepool, increment the number by + preemptible-pool-N, respectively. When you re-create the nodepool, increment the number by one. Take care to copy the taints and tags correctly. 2. Wait for the new nodepool to be ready. @@ -47,3 +49,48 @@ kubectl drain --delete-emptydir-data --ignore-daemonsets --selector="cloud.googl ``` gcloud container node-pools delete $OLD_POOL_NAME --cluster $CLUSTER_NAME ``` + +### When using terraform +If using terraform to manage the node pools, we use terraform to create and delete +the pools. Assume we are replacing a pool whose terraform resource name is +`vdc_preemptible_pool`. NOTE: the following names apply to the *terraform resource*, +not the names of the node pools themselves, which should adhere to the naming +conventions outlined above and specified as terraform variables. + +To complete step 1, copy the existing node pool resource +under a new name, `vdc_preemptible_pool_2`, make the desired changes to the new +resource and apply the terraform. This should not alter existing node pools. + +Once draining is complete, take the following steps to remove the old node pool +and restore a clean terraform state: +1. Delete the resource `vdc_preemptible_pool` and apply. This should delete the old node pool. +2. Move the state of the new resource into the old one. For example, if in Azure, run + +``` +terraform state mv \ +module.vdc.azurerm_kubernetes_cluster_node_pool.vdc_preemptible_pool_2 \ +module.vdc.azurerm_kubernetes_cluster_node_pool.vdc_preemptible_pool +``` + +3. Rename `vdc_preemptible_pool_2` to `vdc_preemptible_pool`. If you try +to `terraform apply`, there should be no planned changes and the git history +should be clean. + + +## Troubleshooting + +### Terraform Kubernetes provider dialing localhost +Occasionally, the `kubernetes` provider can initialize before fetching necessary +state (as the credentials are themselves terraform resources) and fall back to +dialing localhost. This can occur if you are switching between Hail installations +and the local mirror of the terraform state needs to be sync'd from remote storage +at the start of `terraform apply`. + +As of writing, this +[remains an issue](https://github.com/hashicorp/terraform-provider-kubernetes/issues/1028) +with the kubernetes provider. A workaround to fully initialize the state is instead +of just running `terraform apply` for the entire module, to instead target just +the resources that generate the kubernetes configuration but do not themselves +rely on the kubernetes provider. Run `terraform apply -var-file=global.tfvars -target=module.vdc` +to correctly sync local terraform state, and subsequent invocations of `terraform apply` +should work as expected. diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index e31f517b6de..d2193ad02bf 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -38,7 +38,7 @@ RUN hail-apt-get-install xz-utils libncurses5 && \ ln -s /opt/mysql-8.0.26-linux-glibc2.17-x86_64-minimal-rebuild/bin/* /usr/bin/ # Regarding explicitly selecting 2.0.1: https://github.com/hail-is/hail/issues/8343 -RUN wget -nv -O ${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar +RUN curl >${SPARK_HOME}/jars/gcs-connector-hadoop2-2.0.1.jar https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-2.0.1.jar COPY docker/core-site.xml ${SPARK_HOME}/conf/core-site.xml RUN git clone https://github.com/catchorg/Catch2.git --depth 1 --branch v2.13.3 && \ diff --git a/docker/hail-ubuntu/curlrc b/docker/hail-ubuntu/curlrc index bbf75c92cc5..c0d88933041 100644 --- a/docker/hail-ubuntu/curlrc +++ b/docker/hail-ubuntu/curlrc @@ -1,7 +1,7 @@ --connect-timeout 5 --max-time 10 --retry 5 ---retry-all-errors +--retry-connrefused --retry-max-time 40 --location --fail diff --git a/docker/requirements.txt b/docker/requirements.txt index 6b409f33c12..fe18a9692d1 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -22,9 +22,9 @@ flake8==4.0.1 Flask-Cors==3.0.10 Flask-Sockets==0.2.1 Flask==2.0.3 -gcsfs==2021.* gidgethub==4.1.0 google-api-python-client==1.7.10 +google-auth-oauthlib==0.4.6 google-cloud-logging==1.12.1 humanize==1.0.0 hurry.filesize==0.9 diff --git a/gateway/deployment.yaml b/gateway/deployment.yaml index c071fd83e3d..d6c78a53cc3 100644 --- a/gateway/deployment.yaml +++ b/gateway/deployment.yaml @@ -77,7 +77,7 @@ spec: - type: Resource resource: name: cpu - targetAverageUtilization: 80 + targetAverageUtilization: 2500 --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget diff --git a/gear/gear/database.py b/gear/gear/database.py index 03d6f3f4a20..728fe8b0874 100644 --- a/gear/gear/database.py +++ b/gear/gear/database.py @@ -9,7 +9,7 @@ import aiomysql import pymysql -from gear.metrics import DB_CONNECTION_QUEUE_SIZE, PrometheusSQLTimer +from gear.metrics import DB_CONNECTION_QUEUE_SIZE, SQL_TRANSACTIONS, PrometheusSQLTimer from hailtop.auth.sql_config import SQLConfig from hailtop.utils import sleep_and_backoff @@ -159,6 +159,7 @@ async def async_init(self, db_pool, read_only): try: self.conn_context_manager = db_pool.acquire() DB_CONNECTION_QUEUE_SIZE.inc() + SQL_TRANSACTIONS.inc() self.conn = await aenter(self.conn_context_manager) DB_CONNECTION_QUEUE_SIZE.dec() async with self.conn.cursor() as cursor: @@ -235,10 +236,15 @@ async def execute_update(self, sql, args=None): async with self.conn.cursor() as cursor: return await cursor.execute(sql, args) - async def execute_many(self, sql, args_array): + async def execute_many(self, sql, args_array, query_name=None): assert self.conn async with self.conn.cursor() as cursor: - return await cursor.executemany(sql, args_array) + if query_name is None: + res = await cursor.executemany(sql, args_array) + else: + async with PrometheusSQLTimer(query_name): + res = await cursor.executemany(sql, args_array) + return res class CallError(Exception): @@ -293,9 +299,9 @@ async def execute_update(self, sql, args=None): return await tx.execute_update(sql, args) @retry_transient_mysql_errors - async def execute_many(self, sql, args_array): + async def execute_many(self, sql, args_array, query_name=None): async with self.start() as tx: - return await tx.execute_many(sql, args_array) + return await tx.execute_many(sql, args_array, query_name=query_name) @retry_transient_mysql_errors async def check_call_procedure(self, sql, args=None, query_name=None): diff --git a/gear/gear/metrics.py b/gear/gear/metrics.py index 5f9d68a8380..a04c9616880 100644 --- a/gear/gear/metrics.py +++ b/gear/gear/metrics.py @@ -6,6 +6,7 @@ REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests', ['endpoint', 'verb', 'status']) CONCURRENT_REQUESTS = pc.Gauge('http_concurrent_requests', 'Number of in progress HTTP requests', ['endpoint', 'verb']) +SQL_TRANSACTIONS = pc.Counter('sql_transactions', 'Number of SQL transactions') SQL_QUERY_COUNT = pc.Counter('sql_query_count', 'Number of SQL Queries', ['query_name']) SQL_QUERY_LATENCY = pc.Summary('sql_query_latency_seconds', 'SQL Query latency in seconds', ['query_name']) DB_CONNECTION_QUEUE_SIZE = pc.Gauge('sql_connection_queue_size', 'Number of coroutines waiting for a connection') diff --git a/hail/.gitignore b/hail/.gitignore index d075af07c46..33862f4ba05 100644 --- a/hail/.gitignore +++ b/hail/.gitignore @@ -3,6 +3,7 @@ python/README.md python/dist python/hail.egg-info python/hail/backend/hail-all-spark.jar +python/hail/hail_revision python/hail/hail_pip_version python/hail/docs/change_log.rst python/hail/docs/_build/* diff --git a/hail/Dockerfile.hail-run-tests b/hail/Dockerfile.hail-run-tests index 66bfc4cc340..e8299c2cd0f 100644 --- a/hail/Dockerfile.hail-run-tests +++ b/hail/Dockerfile.hail-run-tests @@ -2,7 +2,7 @@ FROM {{ hail_run_image.image }} RUN mkdir -p plink && \ cd plink && \ - wget -O plink_linux_x86_64.zip https://storage.googleapis.com/hail-common/plink_linux_x86_64_20181202.zip && \ + curl >plink_linux_x86_64.zip https://storage.googleapis.com/hail-common/plink_linux_x86_64_20181202.zip && \ unzip plink_linux_x86_64.zip && \ mv plink /usr/local/bin && \ cd .. && \ diff --git a/hail/Makefile b/hail/Makefile index 5913ca35cd0..a9252d22c34 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -14,7 +14,7 @@ BRANCH := $(shell git rev-parse --abbrev-ref HEAD) SCALA_VERSION ?= 2.12.13 SPARK_VERSION ?= 3.1.2 HAIL_MAJOR_MINOR_VERSION := 0.2 -HAIL_PATCH_VERSION := 91 +HAIL_PATCH_VERSION := 93 HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION) HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION) ELASTIC_MAJOR_VERSION ?= 8 @@ -49,7 +49,8 @@ JAR_DEBUG_CLASSES := $(addprefix $(BUILD_DEBUG_PREFIX)/, $(JAR_DEBUG_SOURCES:src PY_FILES := $(shell git ls-files python) INIT_SCRIPTS := python/hailtop/hailctl/deploy.yaml -PYTHON_VERSION_INFO := python/hail/hail_version +PYTHON_VERSION_INFO := python/hail/hail_revision +PYTHON_VERSION_INFO += python/hail/hail_version PYTHON_VERSION_INFO += python/hail/hail_pip_version PYTHON_VERSION_INFO += python/hailtop/hail_version PYTHON_VERSION_INFO += python/hail/docs/_static/hail_version.js @@ -119,6 +120,9 @@ src/main/resources/build-info.properties: Makefile .PHONY: python-version-info python-version-info: $(PYTHON_VERSION_INFO) +python/hail/hail_revision: env/REVISION + echo $(REVISION) > $@ + python/hail/hail_version: env/SHORT_REVISION env/HAIL_PIP_VERSION echo $(HAIL_VERSION) > $@ diff --git a/hail/python/dev/requirements.txt b/hail/python/dev/requirements.txt index 7d3e936da50..a0837e3d327 100644 --- a/hail/python/dev/requirements.txt +++ b/hail/python/dev/requirements.txt @@ -5,7 +5,7 @@ pre-commit==2.17.0 black==22.1.0 curlylint==0.12.0 isort==5.10.1 -pytest==6.2.5 +pytest==7.1.1 pytest-html==1.20.0 pytest-xdist==2.2.1 pytest-instafail==0.4.2 diff --git a/hail/python/hail/__init__.py b/hail/python/hail/__init__.py index a81aabf168f..f9746988525 100644 --- a/hail/python/hail/__init__.py +++ b/hail/python/hail/__init__.py @@ -1,3 +1,4 @@ +from typing import Optional import pkg_resources import sys import asyncio @@ -63,7 +64,7 @@ hadoop_stat, hadoop_exists, hadoop_is_file, hadoop_is_dir, hadoop_scheme_supported, copy_log) -from .context import (init, init_local, init_service, stop, spark_context, tmp_dir, # noqa: E402 +from .context import (init, init_local, init_batch, stop, spark_context, tmp_dir, # noqa: E402 default_reference, get_reference, set_global_seed, _set_flags, _get_flags, _async_current_backend, current_backend, debug_info, citation, cite_hail, cite_hail_bibtex, version, TemporaryFilename, TemporaryDirectory) @@ -73,7 +74,7 @@ __all__ = [ 'init', 'init_local', - 'init_service', + 'init_batch', 'stop', 'spark_context', 'tmp_dir', @@ -135,7 +136,8 @@ ir.register_functions() ir.register_aggregators() -__version__ = None # set in hail.init() +__version__: Optional[str] = None # set by hail.version() +__revision__: Optional[str] = None # set by hail.revision() import warnings # noqa: E402 diff --git a/hail/python/hail/backend/backend.py b/hail/python/hail/backend/backend.py index 5e6fa0e36c4..1caa4d5a3e6 100644 --- a/hail/python/hail/backend/backend.py +++ b/hail/python/hail/backend/backend.py @@ -1,5 +1,23 @@ +from typing import Mapping, List, Union, Tuple, Dict, Optional, Any import abc from ..fs.fs import FS +from ..expr import Expression +from ..expr.types import HailType +from ..ir import BaseIR +from ..utils.java import FatalError, HailUserError + + +def fatal_error_from_java_error_triplet(short_message, expanded_message, error_id): + from .. import __version__ + if error_id != -1: + return FatalError(f'Error summary: {short_message}', error_id) + return FatalError(f'''{short_message} + +Java stack trace: +{expanded_message} +Hail version: {__version__} +Error summary: {short_message}''', + error_id) class Backend(abc.ABC): @@ -8,7 +26,7 @@ def stop(self): pass @abc.abstractmethod - def execute(self, ir, timed=False): + def execute(self, ir: BaseIR, timed: bool = False) -> Any: pass @abc.abstractmethod @@ -97,7 +115,12 @@ def fs(self) -> FS: pass @abc.abstractmethod - def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): + def index_bgen(self, + files: List[str], + index_file_map: Dict[str, str], + referenceGenomeName: Optional[str], + contig_recoding: Dict[str, str], + skip_invalid_loci: bool): pass @abc.abstractmethod @@ -105,6 +128,7 @@ def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str) pass def persist_table(self, t, storage_level): + # FIXME: this can't possibly be right. return t def unpersist_table(self, t): @@ -120,9 +144,46 @@ def unpersist_block_matrix(self, id): pass @abc.abstractmethod - def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): + def register_ir_function(self, + name: str, + type_parameters: Union[Tuple[HailType, ...], List[HailType]], + value_parameter_names: Union[Tuple[str, ...], List[str]], + value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], + return_type: HailType, + body: Expression): + pass + + @abc.abstractmethod + def persist_expression(self, expr: Expression) -> Expression: + pass + + @abc.abstractmethod + def set_flags(self, **flags: Mapping[str, str]): + """Set Hail flags.""" + pass + + @abc.abstractmethod + def get_flags(self, *flags) -> Mapping[str, str]: + """Mapping of Hail flags.""" pass + @property @abc.abstractmethod - def persist_ir(self, ir): + def requires_lowering(self): pass + + def _handle_fatal_error_from_backend(self, err: FatalError, ir: BaseIR): + if err._error_id is None: + raise err + + error_sources = ir.base_search(lambda x: x._error_id == err._error_id) + if len(error_sources) == 0: + raise err + + better_stack_trace = error_sources[0]._stack_trace + error_message = str(err) + message_and_trace = (f'{error_message}\n' + '------------\n' + 'Hail stack trace:\n' + f'{better_stack_trace}') + raise HailUserError(message_and_trace) from None diff --git a/hail/python/hail/backend/local_backend.py b/hail/python/hail/backend/local_backend.py index 9eb7cd0bbf1..9ca30b4c3c6 100644 --- a/hail/python/hail/backend/local_backend.py +++ b/hail/python/hail/backend/local_backend.py @@ -13,7 +13,6 @@ from hail.expr.matrix_type import tmatrix from hail.expr.table_type import ttable from hail.expr.types import dtype -from hail.ir import JavaIR from hail.ir.renderer import CSERenderer from hail.utils.java import scala_package_object, scala_object from .py4j_backend import Py4JBackend, handle_java_exception @@ -175,7 +174,7 @@ def utils_package_object(self): def stop(self): self._jhc.stop() self._jhc = None - # FIXME stop gateway? + self._gateway.shutdown() uninstall_exception_handler() def _parse_value_ir(self, code, ref_map={}, ir_map={}): @@ -268,13 +267,14 @@ def remove_liftover(self, name, dest_reference_genome): name, dest_reference_genome) def parse_vcf_metadata(self, path): - return json.loads(self._jhc.pyParseVCFMetadataJSON(self.fs._jfs, path)) + return json.loads(self._jhc.pyParseVCFMetadataJSON(self._jbackend.fs(), path)) - def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): - self._jbackend.pyIndexBgen(files, index_file_map, rg, contig_recoding, skip_invalid_loci) + def index_bgen(self, files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci): + self._jbackend.pyIndexBgen(files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci) def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): return json.loads(self._jbackend.pyImportFam(path, quant_pheno, delimiter, missing)) - def persist_ir(self, ir): - return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) + @property + def requires_lowering(self): + return True diff --git a/hail/python/hail/backend/py4j_backend.py b/hail/python/hail/backend/py4j_backend.py index 7ae92ee8ab7..fc808d9d70e 100644 --- a/hail/python/hail/backend/py4j_backend.py +++ b/hail/python/hail/backend/py4j_backend.py @@ -1,11 +1,17 @@ +from typing import Mapping, Union, Tuple, List import abc import py4j +import py4j.java_gateway import hail +from hail.expr import construct_expr +from hail.ir import JavaIR from hail.ir.renderer import CSERenderer -from hail.utils.java import FatalError, Env, HailUserError -from .backend import Backend +from hail.utils.java import FatalError, Env +from .backend import Backend, fatal_error_from_java_error_triplet +from ..expr import Expression +from ..expr.types import HailType def handle_java_exception(f): @@ -22,13 +28,7 @@ def deco(*args, **kwargs): tpl = Env.jutils().handleForPython(e.java_exception) deepest, full, error_id = tpl._1(), tpl._2(), tpl._3() - - if error_id != -1: - raise FatalError('Error summary: %s' % (deepest,), error_id) from None - else: - raise FatalError('%s\n\nJava stack trace:\n%s\n' - 'Hail version: %s\n' - 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None + raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None except pyspark.sql.utils.CapturedException as e: raise FatalError('%s\n\nJava stack trace:\n%s\n' 'Hail version: %s\n' @@ -38,6 +38,7 @@ def deco(*args, **kwargs): class Py4JBackend(Backend): + _jbackend: py4j.java_gateway.JavaObject @abc.abstractmethod def __init__(self): @@ -66,15 +67,26 @@ def utils_package_object(self): def _parse_value_ir(self, code, ref_map={}, ir_map={}): pass - def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): + @abc.abstractmethod + def _to_java_value_ir(self, ir): + pass + + def register_ir_function(self, + name: str, + type_parameters: Union[Tuple[HailType, ...], List[HailType]], + value_parameter_names: Union[Tuple[str, ...], List[str]], + value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], + return_type: HailType, + body: Expression): r = CSERenderer(stop_at_jir=True) code = r(body._ir) - jbody = (self._parse_value_ir(code, ref_map=dict(zip(argument_names, argument_types)), ir_map=r.jirs)) + jbody = (self._parse_value_ir(code, ref_map=dict(zip(value_parameter_names, value_parameter_types)), ir_map=r.jirs)) Env.hail().expr.ir.functions.IRFunctionRegistry.pyRegisterIR( name, [ta._parsable_string() for ta in type_parameters], - argument_names, [pt._parsable_string() for pt in argument_types], + value_parameter_names, + [pt._parsable_string() for pt in value_parameter_types], return_type._parsable_string(), jbody) @@ -83,31 +95,13 @@ def execute(self, ir, timed=False): stream_codec = '{"name":"StreamBufferSpec"}' # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1)) try: - result_tuple = self._jhc.backend().executeEncode(jir, stream_codec) + result_tuple = self._jbackend.executeEncode(jir, stream_codec) (result, timings) = (result_tuple._1(), result_tuple._2()) value = ir.typ._from_encoding(result) return (value, timings) if timed else value except FatalError as e: - error_id = e._error_id - - def criteria(hail_ir): - return hail_ir._error_id is not None and hail_ir._error_id == error_id - - error_sources = ir.base_search(criteria) - better_stack_trace = None - if error_sources: - better_stack_trace = error_sources[0]._stack_trace - - if better_stack_trace: - error_message = str(e) - message_and_trace = (f'{error_message}\n' - '------------\n' - 'Hail stack trace:\n' - f'{better_stack_trace}') - raise HailUserError(message_and_trace) from None - - raise e + self._handle_fatal_error_from_backend(e, ir) async def _async_execute(self, ir, timed=False): raise NotImplementedError('no async available in Py4JBackend') @@ -120,3 +114,28 @@ async def _async_get_reference(self, name): async def _async_get_references(self, names): raise NotImplementedError('no async available in Py4JBackend') + + def persist_expression(self, expr): + return construct_expr( + JavaIR(self._jbackend.executeLiteral(self._to_java_value_ir(expr._ir))), + expr.dtype + ) + + def set_flags(self, **flags: Mapping[str, str]): + available = self._jbackend.availableFlags() + invalid = [] + for flag, value in flags.items(): + if flag in available: + self._jbackend.setFlag(flag, value) + else: + invalid.append(flag) + if len(invalid) != 0: + raise FatalError("Flags {} not valid. Valid flags: \n {}" + .format(', '.join(invalid), '\n '.join(available))) + + def get_flags(self, *flags) -> Mapping[str, str]: + return {flag: self._jbackend.getFlag(flag) for flag in flags} + + @property + def requires_lowering(self): + return True diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index a1b968d0d74..2c802a938c5 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -1,22 +1,25 @@ -from typing import Dict, Optional, Callable, Awaitable +from typing import Dict, Optional, Callable, Awaitable, Mapping, Any, List, Union, Tuple +import abc import asyncio import struct import os +from hail.expr.expressions.base_expression import Expression import orjson import logging import re import yaml from pathlib import Path -from hail.context import TemporaryDirectory, tmp_dir +from hail.context import TemporaryDirectory, tmp_dir, TemporaryFilename, revision from hail.utils import FatalError -from hail.expr.types import dtype +from hail.expr.types import HailType, dtype, ttuple, tvoid from hail.expr.table_type import ttable from hail.expr.matrix_type import tmatrix from hail.expr.blockmatrix_type import tblockmatrix +from hail.experimental import write_expression, read_expression from hail.ir.renderer import CSERenderer -from hailtop.config import get_user_config, get_user_local_cache_dir, get_remote_tmpdir +from hailtop.config import (get_user_config, get_user_local_cache_dir, get_remote_tmpdir) from hailtop.utils import async_to_blocking, secret_alnum_string, TransientError, Timings from hailtop.batch_client import client as hb from hailtop.batch_client import aioclient as aiohb @@ -24,16 +27,25 @@ from hailtop.aiotools.router_fs import RouterAsyncFS import hailtop.aiotools.fs as afs -from .backend import Backend +from .backend import Backend, fatal_error_from_java_error_triplet from ..builtin_references import BUILTIN_REFERENCES from ..fs.fs import FS from ..fs.router_fs import RouterFS +from ..ir import BaseIR from ..context import version +from ..utils import frozendict log = logging.getLogger('backend.service_backend') +async def write_bool(strm: afs.WritableStream, v: bool): + if v: + await strm.write(b'\x01') + else: + await strm.write(b'\x00') + + async def write_int(strm: afs.WritableStream, v: int): await strm.write(struct.pack(' Dict[str, str]: + raise NotImplementedError + + +class JarUrl(JarSpec): + def __init__(self, url): + self.url = url + + def to_dict(self) -> Dict[str, str]: + return {'type': 'jar_url', 'value': self.url} + + def __repr__(self): + return f'JarUrl({self.url})' + + +class GitRevision(JarSpec): + def __init__(self, revision): + self.revision = revision + + def to_dict(self) -> Dict[str, str]: + return {'type': 'git_revision', 'value': self.revision} + + def __repr__(self): + return f'GitRevision({self.revision})' + + +def _get_jar_specification(jar_url: Optional[str]) -> JarSpec: + user_config = get_user_config() + + jar_url = jar_url or os.environ.get('HAIL_JAR_URL') + jar_url = jar_url or user_config.get('query', 'jar_url', fallback=None) + + if jar_url is not None: + return JarUrl(jar_url) + return GitRevision(revision()) + + +class IRFunction: + def __init__(self, + name: str, + type_parameters: Union[Tuple[HailType, ...], List[HailType]], + value_parameter_names: Union[Tuple[str, ...], List[str]], + value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], + return_type: HailType, + body: Expression): + assert len(value_parameter_names) == len(value_parameter_types) + render = CSERenderer(stop_at_jir=True) + self._name = name + self._type_parameters = type_parameters + self._value_parameter_names = value_parameter_names + self._value_parameter_types = value_parameter_types + self._return_type = return_type + self._rendered_body = render(body._ir) + + async def serialize(self, writer: afs.WritableStream): + await write_str(writer, self._name) + + await write_int(writer, len(self._type_parameters)) + for type_parameter in self._type_parameters: + await write_str(writer, type_parameter._parsable_string()) + + await write_int(writer, len(self._value_parameter_names)) + for value_parameter_name in self._value_parameter_names: + await write_str(writer, value_parameter_name) + + await write_int(writer, len(self._value_parameter_types)) + for value_parameter_type in self._value_parameter_types: + await write_str(writer, value_parameter_type._parsable_string()) + + await write_str(writer, self._return_type._parsable_string()) + await write_str(writer, self._rendered_body) + + class ServiceBackend(Backend): HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE = re.compile("is.hail.backend.service.HailBatchFailure: ([0-9]+)\n") @@ -113,22 +200,23 @@ class ServiceBackend(Backend): PARSE_VCF_METADATA = 8 INDEX_BGEN = 9 IMPORT_FAM = 10 - GOODBYE = 254 @staticmethod async def create(*, billing_project: Optional[str] = None, batch_client: Optional[aiohb.BatchClient] = None, - skip_logging_configuration: Optional[bool] = None, disable_progress_bar: bool = True, - remote_tmpdir: Optional[str] = None): - del skip_logging_configuration - + remote_tmpdir: Optional[str] = None, + flags: Optional[Dict[str, str]] = None, + jar_url: Optional[str] = None, + driver_cores: Optional[Union[int, str]] = None, + driver_memory: Optional[Union[int, str]] = None, + name_prefix: Optional[str] = None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: raise ValueError( - "No billing project. Call 'init_service' with the billing " + "No billing project. Call 'init_batch' with the billing " "project or run 'hailctl config set batch/billing_project " "MY_BILLING_PROJECT'" ) @@ -142,6 +230,21 @@ async def create(*, user_local_reference_cache_dir = Path(get_user_local_cache_dir(), 'references', version()) os.makedirs(user_local_reference_cache_dir, exist_ok=True) remote_tmpdir = get_remote_tmpdir('ServiceBackend', remote_tmpdir=remote_tmpdir) + jar_spec = _get_jar_specification(jar_url) + + driver_cores = ( + driver_cores + or os.environ.get('HAIL_QUERY_BATCH_DRIVER_CORES', None) + or get_user_config().get('query', 'batch_driver_cores', fallback=None) + or '1' + ) + + driver_memory = ( + driver_memory + or os.environ.get('HAIL_QUERY_BATCH_DRIVER_MEMORY', None) + or get_user_config().get('query', 'batch_driver_memory', fallback=None) + or 'standard' + ) return ServiceBackend( billing_project=billing_project, @@ -152,9 +255,15 @@ async def create(*, batch_attributes=batch_attributes, user_local_reference_cache_dir=user_local_reference_cache_dir, remote_tmpdir=remote_tmpdir, + flags=flags or {}, + jar_spec=jar_spec, + driver_cores=driver_cores, + driver_memory=driver_memory, + name_prefix=name_prefix or '' ) def __init__(self, + *, billing_project: str, sync_fs: FS, async_fs: AsyncFS, @@ -162,7 +271,12 @@ def __init__(self, disable_progress_bar: bool, batch_attributes: Dict[str, str], user_local_reference_cache_dir: Path, - remote_tmpdir: str): + remote_tmpdir: str, + flags: Dict[str, str], + jar_spec: JarSpec, + driver_cores: Optional[Union[int, str]], + driver_memory: Optional[Union[int, str]], + name_prefix: str): self.billing_project = billing_project self._sync_fs = sync_fs self._async_fs = async_fs @@ -172,6 +286,27 @@ def __init__(self, self.batch_attributes = batch_attributes self.user_local_reference_cache_dir = user_local_reference_cache_dir self.remote_tmpdir = remote_tmpdir + self.flags = flags + self.jar_spec = jar_spec + self.functions: List[IRFunction] = [] + self.driver_cores = driver_cores + self.driver_memory = driver_memory + self.name_prefix = name_prefix + + if "use_new_shuffle" not in self.flags: + self.flags["use_new_shuffle"] = "1" + + def debug_info(self) -> Dict[str, Any]: + return { + 'jar_spec': str(self.jar_spec), + 'billing_project': self.billing_project, + 'batch_attributes': self.batch_attributes, + 'user_local_reference_cache_dir': str(self.user_local_reference_cache_dir), + 'remote_tmpdir': self.remote_tmpdir, + 'flags': self.flags, + 'driver_cores': self.driver_cores, + 'driver_memory': self.driver_memory + } @property def fs(self) -> FS: @@ -184,6 +319,7 @@ def logger(self): def stop(self): async_to_blocking(self._async_fs.close()) async_to_blocking(self.async_bc.close()) + self.functions = [] def render(self, ir): r = CSERenderer() @@ -192,29 +328,44 @@ def render(self, ir): async def _rpc(self, name: str, - inputs: Callable[[afs.WritableStream, str], Awaitable[None]]): + inputs: Callable[[afs.WritableStream, str], Awaitable[None]], + *, + ir: Optional[BaseIR] = None): timings = Timings() token = secret_alnum_string() iodir = TemporaryDirectory(ensure_exists=False).name # FIXME: actually cleanup with TemporaryDirectory(ensure_exists=False) as _: with timings.step("write input"): async with await self._async_fs.create(iodir + '/in') as infile: + nonnull_flag_count = sum(v is not None for v in self.flags.values()) + await write_int(infile, nonnull_flag_count) + for k, v in self.flags.items(): + if v is not None: + await write_str(infile, k) + await write_str(infile, v) await inputs(infile, token) with timings.step("submit batch"): batch_attributes = self.batch_attributes if 'name' not in batch_attributes: - batch_attributes = {**batch_attributes, 'name': name} + batch_attributes = {**batch_attributes, 'name': self.name_prefix + name} bb = self.async_bc.create_batch(token=token, attributes=batch_attributes) - j = bb.create_jvm_job([ - ServiceBackend.DRIVER, - os.environ['HAIL_SHA'], - os.environ['HAIL_JAR_URL'], - batch_attributes['name'], - iodir + '/in', - iodir + '/out', - ], mount_tokens=True, resources={'preemptible': False, 'memory': 'standard'}) + j = bb.create_jvm_job( + jar_spec=self.jar_spec.to_dict(), + argv=[ + ServiceBackend.DRIVER, + batch_attributes['name'], + iodir + '/in', + iodir + '/out' + ], + mount_tokens=True, + resources={ + 'preemptible': False, + 'cpu': str(self.driver_cores), + 'memory': str(self.driver_memory) + } + ) b = await bb.submit(disable_progress_bar=self.disable_progress_bar) with timings.step("wait batch"): @@ -233,26 +384,38 @@ async def _rpc(self, logs = await j.log() for k in logs: logs[k] = yaml_literally_shown_str(logs[k].strip()) - message = {'batch_status': status, + message = {'service_backend_debug_info': self.debug_info(), + 'batch_status': status, 'job_status': job_status, 'log': logs} log.error(yaml.dump(message)) - raise ValueError(message) + raise FatalError(message) with timings.step("read output"): async with await self._async_fs.open(iodir + '/out') as outfile: success = await read_bool(outfile) if success: - json_bytes = await read_bytes(outfile) + result_bytes = await read_bytes(outfile) try: - return token, orjson.loads(json_bytes), timings + return token, result_bytes, timings except orjson.JSONDecodeError as err: - raise ValueError(f'batch id was {b.id}\ncould not decode {json_bytes}') from err + raise FatalError(f'batch id was {b.id}\ncould not decode {result_bytes}') from err else: - jstacktrace = await read_str(outfile) - maybe_id = ServiceBackend.HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE.match(jstacktrace) - if maybe_id: - batch_id = maybe_id.groups()[0] + short_message = await read_str(outfile) + expanded_message = await read_str(outfile) + error_id = await read_int(outfile) + if error_id == -1: + error_id = None + maybe_batch_id = ServiceBackend.HAIL_BATCH_FAILURE_EXCEPTION_MESSAGE_RE.match(expanded_message) + if error_id is not None: + assert maybe_batch_id is None, str((short_message, expanded_message, error_id)) + assert ir is not None + self._handle_fatal_error_from_backend( + fatal_error_from_java_error_triplet(short_message, expanded_message, error_id), + ir) + if maybe_batch_id is not None: + assert error_id is None, str((short_message, expanded_message, error_id)) + batch_id = maybe_batch_id.groups()[0] b2 = await self.async_bc.get_batch(batch_id) b2_status = await b2.status() assert b2_status['state'] != 'success' @@ -275,16 +438,18 @@ async def _rpc(self, }) message = { 'id': b.id, - 'stacktrace': yaml_literally_shown_str(jstacktrace.strip()), + 'service_backend_debug_info': self.debug_info(), + 'short_message': yaml_literally_shown_str(short_message.strip()), + 'expanded_message': yaml_literally_shown_str(expanded_message.strip()), 'cause': {'id': batch_id, 'batch_status': b2_status, 'failed_jobs': failed_jobs}} log.error(yaml.dump(message)) - raise ValueError(orjson.dumps(message).decode('utf-8')) - raise FatalError(f'batch id was {b.id}\n' + jstacktrace) + raise FatalError(orjson.dumps(message).decode('utf-8')) + raise FatalError(f'batch id was {b.id}\n' + short_message + '\n' + expanded_message) - def execute(self, ir, timed=False): + def execute(self, ir: BaseIR, timed: bool = False): return async_to_blocking(self._async_execute(ir, timed=timed)) - async def _async_execute(self, ir, timed=False): + async def _async_execute(self, ir: BaseIR, timed: bool = False): async def inputs(infile, token): await write_int(infile, ServiceBackend.EXECUTE) await write_str(infile, tmp_dir()) @@ -292,9 +457,18 @@ async def inputs(infile, token): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(ir)) await write_str(infile, token) - _, resp, timings = await self._rpc('execute(...)', inputs) - typ = dtype(resp['type']) - converted_value = typ._convert_from_json_na(resp['value']) + await write_int(infile, len(self.functions)) + for fun in self.functions: + await fun.serialize(infile) + await write_str(infile, '{"name":"StreamBufferSpec"}') + + _, resp, timings = await self._rpc('execute(...)', inputs, ir=ir) + typ: HailType = ir.typ + if typ == tvoid: + assert resp == b'', (typ, resp) + converted_value = None + else: + converted_value = ttuple(typ)._from_encoding(resp)[0] if timed: return converted_value, timings return converted_value @@ -316,7 +490,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(ir)) _, resp, _ = await self._rpc('value_type(...)', inputs) - return dtype(resp) + return dtype(orjson.loads(resp)) def table_type(self, tir): return async_to_blocking(self._async_table_type(tir)) @@ -329,7 +503,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(tir)) _, resp, _ = await self._rpc('table_type(...)', inputs) - return ttable._from_json(resp) + return ttable._from_json(orjson.loads(resp)) def matrix_type(self, mir): return async_to_blocking(self._async_matrix_type(mir)) @@ -342,7 +516,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(mir)) _, resp, _ = await self._rpc('matrix_type(...)', inputs) - return tmatrix._from_json(resp) + return tmatrix._from_json(orjson.loads(resp)) def blockmatrix_type(self, bmir): return async_to_blocking(self._async_blockmatrix_type(bmir)) @@ -355,7 +529,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, self.render(bmir)) _, resp, _ = await self._rpc('blockmatrix_type(...)', inputs) - return tblockmatrix._from_json(resp) + return tblockmatrix._from_json(orjson.loads(resp)) def add_reference(self, config): raise NotImplementedError("ServiceBackend does not support 'add_reference'") @@ -386,8 +560,8 @@ async def inputs(infile, _): _, resp, _ = await self._rpc('get_reference(...)', inputs) if name in BUILTIN_REFERENCES: with open(Path(self.user_local_reference_cache_dir, name), 'wb') as f: - f.write(orjson.dumps(resp)) - return resp + f.write(resp) + return orjson.loads(resp) def get_references(self, names): return async_to_blocking(self._async_get_references(names)) @@ -406,7 +580,7 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, path) _, resp, _ = await self._rpc('load_references_from_dataset(...)', inputs) - return resp + return orjson.loads(resp) def add_sequence(self, name, fasta_file, index_file): raise NotImplementedError("ServiceBackend does not support 'add_sequence'") @@ -431,16 +605,99 @@ async def inputs(infile, _): await write_str(infile, self.remote_tmpdir) await write_str(infile, path) _, resp, _ = await self._rpc('parse_vcf_metadata(...)', inputs) - return resp - - def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): - raise NotImplementedError("ServiceBackend does not support 'index_bgen'") + return orjson.loads(resp) + + def index_bgen(self, + files: List[str], + index_file_map: Dict[str, str], + referenceGenomeName: Optional[str], + contig_recoding: Dict[str, str], + skip_invalid_loci: bool): + return async_to_blocking(self._async_index_bgen( + files, + index_file_map, + referenceGenomeName, + contig_recoding, + skip_invalid_loci + )) + + async def _async_index_bgen(self, + files: List[str], + index_file_map: Dict[str, str], + referenceGenomeName: Optional[str], + contig_recoding: Dict[str, str], + skip_invalid_loci: bool): + async def inputs(infile, _): + await write_int(infile, ServiceBackend.INDEX_BGEN) + await write_str(infile, tmp_dir()) + await write_str(infile, self.billing_project) + await write_str(infile, self.remote_tmpdir) + await write_int(infile, len(files)) + for fname in files: + await write_str(infile, fname) + await write_int(infile, len(index_file_map)) + for k, v in index_file_map.items(): + await write_str(infile, k) + await write_str(infile, v) + if referenceGenomeName is None: + await write_bool(infile, False) + else: + await write_bool(infile, True) + await write_str(infile, referenceGenomeName) + await write_int(infile, len(contig_recoding)) + for k, v in contig_recoding.items(): + await write_str(infile, k) + await write_str(infile, v) + await write_bool(infile, skip_invalid_loci) + + _, resp, _ = await self._rpc('index_bgen(...)', inputs) + assert resp == b'null' + return None def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): - raise NotImplementedError("ServiceBackend does not support 'import_fam'") + return async_to_blocking(self._async_import_fam(path, quant_pheno, delimiter, missing)) - def register_ir_function(self, name, type_parameters, argument_names, argument_types, return_type, body): - raise NotImplementedError("ServiceBackend does not support 'register_ir_function'") + async def _async_import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): + async def inputs(infile, _): + await write_int(infile, ServiceBackend.IMPORT_FAM) + await write_str(infile, tmp_dir()) + await write_str(infile, self.billing_project) + await write_str(infile, self.remote_tmpdir) + await write_str(infile, path) + await write_bool(infile, quant_pheno) + await write_str(infile, delimiter) + await write_str(infile, missing) + _, resp, _ = await self._rpc('import_fam(...)', inputs) + return orjson.loads(resp) + + def register_ir_function(self, + name: str, + type_parameters: Union[Tuple[HailType, ...], List[HailType]], + value_parameter_names: Union[Tuple[str, ...], List[str]], + value_parameter_types: Union[Tuple[HailType, ...], List[HailType]], + return_type: HailType, + body: Expression): + self.functions.append(IRFunction( + name, + type_parameters, + value_parameter_names, + value_parameter_types, + return_type, + body + )) + + def persist_expression(self, expr): + # FIXME: should use context manager to clean up persisted resources + fname = TemporaryFilename().name + write_expression(expr, fname) + return read_expression(fname, _assert_type=expr.dtype) + + def set_flags(self, **flags: str): + self.flags.update(flags) + + def get_flags(self, *flags) -> Mapping[str, str]: + return frozendict(self.flags) - def persist_ir(self, ir): - raise NotImplementedError("ServiceBackend does not support 'persist_ir'") + @property + def requires_lowering(self): + return True diff --git a/hail/python/hail/backend/spark_backend.py b/hail/python/hail/backend/spark_backend.py index 38a08e40781..e8440517f4b 100644 --- a/hail/python/hail/backend/spark_backend.py +++ b/hail/python/hail/backend/spark_backend.py @@ -17,7 +17,6 @@ from hail.expr.matrix_type import tmatrix from hail.expr.blockmatrix_type import tblockmatrix from hail.ir.renderer import CSERenderer -from hail.ir import JavaIR from hail.table import Table from hail.matrixtable import MatrixTable @@ -353,8 +352,8 @@ def remove_liftover(self, name, dest_reference_genome): def parse_vcf_metadata(self, path): return json.loads(self._jhc.pyParseVCFMetadataJSON(self.fs._jfs, path)) - def index_bgen(self, files, index_file_map, rg, contig_recoding, skip_invalid_loci): - self._jbackend.pyIndexBgen(files, index_file_map, rg, contig_recoding, skip_invalid_loci) + def index_bgen(self, files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci): + self._jbackend.pyIndexBgen(files, index_file_map, referenceGenomeName, contig_recoding, skip_invalid_loci) def import_fam(self, path: str, quant_pheno: bool, delimiter: str, missing: str): return json.loads(self._jbackend.pyImportFam(path, quant_pheno, delimiter, missing)) @@ -372,9 +371,6 @@ def register_ir_function(self, name, type_parameters, argument_names, argument_t return_type._parsable_string(), jbody) - def persist_ir(self, ir): - return JavaIR(self._jhc.backend().executeLiteral(self._to_java_value_ir(ir))) - def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.Interval]', intervals_type): json_repr = { 'paths': paths, @@ -384,3 +380,7 @@ def read_multiple_matrix_tables(self, paths: 'List[str]', intervals: 'List[hl.In results = self._jhc.backend().pyReadMultipleMatrixTables(json.dumps(json_repr)) return [MatrixTable._from_java(jm) for jm in results] + + @property + def requires_lowering(self): + return False diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index eef95806426..a02a71a39f7 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -1,4 +1,5 @@ -from typing import Optional +from typing import Optional, Union +import warnings import sys import os from urllib.parse import urlparse, urlunparse @@ -8,9 +9,9 @@ import hail from hail.genetics.reference_genome import ReferenceGenome -from hail.typecheck import nullable, typecheck, typecheck_method, enumeration, dictof +from hail.typecheck import nullable, typecheck, typecheck_method, enumeration, dictof, oneof from hail.utils import get_env_or_default -from hail.utils.java import Env, FatalError, warning +from hail.utils.java import Env, warning, choose_backend from hail.backend import Backend from hailtop.utils import secret_alnum_string from .builtin_references import BUILTIN_REFERENCES @@ -167,7 +168,10 @@ def stop(self): spark_conf=nullable(dictof(str, str)), skip_logging_configuration=bool, local_tmpdir=nullable(str), - _optimizer_iterations=nullable(int)) + _optimizer_iterations=nullable(int), + backend=nullable(str), + driver_cores=nullable(oneof(str, int)), + driver_memory=nullable(str)) def init(sc=None, app_name='Hail', master=None, local='local[*]', log=None, quiet=False, append=False, min_block_size=0, branching_factor=50, tmp_dir=None, @@ -176,37 +180,37 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', spark_conf=None, skip_logging_configuration=False, local_tmpdir=None, - _optimizer_iterations=None): - """Initialize Hail and Spark. - - Examples - -------- - Import and initialize Hail using GRCh38 as the default reference genome: + _optimizer_iterations=None, + *, + backend=None, + driver_cores=None, + driver_memory=None): + """Initialize and configure Hail. + + This function will be called with default arguments if any Hail functionality is used. If you + need custom configuration, you must explicitly call this function before using Hail. For + example, to set the default reference genome to GRCh38, import Hail and immediately call + :func:`.init`: >>> import hail as hl >>> hl.init(default_reference='GRCh38') # doctest: +SKIP - Notes - ----- - Hail is not only a Python library; most of Hail is written in Java/Scala - and runs together with Apache Spark in the Java Virtual Machine (JVM). - In order to use Hail, a JVM needs to run as well. The :func:`.init` - function is used to initialize Hail and Spark. + Hail has two backends, ``spark`` and ``batch``. Hail selects a backend by consulting, in order, + these configuration locations: - This function also sets global configuration parameters used for the Hail - session, like the default reference genome and log file location. + 1. The ``backend`` parameter of this function. + 2. The ``HAIL_QUERY_BACKEND`` environment variable. + 3. The value of ``hailctl config get query/backend``. - This function will be called automatically (with default parameters) if - any Hail functionality requiring the backend (most of the libary!) is used. - To initialize Hail explicitly with non-default arguments, be sure to do so - directly after importing the module, as in the above example. + If no configuration is found, Hail will select the Spark backend. - To facilitate the migration from Spark to the ServiceBackend, this method - calls init_service when the environment variable HAIL_QUERY_BACKEND is set - to "service". + Examples + -------- + Configure Hail to use the Batch backend: + + >>> import hail as hl + >>> hl.init(backend='batch') # doctest: +SKIP - Note - ---- If a :class:`pyspark.SparkContext` is already running, then Hail must be initialized with it as an argument: @@ -219,20 +223,22 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', Parameters ---------- sc : pyspark.SparkContext, optional - Spark context. By default, a Spark context will be created. + Spark Backend only. Spark context. If not specified, the Spark backend will create a new + Spark context. app_name : :class:`str` - Spark application name. + A name for this pipeline. In the Spark backend, this becomes the Spark application name. In + the Batch backend, this is a prefix for the name of every Batch. master : :class:`str`, optional - URL identifying the Spark leader (master) node or `local[N]` for local clusters. + Spark Backend only. URL identifying the Spark leader (master) node or `local[N]` for local + clusters. local : :class:`str` - Local-mode core limit indicator. Must either be `local[N]` where N is a - positive integer or `local[*]`. The latter indicates Spark should use all - cores available. `local[*]` does not respect most containerization CPU - limits. This option is only used if `master` is unset and `spark.master` - is not set in the Spark configuration. + Spark Backend only. Local-mode core limit indicator. Must either be `local[N]` where N is a + positive integer or `local[*]`. The latter indicates Spark should use all cores + available. `local[*]` does not respect most containerization CPU limits. This option is only + used if `master` is unset and `spark.master` is not set in the Spark configuration. log : :class:`str` - Local path for Hail log file. Does not currently support distributed - file systems like Google Storage, S3, or HDFS. + Local path for Hail log file. Does not currently support distributed file systems like + Google Storage, S3, or HDFS. quiet : :obj:`bool` Print fewer log messages. append : :obj:`bool` @@ -252,12 +258,19 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', global_seed : :obj:`int`, optional Global random seed. spark_conf : :obj:`dict` of :class:`str` to :class`str`, optional - Spark configuration parameters. + Spark backend only. Spark configuration parameters. skip_logging_configuration : :obj:`bool` - Skip logging configuration in java and python. + Spark Backend only. Skip logging configuration in java and python. local_tmpdir : :class:`str`, optional Local temporary directory. Used on driver and executor nodes. Must use the file scheme. Defaults to TMPDIR, or /tmp. + driver_cores : :class:`str` or :class:`int`, optional + Batch backend only. Number of cores to use for the driver process. May be 1 or 8. Default is + 1. + driver_memory : :class:`str`, optional + Batch backend only. Number of cores to use for the driver process. May be standard or + highmem. Default is standard. + """ if Env._hc: if idempotent: @@ -266,20 +279,93 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', warning('Hail has already been initialized. If this call was intended to change configuration,' ' close the session with hl.stop() first.') - if os.environ.get('HAIL_QUERY_BACKEND') == 'service': + backend = choose_backend(backend) + + if backend == 'service': + warnings.warn( + 'The "service" backend is now called the "batch" backend. Support for "service" will be removed in a ' + 'future release.' + ) + backend = 'batch' + + if backend == 'batch': import asyncio - # NB: do not use warning because that will initialize Env._hc, which we are trying to do right now. - print('When using the query service backend, use `await init_service\'', file=sys.stderr) - return asyncio.get_event_loop().run_until_complete(init_service( + try: + asyncio.get_running_loop() + raise ValueError( + 'When using Hail Query in async code, initialize the ServiceBackend with `await hl.init_batch()`' + ) + except RuntimeError: # RuntimeError implies there is no running loop, so we may start one + return asyncio.get_event_loop().run_until_complete(init_batch( + log=log, + quiet=quiet, + append=append, + tmpdir=tmp_dir, + local_tmpdir=local_tmpdir, + default_reference=default_reference, + global_seed=global_seed, + driver_cores=driver_cores, + driver_memory=driver_memory, + name_prefix=app_name + )) + if backend == 'spark': + return init_spark( log=log, quiet=quiet, append=append, - tmpdir=tmp_dir, + tmp_dir=tmp_dir, local_tmpdir=local_tmpdir, default_reference=default_reference, global_seed=global_seed, - skip_logging_configuration=skip_logging_configuration)) + skip_logging_configuration=skip_logging_configuration + ) + if backend == 'local': + return init_local( + log=log, + quiet=quiet, + append=append, + tmpdir=tmp_dir, + default_reference=default_reference, + global_seed=global_seed, + skip_logging_configuration=skip_logging_configuration + ) + raise ValueError(f'unknown Hail Query backend: {backend}') + +@typecheck(sc=nullable(SparkContext), + app_name=str, + master=nullable(str), + local=str, + log=nullable(str), + quiet=bool, + append=bool, + min_block_size=int, + branching_factor=int, + tmp_dir=nullable(str), + default_reference=enumeration(*BUILTIN_REFERENCES), + idempotent=bool, + global_seed=nullable(int), + spark_conf=nullable(dictof(str, str)), + skip_logging_configuration=bool, + local_tmpdir=nullable(str), + _optimizer_iterations=nullable(int)) +def init_spark(sc=None, + app_name='Hail', + master=None, + local='local[*]', + log=None, + quiet=False, + append=False, + min_block_size=0, + branching_factor=50, + tmp_dir=None, + default_reference='GRCh37', + idempotent=False, + global_seed=6348563392232659379, + spark_conf=None, + skip_logging_configuration=False, + local_tmpdir=None, + _optimizer_iterations=None): from hail.backend.spark_backend import SparkBackend log = _get_log(log) @@ -291,7 +377,6 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', idempotent, sc, spark_conf, app_name, master, local, log, quiet, append, min_block_size, branching_factor, tmpdir, local_tmpdir, skip_logging_configuration, optimizer_iterations) - if not backend.fs.exists(tmpdir): backend.fs.mkdir(tmpdir) @@ -310,26 +395,35 @@ def init(sc=None, app_name='Hail', master=None, local='local[*]', local_tmpdir=nullable(str), default_reference=enumeration(*BUILTIN_REFERENCES), global_seed=nullable(int), - skip_logging_configuration=bool, - disable_progress_bar=bool) -async def init_service( + disable_progress_bar=bool, + driver_cores=nullable(oneof(str, int)), + driver_memory=nullable(str), + name_prefix=nullable(str) +) +async def init_batch( + *, billing_project: Optional[str] = None, remote_tmpdir: Optional[str] = None, - log=None, - quiet=False, - append=False, - tmpdir=None, - local_tmpdir=None, - default_reference='GRCh37', - global_seed=6348563392232659379, - skip_logging_configuration=False, - *, - disable_progress_bar=True): + log: Optional[str] = None, + quiet: bool = False, + append: bool = False, + tmpdir: Optional[str] = None, + local_tmpdir: Optional[str] = None, + default_reference: str = 'GRCh37', + global_seed: int = 6348563392232659379, + disable_progress_bar: bool = True, + driver_cores: Optional[Union[str, int]] = None, + driver_memory: Optional[str] = None, + name_prefix: Optional[str] = None +): from hail.backend.service_backend import ServiceBackend + # FIXME: pass local_tmpdir and use on worker and driver backend = await ServiceBackend.create(billing_project=billing_project, remote_tmpdir=remote_tmpdir, - skip_logging_configuration=skip_logging_configuration, - disable_progress_bar=disable_progress_bar) + disable_progress_bar=disable_progress_bar, + driver_cores=driver_cores, + driver_memory=driver_memory, + name_prefix=name_prefix) log = _get_log(log) if tmpdir is None: @@ -379,8 +473,8 @@ def init_local( global_seed, backend) -def version(): - """Get the installed hail version. +def version() -> str: + """Get the installed Hail version. Returns ------- @@ -392,6 +486,19 @@ def version(): return hail.__version__ +def revision() -> str: + """Get the installed Hail git revision. + + Returns + ------- + str + """ + if hail.__revision__ is None: + # https://stackoverflow.com/questions/6028000/how-to-read-a-static-file-from-inside-a-python-package + hail.__revision__ = pkg_resources.resource_string(__name__, 'hail_revision').decode().strip() + return hail.__revision__ + + def _hail_cite_url(): v = version() [tag, sha_prefix] = v.split("-") @@ -625,20 +732,11 @@ def set_global_seed(seed): def _set_flags(**flags): - available = set(Env.backend()._jhc.flags().available()) - invalid = [] - for flag, value in flags.items(): - if flag in available: - Env.backend()._jhc.flags().set(flag, value) - else: - invalid.append(flag) - if len(invalid) != 0: - raise FatalError("Flags {} not valid. Valid flags: \n {}" - .format(', '.join(invalid), '\n '.join(available))) + Env.backend().set_flags(**flags) def _get_flags(*flags): - return {flag: Env.backend()._jhc.flags().get(flag) for flag in flags} + return Env.backend().get_flags(*flags) def debug_info(): diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 533af14b768..33ff343abfb 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -23,6 +23,39 @@ relating to file formats**: this means that it may not be possible to use an earlier version of Hail to read files written in a later version. +--- + +## Version 0.2.93 + +Release 2022-03-27 + +### Beta features + +- Several issues with the beta version of Hail Query on Hail Batch are addressed in this release. + +--- + +## Version 0.2.92 + +Release 2022-03-25 + +### New features + +- (hail#11613) Add `hl.ggplot` support for `scale_fill_hue`, `scale_color_hue`, and `scale_fill_manual`, + `scale_color_manual`. This allows for an infinite number of discrete colors. +- (hail#11608) Add all remaining and all versions of extant public gnomAD datasets to the Hail + Annotation Database and Datasets API. Current as of March 23rd 2022. +- (hail#11662) Add the `weight` aesthetic `geom_bar`. + +### Beta features + +- This version of Hail includes all the necessary client-side infrastructure to execute Hail Query + pipelines on a Hail Batch cluster. This effectively enables a "serverless" version of Hail Query + which is independent of Apache Spark. Broad affiliated users should contact the Hail team for help + using Hail Query on Hail Batch. Unaffiliated users should also contact the Hail team to discuss + the feasibility of running your own Hail Batch cluster. The Hail team is accessible at both + https://hail.zulipchat.com and https://discuss.hail.is . + --- ## Version 0.2.91 diff --git a/hail/python/hail/docs/cloud/query_on_batch.rst b/hail/python/hail/docs/cloud/query_on_batch.rst new file mode 100644 index 00000000000..6365d12fb29 --- /dev/null +++ b/hail/python/hail/docs/cloud/query_on_batch.rst @@ -0,0 +1,65 @@ +=================== +Hail Query-on-Batch +=================== + +.. warning:: + + Hail Query-on-Batch (the Batch backend) is currently in beta. This means some functionality is + not yet working. Please `contact us `__ if you would like to use missing + functionality on Query-on-Batch! + + +Hail Query-on-Batch uses Hail Batch instead of Apache Spark to execute jobs. Instead of a Dataproc +cluster, you will need a Hail Batch cluster. For more information on using Hail Batch, see the `Hail +Batch docs `__. For more information on deploying a Hail Batch cluster, +please contact the Hail Team at our `discussion forum `__. + +Getting Started +--------------- + +1. Install Hail version 0.2.93 or later: + +.. code-block:: text + + pip install 'hail>=0.2.93' + +2. `Sign up for a Hail Batch account `__ (currently only available to + Broad affiliates). + +3. Authenticate with Hail Batch. + +.. code-block:: text + + hailctl auth login + +3. Specify a bucket for Hail to use for temporary intermediate files. In Google Cloud, we recommend + using a bucket with `automatic deletion after a set period of time + `__. + +.. code-block:: text + + hailctl config set batch/tmp_dir gs://my-auto-delete-bucket/hail-query-temporaries + +4. Specify a Hail Batch billing project (these are different from Google Cloud projects). Every new + user has a trial billing project loaded with 10 USD. The name is available on the `Hail User + account page `__. + +.. code-block:: text + + hailctl config set batch/billing_proejct my-billing-project + +5. Set the default Hail Query backend to ``batch``: + +.. code-block:: text + + hailctl config set query/backend batch + +6. Now you are ready to `try Hail <../install/try.rst>`__! + +.. _vep_query_on_batch: + +Variant Effect Predictor (VEP) +------------------------------ + +More information coming very soon. If you want to use VEP with Hail Query-on-Batch, please contact +the Hail Team at our `discussion forum `__. diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst b/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst index 2191bf1b453..f4cb3173eb3 100644 --- a/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst +++ b/hail/python/hail/docs/datasets/schemas/gnomad_genome_sites.rst @@ -3,63 +3,49 @@ gnomad_genome_sites =================== -* **Versions:** 2.1.1, 3.1 +* **Versions:** 2.1.1, 3.1, 3.1.1, 3.1.2 * **Reference genome builds:** GRCh37, GRCh38 * **Type:** :class:`hail.Table` -Schema (2.1.1, GRCh37) +Schema (3.1.2, GRCh38) ~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: text ---------------------------------------- Global fields: - 'rf': struct { - variants_by_type: dict, - feature_medians: dict, - test_intervals: array>>, - test_results: array, - features_importance: dict, - features: array, - vqsr_training: bool, - no_transmitted_singletons: bool, - adj: bool, - rf_hash: str, - rf_snv_cutoff: struct { - bin: int32, + 'freq_meta': array> + 'freq_index_dict': dict + 'faf_index_dict': dict + 'faf_meta': array> + 'vep_version': str + 'vep_csq_header': str + 'dbsnp_version': str + 'filtering_model': struct { + model_name: str, + score_name: str, + snv_cutoff: struct { + bin: float64, min_score: float64 }, - rf_indel_cutoff: struct { - bin: int32, + indel_cutoff: struct { + bin: float64, min_score: float64 - } + }, + model_id: str, + snv_training_variables: array, + indel_training_variables: array } - 'freq_meta': array> - 'freq_index_dict': dict - 'popmax_index_dict': dict - 'age_index_dict': dict - 'faf_index_dict': dict - 'age_distribution': array + 'age_distribution': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int32, + n_larger: int32 + } + 'freq_sample_count': array ---------------------------------------- Row fields: - 'locus': locus + 'locus': locus 'alleles': array 'freq': array - 'age_hist_het': array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }> - 'age_hist_hom': array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }> - 'popmax': array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'popmax': struct { AC: int32, AF: float64, AN: int32, homozygote_count: int32, - pop: str - }> + pop: str, + faf95: float64 + } + 'qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } 'faf': array, faf95: float64, faf99: float64 }> - 'lcr': bool - 'decoy': bool - 'segdup': bool - 'nonpar': bool - 'variant_type': str - 'allele_type': str - 'n_alt_alleles': int32 - 'was_mixed': bool - 'has_star': bool - 'qd': float64 - 'pab_max': float64 - 'info_MQRankSum': float64 - 'info_SOR': float64 - 'info_InbreedingCoeff': float64 - 'info_ReadPosRankSum': float64 - 'info_FS': float64 - 'info_QD': float64 - 'info_MQ': float64 - 'info_DP': int32 - 'transmitted_singleton': bool - 'fail_hard_filters': bool - 'info_POSITIVE_TRAIN_SITE': bool - 'info_NEGATIVE_TRAIN_SITE': bool - 'omni': bool - 'mills': bool - 'tp': bool - 'rf_train': bool - 'rf_label': str - 'rf_probability': float64 - 'rank': int64 + 'a_index': int32 'was_split': bool - 'singleton': bool - '_score': float64 - '_singleton': bool - 'biallelic_rank': int64 - 'singleton_rank': int64 - 'n_nonref': int32 - 'score': float64 - 'adj_biallelic_singleton_rank': int64 - 'adj_rank': int64 - 'adj_biallelic_rank': int64 - 'adj_singleton_rank': int64 - 'biallelic_singleton_rank': int64 + 'rsid': set 'filters': set - 'gq_hist_alt': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'gq_hist_all': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'dp_hist_alt': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'dp_hist_all': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - 'ab_hist_alt': struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 + 'info': struct { + QUALapprox: int64, + SB: array, + MQ: float64, + MQRankSum: float64, + VarDP: int32, + AS_ReadPosRankSum: float64, + AS_pab_max: float64, + AS_QD: float32, + AS_MQ: float64, + QD: float32, + AS_MQRankSum: float64, + FS: float64, + AS_FS: float64, + ReadPosRankSum: float64, + AS_QUALapprox: int64, + AS_SB_TABLE: array, + AS_VarDP: int32, + AS_SOR: float64, + SOR: float64, + singleton: bool, + transmitted_singleton: bool, + omni: bool, + mills: bool, + monoallelic: bool, + AS_VQSLOD: float64, + InbreedingCoeff: float64 } - 'qual': float64 'vep': struct { assembly_name: str, allele_string: str, ancestral: str, - colocated_variants: array, - end: int32, - eas_allele: str, - eas_maf: float64, - ea_allele: str, - ea_maf: float64, - eur_allele: str, - eur_maf: float64, - exac_adj_allele: str, - exac_adj_maf: float64, - exac_allele: str, - exac_afr_allele: str, - exac_afr_maf: float64, - exac_amr_allele: str, - exac_amr_maf: float64, - exac_eas_allele: str, - exac_eas_maf: float64, - exac_fin_allele: str, - exac_fin_maf: float64, - exac_maf: float64, - exac_nfe_allele: str, - exac_nfe_maf: float64, - exac_oth_allele: str, - exac_oth_maf: float64, - exac_sas_allele: str, - exac_sas_maf: float64, - id: str, - minor_allele: str, - minor_allele_freq: float64, - phenotype_or_disease: int32, - pubmed: array, - sas_allele: str, - sas_maf: float64, - somatic: int32, - start: int32, - strand: int32 - }>, context: str, end: int32, id: str, @@ -255,6 +205,7 @@ Schema (2.1.1, GRCh37) transcript_consequences: array, variant_class: str } - 'allele_info': struct { - BaseQRankSum: float64, - ClippingRankSum: float64, - DB: bool, - DP: int32, - DS: bool, - END: int32, - FS: float64, - HaplotypeScore: float64, - InbreedingCoeff: float64, - MQ: float64, - MQ0: int32, - MQRankSum: float64, + 'vqsr': struct { + AS_VQSLOD: float64, + AS_culprit: str, NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool, - QD: float64, - RAW_MQ: float64, - ReadPosRankSum: float64, - SOR: float64, - VQSLOD: float64, - culprit: str + POSITIVE_TRAIN_SITE: bool + } + 'region_flag': struct { + lcr: bool, + segdup: bool + } + 'allele_info': struct { + variant_type: str, + allele_type: str, + n_alt_alleles: int32, + was_mixed: bool + } + 'age_hist_het': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'age_hist_hom': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'cadd': struct { + phred: float32, + raw_score: float32, + has_duplicate: bool + } + 'revel': struct { + revel_score: float64, + has_duplicate: bool + } + 'splice_ai': struct { + splice_ai_score: float32, + splice_consequence: str, + has_duplicate: bool + } + 'primate_ai': struct { + primate_ai_score: float32, + has_duplicate: bool } - 'rsid': str ---------------------------------------- Key: ['locus', 'alleles'] ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst deleted file mode 100644 index c9cea3b61b0..00000000000 --- a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_callset.rst +++ /dev/null @@ -1,651 +0,0 @@ -.. _gnomad_hgdp_1kg_callset: - -gnomad_hgdp_1kg_callset -======================= - -* **Versions:** 3.1 -* **Reference genome builds:** GRCh38 -* **Type:** :class:`hail.MatrixTable` - -Schema (3.1, GRCh38) -~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: text - - ---------------------------------------- - Global fields: - 'global_annotation_descriptions': struct { - sex_imputation_ploidy_cutoffs: struct { - Description: str - }, - population_inference_pca_metrics: struct { - Description: str - }, - hard_filter_cutoffs: struct { - Description: str - }, - cohort_freq_meta: struct { - Description: str - }, - gnomad_freq_meta: struct { - Description: str - }, - cohort_freq_index_dict: struct { - Description: str - }, - gnomad_freq_index_dict: struct { - Description: str - }, - gnomad_faf_index_dict: struct { - Description: str - }, - gnomad_faf_meta: struct { - Description: str - }, - vep_version: struct { - Description: str - }, - vep_csq_header: struct { - Description: str - }, - dbsnp_version: struct { - Description: str - }, - filtering_model: struct { - Description: str, - sub_globals: struct { - model_name: struct { - Description: str - }, - score_name: struct { - Description: str - }, - snv_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - indel_cutoff: struct { - Description: str, - sub_globals: struct { - bin: struct { - Description: str - }, - min_score: struct { - Description: str - } - } - }, - snv_training_variables: struct { - Description: str - }, - indel_training_variables: struct { - Description: str - } - } - }, - inbreeding_coeff_cutoff: struct { - Description: str - } - } - 'sample_annotation_descriptions': struct { - s: struct { - Description: str - }, - bam_metrics: struct { - Description: str, - sub_annotations: struct { - pct_bases_20x: struct { - Description: str - }, - pct_chimeras: struct { - Description: str - }, - freemix: struct { - Description: str - }, - mean_coverage: struct { - Description: str - }, - median_coverage: struct { - Description: str - }, - mean_insert_size: struct { - Description: str - }, - median_insert_size: struct { - Description: str - }, - pct_bases_10x: struct { - Description: str - } - } - }, - subsets: struct { - Description: str, - sub_annotations: struct { - tgp: struct { - Description: str - }, - hgdp: struct { - Description: str - } - } - }, - sex_imputation: struct { - Description: str, - sub_annotations: struct { - f_stat: struct { - Description: str - }, - n_called: struct { - Description: str - }, - expected_homs: struct { - Description: str - }, - observed_homs: struct { - Description: str - }, - chr20_mean_dp: struct { - Description: str - }, - chrX_mean_dp: struct { - Description: str - }, - chrY_mean_dp: struct { - Description: str - }, - chrX_ploidy: struct { - Description: str - }, - chrY_ploidy: struct { - Description: str - }, - X_karyotype: struct { - Description: str - }, - Y_karyotype: struct { - Description: str - }, - sex_karyotype: struct { - Description: str - } - } - }, - sample_qc: struct { - Description: str, - sub_annotations: struct { - n_hom_ref: struct { - Description: str - }, - n_het: struct { - Description: str - }, - n_hom_var: struct { - Description: str - }, - n_non_ref: struct { - Description: str - }, - n_snp: struct { - Description: str - }, - n_insertion: struct { - Description: str - }, - n_deletion: struct { - Description: str - }, - n_transition: struct { - Description: str - }, - n_transversion: struct { - Description: str - }, - r_ti_tv: struct { - Description: str - }, - r_het_hom_var: struct { - Description: str - }, - r_insertion_deletion: struct { - Description: str - } - } - }, - population_inference: struct { - Description: str, - sub_annotations: struct { - pca_scores: struct { - Description: str - }, - pop: struct { - Description: str - }, - prob_afr: struct { - Description: str - }, - prob_ami: struct { - Description: str - }, - prob_amr: struct { - Description: str - }, - prob_asj: struct { - Description: str - }, - prob_eas: struct { - Description: str - }, - prob_fin: struct { - Description: str - }, - prob_mid: struct { - Description: str - }, - prob_nfe: struct { - Description: str - }, - prob_oth: struct { - Description: str - }, - prob_sas: struct { - Description: str - } - } - }, - labeled_subpop: struct { - Description: str - }, - gnomad_release: struct { - Description: str - } - } - 'sex_imputation_ploidy_cutoffs': struct { - x_ploidy_cutoffs: struct { - upper_cutoff_X: float64, - lower_cutoff_XX: float64, - upper_cutoff_XX: float64, - lower_cutoff_XXX: float64 - }, - y_ploidy_cutoffs: struct { - lower_cutoff_Y: float64, - upper_cutoff_Y: float64, - lower_cutoff_YY: float64 - }, - f_stat_cutoff: float64 - } - 'population_inference_pca_metrics': struct { - n_pcs: int32, - min_prob: float64 - } - 'hard_filter_cutoffs': struct { - min_cov: int32, - max_n_snp: float64, - min_n_snp: float64, - max_n_singleton: float64, - max_r_het_hom_var: float64, - max_pct_contamination: float64, - max_pct_chimera: float64, - min_median_insert_size: int32 - } - 'cohort_freq_meta': array> - 'cohort_freq_index_dict': dict - 'gnomad_freq_meta': array> - 'gnomad_freq_index_dict': dict - 'gnomad_faf_index_dict': dict - 'gnomad_faf_meta': array> - 'vep_version': str - 'vep_csq_header': str - 'dbsnp_version': str - 'filtering_model': struct { - model_name: str, - score_name: str, - snv_cutoff: struct { - bin: float64, - min_score: float64 - }, - indel_cutoff: struct { - bin: float64, - min_score: float64 - }, - snv_training_variables: array, - indel_training_variables: array - } - 'inbreeding_coeff_cutoff': float64 - ---------------------------------------- - Column fields: - 's': str - 'bam_metrics': struct { - pct_bases_20x: float64, - pct_chimeras: float64, - freemix: float64, - mean_coverage: float64, - median_coverage: float64, - mean_insert_size: float64, - median_insert_size: float64, - pct_bases_10x: float64 - } - 'subsets': struct { - tgp: bool, - hgdp: bool - } - 'sex_imputation': struct { - chr20_mean_dp: float32, - chrX_mean_dp: float32, - chrY_mean_dp: float32, - chrX_ploidy: float32, - chrY_ploidy: float32, - X_karyotype: str, - Y_karyotype: str, - sex_karyotype: str, - impute_sex_stats: struct { - f_stat: float64, - n_called: int64, - expected_homs: float64, - observed_homs: int64 - } - } - 'sample_qc': struct { - n_hom_ref: int64, - n_het: int64, - n_hom_var: int64, - n_non_ref: int64, - n_snp: int64, - n_insertion: int64, - n_deletion: int64, - n_transition: int64, - n_transversion: int64, - r_ti_tv: float64, - r_het_hom_var: float64, - r_insertion_deletion: float64 - } - 'population_inference': struct { - pca_scores: array, - pop: str, - prob_afr: float64, - prob_ami: float64, - prob_amr: float64, - prob_asj: float64, - prob_eas: float64, - prob_fin: float64, - prob_mid: float64, - prob_nfe: float64, - prob_oth: float64, - prob_sas: float64 - } - 'labeled_subpop': str - 'gnomad_release': bool - ---------------------------------------- - Row fields: - 'locus': locus - 'alleles': array - 'rsid': str - 'AS_lowqual': bool - 'telomere_or_centromere': bool - 'cohort_freq': array - 'gnomad_freq': array - 'gnomad_raw_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_popmax': struct { - AC: int32, - AF: float64, - AN: int32, - homozygote_count: int32, - pop: str, - faf95: float64 - } - 'gnomad_qual_hists': struct { - gq_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_all: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - gq_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - dp_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - }, - ab_hist_alt: struct { - bin_edges: array, - bin_freq: array, - n_smaller: int64, - n_larger: int64 - } - } - 'gnomad_faf': array - 'filters': set - 'info': struct { - QUALapprox: int64, - SB: array, - MQ: float64, - MQRankSum: float64, - VarDP: int32, - AS_ReadPosRankSum: float64, - AS_pab_max: float64, - AS_QD: float32, - AS_MQ: float64, - QD: float32, - AS_MQRankSum: float64, - FS: float64, - AS_FS: float64, - ReadPosRankSum: float64, - AS_QUALapprox: int64, - AS_SB_TABLE: array, - AS_VarDP: int32, - AS_SOR: float64, - SOR: float64, - transmitted_singleton: bool, - omni: bool, - mills: bool, - monoallelic: bool, - AS_VQSLOD: float64, - InbreedingCoeff: float32 - } - 'vep': struct { - assembly_name: str, - allele_string: str, - ancestral: str, - context: str, - end: int32, - id: str, - input: str, - intergenic_consequences: array, - impact: str, - minimised: int32, - variant_allele: str - }>, - most_severe_consequence: str, - motif_feature_consequences: array, - high_inf_pos: str, - impact: str, - minimised: int32, - motif_feature_id: str, - motif_name: str, - motif_pos: int32, - motif_score_change: float64, - strand: int32, - variant_allele: str - }>, - regulatory_feature_consequences: array, - impact: str, - minimised: int32, - regulatory_feature_id: str, - variant_allele: str - }>, - seq_region_name: str, - start: int32, - strand: int32, - transcript_consequences: array, - distance: int32, - domains: array, - exon: str, - gene_id: str, - gene_pheno: int32, - gene_symbol: str, - gene_symbol_source: str, - hgnc_id: str, - hgvsc: str, - hgvsp: str, - hgvs_offset: int32, - impact: str, - intron: str, - lof: str, - lof_flags: str, - lof_filter: str, - lof_info: str, - minimised: int32, - polyphen_prediction: str, - polyphen_score: float64, - protein_end: int32, - protein_start: int32, - protein_id: str, - sift_prediction: str, - sift_score: float64, - strand: int32, - swissprot: str, - transcript_id: str, - trembl: str, - tsl: int32, - uniparc: str, - variant_allele: str - }>, - variant_class: str - } - 'vqsr': struct { - AS_VQSLOD: float64, - AS_culprit: str, - NEGATIVE_TRAIN_SITE: bool, - POSITIVE_TRAIN_SITE: bool - } - 'region_flag': struct { - lcr: bool, - segdup: bool - } - 'allele_info': struct { - variant_type: str, - allele_type: str, - n_alt_alleles: int32, - was_mixed: bool - } - 'cadd': struct { - raw_score: float32, - phred: float32 - } - 'revel': struct { - revel_score: float64, - ref_aa: str, - alt_aa: str - } - 'splice_ai': struct { - splice_ai: array, - max_ds: float32, - splice_consequence: str - } - 'primate_ai': struct { - primate_ai_score: float32 - } - ---------------------------------------- - Entry fields: - 'END': int32 - 'DP': int32 - 'GQ': int32 - 'MIN_DP': int32 - 'PID': str - 'RGQ': int32 - 'SB': array - 'GT': call - 'PGT': call - 'AD': array - 'PL': array - ---------------------------------------- - Column key: ['s'] - Row key: ['locus', 'alleles'] - ---------------------------------------- - diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst new file mode 100644 index 00000000000..0dd1a553f42 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_dense.rst @@ -0,0 +1,1501 @@ +.. _gnomad_hgdp_1kg_subset_dense: + +gnomad_hgdp_1kg_subset_dense +============================ + +* **Versions:** 3.1, 3.1.2 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (3.1.2, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'global_annotation_descriptions': struct { + gnomad_sex_imputation_ploidy_cutoffs: struct { + Description: str + }, + gnomad_population_inference_pca_metrics: struct { + Description: str + }, + sample_hard_filter_cutoffs: struct { + Description: str + }, + gnomad_sample_qc_metric_outlier_cutoffs: struct { + Description: str + }, + gnomad_age_distribution: struct { + Description: str, + sub_globals: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + hgdp_tgp_freq_meta: struct { + Description: str + }, + gnomad_freq_meta: struct { + Description: str + }, + hgdp_tgp_freq_index_dict: struct { + Description: str + }, + gnomad_freq_index_dict: struct { + Description: str + }, + gnomad_faf_meta: struct { + Description: str + }, + gnomad_faf_index_dict: struct { + Description: str + }, + variant_filtering_model: struct { + Description: set, + sub_globals: struct { + model_name: struct { + Description: str + }, + score_name: struct { + Description: str + }, + snv_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + indel_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + snv_training_variables: struct { + Description: str + }, + indel_training_variables: struct { + Description: str + } + } + }, + variant_inbreeding_coeff_cutoff: struct { + Description: str + }, + vep_version: struct { + Description: str + }, + vep_csq_header: struct { + Description: str + }, + dbsnp_version: struct { + Description: str + } + } + 'sample_annotation_descriptions': struct { + s: struct { + Description: str + }, + bam_metrics: struct { + Description: str, + sub_annotations: struct { + pct_bases_20x: struct { + Description: str + }, + pct_chimeras: struct { + Description: str + }, + freemix: struct { + Description: str + }, + mean_coverage: struct { + Description: str + }, + median_coverage: struct { + Description: str + }, + mean_insert_size: struct { + Description: str + }, + median_insert_size: struct { + Description: str + }, + pct_bases_10x: struct { + Description: str + } + } + }, + sample_qc: struct { + Description: str, + sub_annotations: struct { + n_deletion: struct { + Description: str + }, + n_het: struct { + Description: str + }, + n_hom_ref: struct { + Description: str + }, + n_hom_var: struct { + Description: str + }, + n_insertion: struct { + Description: str + }, + n_non_ref: struct { + Description: str + }, + n_snp: struct { + Description: str + }, + n_transition: struct { + Description: str + }, + n_transversion: struct { + Description: str + }, + r_het_hom_var: struct { + Description: str + }, + r_insertion_deletion: struct { + Description: str + }, + r_ti_tv: struct { + Description: str + } + } + }, + gnomad_sex_imputation: struct { + Description: str, + sub_annotations: struct { + chr20_mean_dp: struct { + Description: str + }, + chrX_mean_dp: struct { + Description: str + }, + chrY_mean_dp: struct { + Description: str + }, + chrX_ploidy: struct { + Description: str + }, + chrY_ploidy: struct { + Description: str + }, + X_karyotype: struct { + Description: str + }, + Y_karyotype: struct { + Description: str + }, + sex_karyotype: struct { + Description: str + }, + f_stat: struct { + Description: str + }, + n_called: struct { + Description: str + }, + expected_homs: struct { + Description: str + }, + observed_homs: struct { + Description: str + } + } + }, + gnomad_population_inference: struct { + Description: str, + sub_annotations: struct { + pca_scores: struct { + Description: str + }, + pop: struct { + Description: str + }, + prob_afr: struct { + Description: str + }, + prob_ami: struct { + Description: str + }, + prob_amr: struct { + Description: str + }, + prob_asj: struct { + Description: str + }, + prob_eas: struct { + Description: str + }, + prob_fin: struct { + Description: str + }, + prob_mid: struct { + Description: str + }, + prob_nfe: struct { + Description: str + }, + prob_oth: struct { + Description: str + }, + prob_sas: struct { + Description: str + } + } + }, + gnomad_sample_qc_residuals: struct { + Description: tuple ( + str + ), + sub_annotations: struct { + n_snp_residual: struct { + Description: str + }, + r_ti_tv_residual: struct { + Description: str + }, + r_insertion_deletion_residual: struct { + Description: str + }, + n_insertion_residual: struct { + Description: str + }, + n_deletion_residual: struct { + Description: str + }, + r_het_hom_var_residual: struct { + Description: str + }, + n_transition_residual: struct { + Description: str + }, + n_transversion_residual: struct { + Description: str + } + } + }, + gnomad_sample_filters: struct { + Description: str, + sub_annotations: struct { + hard_filters: struct { + Description: str + }, + hard_filtered: struct { + Description: str + }, + release_related: struct { + Description: str + }, + qc_metrics_filters: struct { + Description: str + } + } + }, + gnomad_high_quality: struct { + Description: str + }, + gnomad_release: struct { + Description: str + }, + relatedness_inference: struct { + Description: str, + sub_annotations: struct { + related_samples: struct { + Description: str, + sub_annotations: struct { + s: struct { + Description: str + }, + kin: struct { + Description: str + }, + ibd0: struct { + Description: str + }, + ibd1: struct { + Description: str + }, + ibd2: struct { + Description: str + } + } + }, + related: struct { + Description: str + } + } + }, + hgdp_tgp_meta: struct { + Description: str, + sub_annotations: struct { + project: struct { + Description: str + }, + study_region: struct { + Description: str + }, + population: struct { + Description: str + }, + genetic_region: struct { + Description: str + }, + latitude: struct { + Description: str + }, + longitude: struct { + Description: str + }, + hgdp_technical_meta: struct { + Description: str, + sub_annotations: struct { + source: struct { + Description: str + }, + library_type: struct { + Description: str + } + } + }, + global_pca_scores: struct { + Description: str + }, + subcontinental_pca: struct { + Description: str, + sub_annotations: struct { + pca_scores: struct { + Description: str + }, + pca_scores_outliers_removed: struct { + Description: str + }, + outlier: struct { + Description: str + } + } + }, + gnomad_labeled_subpop: struct { + Description: str + } + } + }, + high_quality: struct { + Description: str + } + } + 'gnomad_sex_imputation_ploidy_cutoffs': struct { + x_ploidy_cutoffs: struct { + upper_cutoff_X: float64, + lower_cutoff_XX: float64, + upper_cutoff_XX: float64, + lower_cutoff_XXX: float64 + }, + y_ploidy_cutoffs: struct { + lower_cutoff_Y: float64, + upper_cutoff_Y: float64, + lower_cutoff_YY: float64 + }, + f_stat_cutoff: float64 + } + 'gnomad_population_inference_pca_metrics': struct { + n_pcs: int32, + min_prob: float64 + } + 'sample_hard_filter_cutoffs': struct { + min_cov: int32, + max_n_snp: float64, + min_n_snp: float64, + max_n_singleton: float64, + max_r_het_hom_var: float64, + max_pct_contamination: float64, + max_pct_chimera: float64, + min_median_insert_size: int32 + } + 'gnomad_sample_qc_metric_outlier_cutoffs': struct { + lms: struct { + n_snp: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_singleton: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_ti_tv: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_insertion_deletion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_insertion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_deletion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_het_hom_var: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_transition: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_transversion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + } + }, + qc_metrics_stats: struct { + n_snp_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_singleton_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_ti_tv_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_insertion_deletion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_insertion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_deletion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_het_hom_var_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_transition_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_transversion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + } + }, + n_pcs: int32, + used_regressed_metrics: bool + } + 'gnomad_age_distribution': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int32, + n_larger: int32 + } + 'variant_annotation_descriptions': struct { + locus: struct { + Description: str + }, + alleles: struct { + Description: str + }, + rsid: struct { + Description: str + }, + a_index: struct { + Description: str + }, + was_split: struct { + Description: str + }, + hgdp_tgp_freq: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + } + } + }, + gnomad_freq: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + } + } + }, + gnomad_popmax: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + }, + pop: struct { + Description: str + }, + faf95: struct { + Description: str + } + } + }, + gnomad_faf: struct { + Description: str, + sub_annotations: struct { + faf95: struct { + Description: str + }, + faf99: struct { + Description: str + } + } + }, + gnomad_qual_hists: struct { + Description: str, + sub_annotations: struct { + gq_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gq_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + ab_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + } + } + }, + gnomad_raw_qual_hists: struct { + Description: str, + sub_annotations: struct { + gq_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gq_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + ab_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + } + } + }, + gnomad_age_hist_het: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gnomad_age_hist_hom: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + filters: struct { + Description: str + }, + info: struct { + Description: str, + sub_annotations: struct { + QUALapprox: struct { + Description: str + }, + SB: struct { + Description: str + }, + MQ: struct { + Description: str + }, + MQRankSum: struct { + Description: str + }, + VarDP: struct { + Description: str + }, + AS_ReadPosRankSum: struct { + Description: str + }, + AS_pab_max: struct { + Description: str + }, + AS_QD: struct { + Description: str + }, + AS_MQ: struct { + Description: str + }, + QD: struct { + Description: str + }, + AS_MQRankSum: struct { + Description: str + }, + FS: struct { + Description: str + }, + AS_FS: struct { + Description: str + }, + ReadPosRankSum: struct { + Description: str + }, + AS_QUALapprox: struct { + Description: str + }, + AS_SB_TABLE: struct { + Description: str + }, + AS_VarDP: struct { + Description: str + }, + AS_SOR: struct { + Description: str + }, + SOR: struct { + Description: str + }, + transmitted_singleton: struct { + Description: str + }, + omni: struct { + Description: str + }, + mills: struct { + Description: str + }, + monoallelic: struct { + Description: str + }, + InbreedingCoeff: struct { + Description: str + } + } + }, + vep: struct { + Description: str + }, + vqsr: struct { + Description: str, + sub_annotations: struct { + AS_VQSLOD: struct { + Description: str + }, + AS_culprit: struct { + Description: str + }, + NEGATIVE_TRAIN_SITE: struct { + Description: str + }, + POSITIVE_TRAIN_SITE: struct { + Description: str + } + } + }, + region_flag: struct { + Description: str, + sub_annotations: struct { + lcr: struct { + Description: str + }, + segdup: struct { + Description: str + } + } + }, + allele_info: struct { + Description: str, + sub_annotations: struct { + variant_type: struct { + Description: str + }, + allele_type: struct { + Description: str + }, + n_alt_alleles: struct { + Description: str + } + } + }, + was_mixed: struct { + Description: str + }, + cadd: struct { + sub_annotations: struct { + raw_score: struct { + Description: str + }, + phred: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + revel: struct { + Description: str, + sub_annotations: struct { + revel_score: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + splice_ai: struct { + sub_annotations: struct { + splice_ai: struct { + Description: str + }, + splice_consequence: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + primate_ai: struct { + sub_annotations: struct { + primate_ai_score: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + AS_lowqual: struct { + Description: str + }, + telomere_or_centromere: struct { + Description: str + } + } + 'hgdp_tgp_freq_meta': array> + 'hgdp_tgp_freq_index_dict': dict + 'gnomad_freq_meta': array> + 'gnomad_freq_index_dict': dict + 'gnomad_faf_index_dict': dict + 'gnomad_faf_meta': array> + 'vep_version': str + 'vep_csq_header': str + 'dbsnp_version': str + 'variant_filtering_model': struct { + model_name: str, + score_name: str, + snv_cutoff: struct { + bin: float64, + min_score: float64 + }, + indel_cutoff: struct { + bin: float64, + min_score: float64 + }, + snv_training_variables: array, + indel_training_variables: array + } + 'variant_inbreeding_coeff_cutoff': float64 + ---------------------------------------- + Column fields: + 's': str + 'bam_metrics': struct { + pct_bases_20x: float64, + pct_chimeras: float64, + freemix: float64, + mean_coverage: float64, + median_coverage: float64, + mean_insert_size: float64, + median_insert_size: float64, + pct_bases_10x: float64 + } + 'sample_qc': struct { + n_deletion: int64, + n_het: int64, + n_hom_ref: int64, + n_hom_var: int64, + n_insertion: int64, + n_non_ref: int64, + n_snp: int64, + n_transition: int64, + n_transversion: int64, + r_het_hom_var: float64, + r_insertion_deletion: float64, + r_ti_tv: float64 + } + 'gnomad_sex_imputation': struct { + chr20_mean_dp: float32, + chrX_mean_dp: float32, + chrY_mean_dp: float32, + chrX_ploidy: float32, + chrY_ploidy: float32, + X_karyotype: str, + Y_karyotype: str, + sex_karyotype: str, + f_stat: float64, + n_called: int64, + expected_homs: float64, + observed_homs: int64 + } + 'gnomad_population_inference': struct { + pca_scores: array, + pop: str, + prob_afr: float64, + prob_ami: float64, + prob_amr: float64, + prob_asj: float64, + prob_eas: float64, + prob_fin: float64, + prob_mid: float64, + prob_nfe: float64, + prob_oth: float64, + prob_sas: float64 + } + 'gnomad_sample_qc_residuals': struct { + n_snp_residual: float64, + r_ti_tv_residual: float64, + r_insertion_deletion_residual: float64, + n_insertion_residual: float64, + n_deletion_residual: float64, + r_het_hom_var_residual: float64, + n_transition_residual: float64, + n_transversion_residual: float64 + } + 'gnomad_sample_filters': struct { + hard_filters: set, + hard_filtered: bool, + release_related: bool, + qc_metrics_filters: set + } + 'gnomad_high_quality': bool + 'gnomad_release': bool + 'relatedness_inference': struct { + related_samples: set, + related: bool + } + 'hgdp_tgp_meta': struct { + project: str, + study_region: str, + population: str, + genetic_region: str, + latitude: float64, + longitude: float64, + hgdp_technical_meta: struct { + source: str, + library_type: str + }, + global_pca_scores: array, + subcontinental_pca: struct { + pca_scores: array, + pca_scores_outliers_removed: array, + outlier: bool + }, + gnomad_labeled_subpop: str + } + 'high_quality': bool + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': set + 'a_index': int32 + 'was_split': bool + 'filters': set + 'info': struct { + SB: array, + MQRankSum: float64, + VarDP: int32, + AS_FS: float64, + AS_ReadPosRankSum: float64, + AS_pab_max: float64, + AS_QD: float32, + AS_MQ: float64, + AS_QUALapprox: int64, + QD: float32, + AS_MQRankSum: float64, + FS: float64, + MQ: float64, + ReadPosRankSum: float64, + QUALapprox: int64, + AS_SB_TABLE: array, + AS_VarDP: int32, + AS_SOR: float64, + SOR: float64, + transmitted_singleton: bool, + omni: bool, + mills: bool, + monoallelic: bool, + InbreedingCoeff: float32, + AS_VQSLOD: float64 + } + 'vep': struct { + assembly_name: str, + allele_string: str, + ancestral: str, + context: str, + end: int32, + id: str, + input: str, + intergenic_consequences: array, + impact: str, + minimised: int32, + variant_allele: str + }>, + most_severe_consequence: str, + motif_feature_consequences: array, + high_inf_pos: str, + impact: str, + minimised: int32, + motif_feature_id: str, + motif_name: str, + motif_pos: int32, + motif_score_change: float64, + strand: int32, + variant_allele: str + }>, + regulatory_feature_consequences: array, + impact: str, + minimised: int32, + regulatory_feature_id: str, + variant_allele: str + }>, + seq_region_name: str, + start: int32, + strand: int32, + transcript_consequences: array, + distance: int32, + domains: array, + exon: str, + gene_id: str, + gene_pheno: int32, + gene_symbol: str, + gene_symbol_source: str, + hgnc_id: str, + hgvsc: str, + hgvsp: str, + hgvs_offset: int32, + impact: str, + intron: str, + lof: str, + lof_flags: str, + lof_filter: str, + lof_info: str, + minimised: int32, + polyphen_prediction: str, + polyphen_score: float64, + protein_end: int32, + protein_start: int32, + protein_id: str, + sift_prediction: str, + sift_score: float64, + strand: int32, + swissprot: str, + transcript_id: str, + trembl: str, + tsl: int32, + uniparc: str, + variant_allele: str + }>, + variant_class: str + } + 'vqsr': struct { + AS_VQSLOD: float64, + AS_culprit: str, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool + } + 'region_flag': struct { + lcr: bool, + segdup: bool + } + 'allele_info': struct { + variant_type: str, + allele_type: str, + n_alt_alleles: int32, + was_mixed: bool + } + 'hgdp_tgp_freq': array + 'gnomad_freq': array + 'gnomad_popmax': struct { + AC: int32, + AF: float64, + AN: int32, + homozygote_count: int32, + pop: str, + faf95: float64 + } + 'gnomad_faf': array + 'gnomad_raw_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_age_hist_het': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'gnomad_age_hist_hom': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'cadd': struct { + phred: float32, + raw_score: float32, + has_duplicate: bool + } + 'revel': struct { + revel_score: float64, + has_duplicate: bool + } + 'splice_ai': struct { + splice_ai_score: float32, + splice_consequence: str, + has_duplicate: bool + } + 'primate_ai': struct { + primate_ai_score: float32, + has_duplicate: bool + } + ---------------------------------------- + Entry fields: + 'DP': int32 + 'GQ': int32 + 'MIN_DP': int32 + 'PID': str + 'RGQ': int32 + 'SB': array + 'GT': call + 'PGT': call + 'AD': array + 'PL': array + 'adj': bool + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst new file mode 100644 index 00000000000..7d5f6ceb559 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sample_metadata.rst @@ -0,0 +1,653 @@ +.. _gnomad_hgdp_1kg_subset_sample_metadata: + +gnomad_hgdp_1kg_subset_sample_metadata +====================================== + +* **Versions:** 3.1.2 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (3.1.2, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'global_annotation_descriptions': struct { + gnomad_sex_imputation_ploidy_cutoffs: struct { + Description: str + }, + gnomad_population_inference_pca_metrics: struct { + Description: str + }, + sample_hard_filter_cutoffs: struct { + Description: str + }, + gnomad_sample_qc_metric_outlier_cutoffs: struct { + Description: str + }, + gnomad_age_distribution: struct { + Description: str, + sub_globals: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + } + } + 'sample_annotation_descriptions': struct { + s: struct { + Description: str + }, + bam_metrics: struct { + Description: str, + sub_annotations: struct { + pct_bases_20x: struct { + Description: str + }, + pct_chimeras: struct { + Description: str + }, + freemix: struct { + Description: str + }, + mean_coverage: struct { + Description: str + }, + median_coverage: struct { + Description: str + }, + mean_insert_size: struct { + Description: str + }, + median_insert_size: struct { + Description: str + }, + pct_bases_10x: struct { + Description: str + } + } + }, + sample_qc: struct { + Description: str, + sub_annotations: struct { + n_deletion: struct { + Description: str + }, + n_het: struct { + Description: str + }, + n_hom_ref: struct { + Description: str + }, + n_hom_var: struct { + Description: str + }, + n_insertion: struct { + Description: str + }, + n_non_ref: struct { + Description: str + }, + n_snp: struct { + Description: str + }, + n_transition: struct { + Description: str + }, + n_transversion: struct { + Description: str + }, + r_het_hom_var: struct { + Description: str + }, + r_insertion_deletion: struct { + Description: str + }, + r_ti_tv: struct { + Description: str + } + } + }, + gnomad_sex_imputation: struct { + Description: str, + sub_annotations: struct { + chr20_mean_dp: struct { + Description: str + }, + chrX_mean_dp: struct { + Description: str + }, + chrY_mean_dp: struct { + Description: str + }, + chrX_ploidy: struct { + Description: str + }, + chrY_ploidy: struct { + Description: str + }, + X_karyotype: struct { + Description: str + }, + Y_karyotype: struct { + Description: str + }, + sex_karyotype: struct { + Description: str + }, + f_stat: struct { + Description: str + }, + n_called: struct { + Description: str + }, + expected_homs: struct { + Description: str + }, + observed_homs: struct { + Description: str + } + } + }, + gnomad_population_inference: struct { + Description: str, + sub_annotations: struct { + pca_scores: struct { + Description: str + }, + pop: struct { + Description: str + }, + prob_afr: struct { + Description: str + }, + prob_ami: struct { + Description: str + }, + prob_amr: struct { + Description: str + }, + prob_asj: struct { + Description: str + }, + prob_eas: struct { + Description: str + }, + prob_fin: struct { + Description: str + }, + prob_mid: struct { + Description: str + }, + prob_nfe: struct { + Description: str + }, + prob_oth: struct { + Description: str + }, + prob_sas: struct { + Description: str + } + } + }, + gnomad_sample_qc_residuals: struct { + Description: tuple ( + str + ), + sub_annotations: struct { + n_snp_residual: struct { + Description: str + }, + r_ti_tv_residual: struct { + Description: str + }, + r_insertion_deletion_residual: struct { + Description: str + }, + n_insertion_residual: struct { + Description: str + }, + n_deletion_residual: struct { + Description: str + }, + r_het_hom_var_residual: struct { + Description: str + }, + n_transition_residual: struct { + Description: str + }, + n_transversion_residual: struct { + Description: str + } + } + }, + gnomad_sample_filters: struct { + Description: str, + sub_annotations: struct { + hard_filters: struct { + Description: str + }, + hard_filtered: struct { + Description: str + }, + release_related: struct { + Description: str + }, + qc_metrics_filters: struct { + Description: str + } + } + }, + gnomad_high_quality: struct { + Description: str + }, + gnomad_release: struct { + Description: str + }, + relatedness_inference: struct { + Description: str, + sub_annotations: struct { + related_samples: struct { + Description: str, + sub_annotations: struct { + s: struct { + Description: str + }, + kin: struct { + Description: str + }, + ibd0: struct { + Description: str + }, + ibd1: struct { + Description: str + }, + ibd2: struct { + Description: str + } + } + }, + related: struct { + Description: str + } + } + }, + hgdp_tgp_meta: struct { + Description: str, + sub_annotations: struct { + project: struct { + Description: str + }, + study_region: struct { + Description: str + }, + population: struct { + Description: str + }, + genetic_region: struct { + Description: str + }, + latitude: struct { + Description: str + }, + longitude: struct { + Description: str + }, + hgdp_technical_meta: struct { + Description: str, + sub_annotations: struct { + source: struct { + Description: str + }, + library_type: struct { + Description: str + } + } + }, + global_pca_scores: struct { + Description: str + }, + subcontinental_pca: struct { + Description: str, + sub_annotations: struct { + pca_scores: struct { + Description: str + }, + pca_scores_outliers_removed: struct { + Description: str + }, + outlier: struct { + Description: str + } + } + }, + gnomad_labeled_subpop: struct { + Description: str + } + } + }, + high_quality: struct { + Description: str + } + } + 'gnomad_sex_imputation_ploidy_cutoffs': struct { + x_ploidy_cutoffs: struct { + upper_cutoff_X: float64, + lower_cutoff_XX: float64, + upper_cutoff_XX: float64, + lower_cutoff_XXX: float64 + }, + y_ploidy_cutoffs: struct { + lower_cutoff_Y: float64, + upper_cutoff_Y: float64, + lower_cutoff_YY: float64 + }, + f_stat_cutoff: float64 + } + 'gnomad_population_inference_pca_metrics': struct { + n_pcs: int32, + min_prob: float64 + } + 'sample_hard_filter_cutoffs': struct { + min_cov: int32, + max_n_snp: float64, + min_n_snp: float64, + max_n_singleton: float64, + max_r_het_hom_var: float64, + max_pct_contamination: float64, + max_pct_chimera: float64, + min_median_insert_size: int32 + } + 'gnomad_sample_qc_metric_outlier_cutoffs': struct { + lms: struct { + n_snp: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_singleton: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_ti_tv: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_insertion_deletion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_insertion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_deletion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + r_het_hom_var: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_transition: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + }, + n_transversion: struct { + beta: array, + standard_error: array, + t_stat: array, + p_value: array, + multiple_standard_error: float64, + multiple_r_squared: float64, + adjusted_r_squared: float64, + f_stat: float64, + multiple_p_value: float64, + n: int32 + } + }, + qc_metrics_stats: struct { + n_snp_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_singleton_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_ti_tv_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_insertion_deletion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_insertion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_deletion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + r_het_hom_var_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_transition_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + }, + n_transversion_residual: struct { + median: float64, + mad: float64, + lower: float64, + upper: float64 + } + }, + n_pcs: int32, + used_regressed_metrics: bool + } + 'gnomad_age_distribution': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int32, + n_larger: int32 + } + ---------------------------------------- + Row fields: + 's': str + 'bam_metrics': struct { + pct_bases_20x: float64, + pct_chimeras: float64, + freemix: float64, + mean_coverage: float64, + median_coverage: float64, + mean_insert_size: float64, + median_insert_size: float64, + pct_bases_10x: float64 + } + 'sample_qc': struct { + n_deletion: int64, + n_het: int64, + n_hom_ref: int64, + n_hom_var: int64, + n_insertion: int64, + n_non_ref: int64, + n_snp: int64, + n_transition: int64, + n_transversion: int64, + r_het_hom_var: float64, + r_insertion_deletion: float64, + r_ti_tv: float64 + } + 'gnomad_sex_imputation': struct { + chr20_mean_dp: float32, + chrX_mean_dp: float32, + chrY_mean_dp: float32, + chrX_ploidy: float32, + chrY_ploidy: float32, + X_karyotype: str, + Y_karyotype: str, + sex_karyotype: str, + f_stat: float64, + n_called: int64, + expected_homs: float64, + observed_homs: int64 + } + 'gnomad_population_inference': struct { + pca_scores: array, + pop: str, + prob_afr: float64, + prob_ami: float64, + prob_amr: float64, + prob_asj: float64, + prob_eas: float64, + prob_fin: float64, + prob_mid: float64, + prob_nfe: float64, + prob_oth: float64, + prob_sas: float64 + } + 'gnomad_sample_qc_residuals': struct { + n_snp_residual: float64, + r_ti_tv_residual: float64, + r_insertion_deletion_residual: float64, + n_insertion_residual: float64, + n_deletion_residual: float64, + r_het_hom_var_residual: float64, + n_transition_residual: float64, + n_transversion_residual: float64 + } + 'gnomad_sample_filters': struct { + hard_filters: set, + hard_filtered: bool, + release_related: bool, + qc_metrics_filters: set + } + 'gnomad_high_quality': bool + 'gnomad_release': bool + 'relatedness_inference': struct { + related_samples: set, + related: bool + } + 'hgdp_tgp_meta': struct { + project: str, + study_region: str, + population: str, + genetic_region: str, + latitude: float64, + longitude: float64, + hgdp_technical_meta: struct { + source: str, + library_type: str + }, + global_pca_scores: array, + subcontinental_pca: struct { + pca_scores: array, + pca_scores_outliers_removed: array, + outlier: bool + }, + gnomad_labeled_subpop: str + } + 'high_quality': bool + ---------------------------------------- + Key: ['s'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst new file mode 100644 index 00000000000..da60d664525 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_sparse.rst @@ -0,0 +1,54 @@ +.. _gnomad_hgdp_1kg_subset_sparse: + +gnomad_hgdp_1kg_subset_sparse +============================= + +* **Versions:** 3.1.2 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.MatrixTable` + +Schema (3.1.2, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Column fields: + 's': str + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'rsid': str + ---------------------------------------- + Entry fields: + 'DP': int32 + 'END': int32 + 'GQ': int32 + 'LA': array + 'LAD': array + 'LGT': call + 'LPGT': call + 'LPL': array + 'MIN_DP': int32 + 'PID': str + 'RGQ': int32 + 'SB': array + 'gvcf_info': struct { + ClippingRankSum: float64, + BaseQRankSum: float64, + MQ: float64, + MQRankSum: float64, + MQ_DP: int32, + QUALapprox: int32, + RAW_MQ: float64, + ReadPosRankSum: float64, + VarDP: int32 + } + ---------------------------------------- + Column key: ['s'] + Row key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst new file mode 100644 index 00000000000..9d7f900b1ab --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_hgdp_1kg_subset_variant_annotations.rst @@ -0,0 +1,857 @@ +.. _gnomad_hgdp_1kg_subset_variant_annotations: + +gnomad_hgdp_1kg_subset_variant_annotations +========================================== + +* **Versions:** 3.1.2 +* **Reference genome builds:** GRCh38 +* **Type:** :class:`hail.Table` + +Schema (3.1.2, GRCh38) +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'global_annotation_descriptions': struct { + hgdp_tgp_freq_meta: struct { + Description: str + }, + gnomad_freq_meta: struct { + Description: str + }, + hgdp_tgp_freq_index_dict: struct { + Description: str + }, + gnomad_freq_index_dict: struct { + Description: str + }, + gnomad_faf_meta: struct { + Description: str + }, + gnomad_faf_index_dict: struct { + Description: str + }, + variant_filtering_model: struct { + Description: set, + sub_globals: struct { + model_name: struct { + Description: str + }, + score_name: struct { + Description: str + }, + snv_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + indel_cutoff: struct { + Description: str, + sub_globals: struct { + bin: struct { + Description: str + }, + min_score: struct { + Description: str + } + } + }, + snv_training_variables: struct { + Description: str + }, + indel_training_variables: struct { + Description: str + } + } + }, + variant_inbreeding_coeff_cutoff: struct { + Description: str + }, + vep_version: struct { + Description: str + }, + vep_csq_header: struct { + Description: str + }, + dbsnp_version: struct { + Description: str + } + } + 'variant_annotation_descriptions': struct { + locus: struct { + Description: str + }, + alleles: struct { + Description: str + }, + rsid: struct { + Description: str + }, + a_index: struct { + Description: str + }, + was_split: struct { + Description: str + }, + hgdp_tgp_freq: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + } + } + }, + gnomad_freq: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + } + } + }, + gnomad_popmax: struct { + Description: str, + sub_annotations: struct { + AC: struct { + Description: str + }, + AF: struct { + Description: str + }, + AN: struct { + Description: str + }, + homozygote_count: struct { + Description: str + }, + pop: struct { + Description: str + }, + faf95: struct { + Description: str + } + } + }, + gnomad_faf: struct { + Description: str, + sub_annotations: struct { + faf95: struct { + Description: str + }, + faf99: struct { + Description: str + } + } + }, + gnomad_qual_hists: struct { + Description: str, + sub_annotations: struct { + gq_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gq_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + ab_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + } + } + }, + gnomad_raw_qual_hists: struct { + Description: str, + sub_annotations: struct { + gq_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_all: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gq_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + dp_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + ab_hist_alt: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + } + } + }, + gnomad_age_hist_het: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + gnomad_age_hist_hom: struct { + Description: str, + sub_annotations: struct { + bin_edges: struct { + Description: str + }, + bin_freq: struct { + Description: str + }, + n_smaller: struct { + Description: str + }, + n_larger: struct { + Description: str + } + } + }, + filters: struct { + Description: str + }, + info: struct { + Description: str, + sub_annotations: struct { + QUALapprox: struct { + Description: str + }, + SB: struct { + Description: str + }, + MQ: struct { + Description: str + }, + MQRankSum: struct { + Description: str + }, + VarDP: struct { + Description: str + }, + AS_ReadPosRankSum: struct { + Description: str + }, + AS_pab_max: struct { + Description: str + }, + AS_QD: struct { + Description: str + }, + AS_MQ: struct { + Description: str + }, + QD: struct { + Description: str + }, + AS_MQRankSum: struct { + Description: str + }, + FS: struct { + Description: str + }, + AS_FS: struct { + Description: str + }, + ReadPosRankSum: struct { + Description: str + }, + AS_QUALapprox: struct { + Description: str + }, + AS_SB_TABLE: struct { + Description: str + }, + AS_VarDP: struct { + Description: str + }, + AS_SOR: struct { + Description: str + }, + SOR: struct { + Description: str + }, + transmitted_singleton: struct { + Description: str + }, + omni: struct { + Description: str + }, + mills: struct { + Description: str + }, + monoallelic: struct { + Description: str + }, + InbreedingCoeff: struct { + Description: str + } + } + }, + vep: struct { + Description: str + }, + vqsr: struct { + Description: str, + sub_annotations: struct { + AS_VQSLOD: struct { + Description: str + }, + AS_culprit: struct { + Description: str + }, + NEGATIVE_TRAIN_SITE: struct { + Description: str + }, + POSITIVE_TRAIN_SITE: struct { + Description: str + } + } + }, + region_flag: struct { + Description: str, + sub_annotations: struct { + lcr: struct { + Description: str + }, + segdup: struct { + Description: str + } + } + }, + allele_info: struct { + Description: str, + sub_annotations: struct { + variant_type: struct { + Description: str + }, + allele_type: struct { + Description: str + }, + n_alt_alleles: struct { + Description: str + } + } + }, + was_mixed: struct { + Description: str + }, + cadd: struct { + sub_annotations: struct { + raw_score: struct { + Description: str + }, + phred: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + revel: struct { + Description: str, + sub_annotations: struct { + revel_score: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + splice_ai: struct { + sub_annotations: struct { + splice_ai: struct { + Description: str + }, + splice_consequence: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + primate_ai: struct { + sub_annotations: struct { + primate_ai_score: struct { + Description: str + }, + has_duplicate: struct { + Description: str + } + } + }, + AS_lowqual: struct { + Description: str + }, + telomere_or_centromere: struct { + Description: str + } + } + 'hgdp_tgp_freq_meta': array> + 'hgdp_tgp_freq_index_dict': dict + 'gnomad_freq_meta': array> + 'gnomad_freq_index_dict': dict + 'gnomad_faf_index_dict': dict + 'gnomad_faf_meta': array> + 'vep_version': str + 'vep_csq_header': str + 'dbsnp_version': str + 'variant_filtering_model': struct { + model_name: str, + score_name: str, + snv_cutoff: struct { + bin: float64, + min_score: float64 + }, + indel_cutoff: struct { + bin: float64, + min_score: float64 + }, + snv_training_variables: array, + indel_training_variables: array + } + 'variant_inbreeding_coeff_cutoff': float64 + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'a_index': int32 + 'was_split': bool + 'rsid': set + 'filters': set + 'info': struct { + SB: array, + MQRankSum: float64, + VarDP: int32, + AS_FS: float64, + AS_ReadPosRankSum: float64, + AS_pab_max: float64, + AS_QD: float32, + AS_MQ: float64, + AS_QUALapprox: int64, + QD: float32, + AS_MQRankSum: float64, + FS: float64, + MQ: float64, + ReadPosRankSum: float64, + QUALapprox: int64, + AS_SB_TABLE: array, + AS_VarDP: int32, + AS_SOR: float64, + SOR: float64, + transmitted_singleton: bool, + omni: bool, + mills: bool, + monoallelic: bool, + InbreedingCoeff: float32, + AS_VQSLOD: float64 + } + 'vep': struct { + assembly_name: str, + allele_string: str, + ancestral: str, + context: str, + end: int32, + id: str, + input: str, + intergenic_consequences: array, + impact: str, + minimised: int32, + variant_allele: str + }>, + most_severe_consequence: str, + motif_feature_consequences: array, + high_inf_pos: str, + impact: str, + minimised: int32, + motif_feature_id: str, + motif_name: str, + motif_pos: int32, + motif_score_change: float64, + strand: int32, + variant_allele: str + }>, + regulatory_feature_consequences: array, + impact: str, + minimised: int32, + regulatory_feature_id: str, + variant_allele: str + }>, + seq_region_name: str, + start: int32, + strand: int32, + transcript_consequences: array, + distance: int32, + domains: array, + exon: str, + gene_id: str, + gene_pheno: int32, + gene_symbol: str, + gene_symbol_source: str, + hgnc_id: str, + hgvsc: str, + hgvsp: str, + hgvs_offset: int32, + impact: str, + intron: str, + lof: str, + lof_flags: str, + lof_filter: str, + lof_info: str, + minimised: int32, + polyphen_prediction: str, + polyphen_score: float64, + protein_end: int32, + protein_start: int32, + protein_id: str, + sift_prediction: str, + sift_score: float64, + strand: int32, + swissprot: str, + transcript_id: str, + trembl: str, + tsl: int32, + uniparc: str, + variant_allele: str + }>, + variant_class: str + } + 'vqsr': struct { + AS_VQSLOD: float64, + AS_culprit: str, + NEGATIVE_TRAIN_SITE: bool, + POSITIVE_TRAIN_SITE: bool + } + 'region_flag': struct { + lcr: bool, + segdup: bool + } + 'allele_info': struct { + variant_type: str, + allele_type: str, + n_alt_alleles: int32, + was_mixed: bool + } + 'hgdp_tgp_freq': array + 'gnomad_freq': array + 'gnomad_popmax': struct { + AC: int32, + AF: float64, + AN: int32, + homozygote_count: int32, + pop: str, + faf95: float64 + } + 'gnomad_faf': array + 'gnomad_raw_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_qual_hists': struct { + gq_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_all: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + gq_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + dp_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + }, + ab_hist_alt: struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + } + 'gnomad_age_hist_het': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'gnomad_age_hist_hom': struct { + bin_edges: array, + bin_freq: array, + n_smaller: int64, + n_larger: int64 + } + 'cadd': struct { + phred: float32, + raw_score: float32, + has_duplicate: bool + } + 'revel': struct { + revel_score: float64, + has_duplicate: bool + } + 'splice_ai': struct { + splice_ai_score: float32, + splice_consequence: str, + has_duplicate: bool + } + 'primate_ai': struct { + primate_ai_score: float32, + has_duplicate: bool + } + 'AS_lowqual': bool + 'telomere_or_centromere': bool + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst b/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst new file mode 100644 index 00000000000..9e74ff1577f --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_pca_variant_loadings.rst @@ -0,0 +1,26 @@ +.. _gnomad_pca_variant_loadings: + +gnomad_pca_variant_loadings +=========================== + +* **Versions:** 2.1, 3.1 +* **Reference genome builds:** GRCh37, GRCh38 +* **Type:** :class:`hail.Table` + +Schema (3.1, GRCh38) +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + None + ---------------------------------------- + Row fields: + 'locus': locus + 'alleles': array + 'loadings': array + 'pca_af': float64 + ---------------------------------------- + Key: ['locus', 'alleles'] + ---------------------------------------- diff --git a/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst b/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst new file mode 100644 index 00000000000..62284380179 --- /dev/null +++ b/hail/python/hail/docs/datasets/schemas/gnomad_variant_co-occurrence.rst @@ -0,0 +1,60 @@ +.. _gnomad_variant_co-occurrence: + +gnomad_variant_co-occurrence +============================ + +* **Versions:** 2.1.1 +* **Reference genome builds:** GRCh37 +* **Type:** :class:`hail.Table` + +Schema (2.1.1, GRCh37) +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: text + + ---------------------------------------- + Global fields: + 'max_freq': float64 + 'least_consequence': str + 'same_haplotype_em_probability_cutoff': float64 + 'different_haplotypes_em_probability_cutoff': float64 + 'global_annotation_descriptions': struct { + max_freq: str, + least_consequence: str, + same_haplotype_em_probability_cutoff: str, + different_haplotypes_em_probability_cutoff: str + } + 'row_annotation_descriptions': struct { + locus1: str, + alleles1: str, + locus2: str, + alleles2: str, + phase_info: struct { + description: str, + gt_counts: str, + em: struct { + hap_counts: str, + p_chet: str, + same_haplotype: str, + different_haplotype: str + } + } + } + ---------------------------------------- + Row fields: + 'locus1': locus + 'alleles1': array + 'locus2': locus + 'alleles2': array + 'phase_info': dict, + em: struct { + hap_counts: array, + p_chet: float64, + same_haplotype: bool, + different_haplotype: bool + } + }> + ---------------------------------------- + Key: ['locus1', 'alleles1', 'locus2', 'alleles2'] + ---------------------------------------- diff --git a/hail/python/hail/docs/ggplot/index.rst b/hail/python/hail/docs/ggplot/index.rst index 62c50439913..cd4cb2927b7 100644 --- a/hail/python/hail/docs/ggplot/index.rst +++ b/hail/python/hail/docs/ggplot/index.rst @@ -73,9 +73,13 @@ currently uses plotly to generate plots, as opposed to ``hl.plot``, which uses b scale_y_reverse scale_color_continuous scale_color_discrete + scale_color_hue + scale_color_manual scale_color_identity scale_fill_continuous scale_fill_discrete + scale_fill_hue + scale_fill_manual scale_fill_identity .. autofunction:: scale_x_continuous @@ -89,9 +93,13 @@ currently uses plotly to generate plots, as opposed to ``hl.plot``, which uses b .. autofunction:: scale_y_reverse .. autofunction:: scale_color_continuous .. autofunction:: scale_color_discrete +.. autofunction:: scale_color_hue +.. autofunction:: scale_color_manual .. autofunction:: scale_color_identity .. autofunction:: scale_fill_continuous .. autofunction:: scale_fill_discrete +.. autofunction:: scale_fill_hue +.. autofunction:: scale_fill_manual .. autofunction:: scale_fill_identity .. rubric:: Labels diff --git a/hail/python/hail/docs/hail_on_the_cloud.rst b/hail/python/hail/docs/hail_on_the_cloud.rst index d60492e652b..16f97e25bb1 100644 --- a/hail/python/hail/docs/hail_on_the_cloud.rst +++ b/hail/python/hail/docs/hail_on_the_cloud.rst @@ -4,15 +4,15 @@ Hail on the Cloud ================= -Public clouds are a natural place to run Hail, offering the ability to run -on-demand workloads with high elasticity. Microsoft Azure, Google Cloud Platform, Databricks and Amazon Web Services make it -possible to rent Spark clusters with thousands of cores on-demand, -providing for the elastic compute requirements of scientific research without -an up-front capital investment in hardware. +Public clouds are a natural place to run Hail, offering the ability to run on-demand workloads with +high elasticity. Microsoft Azure, Google Cloud Platform, Databricks and Amazon Web Services make it +possible to rent Spark clusters with thousands of cores on-demand, providing for the elastic compute +requirements of scientific research without an up-front capital investment in hardware. .. toctree:: General Advice + Query-on-Batch Google Cloud Microsoft Azure Other Cloud Providers diff --git a/hail/python/hail/experimental/datasets.json b/hail/python/hail/experimental/datasets.json index be573be2d6f..62f6a2a3dc5 100644 --- a/hail/python/hail/experimental/datasets.json +++ b/hail/python/hail/experimental/datasets.json @@ -3780,24 +3780,119 @@ } }, "version": "3.1" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.1/ht/genomes/gnomad.genomes.v3.1.1.sites.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.1/ht/genomes/gnomad.genomes.v3.1.1.sites.ht" + } + }, + "version": "3.1.1" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.sites.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.sites.ht" + } + }, + "version": "3.1.2" } ] }, - "gnomad_hgdp_1kg_callset": { - "description": "gnomAD: samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD.", + "gnomad_hgdp_1kg_subset_dense": { + "description": "gnomAD: 1KG + HGDP subset, formatted as a dense MatrixTable. Samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD.", "url": "https://gnomad.broadinstitute.org/", "versions": [ { "reference_genome": "GRCh38", "url": { "aws": { - "us": "s3://gnomad-public-us-east-1/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt" + "us": "s3://gnomad-public-us-east-1/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset_dense.mt" }, "gcp": { - "us": "gs://gcp-public-data--gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset.mt" + "us": "gs://gcp-public-data--gnomad/release/3.1/mt/genomes/gnomad.genomes.v3.1.hgdp_1kg_subset_dense.mt" } }, "version": "3.1" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_dense.mt" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_dense.mt" + } + }, + "version": "3.1.2" + } + ] + }, + "gnomad_hgdp_1kg_subset_sparse": { + "description": "gnomAD: 1KG + HGDP subset, formatted as a sparse MatrixTable. Samples included in this subset are drawn from the 1000 Genomes Project (n=2,435) and the Human Genome Diversity Project (n=780), which contain some of the most genetically diverse populations present in gnomAD. Note: this version is formatted as a sparse MatrixTable.", + "url": "https://gnomad.broadinstitute.org/", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sparse.mt" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.2/mt/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sparse.mt" + } + }, + "version": "3.1.2" + } + ] + }, + "gnomad_hgdp_1kg_subset_sample_metadata": { + "description": "gnomAD: Hail Table containing HGDP + 1KG gnomAD sample QC metrics, as well as additional sample metadata.", + "url": "https://gnomad.broadinstitute.org/", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sample_meta.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_sample_meta.ht" + } + }, + "version": "3.1.2" + } + ] + }, + "gnomad_hgdp_1kg_subset_variant_annotations": { + "annotation_db": { + "key_properties": [ + "unique" + ] + }, + "description": "gnomAD: Hail Table containing HGDP + 1KG variant annotations. Note that this Hail Table splits multi-allelic variants, so users who would like to annotate the sparse, unsplit MatrixTable with this Table will need to split the sparse MatrixTable first.", + "url": "https://gnomad.broadinstitute.org/", + "versions": [ + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1.2/ht/genomes/gnomad.genomes.v3.1.2.hgdp_1kg_subset_variant_annotations.ht" + } + }, + "version": "3.1.2" } ] }, @@ -4607,6 +4702,59 @@ } ] }, + "gnomad_variant_co-occurrence": { + "description": "gnomAD: Hail Table containing variant co-occurrence (inferred phasing) data for all pairs of variants within a gene where both variants have a global allele frequency in gnomAD exomes <5% and are either coding, flanking intronic (from position -1 to -3 in acceptor sites, and +1 to +8 in donor sites) or in the 5’/3’ UTRs. This encompasses 20,921,100 pairs of variants across 19,685 genes.", + "url": "https://gnomad.broadinstitute.org/", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/2.1.1/ht/exomes_phased_counts_0.05_3_prime_UTR_variant_vp.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/2.1.1/ht/exomes_phased_counts_0.05_3_prime_UTR_variant_vp.ht" + } + }, + "version": "2.1.1" + } + ] + }, + "gnomad_pca_variant_loadings": { + "annotation_db": { + "key_properties": [ + "unique" + ] + }, + "description": "gnomAD: ancestry principal component analysis (PCA) variant loadings.", + "url": "https://gnomad.broadinstitute.org/", + "versions": [ + { + "reference_genome": "GRCh37", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/2.1/pca/gnomad.r2.1.pca_loadings.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/2.1/pca/gnomad.r2.1.pca_loadings.ht" + } + }, + "version": "2.1" + }, + { + "reference_genome": "GRCh38", + "url": { + "aws": { + "us": "s3://gnomad-public-us-east-1/release/3.1/pca/gnomad.v3.1.pca_loadings.ht" + }, + "gcp": { + "us": "gs://gcp-public-data--gnomad/release/3.1/pca/gnomad.v3.1.pca_loadings.ht" + } + }, + "version": "3.1" + } + ] + }, "gnomad_plof_metrics_gene": { "annotation_db": { "key_properties": [ diff --git a/hail/python/hail/experimental/datasets.py b/hail/python/hail/experimental/datasets.py index e7182bef66a..26dbddb4d1e 100644 --- a/hail/python/hail/experimental/datasets.py +++ b/hail/python/hail/experimental/datasets.py @@ -25,9 +25,9 @@ def load_dataset(name: str, Example ------- - >>> # Load the gnomAD "HGDP + 1000 Genomes" MatrixTable with GRCh38 coordinates. - >>> mt = hl.experimental.load_dataset(name='gnomad_hgdp_1kg_callset', - ... version='3.1', + >>> # Load the gnomAD "HGDP + 1000 Genomes" dense MatrixTable with GRCh38 coordinates. + >>> mt = hl.experimental.load_dataset(name='gnomad_hgdp_1kg_subset_dense', + ... version='3.1.2', ... reference_genome='GRCh38', ... region='us', ... cloud='gcp') diff --git a/hail/python/hail/experimental/expressions.py b/hail/python/hail/experimental/expressions.py index c6feeda7ad7..5704ef3de69 100644 --- a/hail/python/hail/experimental/expressions.py +++ b/hail/python/hail/experimental/expressions.py @@ -1,6 +1,8 @@ import hail as hl from hail.expr.expressions import expr_any, analyze -from hail.typecheck import typecheck +from hail.expr.types import hail_type +from hail.expr.table_type import ttable +from hail.typecheck import typecheck, nullable @typecheck(expr=expr_any, path=str, overwrite=bool) @@ -39,11 +41,11 @@ def write_expression(expr, path, overwrite=False): analyze('write_expression.expr', expr, source._global_indices) source = source.select_globals(__expr=expr) expr = source.index_globals().__expr - hl.utils.range_table(1).filter(False).annotate_globals(expr=expr).write(path, overwrite=overwrite) + hl.utils.range_table(1).filter(False).key_by().drop('idx').annotate_globals(expr=expr).write(path, overwrite=overwrite) -@typecheck(path=str) -def read_expression(path): +@typecheck(path=str, _assert_type=nullable(hail_type)) +def read_expression(path, _assert_type=None): """Read an :class:`~.Expression` written with :func:`.experimental.write_expression`. Example @@ -62,4 +64,9 @@ def read_expression(path): ------- :class:`~.Expression` """ - return hl.read_table(path).index_globals().expr + _assert_table_type = None + _load_refs = True + if _assert_type: + _assert_table_type = ttable(hl.tstruct(expr=_assert_type), row_type=hl.tstruct(), row_key=[]) + _load_refs = False + return hl.read_table(path, _assert_type=_assert_table_type, _load_refs=_load_refs).index_globals().expr diff --git a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py index b2c31a296b9..7c0bf504323 100644 --- a/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py +++ b/hail/python/hail/experimental/vcf_combiner/vcf_combiner.py @@ -607,6 +607,7 @@ def run_combiner(sample_paths: List[str], None """ + hl.utils.no_service_backend('vcf_combiner') flagname = 'no_ir_logging' prev_flag_value = hl._get_flags(flagname).get(flagname) hl._set_flags(**{flagname: '1'}) diff --git a/hail/python/hail/expr/expressions/base_expression.py b/hail/python/hail/expr/expressions/base_expression.py index 5ced2e5f5e1..6c8a6c71df5 100644 --- a/hail/python/hail/expr/expressions/base_expression.py +++ b/hail/python/hail/expr/expressions/base_expression.py @@ -1219,5 +1219,6 @@ def _persist(self): src = self._indices.source if src is not None: raise ValueError("Can only persist a scalar (no Table/MatrixTable source)") - executed_jir = Env.backend().persist_ir(self._ir) - return expressions.construct_expr(executed_jir, self.dtype) + expr = Env.backend().persist_expression(self) + assert expr.dtype == self.dtype + return expr diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index 65bc63cb689..7eb937bd071 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -2916,6 +2916,14 @@ def split(self, delim, n=None): else: return self._method("split", tarray(tstr), delim, n) + @typecheck_method(delim=expr_str, missing=expr_array(), quote=nullable(expr_str), regex=bool) + def _split_line(self, delim, missing, quote, regex): + regex_str = 'Regex' if regex else 'Char' + if quote is None: + return self._method(f"split{regex_str}", tarray(tstr), delim, missing) + else: + return self._method(f"splitQuoted{regex_str}", tarray(tstr), delim, missing, quote) + def lower(self): """Returns a copy of the string, but with upper case letters converted to lower case. @@ -3095,25 +3103,41 @@ def translate(self, mapping): """ return self._method('translate', tstr, mapping) - @typecheck_method(regex=expr_str) - def matches(self, regex): - """Returns ``True`` if the string contains any match for the given regex. + @typecheck_method(regex=expr_str, full_match=nullable(bool)) + def matches(self, regex, full_match=False): + """Returns ``True`` if the string contains any match for the given regex if + `full_match` is false. Returns ``True`` if the whole string matches the + given regex if `full_match` is true. Examples -------- + The `regex` parameter does not need to match the entire string if `full_match` is ``False``: + >>> string = hl.literal('NA12878') + >>> hl.eval(string.matches('12')) + True - The `regex` parameter does not need to match the entire string: + The `regex` parameter needs to match the entire string if `full_match` is ``True``: - >>> hl.eval(string.matches('12')) + >>> string = hl.literal('NA12878') + >>> hl.eval(string.matches('12', True)) + False + + >>> string = hl.literal('3412878') + >>> hl.eval(string.matches('^[0-9]*$')) True Regex motifs can be used to match sequences of characters: + >>> string = hl.literal('NA12878') >>> hl.eval(string.matches(r'NA\\d+')) True + >>> string = hl.literal('3412878') + >>> hl.eval(string.matches('^[0-9]*$')) + True + Notes ----- The `regex` argument is a @@ -3125,13 +3149,20 @@ def matches(self, regex): ---------- regex: :class:`.StringExpression` Pattern to match. + full_match: :obj: `bool` + If ``True``, the function considers whether the whole string matches the regex. + If ``False``, the function considers whether the string has a partial match for that regex Returns ------- :class:`.BooleanExpression` - ``True`` if the string contains any match for the regex, otherwise ``False``. + If `full_match` is ``False``,``True`` if the string contains any match for the regex, otherwise ``False``. + If `full_match` is ``True``,``True`` if the whole string matches the regex, otherwise ``False``. """ - return regex._method("regexMatch", tbool, self) + if full_match is False: + return regex._method("regexMatch", tbool, self) + else: + return regex._method("regexFullMatch", tbool, self) def reverse(self): """Returns the reversed value. diff --git a/hail/python/hail/expr/types.py b/hail/python/hail/expr/types.py index 6c514109be4..32c2f6026b4 100644 --- a/hail/python/hail/expr/types.py +++ b/hail/python/hail/expr/types.py @@ -70,7 +70,7 @@ def summary_type(t): return str(t) -def dtype(type_str): +def dtype(type_str) -> 'HailType': r"""Parse a type from its string representation. Examples @@ -216,7 +216,7 @@ def _pretty(self, b, indent, increment): b.append(str(self)) @abc.abstractmethod - def _parsable_string(self): + def _parsable_string(self) -> str: pass def typecheck(self, value): diff --git a/hail/python/hail/fs/fs.py b/hail/python/hail/fs/fs.py index fd89afa9ebe..fd289bb354f 100644 --- a/hail/python/hail/fs/fs.py +++ b/hail/python/hail/fs/fs.py @@ -73,3 +73,7 @@ def copy_log(self, path: str) -> None: self.copy(local_path_uri(Env.hc()._log), path) except Exception as e: sys.stderr.write(f'Could not copy log: encountered error:\n {e}') + + @abc.abstractmethod + def canonicalize_path(self, path: str) -> str: + pass diff --git a/hail/python/hail/fs/google_fs.py b/hail/python/hail/fs/google_fs.py deleted file mode 100644 index 9b2dd4a6f99..00000000000 --- a/hail/python/hail/fs/google_fs.py +++ /dev/null @@ -1,148 +0,0 @@ -import os -import time - -from stat import S_ISREG, S_ISDIR -from typing import Dict, List, Optional -from shutil import copy2, rmtree - -import dateutil -import gcsfs - -from hailtop.utils import sync_retry_transient_errors - -from .fs import FS -from .stat_result import FileType, StatResult - - -class GoogleCloudStorageFS(FS): - def __init__(self): - self.client = gcsfs.core.GCSFileSystem(secure_serialize=True) - - def _is_local(self, path: str): - if path.startswith("gs://"): - return False - return True - - def _add_gs_path_prefix(self, path: str) -> str: - first_idx = 0 - - for char in path: - if char != "/": - break - first_idx += 1 - - return "gs://" + path[first_idx:] - - def open(self, path: str, mode: str = 'r', buffer_size: int = 2**18): - if self._is_local(path): - if mode.startswith('w') and not os.path.exists(path): - parts = os.path.split(path) - if not os.path.exists(parts[0]): - os.makedirs(parts[0]) - - return open(path, mode, buffer_size) - - return self.client.open(path, mode, buffer_size) - - def copy(self, src: str, dest: str): - src_is_remote = src.startswith('gs://') - dest_is_remote = dest.startswith('gs://') - - if src_is_remote and dest_is_remote: - self.client.copy(src, dest) - elif src_is_remote: - self.client.get(src, dest) - elif dest_is_remote: - self.client.put(src, dest) - else: - dst_w_file = dest - if os.path.isdir(dst_w_file): - dst_w_file = os.path.join(dest, os.path.basename(src)) - - copy2(src, dst_w_file) - stats = os.stat(src) - - os.chown(dst_w_file, stats.st_uid, stats.st_gid) - - def exists(self, path: str) -> bool: - if self._is_local(path): - return os.path.exists(path) - - return self.client.exists(path) - - def is_file(self, path: str) -> bool: - try: - if self._is_local(path): - return S_ISREG(os.stat(path).st_mode) - return not self._stat_is_gs_dir(self.client.info(path)) - except FileNotFoundError: - return False - - def is_dir(self, path: str) -> bool: - try: - if self._is_local(path): - return S_ISDIR(os.stat(path).st_mode) - return self._stat_is_gs_dir(self.client.info(path)) - except FileNotFoundError: - return False - - def stat(self, path: str) -> Dict: - if self._is_local(path): - return StatResult.from_os_stat_result(path, os.stat(path)) - - return self._format_stat_gs_file(self.client.info(path), path) - - def _format_stat_gs_file(self, stats: Dict, path: Optional[str] = None) -> StatResult: - path_from_stats = stats.get('name') - if path_from_stats is not None: - path_from_stats = self._add_gs_path_prefix(path_from_stats) - else: - assert path is not None - path_from_stats = path - - modification_time = stats.get('updated') - if modification_time is not None: - dt = dateutil.parser.isoparse(modification_time) - modification_time = time.mktime(dt.timetuple()) - - typ = FileType.DIRECTORY if self._stat_is_gs_dir(stats) else FileType.FILE - - return StatResult( - path=path_from_stats, - size=stats['size'], - owner=stats['bucket'], - typ=typ, - modification_time=modification_time) - - def _stat_is_gs_dir(self, stats: Dict) -> bool: - return stats['storageClass'] == 'DIRECTORY' or stats['name'].endswith('/') - - def ls(self, path: str) -> List[StatResult]: - if self._is_local(path): - return [StatResult.from_os_stat_result(file, os.stat(file)) - for file in os.listdir(path)] - - return [self._format_stat_gs_file(file) - for file in self.client.ls(path, detail=True)] - - def mkdir(self, path: str): - pass - - def remove(self, path: str): - if self._is_local(path): - os.remove(path) - self.client.rm(path) - - def rmtree(self, path: str): - if self._is_local(path): - rmtree(path) - - def rm_not_exist_ok(): - try: - self.client.rm(path, recursive=True) - except FileNotFoundError: - pass - sync_retry_transient_errors(rm_not_exist_ok) - - def supports_scheme(self, scheme: str) -> bool: - return scheme in ("gs", "") diff --git a/hail/python/hail/fs/hadoop_fs.py b/hail/python/hail/fs/hadoop_fs.py index 9642942d089..e172a06c07b 100644 --- a/hail/python/hail/fs/hadoop_fs.py +++ b/hail/python/hail/fs/hadoop_fs.py @@ -1,7 +1,7 @@ import io import json import time -from typing import Dict, List +from typing import Dict, List, Union, Any import dateutil @@ -9,7 +9,7 @@ from .stat_result import FileType, StatResult -def _stat_dict_to_stat_result(stat: Dict) -> StatResult: +def _stat_dict_to_stat_result(stat: Dict[str, Any]) -> StatResult: dt = dateutil.parser.isoparse(stat['modification_time']) mtime = time.mktime(dt.timetuple()) if stat['is_dir']: @@ -35,6 +35,7 @@ def legacy_open(self, path: str, mode: str = 'r', buffer_size: int = 8192): return self._open(path, mode, buffer_size, use_codec=True) def _open(self, path: str, mode: str = 'r', buffer_size: int = 8192, use_codec: bool = False): + handle: Union[io.BufferedReader, io.BufferedWriter] if 'r' in mode: handle = io.BufferedReader(HadoopReader(self, path, buffer_size, use_codec=use_codec), buffer_size=buffer_size) elif 'w' in mode: @@ -79,6 +80,9 @@ def rmtree(self, path: str): def supports_scheme(self, scheme: str) -> bool: return self._jfs.supportsScheme(scheme) + def canonicalize_path(self, path: str) -> str: + return self._jfs.makeQualified(path) + class HadoopReader(io.RawIOBase): def __init__(self, hfs, path, buffer_size, use_codec=False): diff --git a/hail/python/hail/fs/local_fs.py b/hail/python/hail/fs/local_fs.py index 1740ce5d514..a957df747b7 100644 --- a/hail/python/hail/fs/local_fs.py +++ b/hail/python/hail/fs/local_fs.py @@ -1,8 +1,7 @@ -from typing import List, BinaryIO -import gzip -import io import os +from typing import List from shutil import copy2, rmtree +import glob from .fs import FS from .stat_result import StatResult @@ -12,22 +11,14 @@ class LocalFS(FS): def __init__(self): pass - def open(self, path: str, mode: str = 'r', buffer_size: int = 0): - if mode not in ('r', 'rb', 'w', 'wb'): - raise ValueError(f'Unsupported mode: {repr(mode)}') - - strm: BinaryIO - if mode[0] == 'r': - strm = open(path, 'rb') - else: - assert mode[0] == 'w' - strm = open(path, 'wb') - - if path[-3:] == '.gz' or path[-4:] == '.bgz': - strm = gzip.GzipFile(fileobj=strm, mode=mode) # type: ignore # GzipFile should be a BinaryIO - if 'b' not in mode: - strm = io.TextIOWrapper(strm, encoding='utf-8') # type: ignore # TextIOWrapper should be a BinaryIO - return strm + def open(self, path: str, mode: str = 'r', buffer_size: int = -1): + if 'w' in mode: + try: + return open(path, mode, buffering=buffer_size) + except FileNotFoundError: + os.makedirs(os.path.dirname(path)) + return open(path, mode, buffering=buffer_size) + return open(path, mode, buffering=buffer_size) def copy(self, src: str, dest: str): dst_w_file = dest @@ -52,8 +43,19 @@ def stat(self, path: str) -> StatResult: return StatResult.from_os_stat_result(path, os.stat(path)) def ls(self, path: str) -> List[StatResult]: - return [self.stat(os.path.join(path, file)) - for file in os.listdir(path)] + if glob.escape(path) == path: + return self._ls_no_glob(path) + return [ + result_path + for globbed_path in glob.glob(path) + for result_path in self._ls_no_glob(globbed_path) + ] + + def _ls_no_glob(self, path: str) -> List[StatResult]: + if os.path.isdir(path): + return [self.stat(os.path.join(path, file)) + for file in os.listdir(path)] + return [self.stat(path)] def mkdir(self, path: str): os.mkdir(path) @@ -66,3 +68,8 @@ def rmtree(self, path: str): def supports_scheme(self, scheme: str) -> bool: return scheme == "" + + def canonicalize_path(self, path: str) -> str: + if path.startswith('file:'): + return 'file:' + os.path.realpath(path[5:]) + return 'file:' + os.path.realpath(path) diff --git a/hail/python/hail/fs/router_fs.py b/hail/python/hail/fs/router_fs.py index f463a2c8799..8f0dfa6a576 100644 --- a/hail/python/hail/fs/router_fs.py +++ b/hail/python/hail/fs/router_fs.py @@ -1,12 +1,13 @@ from typing import List, AsyncContextManager, BinaryIO import asyncio -import gzip import io import nest_asyncio +import os -from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.aiotools.fs import Copier, Transfer, FileListEntry, ReadableStream, WritableStream -from hailtop.utils import async_to_blocking, OnlineBoundedGather2 +from hailtop.aiotools.local_fs import LocalAsyncFS +from hailtop.aiotools.router_fs import RouterAsyncFS +from hailtop.utils import OnlineBoundedGather2, async_to_blocking from .fs import FS from .stat_result import FileType, StatResult @@ -175,8 +176,6 @@ def open(self, path: str, mode: str = 'r', buffer_size: int = 8192) -> io.IOBase assert mode[0] == 'w' strm = SyncWritableStream(async_to_blocking(self.afs.create(path)), path) - if path[-3:] == '.gz' or path[-4:] == '.bgz': - strm = gzip.GzipFile(fileobj=strm, mode=mode) if 'b' not in mode: strm = io.TextIOWrapper(strm, encoding='utf-8') # type: ignore # typeshed is wrong, this *is* an IOBase return strm @@ -186,12 +185,18 @@ def copy(self, src: str, dest: str, *, max_simultaneous_transfers=75): async def _copy(): sema = asyncio.Semaphore(max_simultaneous_transfers) - async with sema: - await Copier.copy(self.afs, asyncio.Semaphore, transfer) + await Copier.copy(self.afs, sema, transfer) return async_to_blocking(_copy()) def exists(self, path: str) -> bool: - return async_to_blocking(self.afs.exists(path)) + async def _exists(): + dir_path = path + if dir_path[-1] != '/': + dir_path = dir_path + '/' + return any(await asyncio.gather( + self.afs.isfile(path), + self.afs.isdir(dir_path))) + return async_to_blocking(_exists()) def is_file(self, path: str) -> bool: return async_to_blocking(self.afs.isfile(path)) @@ -232,17 +237,24 @@ async def _ls(): async with OnlineBoundedGather2(asyncio.Semaphore(_max_simultaneous_files)) as pool: tasks = [pool.call(self._fle_to_dict, fle) async for fle in await self.afs.listfiles(path)] - return [await t for t in tasks] + return list(await asyncio.gather(*tasks)) return async_to_blocking(_ls()) def mkdir(self, path: str): return async_to_blocking(self.afs.mkdir(path)) def remove(self, path: str): - return async_to_blocking(self.remove(path)) + return async_to_blocking(self.afs.remove(path)) def rmtree(self, path: str): return async_to_blocking(self.afs.rmtree(None, path)) def supports_scheme(self, scheme: str) -> bool: return scheme in self.afs.schemes + + def canonicalize_path(self, path: str) -> str: + if isinstance(self.afs._get_fs(path), LocalAsyncFS): + if path.startswith('file:'): + return 'file:' + os.path.realpath(path[5:]) + return 'file:' + os.path.realpath(path) + return path diff --git a/hail/python/hail/fs/stat_result.py b/hail/python/hail/fs/stat_result.py index cb49d60c07a..bf64f309804 100644 --- a/hail/python/hail/fs/stat_result.py +++ b/hail/python/hail/fs/stat_result.py @@ -2,7 +2,7 @@ import stat from enum import Enum, auto -from typing import Dict, NamedTuple, Optional, Union +from typing import Dict, NamedTuple, Optional, Union, Any import hurry.filesize @@ -35,6 +35,6 @@ def from_os_stat_result(path: str, sb: os.stat_result) -> 'StatResult': return StatResult(path=path, owner=sb.st_uid, size=sb.st_size, typ=typ, modification_time=sb.st_mtime) - def to_legacy_dict(self) -> Dict: + def to_legacy_dict(self) -> Dict[str, Any]: return dict(path=self.path, owner=self.owner, is_dir=self.is_dir(), size_bytes=self.size, size=hurry.filesize.size(self.size), modification_time=self.modification_time) diff --git a/hail/python/hail/ggplot/__init__.py b/hail/python/hail/ggplot/__init__.py index 1fdcd4844b3..2c9b0d5d9c9 100644 --- a/hail/python/hail/ggplot/__init__.py +++ b/hail/python/hail/ggplot/__init__.py @@ -5,8 +5,9 @@ geom_hline, geom_vline, geom_tile, geom_col, geom_area, geom_ribbon # noqa F401 from .labels import ggtitle, xlab, ylab from .scale import scale_x_continuous, scale_y_continuous, scale_x_discrete, scale_y_discrete, scale_x_genomic, \ - scale_x_log10, scale_y_log10, scale_x_reverse, scale_y_reverse, scale_color_discrete, scale_color_identity,\ - scale_color_continuous, scale_fill_discrete, scale_fill_identity, scale_fill_continuous + scale_x_log10, scale_y_log10, scale_x_reverse, scale_y_reverse, scale_color_discrete, scale_color_hue, scale_color_identity,\ + scale_color_manual, scale_color_continuous, scale_fill_discrete, scale_fill_hue, scale_fill_identity, scale_fill_continuous,\ + scale_fill_manual __all__ = [ "aes", @@ -39,7 +40,11 @@ "scale_color_continuous", "scale_color_identity", "scale_color_discrete", + "scale_color_hue", + "scale_color_manual", "scale_fill_continuous", "scale_fill_identity", "scale_fill_discrete", + "scale_fill_hue", + "scale_fill_manual" ] diff --git a/hail/python/hail/ggplot/geoms.py b/hail/python/hail/ggplot/geoms.py index ab8f181e064..b7a6be5f5ec 100644 --- a/hail/python/hail/ggplot/geoms.py +++ b/hail/python/hail/ggplot/geoms.py @@ -255,7 +255,7 @@ def get_stat(self): def geom_bar(mapping=aes(), *, fill=None, color=None, alpha=None, position="stack", size=None): """Create a bar chart that counts occurrences of the various values of the ``x`` aesthetic. - Supported aesthetics: ``x``, ``color``, ``fill`` + Supported aesthetics: ``x``, ``color``, ``fill``, ``weight`` Returns ------- @@ -496,9 +496,14 @@ def plot_group(df): "y0": y_center - height / 2, "x1": x_center + width / 2, "y1": y_center + height / 2, - "fillcolor": "black" if "fill" not in df.attrs else df.attrs["fill"], "opacity": row.get('alpha', 1.0) } + if "fill" in df.attrs: + shape_args["fillcolor"] = df.attrs["fill"] + elif "fill" in row: + shape_args["fillcolor"] = row["fill"] + else: + shape_args["fillcolor"] = "black" fig_so_far.add_shape(**shape_args) for group_df in grouped_data: diff --git a/hail/python/hail/ggplot/ggplot.py b/hail/python/hail/ggplot/ggplot.py index 72ef33af3e5..eb251f81e5c 100644 --- a/hail/python/hail/ggplot/ggplot.py +++ b/hail/python/hail/ggplot/ggplot.py @@ -1,4 +1,3 @@ -import plotly import plotly.graph_objects as go from pprint import pprint @@ -25,8 +24,7 @@ class GGPlot: .. automethod:: write_image """ - def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, scales=None, - discrete_color_scale=plotly.colors.qualitative.D3, continuous_color_scale=plotly.colors.sequential.Viridis): + def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, scales=None): if scales is None: scales = {} @@ -36,10 +34,6 @@ def __init__(self, ht, aes, geoms=[], labels=Labels(), coord_cartesian=None, sca self.labels = labels self.coord_cartesian = coord_cartesian self.scales = scales - self.discrete_color_scale = discrete_color_scale - self.discrete_color_dict = {} - self.discrete_color_idx = 0 - self.continuous_color_scale = continuous_color_scale self.add_default_scales(aes) @@ -99,8 +93,7 @@ def add_default_scales(self, aesthetic): self.scales[aesthetic_str] = ScaleDiscrete(aesthetic_str) def copy(self): - return GGPlot(self.ht, self.aes, self.geoms[:], self.labels, self.coord_cartesian, self.scales, - self.discrete_color_scale, self.continuous_color_scale) + return GGPlot(self.ht, self.aes, self.geoms[:], self.labels, self.coord_cartesian, self.scales) def verify_scales(self): for geom_idx, geom in enumerate(self.geoms): @@ -175,7 +168,7 @@ def get_aggregation_result(selected, mapping_per_geom, precomputed): # Create scaling functions based on all the data: transformers = {} for scale in self.scales.values(): - transformers[scale.aesthetic_name] = scale.create_local_transformer([x for _, _, x in geoms_and_grouped_dfs], self) + transformers[scale.aesthetic_name] = scale.create_local_transformer([x for _, _, x in geoms_and_grouped_dfs]) for geom, geom_label, grouped_dfs in geoms_and_grouped_dfs: scaled_grouped_dfs = [] diff --git a/hail/python/hail/ggplot/scale.py b/hail/python/hail/ggplot/scale.py index 55d692d76c9..19aa81289a6 100644 --- a/hail/python/hail/ggplot/scale.py +++ b/hail/python/hail/ggplot/scale.py @@ -5,6 +5,9 @@ from .utils import categorical_strings_to_colors, continuous_nums_to_colors +import plotly.express as px +import plotly + class Scale(FigureAttribute): def __init__(self, aesthetic_name): @@ -14,7 +17,7 @@ def __init__(self, aesthetic_name): def transform_data(self, field_expr): pass - def create_local_transformer(self, groups_of_dfs, parent): + def create_local_transformer(self, groups_of_dfs): return lambda x: x @abc.abstractmethod @@ -147,16 +150,20 @@ def is_continuous(self): return False -class ScaleColorDiscrete(ScaleDiscrete): +class ScaleColorManual(ScaleDiscrete): + + def __init__(self, aesthetic_name, values): + super().__init__(aesthetic_name) + self.values = values - def create_local_transformer(self, groups_of_dfs, parent): + def create_local_transformer(self, groups_of_dfs): categorical_strings = set() for group_of_dfs in groups_of_dfs: for df in group_of_dfs: if self.aesthetic_name in df.attrs: categorical_strings.add(df.attrs[self.aesthetic_name]) - unique_color_mapping = categorical_strings_to_colors(categorical_strings, parent) + unique_color_mapping = categorical_strings_to_colors(categorical_strings, self.values) def transform(df): df.attrs[f"{self.aesthetic_name}_legend"] = df.attrs[self.aesthetic_name] @@ -168,7 +175,7 @@ def transform(df): class ScaleColorContinuous(ScaleContinuous): - def create_local_transformer(self, groups_of_dfs, parent): + def create_local_transformer(self, groups_of_dfs): overall_min = None overall_max = None for group_of_dfs in groups_of_dfs: @@ -187,7 +194,7 @@ def create_local_transformer(self, groups_of_dfs, parent): else: overall_max = max(series_max, overall_max) - color_mapping = continuous_nums_to_colors(overall_min, overall_max, parent.continuous_color_scale) + color_mapping = continuous_nums_to_colors(overall_min, overall_max, plotly.colors.sequential.Viridis) def transform(df): df[self.aesthetic_name] = df[self.aesthetic_name].map(lambda i: color_mapping(i)) @@ -196,6 +203,29 @@ def transform(df): return transform +class ScaleColorHue(ScaleDiscrete): + def create_local_transformer(self, groups_of_dfs): + categorical_strings = set() + for group_of_dfs in groups_of_dfs: + for df in group_of_dfs: + if self.aesthetic_name in df.attrs: + categorical_strings.add(df.attrs[self.aesthetic_name]) + + num_categories = len(categorical_strings) + step = 1.0 / num_categories + interpolation_values = [step * i for i in range(num_categories)] + hsv_scale = px.colors.get_colorscale("HSV") + colors = px.colors.sample_colorscale(hsv_scale, interpolation_values) + unique_color_mapping = dict(zip(categorical_strings, colors)) + + def transform(df): + df.attrs[f"{self.aesthetic_name}_legend"] = df.attrs[self.aesthetic_name] + df.attrs[self.aesthetic_name] = unique_color_mapping[df.attrs[self.aesthetic_name]] + return df + + return transform + + # Legend names messed up for scale color identity class ScaleColorDiscreteIdentity(ScaleDiscrete): pass @@ -368,14 +398,26 @@ def scale_x_genomic(reference_genome, name=None): def scale_color_discrete(): - """The default discrete color scale. This maps each discrete value to a color. + """The default discrete color scale. This maps each discrete value to a color. Equivalent to scale_color_hue. Returns ------- :class:`.FigureAttribute` The scale to be applied. """ - return ScaleColorDiscrete("color") + return scale_color_hue() + + +def scale_color_hue(): + """Map discrete colors to evenly placed positions around the color wheel. + + Returns + ------- + :class:`.FigureAttribute` + The scale to be applied. + + """ + return ScaleColorHue("color") def scale_color_continuous(): @@ -400,6 +442,23 @@ def scale_color_identity(): return ScaleColorDiscreteIdentity("color") +def scale_color_manual(*, values): + """A color scale that assigns strings to colors using the pool of colors specified as `values`. + + + Parameters + ---------- + values: :class:`list` of :class:`str` + The colors to choose when assigning values to colors. + + Returns + ------- + :class:`.FigureAttribute` + The scale to be applied. + """ + return ScaleColorManual("color", values=values) + + def scale_fill_discrete(): """The default discrete fill scale. This maps each discrete value to a fill color. @@ -408,7 +467,7 @@ def scale_fill_discrete(): :class:`.FigureAttribute` The scale to be applied. """ - return ScaleColorDiscrete("fill") + return scale_fill_hue() def scale_fill_continuous(): @@ -431,3 +490,32 @@ def scale_fill_identity(): The scale to be applied. """ return ScaleColorDiscreteIdentity("fill") + + +def scale_fill_hue(): + """Map discrete fill colors to evenly placed positions around the color wheel. + + Returns + ------- + :class:`.FigureAttribute` + The scale to be applied. + + """ + return ScaleColorHue("fill") + + +def scale_fill_manual(*, values): + """A color scale that assigns strings to fill colors using the pool of colors specified as `values`. + + + Parameters + ---------- + values: :class:`list` of :class:`str` + The colors to choose when assigning values to colors. + + Returns + ------- + :class:`.FigureAttribute` + The scale to be applied. + """ + return ScaleColorManual("fill", values=values) diff --git a/hail/python/hail/ggplot/stats.py b/hail/python/hail/ggplot/stats.py index 45babdf934b..5c58b790918 100644 --- a/hail/python/hail/ggplot/stats.py +++ b/hail/python/hail/ggplot/stats.py @@ -67,6 +67,8 @@ class StatCount(Stat): def make_agg(self, mapping, precomputed): grouping_variables = {aes_key: mapping[aes_key] for aes_key in mapping.keys() if should_use_for_grouping(aes_key, mapping[aes_key].dtype)} + if "weight" in mapping: + return hl.agg.group_by(hl.struct(**grouping_variables), hl.agg.counter(mapping["x"], weight=mapping["weight"])) return hl.agg.group_by(hl.struct(**grouping_variables), hl.agg.group_by(mapping["x"], hl.agg.count())) def listify(self, agg_result): diff --git a/hail/python/hail/ggplot/utils.py b/hail/python/hail/ggplot/utils.py index afd92b91228..51135d87ca8 100644 --- a/hail/python/hail/ggplot/utils.py +++ b/hail/python/hail/ggplot/utils.py @@ -34,16 +34,20 @@ def should_use_scale_for_grouping(scale): # Map strings to numbers that will index into a color scale. -def categorical_strings_to_colors(string_set, parent_plot): +def categorical_strings_to_colors(string_set, color_values): - color_dict = parent_plot.discrete_color_dict + if isinstance(color_values, list): + if len(string_set) > len(color_values): + print(f"Not enough colors specified. Found {len(string_set)} distinct values of color aesthetic and only {len(color_values)} colors were provided.") + color_dict = {} + for idx, element in enumerate(string_set): + if element not in color_dict: + color_dict[element] = color_values[idx] - for element in string_set: - if element not in color_dict: - color_dict[element] = parent_plot.discrete_color_scale[parent_plot.discrete_color_idx % len(parent_plot.discrete_color_scale)] - parent_plot.discrete_color_idx += 1 + else: + color_dict = color_values - return parent_plot.discrete_color_dict + return color_dict def continuous_nums_to_colors(min_color, max_color, continuous_color_scale): diff --git a/hail/python/hail/ir/__init__.py b/hail/python/hail/ir/__init__.py index 10bd65a94fa..e6f55c52a90 100644 --- a/hail/python/hail/ir/__init__.py +++ b/hail/python/hail/ir/__init__.py @@ -47,7 +47,7 @@ tensor_shape_to_matrix_shape from .utils import filter_predicate_with_keep, make_filter_and_replace from .matrix_reader import MatrixReader, MatrixNativeReader, MatrixRangeReader, \ - MatrixVCFReader, MatrixBGENReader, TextMatrixReader, MatrixPLINKReader + MatrixVCFReader, MatrixBGENReader, MatrixPLINKReader from .table_reader import AvroTableReader, TableReader, TableNativeReader, \ TextTableReader, TableFromBlockMatrixNativeReader, StringTableReader from .blockmatrix_reader import BlockMatrixReader, BlockMatrixNativeReader, \ @@ -254,7 +254,6 @@ 'MatrixRangeReader', 'MatrixVCFReader', 'MatrixBGENReader', - 'TextMatrixReader', 'MatrixPLINKReader', 'MatrixWriter', 'MatrixNativeWriter', diff --git a/hail/python/hail/ir/base_ir.py b/hail/python/hail/ir/base_ir.py index 7a594b2f9ae..5258bb5e83c 100644 --- a/hail/python/hail/ir/base_ir.py +++ b/hail/python/hail/ir/base_ir.py @@ -68,7 +68,7 @@ def head_str(self): @property @abc.abstractmethod def typ(self): - return + raise NotImplementedError def __eq__(self, other): return isinstance(other, self.__class__) and self.children == other.children and self._eq(other) diff --git a/hail/python/hail/ir/blockmatrix_ir.py b/hail/python/hail/ir/blockmatrix_ir.py index 9de543cd366..9fa242b5d6f 100644 --- a/hail/python/hail/ir/blockmatrix_ir.py +++ b/hail/python/hail/ir/blockmatrix_ir.py @@ -3,17 +3,18 @@ from hail.expr.types import tarray from .blockmatrix_reader import BlockMatrixReader from .base_ir import BlockMatrixIR, IR -from hail.typecheck import typecheck_method, sequenceof +from hail.typecheck import typecheck_method, sequenceof, nullable from hail.utils.misc import escape_id from hail.utils.java import Env class BlockMatrixRead(BlockMatrixIR): - @typecheck_method(reader=BlockMatrixReader) - def __init__(self, reader): + @typecheck_method(reader=BlockMatrixReader, _assert_type=nullable(tblockmatrix)) + def __init__(self, reader, _assert_type=None): super().__init__() self.reader = reader + self._type = _assert_type def head_str(self): return f'"{self.reader.render()}"' @@ -22,7 +23,8 @@ def _eq(self, other): return self.reader == other.reader def _compute_type(self): - self._type = Env.backend().blockmatrix_type(self) + if self._type is None: + self._type = Env.backend().blockmatrix_type(self) class BlockMatrixMap(BlockMatrixIR): diff --git a/hail/python/hail/ir/blockmatrix_writer.py b/hail/python/hail/ir/blockmatrix_writer.py index 2775640cf39..9f41f17f593 100644 --- a/hail/python/hail/ir/blockmatrix_writer.py +++ b/hail/python/hail/ir/blockmatrix_writer.py @@ -2,6 +2,7 @@ import json from ..typecheck import typecheck_method, sequenceof, nullable, enumeration +from ..expr.types import tvoid, tstr from ..utils.misc import escape_str @@ -10,6 +11,10 @@ class BlockMatrixWriter(object): def render(self): pass + @abc.abstractmethod + def _type(self): + pass + @abc.abstractmethod def __eq__(self, other): pass @@ -31,6 +36,9 @@ def render(self): 'stageLocally': self.stage_locally} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixNativeWriter) and \ self.path == other.path and \ @@ -49,6 +57,9 @@ def render(self): 'path': self.path} return escape_str(json.dumps(writer)) + def _type(self): + return tstr + def __eq__(self, other): return isinstance(other, BlockMatrixBinaryWriter) and \ self.path == other.path @@ -73,6 +84,9 @@ def render(self): 'binary': self.binary} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixRectanglesWriter) and \ self.path == other.path and \ @@ -86,6 +100,9 @@ class BlockMatrixMultiWriter(object): def render(self): pass + def _type(self): + return tvoid + @abc.abstractmethod def __eq__(self, other): pass @@ -103,6 +120,9 @@ def render(self): 'overwrite': self.overwrite} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixBinaryMultiWriter) and \ self.prefix == other.prefix and \ @@ -132,6 +152,9 @@ def render(self): 'customFilenames': self.custom_filenames} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixTextMultiWriter) and \ self.prefix == other.prefix and \ @@ -155,6 +178,9 @@ def render(self): 'storageLevel': self.storage_level} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixPersistWriter) and \ self.id == other.id and \ @@ -177,6 +203,9 @@ def render(self): 'stageLocally': self.stage_locally} return escape_str(json.dumps(writer)) + def _type(self): + return tvoid + def __eq__(self, other): return isinstance(other, BlockMatrixNativeMultiWriter) and \ self.prefix == other.prefix and \ diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index 015e032f39b..811c39297c7 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -10,7 +10,7 @@ from hail.ir.blockmatrix_writer import BlockMatrixWriter, BlockMatrixMultiWriter from hail.typecheck import typecheck, typecheck_method, sequenceof, numeric, \ sized_tupleof, nullable, tupleof, anytype, func_spec -from hail.utils.java import Env, HailUserError +from hail.utils.java import Env, HailUserError, warning from hail.utils.misc import escape_str, dump_json, parsable_strings, escape_id from .base_ir import BaseIR, IR, TableIR, MatrixIR, BlockMatrixIR, _env_bind from .matrix_writer import MatrixWriter, MatrixNativeMultiWriter @@ -2284,6 +2284,9 @@ def _compute_type(self, env, agg_env): class ApplySeeded(IR): @typecheck_method(function=str, seed=int, return_type=hail_type, args=IR) def __init__(self, function, seed, return_type, *args): + if hail.current_backend().requires_lowering: + warning("Seeded randomness is currently unreliable on the service. " + "You may observe some unexpected behavior. Don't use for real work yet.") super().__init__(*args) self.function = function self.args = args @@ -2545,7 +2548,7 @@ def _eq(self, other): def _compute_type(self, env, agg_env): self.child._compute_type() - self._type = tvoid + self._type = self.writer._type() @staticmethod def is_effectful() -> bool: diff --git a/hail/python/hail/ir/matrix_ir.py b/hail/python/hail/ir/matrix_ir.py index 7fd117b534c..6c45d99062e 100644 --- a/hail/python/hail/ir/matrix_ir.py +++ b/hail/python/hail/ir/matrix_ir.py @@ -1,4 +1,6 @@ +from typing import Optional import hail as hl +from hail.expr.types import HailType from hail.ir.base_ir import BaseIR, MatrixIR from hail.utils.misc import escape_str, parsable_strings, dump_json, escape_id from hail.utils.java import Env @@ -45,11 +47,17 @@ def renderable_agg_bindings(self, i, default_value=None): class MatrixRead(MatrixIR): - def __init__(self, reader, drop_cols=False, drop_rows=False): + def __init__(self, + reader, + drop_cols: bool = False, + drop_rows: bool = False, + *, + _assert_type: Optional[HailType] = None): super().__init__() self.reader = reader self.drop_cols = drop_cols self.drop_rows = drop_rows + self._type: Optional[HailType] = _assert_type def render_head(self, r): return f'(MatrixRead None {self.drop_cols} {self.drop_rows} "{self.reader.render(r)}"' @@ -58,7 +66,8 @@ def _eq(self, other): return self.reader == other.reader and self.drop_cols == other.drop_cols and self.drop_rows == other.drop_rows def _compute_type(self): - self._type = Env.backend().matrix_type(self) + if self._type is None: + self._type = Env.backend().matrix_type(self) class MatrixFilterRows(MatrixIR): diff --git a/hail/python/hail/ir/matrix_reader.py b/hail/python/hail/ir/matrix_reader.py index d62c86792c8..405813ae91f 100644 --- a/hail/python/hail/ir/matrix_reader.py +++ b/hail/python/hail/ir/matrix_reader.py @@ -1,13 +1,11 @@ import abc import json -import hail as hl - -from .utils import make_filter_and_replace -from ..expr.types import tfloat32, tfloat64, hail_type, tint32, tint64, tstr +from .utils import make_filter_and_replace, impute_type_of_partition_interval_array +from ..expr.types import tfloat32, tfloat64 from ..genetics.reference_genome import reference_genome_type -from ..typecheck import typecheck_method, sequenceof, nullable, enumeration, \ - anytype, oneof, dictof, sized_tupleof +from ..typecheck import (typecheck_method, sequenceof, nullable, enumeration, anytype, oneof, + dictof, sized_tupleof) from ..utils import wrap_to_list from ..utils.misc import escape_str @@ -27,25 +25,9 @@ class MatrixNativeReader(MatrixReader): intervals=nullable(sequenceof(anytype)), filter_intervals=bool) def __init__(self, path, intervals, filter_intervals): - if intervals is not None: - t = hl.expr.impute_type(intervals) - if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval): - raise TypeError("'intervals' must be an array of tintervals") - pt = t.element_type.point_type - if isinstance(pt, hl.tstruct): - self._interval_type = t - else: - self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) - self.path = path self.filter_intervals = filter_intervals - if intervals is not None and t != self._interval_type: - self.intervals = [hl.Interval(hl.Struct(__point=i.start), - hl.Struct(__point=i.end), - i.includes_start, - i.includes_end) for i in intervals] - else: - self.intervals = intervals + self.intervals, self._interval_type = impute_type_of_partition_interval_array(intervals) def render(self, r): reader = {'name': 'MatrixNativeReader', @@ -217,68 +199,6 @@ def __eq__(self, other): other.included_variants == self.included_variants -class TextMatrixReader(MatrixReader): - @typecheck_method(paths=oneof(str, sequenceof(str)), - n_partitions=nullable(int), - row_fields=dictof(str, hail_type), - entry_type=enumeration(tint32, tint64, tfloat32, tfloat64, tstr), - missing_value=str, - has_header=bool, - separator=str, - gzip_as_bgzip=bool, - add_row_id=bool, - comment=sequenceof(str)) - def __init__(self, - paths, - n_partitions, - row_fields, - entry_type, - missing_value, - has_header, - separator, - gzip_as_bgzip, - add_row_id, - comment): - self.paths = wrap_to_list(paths) - self.n_partitions = n_partitions - self.row_fields = row_fields - self.entry_type = entry_type - self.missing_value = missing_value - self.has_header = has_header - self.separator = separator - self.gzip_as_bgzip = gzip_as_bgzip - self.add_row_id = add_row_id - self.comment = comment - - def render(self, r): - reader = {'name': 'TextMatrixReader', - 'paths': self.paths, - 'nPartitions': self.n_partitions, - 'rowFieldsStr': {k: v._parsable_string() - for k, v in self.row_fields.items()}, - 'entryTypeStr': self.entry_type._parsable_string(), - 'missingValue': self.missing_value, - 'hasHeader': self.has_header, - 'separatorStr': self.separator, - 'gzipAsBGZip': self.gzip_as_bgzip, - 'addRowId': self.add_row_id, - 'comment': self.comment} - return escape_str(json.dumps(reader)) - - def __eq__(self, other): - return isinstance(other, TextMatrixReader) and \ - self.paths == other.paths and \ - self.n_partitions == other.n_partitions and \ - self.row_fields == other.row_fields and \ - self.entry_type == other.entry_type and \ - self.missing_value == other.missing_value and \ - self.has_header == other.has_header and \ - self.separator == other.separator and \ - self.gzip_as_bgzip == other.gzip_as_bgzip and \ - self.add_row_id == other.add_row_id and \ - self.comment == other.comment - - class MatrixPLINKReader(MatrixReader): @typecheck_method(bed=str, bim=str, fam=str, n_partitions=nullable(int), block_size=nullable(int), min_partitions=nullable(int), diff --git a/hail/python/hail/ir/register_functions.py b/hail/python/hail/ir/register_functions.py index e62e9f50839..061180d0d42 100644 --- a/hail/python/hail/ir/register_functions.py +++ b/hail/python/hail/ir/register_functions.py @@ -44,7 +44,6 @@ def ndarray_floating_point_divide(arg_type, ret_type): ndarray_floating_point_divide(tint64, tfloat32) ndarray_floating_point_divide(tfloat32, tfloat32) ndarray_floating_point_divide(tfloat64, tfloat64) - register_function("values", (dtype("dict"),), dtype("array")) register_function("sliceRight", (dtype("str"), dtype("int32"),), dtype("str")) register_function("get", (dtype("dict"), dtype("?key"),), dtype("?value")) @@ -211,6 +210,12 @@ def ndarray_floating_point_divide(arg_type, ret_type): register_function("slice", (dtype("str"), dtype("int32"), dtype("int32"),), dtype("str")) register_function("split", (dtype("str"), dtype("str"), dtype("int32"),), dtype("array")) register_function("split", (dtype("str"), dtype("str"),), dtype("array")) + register_function("splitQuotedChar", (dtype("str"), dtype("str"), dtype("array"), dtype("str"),), + dtype("array")) + register_function("splitQuotedRegex", (dtype("str"), dtype("str"), dtype("array"), dtype("str"),), + dtype("array")) + register_function("splitChar", (dtype("str"), dtype("str"), dtype("array"),), dtype("array")) + register_function("splitRegex", (dtype("str"), dtype("str"), dtype("array"),), dtype("array")) register_seeded_function("rand_gamma", (dtype("float64"), dtype("float64"),), dtype("float64")) register_function("UnphasedDiploidGtIndexCall", (dtype("int32"),), dtype("call")) register_function("lgt_to_gt", (dtype("call"), dtype("array"),), dtype("call")) diff --git a/hail/python/hail/ir/table_ir.py b/hail/python/hail/ir/table_ir.py index 6d8da139926..13b8a0ca52d 100644 --- a/hail/python/hail/ir/table_ir.py +++ b/hail/python/hail/ir/table_ir.py @@ -1,5 +1,6 @@ +from typing import Optional import hail as hl -from hail.expr.types import dtype +from hail.expr.types import dtype, HailType from hail.ir.base_ir import BaseIR, TableIR from hail.utils.java import Env from hail.utils.misc import escape_str, parsable_strings, dump_json, escape_id @@ -233,10 +234,15 @@ def _eq(self, other): class TableRead(TableIR): - def __init__(self, reader, drop_rows=False): + def __init__(self, + reader, + drop_rows: bool = False, + *, + _assert_type: Optional[HailType] = None): super().__init__() self.reader = reader self.drop_rows = drop_rows + self._type = _assert_type def head_str(self): return f'None {self.drop_rows} "{self.reader.render()}"' @@ -245,7 +251,8 @@ def _eq(self, other): return self.reader == other.reader and self.drop_rows == other.drop_rows def _compute_type(self): - self._type = Env.backend().table_type(self) + if self._type is None: + self._type = Env.backend().table_type(self) class MatrixEntriesTable(TableIR): diff --git a/hail/python/hail/ir/table_reader.py b/hail/python/hail/ir/table_reader.py index d519d4b9507..441192f4c9d 100644 --- a/hail/python/hail/ir/table_reader.py +++ b/hail/python/hail/ir/table_reader.py @@ -9,6 +9,8 @@ from hail.typecheck import typecheck_method, sequenceof, nullable, anytype, oneof from hail.utils.misc import escape_str +from .utils import impute_type_of_partition_interval_array + class TableReader(object): @abc.abstractmethod @@ -25,25 +27,9 @@ class TableNativeReader(TableReader): intervals=nullable(sequenceof(anytype)), filter_intervals=bool) def __init__(self, path, intervals, filter_intervals): - if intervals is not None: - t = hl.expr.impute_type(intervals) - if not isinstance(t, hl.tarray) and not isinstance(t.element_type, hl.tinterval): - raise TypeError("'intervals' must be an array of tintervals") - pt = t.element_type.point_type - if isinstance(pt, hl.tstruct): - self._interval_type = t - else: - self._interval_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) - self.path = path self.filter_intervals = filter_intervals - if intervals is not None and t != self._interval_type: - self.intervals = [hl.Interval(hl.Struct(__point=i.start), - hl.Struct(__point=i.end), - i.includes_start, - i.includes_end) for i in intervals] - else: - self.intervals = intervals + self.intervals, self._interval_type = impute_type_of_partition_interval_array(intervals) def render(self): reader = {'name': 'TableNativeReader', @@ -97,21 +83,32 @@ def __eq__(self, other): class StringTableReader(TableReader): - @typecheck_method(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int)) - def __init__(self, paths, min_partitions): + @typecheck_method(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int), force_bgz=bool, + force=bool, file_per_partition=bool) + def __init__(self, paths, min_partitions, force_bgz, force, file_per_partition): self.paths = paths self.min_partitions = min_partitions + self.force_bgz = force_bgz + self.force = force + self.file_per_partition = file_per_partition def render(self): reader = {'name': 'StringTableReader', 'files': self.paths, - 'minPartitions': self.min_partitions} + 'minPartitions': self.min_partitions, + 'forceBGZ': self.force_bgz, + 'forceGZ': self.force, + 'filePerPartition': self.file_per_partition} + return escape_str(json.dumps(reader)) def __eq__(self, other): return isinstance(other, StringTableReader) and \ other.path == self.path and \ - other.min_partitions == self.min_partitions + other.min_partitions == self.min_partitions and \ + other.force_bgz == self.force_bgz and \ + other.force == self.force and \ + other.file_per_partition == self.file_per_partition class TableFromBlockMatrixNativeReader(TableReader): diff --git a/hail/python/hail/ir/utils.py b/hail/python/hail/ir/utils.py index 0c445beacb6..db12aae97c4 100644 --- a/hail/python/hail/ir/utils.py +++ b/hail/python/hail/ir/utils.py @@ -1,4 +1,33 @@ +from typing import Optional, List, Any, Tuple from .ir import Coalesce, ApplyUnaryPrimOp, FalseIR +import hail as hl + + +def impute_type_of_partition_interval_array( + intervals: Optional[List[Any]] +) -> Tuple[Optional[List[Any]], Any]: + if intervals is None: + return None, None + if len(intervals) == 0: + return [], hl.tarray(hl.tinterval(hl.tstruct())) + + t = hl.expr.impute_type(intervals) + if not isinstance(t, hl.tarray) or not isinstance(t.element_type, hl.tinterval): + raise TypeError("'intervals' must be an array of tintervals") + pt = t.element_type.point_type + + if isinstance(pt, hl.tstruct): + return intervals, t + + struct_intervals = [ + hl.Interval(hl.Struct(__point=i.start), + hl.Struct(__point=i.end), + i.includes_start, + i.includes_end) + for i in intervals + ] + struct_intervals_type = hl.tarray(hl.tinterval(hl.tstruct(__point=pt))) + return struct_intervals, struct_intervals_type def filter_predicate_with_keep(ir_pred, keep): @@ -16,3 +45,26 @@ def make_filter_and_replace(filter, find_replace): 'findPattern': find, 'replacePattern': replace } + + +def parse_type(string_expr, ttype): + if ttype == hl.tstr: + return string_expr + elif ttype == hl.tint32: + return hl.int32(string_expr) + elif ttype == hl.tint64: + return hl.int64(string_expr) + elif ttype == hl.tfloat32: + return hl.float32(string_expr) + elif ttype == hl.tfloat64: + return hl.float64(string_expr) + elif ttype == hl.tbool: + return hl.bool(string_expr) + elif ttype == hl.tcall: + return hl.parse_call(string_expr) + elif isinstance(ttype, hl.tlocus): + return hl.parse_locus(string_expr, ttype.reference_genome) + elif isinstance(ttype, hl.tinterval) and isinstance(ttype.point_type, hl.tlocus): + return hl.parse_locus_interval(string_expr, ttype.point_type.reference_genome) + else: + return hl.parse_json(string_expr, ttype) diff --git a/hail/python/hail/linalg/blockmatrix.py b/hail/python/hail/linalg/blockmatrix.py index 27e5ac3e52f..4df6eef2ce0 100644 --- a/hail/python/hail/linalg/blockmatrix.py +++ b/hail/python/hail/linalg/blockmatrix.py @@ -9,6 +9,7 @@ import hail as hl import hail.expr.aggregators as agg from hail.expr import construct_expr, construct_variable +from hail.expr.blockmatrix_type import tblockmatrix from hail.expr.expressions import (expr_float64, matrix_table_source, expr_ndarray, check_entry_indexed, expr_tuple, expr_array, expr_int32, expr_int64) from hail.ir import (BlockMatrixWrite, BlockMatrixMap2, ApplyBinaryPrimOp, F64, @@ -28,8 +29,8 @@ from hail.typecheck import (typecheck, typecheck_method, nullable, oneof, sliceof, sequenceof, lazy, enumeration, numeric, tupleof, func_spec, sized_tupleof) -from hail.utils import (new_temp_file, new_local_temp_file, local_path_uri, - storage_level, with_local_temp_file) +from hail.utils import (new_temp_file, local_path_uri, storage_level, with_local_temp_file, + new_local_temp_file) from hail.utils.java import Env block_matrix_type = lazy() @@ -227,8 +228,8 @@ def __init__(self, bmir): self._bmir = bmir @classmethod - @typecheck_method(path=str) - def read(cls, path): + @typecheck_method(path=str, _assert_type=nullable(tblockmatrix)) + def read(cls, path, *, _assert_type=None): """Reads a block matrix. Parameters @@ -240,14 +241,15 @@ def read(cls, path): ------- :class:`.BlockMatrix` """ - return cls(BlockMatrixRead(BlockMatrixNativeReader(path))) + return cls(BlockMatrixRead(BlockMatrixNativeReader(path), _assert_type=_assert_type)) @classmethod @typecheck_method(uri=str, n_rows=int, n_cols=int, - block_size=nullable(int)) - def fromfile(cls, uri, n_rows, n_cols, block_size=None): + block_size=nullable(int), + _assert_type=nullable(tblockmatrix)) + def fromfile(cls, uri, n_rows, n_cols, block_size=None, *, _assert_type=None): """Creates a block matrix from a binary file. Examples @@ -301,7 +303,7 @@ def fromfile(cls, uri, n_rows, n_cols, block_size=None): if not block_size: block_size = BlockMatrix.default_block_size() - return cls(BlockMatrixRead(BlockMatrixBinaryReader(uri, [n_rows, n_cols], block_size))) + return cls(BlockMatrixRead(BlockMatrixBinaryReader(uri, [n_rows, n_cols], block_size), _assert_type=_assert_type)) @classmethod @typecheck_method(ndarray=np.ndarray, @@ -335,6 +337,8 @@ def from_numpy(cls, ndarray, block_size=None): ------- :class:`.BlockMatrix` """ + from hail.backend.service_backend import ServiceBackend + if not block_size: block_size = BlockMatrix.default_block_size() @@ -345,9 +349,14 @@ def from_numpy(cls, ndarray, block_size=None): nd = _ndarray_as_float64(nd) n_rows, n_cols = nd.shape - path = new_local_temp_file() - uri = local_path_uri(path) - nd.tofile(path) + if isinstance(hl.current_backend(), ServiceBackend): + path = hl.TemporaryFilename().name + hl.current_backend().fs.open(path, mode='wb').write(nd.tobytes()) + uri = path + else: + path = new_local_temp_file() + nd.tofile(path) + uri = local_path_uri(path) return cls.fromfile(uri, n_rows, n_cols, block_size) @classmethod @@ -638,7 +647,7 @@ def checkpoint(self, path, overwrite=False, force_row_major=False, stage_locally before being copied to ``output``. """ self.write(path, overwrite, force_row_major, stage_locally) - return BlockMatrix.read(path) + return BlockMatrix.read(path, _assert_type=self._bmir._type) @staticmethod @typecheck(entry_expr=expr_float64, @@ -1203,12 +1212,20 @@ def to_numpy(self, _force_blocking=False): ------- :class:`numpy.ndarray` """ + from hail.backend.service_backend import ServiceBackend if self.n_rows * self.n_cols > 1 << 31 or _force_blocking: path = new_temp_file() self.export_blocks(path, binary=True) return BlockMatrix.rectangles_to_numpy(path, binary=True) + if isinstance(hl.current_backend(), ServiceBackend): + with hl.TemporaryFilename() as path: + self.tofile(path) + return np.frombuffer( + hl.current_backend().fs.open(path, mode='rb').read() + ).reshape((self.n_rows, self.n_cols)) + with with_local_temp_file() as path: uri = local_path_uri(path) self.tofile(uri) @@ -1315,7 +1332,7 @@ def persist(self, storage_level='MEMORY_AND_DISK'): """ id = Env.get_uid() Env.backend().execute(BlockMatrixWrite(self._bmir, BlockMatrixPersistWriter(id, storage_level))) - return BlockMatrix(BlockMatrixRead(BlockMatrixPersistReader(id, self._bmir))) + return BlockMatrix(BlockMatrixRead(BlockMatrixPersistReader(id, self._bmir), _assert_type=self._bmir._type)) def unpersist(self): """Unpersists this block matrix from memory/disk. diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index a3fcac35a26..4a14ac56c16 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -2500,8 +2500,20 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = }""" if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec) - return hl.read_matrix_table(output, _intervals=_intervals, _filter_intervals=_filter_intervals, - _drop_cols=_drop_cols, _drop_rows=_drop_rows) + _assert_type = self._type + _load_refs = False + else: + _assert_type = None + _load_refs = True + return hl.read_matrix_table( + output, + _intervals=_intervals, + _filter_intervals=_filter_intervals, + _drop_cols=_drop_cols, + _drop_rows=_drop_rows, + _assert_type=_assert_type, + _load_refs=_load_refs + ) @typecheck_method(output=str, overwrite=bool, @@ -3326,6 +3338,20 @@ def repartition(self, n_partitions: int, shuffle: bool = True) -> 'MatrixTable': :class:`.MatrixTable` Repartitioned dataset. """ + if hl.current_backend().requires_lowering: + tmp = hl.utils.new_temp_file() + + if len(self.row_key) == 0: + uid = Env.get_uid() + tmp2 = hl.utils.new_temp_file() + self.checkpoint(tmp2) + ht = hl.read_matrix_table(tmp2).add_row_index(uid).key_rows_by(uid) + ht.checkpoint(tmp) + return hl.read_matrix_table(tmp, _n_partitions=n_partitions).drop(uid) + else: + # checkpoint rather than write to use fast codec + self.checkpoint(tmp) + return hl.read_matrix_table(tmp, _n_partitions=n_partitions) return MatrixTable(ir.MatrixRepartition( self._mir, n_partitions, @@ -3361,6 +3387,9 @@ def naive_coalesce(self, max_partitions: int) -> 'MatrixTable': Matrix table with at most `max_partitions` partitions. """ + if hl.current_backend().requires_lowering: + return self.repartition(max_partitions) + return MatrixTable(ir.MatrixRepartition( self._mir, max_partitions, ir.RepartitionStrategy.NAIVE_COALESCE)) diff --git a/hail/python/hail/methods/impex.py b/hail/python/hail/methods/impex.py index 32bd2a73c72..fdbc3dbd856 100644 --- a/hail/python/hail/methods/impex.py +++ b/hail/python/hail/methods/impex.py @@ -6,7 +6,6 @@ import avro.schema from avro.datafile import DataFileReader from avro.io import DatumReader - import hail as hl from hail import ir from hail.expr import StructExpression, LocusExpression, \ @@ -16,12 +15,13 @@ from hail.expr.types import hail_type, tarray, tfloat64, tstr, tint32, tstruct, \ tcall, tbool, tint64, tfloat32 from hail.genetics.reference_genome import reference_genome_type +from hail.ir.utils import parse_type from hail.matrixtable import MatrixTable from hail.methods.misc import require_biallelic, require_row_key_variant, require_col_key_str from hail.table import Table from hail.typecheck import typecheck, nullable, oneof, dictof, anytype, \ sequenceof, enumeration, sized_tupleof, numeric, table_key_type, char -from hail.utils import wrap_to_list +from hail.utils.misc import wrap_to_list from hail.utils.java import Env, FatalError, jindexed_seq_args, warning @@ -258,7 +258,6 @@ def export_bgen(mt, output, gp=None, varid=None, rsid=None, parallel=None): def export_plink(dataset, output, call=None, fam_id=None, ind_id=None, pat_id=None, mat_id=None, is_female=None, pheno=None, varid=None, cm_position=None): - """Export a :class:`.MatrixTable` as `PLINK2 `__ BED, BIM and FAM files. @@ -523,7 +522,8 @@ def export_vcf(dataset, output, append_to_header=None, parallel=None, metadata=N invalid_info_fields = [f for f in info_fields if not re.fullmatch(r"^([A-Za-z_][0-9A-Za-z_.]*|1000G)", f)] if invalid_info_fields: invalid_info_str = ''.join(f'\n {f!r}' for f in invalid_info_fields) - warning('export_vcf: the following info field names are invalid in VCF 4.3 and may not work with some tools: ' + invalid_info_str) + warning( + 'export_vcf: the following info field names are invalid in VCF 4.3 and may not work with some tools: ' + invalid_info_str) row_fields_used = {'rsid', 'info', 'filters', 'qual'} @@ -833,7 +833,8 @@ def recode_contig(x): reference_genome, skip_invalid_intervals)) - elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct(f0=tstr, f1=tint32, f2=tint32, f3=tstr): + elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct( + f0=tstr, f1=tint32, f2=tint32, f3=tstr): t = t.select(interval=locus_interval_expr(recode_contig(t['f0']), t['f1'] + 1, t['f2'] + 1, @@ -1159,20 +1160,23 @@ def import_bgen(path, variants = hl.struct(locus=variants) if len(variants.dtype) == 0 or not variants.dtype._is_prefix_of(expected_vtype): - raise TypeError("'import_bgen' requires the expression type for 'variants' is a non-empty prefix of the BGEN key type: \n" - + f"\tFound: {repr(variants.dtype)}\n" - + f"\tExpected: {repr(expected_vtype)}\n") + raise TypeError( + "'import_bgen' requires the expression type for 'variants' is a non-empty prefix of the BGEN key type: \n" + + f"\tFound: {repr(variants.dtype)}\n" + + f"\tExpected: {repr(expected_vtype)}\n") uid = Env.get_uid() fnames = list(variants.dtype) - name, variants = variants._to_table(uid) # This will add back the other key fields of the source, which we don't want + name, variants = variants._to_table( + uid) # This will add back the other key fields of the source, which we don't want variants = variants.key_by(**{fname: variants[name][fname] for fname in fnames}) variants = variants.select() elif isinstance(variants, Table): if len(variants.key) == 0 or not variants.key.dtype._is_prefix_of(expected_vtype): - raise TypeError("'import_bgen' requires the row key type for 'variants' is a non-empty prefix of the BGEN key type: \n" - + f"\tFound: {repr(variants.key.dtype)}\n" - + f"\tExpected: {repr(expected_vtype)}\n") + raise TypeError( + "'import_bgen' requires the row key type for 'variants' is a non-empty prefix of the BGEN key type: \n" + + f"\tFound: {repr(variants.key.dtype)}\n" + + f"\tExpected: {repr(expected_vtype)}\n") variants = variants.select() else: assert isinstance(variants, list) @@ -1198,7 +1202,8 @@ def import_bgen(path, schema=expected_vtype, key=['locus', 'alleles']) except Exception: - raise TypeError(f"'import_bgen' requires all elements in 'variants' are a non-empty prefix of the BGEN key type: {repr(expected_vtype)}") + raise TypeError( + f"'import_bgen' requires all elements in 'variants' are a non-empty prefix of the BGEN key type: {repr(expected_vtype)}") reader = ir.MatrixBGENReader(path, sample_file, index_file_map, n_partitions, block_size, variants) @@ -1493,9 +1498,9 @@ def import_table(paths, .. code-block: text $cat data/table_with_json.tsv - id json_field - 1 {"foo": "bar", "x": 7} - 4 {"foo": "baz", "x": 100} + id json_field + 1 {"foo": "bar", "x": 7} + 4 {"foo": "baz", "x": 100} To import, we need to specify the types argument. @@ -1602,26 +1607,98 @@ def import_table(paths, ------- :class:`.Table` """ + if len(delimiter) < 1: + raise ValueError('import_table: empty delimiter is not supported') + + def split_lines(row, fields): + split_array = row.text._split_line(delimiter, missing=missing, quote=quote, regex=len(delimiter) > 1) + return hl.case().when(hl.len(split_array) == len(fields), split_array)\ + .or_error(hl.str("error in number of fields found: in file ") + hl.str(row.file) + + hl.str(f"\nExpected {len(fields)} {'fields' if len(fields) > 1 else 'field' }, found ") + + hl.str(hl.len(split_array)) + hl.if_else(hl.len(split_array) > 1, hl.str(" fields"), + hl.str(" field")) + hl.str("\nfor line consisting of '") + hl.str(row.text) + "'") + + def should_filter_line(hl_str): + to_filter = hl_str.matches(filter) if filter is not None else hl.bool(False) + if len(comment) > 0: + hl_comment = hl.array(comment) + filter_comment = hl_comment.any(lambda com: hl.if_else(hl.len(com) == 1, + hl_str.startswith(com), + hl_str.matches(com, True))) + else: + filter_comment = hl.bool(False) + filter_blank_line = hl.len(hl_str) == 0 if skip_blank_lines else hl.bool(False) + return hl.array([to_filter, filter_comment, filter_blank_line]).any(lambda filt: filt) + + def check_fields_for_duplicates(fields_to_check): + changed_fields = [] + unique_fields = {} + for field_idx, field_to_check in enumerate(fields_to_check): + field_copy = field_to_check + suffix = 1 + while unique_fields.get(field_copy) is not None: + field_copy = field_to_check + str(suffix) + suffix += 1 + if field_copy is not field_to_check: + changed_fields.append((field_copy, field_to_check)) + unique_fields[field_copy] = field_idx + for new_field_name in changed_fields: + fields_to_check[unique_fields[new_field_name[0]]] = new_field_name[0] + if len(changed_fields) > 0: + from itertools import starmap + print_changed_fields = list(starmap(lambda post, pre: f"{pre} -> {post}", changed_fields)) + hl.utils.warning(f"Found {len(changed_fields)} duplicate" + f" {'row field' if len(changed_fields) == 1 else 'row fields'}. Changed row fields as " + f"follows:\n" + "\n".join(print_changed_fields)) + return fields_to_check + + if len(delimiter) == 0: + raise ValueError("Hail does not currently support 0-character separators") + paths = wrap_to_list(paths) comment = wrap_to_list(comment) missing = wrap_to_list(missing) - tr = ir.TextTableReader(paths, min_partitions, types, comment, - delimiter, missing, no_header, quote, - skip_blank_lines, force_bgz, filter, find_replace, - force, source_file_field) - ht = Table(ir.TableRead(tr)) + ht = hl.import_lines(paths, min_partitions, force_bgz, force) + if skip_blank_lines is not None or len(comment) > 0 or filter is not None: + ht = ht.filter(should_filter_line(ht.text), keep=False) - strs = [] + if find_replace is not None: + ht = ht.annotate(text=ht['text'].replace(*find_replace)) + + first_row = ht.head(1) + first_row_value = first_row.annotate( + header=first_row.text._split_line(delimiter, missing=hl.empty_array(hl.tstr), quote=quote, regex=len(delimiter) > 1)).collect()[0] + + if first_row_value is None: + raise ValueError(f"Invalid file: no lines remaining after filters\n Offending file: {first_row.file}") + if not no_header: + unchecked_fields = first_row_value.header + fields = check_fields_for_duplicates(unchecked_fields) + ht = ht.filter(ht.text == first_row_value.text, keep=False) + else: + num_of_fields = list(range(0, len(first_row_value.header))) + fields = list(map(lambda f_num: "f" + str(f_num), num_of_fields)) + + ht = ht.annotate(split_text=hl.case().when(hl.len(ht.text) > 0, split_lines(ht, fields)) + .or_error(hl.str("Blank line found in file ") + ht.file)).drop('text') + + fields_to_value = {} + strs = [] if impute: - fields_to_guess = [f for f in ht.row if f not in types] + fields_to_impute_idx = [] + fields_to_guess = [] + for idx, field in enumerate(fields): + if types.get(field) is None: + fields_to_impute_idx.append(idx) + fields_to_guess.append(field) hl.utils.info('Reading table to impute column types') - guessed = ht.aggregate(hl.agg.array_agg(lambda x: hl.agg._impute_type(x), [ht[f] for f in fields_to_guess])) + guessed = ht.aggregate(hl.agg.array_agg(lambda x: hl.agg._impute_type(x), + [ht.split_text[i] for i in fields_to_impute_idx])) reasons = {f: 'user-supplied type' for f in types} - imputed_types = dict() for field, s in zip(fields_to_guess, guessed): if not s['anyNonMissing']: @@ -1643,28 +1720,31 @@ def import_table(paths, strs.append('Finished type imputation') all_types = dict(**types, **imputed_types) - for field in ht.row: - strs.append(f' Loading field {field!r} as type {all_types[field]} ({reasons[field]})') - tr = ir.TextTableReader(paths, min_partitions, all_types, comment, - delimiter, missing, no_header, quote, - skip_blank_lines, force_bgz, filter, find_replace, - force, source_file_field) - ht = Table(ir.TableRead(tr)) + for f_idx, field in enumerate(fields): + strs.append(f' Loading field {field!r} as type {all_types[field]} ({reasons[field]})') + fields_to_value[field] = parse_type(ht.split_text[f_idx], all_types[field]) else: strs.append('Reading table without type imputation') - for field in ht.row: + for f_idx, field in enumerate(fields): reason = 'user-supplied' if field in types else 'not specified' t = types.get(field, hl.tstr) + fields_to_value[field] = parse_type(ht.split_text[f_idx], t) strs.append(f' Loading field {field!r} as type {t} ({reason})') - if len(ht.row) < 30: + ht = ht.annotate(**fields_to_value).drop('split_text') + if source_file_field is not None: + source_file = {source_file_field: ht.file} + ht = ht.annotate(**source_file) + ht = ht.drop('file') + + if len(fields) < 30: hl.utils.info('\n'.join(strs)) else: from collections import Counter - strs2 = [f'Loading {len(ht.row)} fields. Counts by type:'] - for name, count in Counter(ht[f].dtype for f in ht.row).most_common(): + strs2 = [f'Loading {ht.row} fields. Counts by type:'] + for name, count in Counter(ht[f].dtype for f in fields).most_common(): strs2.append(f' {name}: {count}') hl.utils.info('\n'.join(strs2)) @@ -1674,8 +1754,9 @@ def import_table(paths, return ht -@typecheck(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int)) -def import_lines(paths, min_partitions=None) -> Table: +@typecheck(paths=oneof(str, sequenceof(str)), min_partitions=nullable(int), force_bgz=bool, + force=bool, file_per_partition=bool) +def import_lines(paths, min_partitions=None, force_bgz=False, force=False, file_per_partition=False) -> Table: """Import lines of file(s) as a :class:`.Table` of strings. Examples @@ -1702,15 +1783,35 @@ def import_lines(paths, min_partitions=None) -> Table: Files to import. min_partitions: :obj:`int` or :obj:`None` Minimum number of partitions. + force_bgz : :obj:`bool` + If ``True``, load files as blocked gzip files, assuming + that they were actually compressed using the BGZ codec. This option is + useful when the file extension is not ``'.bgz'``, but the file is + blocked gzip, so that the file can be read in parallel and not on a + single node. + force : :obj:`bool` + If ``True``, load gzipped files serially on one core. This should + be used only when absolutely necessary, as processing time will be + increased due to lack of parallelism. + file_per_partition : :obj:`bool` + If ``True``, each file will be in a seperate partition. Not recommended + for most uses. Error thrown if ``True`` and `min_partitions` is less than + the number of files Returns ------- :class:`.Table` Table constructed from imported data. """ + paths = wrap_to_list(paths) - st_reader = ir.StringTableReader(paths, min_partitions) + if file_per_partition and min_partitions is not None: + if min_partitions > len(paths): + raise FatalError(f'file_per_partition is True while min partitions is {min_partitions} ,which is greater' + f' than the number of files, {len(paths)}') + + st_reader = ir.StringTableReader(paths, min_partitions, force_bgz, force, file_per_partition) string_table = Table(ir.TableRead(st_reader)) return string_table @@ -1893,6 +1994,110 @@ def import_matrix_table(paths, :class:`.MatrixTable` MatrixTable constructed from imported data. """ + row_key = wrap_to_list(row_key) + comment = wrap_to_list(comment) + paths = [hl.current_backend().fs.canonicalize_path(p) for p in wrap_to_list(paths)] + missing_list = wrap_to_list(missing) + + def comment_filter(table): + return hl.rbind(hl.array(comment), + lambda hl_comment: hl_comment.any(lambda com: hl.if_else(hl.len(com) == 1, + table.text.startswith(com), + table.text.matches(com, False)))) \ + if len(comment) > 0 else False + + def truncate(string_array, delim=", "): + if len(string_array) > 10: + string_array = string_array[:10] + string_array.append("...") + return delim.join(string_array) + + path_to_index = {path: idx for idx, path in enumerate(paths)} + + def format_file(file_name, hl_value=False): + if hl_value: + return hl.rbind(file_name.split('/'), lambda split_file: + hl.if_else(hl.len(split_file) <= 4, hl.str("/").join(file_name.split('/')[-4:]), + hl.str("/") + hl.str("/").join(file_name.split('/')[-4:]))) + else: + return "/".join(file_name.split('/')[-3:]) if len(file_name) <= 4 else \ + "/" + "/".join(file_name.split('/')[-3:]) + + def get_file_start(row): + first_lines = first_lines_table.collect() + if first_lines: + file_start_array = hl.array(list(map(lambda line: (line.file, line.idx), first_lines))) + match_file_idx = file_start_array.index(lambda line_tuple: line_tuple[0] == row.file) + return file_start_array[match_file_idx][1] + else: + return 0 + + def validate_row_fields(): + unique_fields = {} + duplicates = [] + header_idx = 0 + for header_rowf in header_dict['row_fields']: + rowf_type = row_fields.get(header_rowf) + if rowf_type is None: + import itertools as it + row_fields_string = '\n'.join(list(it.starmap( + lambda row_field, row_type: f" '{row_field}': {str(row_type)}", row_fields.items()))) + header_fields_string = "\n ".join(map(lambda field: f"'{field}'", header_dict['row_fields'])) + raise FatalError(f"in file {format_file(header_dict['path'])} found row field '{header_rowf}' that's" + f" not in 'row fields'\nrow fields found in file:\n {header_fields_string}" + f"\n'row fields':\n{row_fields_string}") + if header_rowf in unique_fields: + duplicates.append(header_rowf) + else: + unique_fields[header_rowf] = True + header_idx += 1 + if len(duplicates) > 0: + raise FatalError("Found following duplicate row fields in header:\n" + '\n'.join(duplicates)) + + def parse_entries(row): + return hl.range(num_of_row_fields, len(header_dict['column_ids']) + num_of_row_fields).map( + lambda entry_idx: parse_type_or_error(entry_type, row, entry_idx, not_entries=False)) + + def parse_rows(row): + rows_list = list(row_fields.items()) + return {rows_list[idx][0]: + parse_type_or_error(rows_list[idx][1], row, idx) for idx in range(num_of_row_fields)} + + def error_msg(row, idx, msg): + return (hl.str("in file ") + hl.str(format_file(row.file, True)) + + hl.str(" on line ") + hl.str(row.row_id - get_file_start(row) + 1) + + hl.str(" at value '") + hl.str(row.split_array[idx]) + hl.str("':\n") + hl.str(msg)) + + def parse_type_or_error(hail_type, row, idx, not_entries=True): + value = row.split_array[idx] + if hail_type == hl.tint32: + parsed_type = hl.parse_int32(value) + elif hail_type == hl.tint64: + parsed_type = hl.parse_int64(value) + elif hail_type == hl.tfloat32: + parsed_type = hl.parse_float32(value) + elif hail_type == hl.tfloat64: + parsed_type = hl.parse_float64(value) + else: + parsed_type = value + + if not_entries: + error_clarify_msg = hl.str(" at row field '") + hl.str(hl_row_fields[idx]) + hl.str("'") + else: + error_clarify_msg = (hl.str(" at column id '") + hl.str(hl_columns[idx - num_of_row_fields]) + + hl.str("' for entry field 'x' ")) + + return hl.if_else(hl.is_missing(value), hl.missing(hail_type), + hl.case().when(~hl.is_missing(parsed_type), parsed_type) + .or_error( + error_msg(row, idx, f"error parsing value into {str(hail_type)}" + error_clarify_msg))) + + num_of_row_fields = len(row_fields.keys()) + add_row_id = False + if len(row_key) == 0: + add_row_id = True + row_key = ['row_id'] + if sep is not None: if delimiter is not None: raise ValueError( @@ -1906,37 +2111,173 @@ def import_matrix_table(paths, if len(delimiter) != 1: raise FatalError('delimiter or sep must be a single character') - add_row_id = False - if isinstance(row_key, list) and len(row_key) == 0: - add_row_id = True - row_key = ['row_id'] - - if 'row_id' in row_fields and add_row_id: - raise FatalError( - "import_matrix_table reserves the field name 'row_id' for" - 'its own use, please use a different name') + if add_row_id: + if 'row_id' in row_fields: + raise FatalError( + "import_matrix_table reserves the field name 'row_id' for" + 'its own use, please use a different name') for k, v in row_fields.items(): if v not in {tint32, tint64, tfloat32, tfloat64, tstr}: raise FatalError( f'import_matrix_table expects field types to be one of:' f"'int32', 'int64', 'float32', 'float64', 'str': field {repr(k)} had type '{v}'") + if entry_type not in {tint32, tint64, tfloat32, tfloat64, tstr}: raise FatalError("""import_matrix_table expects entry types to be one of: 'int32', 'int64', 'float32', 'float64', 'str': found '{}'""".format(entry_type)) - reader = ir.TextMatrixReader(paths, - min_partitions, - row_fields, - entry_type, - missing, - not no_header, - delimiter, - force_bgz, - add_row_id, - wrap_to_list(comment)) - - mt = MatrixTable(ir.MatrixRead(reader)).key_rows_by(*wrap_to_list(row_key)) + if missing in delimiter: + raise FatalError(f"Missing value {missing} contains delimiter {delimiter}") + + ht = import_lines(paths, min_partitions, force_bgz=force_bgz).add_index(name='row_id') + # for checking every header matches + file_per_partition = import_lines(paths, force_bgz=force_bgz, file_per_partition=True) + file_per_partition = file_per_partition.filter(hl.bool(hl.len(file_per_partition.text) == 0) + | comment_filter(file_per_partition), False) + first_lines_table = file_per_partition._map_partitions(lambda rows: rows[:1]) + first_lines_table = first_lines_table.annotate(split_array=first_lines_table.text.split(delimiter)).add_index() + + if not no_header: + def validate_header_get_info_dict(): + two_first_lines = file_per_partition.head(2) + two_first_lines = two_first_lines.annotate(split_array=two_first_lines.text.split(delimiter)).collect() + header_line = two_first_lines[0] if two_first_lines else None + first_data_line = two_first_lines[1] if len(two_first_lines) > 1 else None + num_of_data_line_values = len(first_data_line.split_array) if len(two_first_lines) > 1 else 0 + num_of_header_values = len(header_line.split_array) if two_first_lines else 0 + if header_line is None or path_to_index[header_line.file] != 0: + raise ValueError(f"Expected header in every file but found empty file: {format_file(paths[0])}") + elif not first_data_line or first_data_line.file != header_line.file: + hl.utils.warning(f"File {format_file(header_line.file)} contains a header, but no lines of data") + if num_of_header_values < num_of_data_line_values: + raise ValueError(f"File {format_file(header_line.file)} contains one line assumed to be the header." + f"The header had a length of {num_of_header_values} while the number" + f"of row fields is {num_of_row_fields}") + user_row_fields = header_line.split_array[:num_of_row_fields] + column_ids = header_line.split_array[num_of_row_fields:] + elif num_of_data_line_values != num_of_header_values: + if num_of_data_line_values == num_of_header_values + num_of_row_fields: + user_row_fields = ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))] + column_ids = header_line.split_array + else: + raise ValueError( + f"In file {format_file(header_line.file)}, expected the header line to match either:\n" + f"rowField0 rowField1 ... rowField${num_of_row_fields} colId0 colId1 ...\nor\n" + f" colId0 colId1 ...\nInstead the first two lines were:\nInstead the first two lin" + f"es were:\n{header_line.text}\n{first_data_line.text}\nThe first line contained" + f" {num_of_header_values} separated values and the second line" + f" contained {num_of_data_line_values}") + else: + user_row_fields = header_line.split_array[:num_of_row_fields] + column_ids = header_line.split_array[num_of_row_fields:] + return {'text': header_line.text, 'header_values': header_line.split_array, 'path': header_line.file, + 'row_fields': user_row_fields, 'column_ids': column_ids} + + def warn_if_duplicate_col_ids(): + time_col_id_encountered_dict = {} + duplicate_cols = [] + for item in header_dict['column_ids']: + if time_col_id_encountered_dict.get(item) is not None: + duplicate_cols.append(item) + time_col_id_encountered_dict[item] = time_col_id_encountered_dict[item] + 1 + time_col_id_encountered_dict[item] = 1 + if len(duplicate_cols) == 0: + return + + import itertools as it + duplicates_to_print = sorted( + [('"' + dup_field + '"', '(' + str(time_col_id_encountered_dict[dup_field]) + ')') + for dup_field in duplicate_cols], key=lambda dup_values: dup_values[1]) + + duplicates_to_print = truncate(duplicates_to_print) + duplicates_to_print_formatted = it.starmap(lambda dup, time_found: time_found + + " " + dup, duplicates_to_print) + ht.utils.warning(f"Found {len(duplicate_cols)} duplicate column id" + + f"{'s' if len(duplicate_cols) > 1 else ''}\n" + '\n'.join(duplicates_to_print_formatted)) + + def validate_all_headers(): + all_headers = first_lines_table.collect() + for header in all_headers: + if header_dict['text'] != header.text: + if len(header_dict['header_values']) == len(header.split_array): + zipped_headers = list(zip(header_dict['header_values'], header.split_array)) + for header_idx, header_values in enumerate(zipped_headers): + main_header_value = header_values[0] + error_header_value = header_values[1] + if main_header_value != error_header_value: + raise ValueError("invalid header: expected elements to be identical for all input paths" + f". Found different elements at position {header_idx + 1}" + f"\n in file {format_file(header.file)} with value " + f"'{error_header_value}' when expecting value '{main_header_value}'") + else: + raise ValueError(f"invalid header: lengths of headers differ. \n" + f"{len(header_dict['header_values'])} elements in " + f"{format_file(header_dict['path'])}:\n" + + truncate(["'{}'".format(value) for value in header_dict['header_values']]) + + f" {len(header.split_array)} elements in {format_file(header.file)}:\n" + + truncate(["'{}'".format(value) for value in header.split_array])) + + header_dict = validate_header_get_info_dict() + warn_if_duplicate_col_ids() + validate_all_headers() + + else: + first_line = first_lines_table.head(1).collect() + if not first_line or path_to_index[first_line[0].file] != 0: + hl.utils.warning( + f"File {format_file(paths[0])} is empty and has no header, so we assume no columns") + header_dict = {'header_values': [], + 'row_fields': ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))], + 'column_ids': [] + } + else: + first_line = first_line[0] + header_dict = {'header_values': [], + 'row_fields': ["f" + str(f_idx) for f_idx in list(range(0, num_of_row_fields))], + 'column_ids': + [col_id for col_id in list(range(0, len(first_line.split_array) - num_of_row_fields))] + } + + validate_row_fields() + header_filter = ht.text == header_dict['text'] if not no_header else False + + ht = ht.filter(hl.bool(hl.len(ht.text) == 0) | comment_filter(ht) | header_filter, False) + + hl_columns = hl.array(header_dict['column_ids']) if len(header_dict['column_ids']) > 0 else hl.empty_array(hl.tstr) + hl_row_fields = hl.array(header_dict['row_fields']) if len(header_dict['row_fields']) > 0 \ + else hl.empty_array(hl.tstr) + ht = ht.annotate(split_array=ht.text._split_line(delimiter, missing_list, quote=None, regex=False)).add_index( + 'row_id') + + ht = ht.annotate(split_array=hl.case().when(hl.len(ht.split_array) >= num_of_row_fields, ht.split_array) + .or_error(error_msg(ht, hl.len(ht.split_array) - 1, + " unexpected end of line while reading row field"))) + + n_column_ids = len(header_dict['column_ids']) + n_in_split_array = hl.len(ht.split_array[num_of_row_fields:(num_of_row_fields + n_column_ids)]) + ht = ht.annotate(split_array=hl.case().when( + n_column_ids <= n_in_split_array, + ht.split_array + ).or_error( + error_msg( + ht, + hl.len(ht.split_array) - 1, + " unexpected end of line while reading entries" + ) + )) + + ht = ht.annotate(**parse_rows(ht), entries=parse_entries(ht).map(lambda entry: hl.struct(x=entry)))\ + .drop('text', 'split_array', 'file') + + ht = ht.annotate_globals(cols=hl.range(0, len(header_dict['column_ids'])) + .map(lambda col_idx: hl.struct(col_id=hl_columns[col_idx]))) + + if not add_row_id: + ht = ht.drop('row_id') + + mt = ht._unlocalize_entries('entries', 'cols', ['col_id']) + mt = mt.key_rows_by(*row_key) return mt @@ -2109,9 +2450,11 @@ def import_plink(bed, bim, fam, _filter_intervals=bool, _drop_cols=bool, _drop_rows=bool, - _n_partitions=nullable(int)) + _n_partitions=nullable(int), + _assert_type=nullable(hl.tmatrix), + _load_refs=bool) def read_matrix_table(path, *, _intervals=None, _filter_intervals=False, _drop_cols=False, - _drop_rows=False, _n_partitions=None) -> MatrixTable: + _drop_rows=False, _n_partitions=None, _assert_type=None, _load_refs=True) -> MatrixTable: """Read in a :class:`.MatrixTable` written with :meth:`.MatrixTable.write`. Parameters @@ -2123,17 +2466,27 @@ def read_matrix_table(path, *, _intervals=None, _filter_intervals=False, _drop_c ------- :class:`.MatrixTable` """ - for rg_config in Env.backend().load_references_from_dataset(path): - hl.ReferenceGenome._from_config(rg_config) + if _load_refs: + for rg_config in Env.backend().load_references_from_dataset(path): + hl.ReferenceGenome._from_config(rg_config) if _intervals is not None and _n_partitions is not None: raise ValueError("'read_matrix_table' does not support both _intervals and _n_partitions") mt = MatrixTable(ir.MatrixRead(ir.MatrixNativeReader(path, _intervals, _filter_intervals), - _drop_cols, _drop_rows)) + _drop_cols, + _drop_rows, + _assert_type=_assert_type)) if _n_partitions: intervals = mt._calculate_new_partitions(_n_partitions) - return read_matrix_table(path, _drop_rows=_drop_rows, _drop_cols=_drop_cols, _intervals=intervals) + return read_matrix_table( + path, + _drop_rows=_drop_rows, + _drop_cols=_drop_cols, + _intervals=intervals, + _assert_type=_assert_type, + _load_refs=_load_refs + ) return mt @@ -2436,7 +2789,8 @@ def import_gvcfs(path, rg = reference_genome.name if reference_genome else None if partitions is not None: - partitions, partitions_type = hl.utils._dumps_partitions(partitions, hl.tstruct(locus=hl.tlocus(rg), alleles=hl.tarray(hl.tstr))) + partitions, partitions_type = hl.utils._dumps_partitions(partitions, hl.tstruct(locus=hl.tlocus(rg), + alleles=hl.tarray(hl.tstr))) else: partitions_type = None @@ -2554,8 +2908,16 @@ def index_bgen(path, @typecheck(path=str, _intervals=nullable(sequenceof(anytype)), _filter_intervals=bool, - _n_partitions=nullable(int)) -def read_table(path, *, _intervals=None, _filter_intervals=False, _n_partitions=None) -> Table: + _n_partitions=nullable(int), + _assert_type=nullable(hl.ttable), + _load_refs=bool) +def read_table(path, + *, + _intervals=None, + _filter_intervals=False, + _n_partitions=None, + _assert_type=None, + _load_refs=True) -> Table: """Read in a :class:`.Table` written with :meth:`.Table.write`. Parameters @@ -2567,17 +2929,18 @@ def read_table(path, *, _intervals=None, _filter_intervals=False, _n_partitions= ------- :class:`.Table` """ - for rg_config in Env.backend().load_references_from_dataset(path): - hl.ReferenceGenome._from_config(rg_config) + if _load_refs: + for rg_config in Env.backend().load_references_from_dataset(path): + hl.ReferenceGenome._from_config(rg_config) if _intervals is not None and _n_partitions is not None: raise ValueError("'read_table' does not support both _intervals and _n_partitions") tr = ir.TableNativeReader(path, _intervals, _filter_intervals) - ht = Table(ir.TableRead(tr, False)) + ht = Table(ir.TableRead(tr, False, _assert_type=_assert_type)) if _n_partitions: intervals = ht._calculate_new_partitions(_n_partitions) - return read_table(path, _intervals=intervals) + return read_table(path, _intervals=intervals, _assert_type=_assert_type, _load_refs=_load_refs) return ht diff --git a/hail/python/hail/methods/statgen.py b/hail/python/hail/methods/statgen.py index 8ff5d520e49..6b32a452a09 100644 --- a/hail/python/hail/methods/statgen.py +++ b/hail/python/hail/methods/statgen.py @@ -821,6 +821,9 @@ def logistic_regression_rows(test, y, x, covariates, pass_through=()) -> hail.Ta ------- :class:`.Table` """ + if not isinstance(Env.backend(), SparkBackend): + return _logistic_regression_rows_nd(test, y, x, covariates, pass_through) + if len(covariates) == 0: raise ValueError('logistic regression requires at least one covariate expression') @@ -3492,6 +3495,7 @@ def ld_prune(call_expr, r2=0.2, bp_window_size=1000000, memory_per_core=256, kee :class:`.Table` Table of a maximal independent set of variants. """ + hl.utils.no_service_backend('ld_prune') if block_size is None: block_size = BlockMatrix.default_block_size() diff --git a/hail/python/hail/stats/linear_mixed_model.py b/hail/python/hail/stats/linear_mixed_model.py index 0368dc4501c..62e03b864a5 100644 --- a/hail/python/hail/stats/linear_mixed_model.py +++ b/hail/python/hail/stats/linear_mixed_model.py @@ -282,6 +282,7 @@ class LinearMixedModel(object): x=nullable(np.ndarray), p_path=nullable(str)) def __init__(self, py, px, s, y=None, x=None, p_path=None): + hl.utils.no_service_backend('linear_mixed_model') if y is None and x is None: low_rank = False elif y is not None and x is not None: diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index 3b99b73c841..842516daf8d 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -834,7 +834,7 @@ def annotate(self, **named_exprs) -> 'Table': @typecheck_method(expr=expr_bool, keep=bool) - def filter(self, expr, keep=True) -> 'Table': + def filter(self, expr, keep: bool = True) -> 'Table': """Filter rows. Examples @@ -1289,7 +1289,18 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec) - return hl.read_table(output, _intervals=_intervals, _filter_intervals=_filter_intervals) + _assert_type = self._type + _load_refs = False + else: + _assert_type = None + _load_refs = True + return hl.read_table( + output, + _intervals=_intervals, + _filter_intervals=_filter_intervals, + _assert_type=_assert_type, + _load_refs=_load_refs + ) @typecheck_method(output=str, overwrite=bool, @@ -1745,6 +1756,8 @@ def rekey_f(t): return t if is_interval: + if all_matches: + hl.utils.no_service_backend('interval join with all_matches=True') left = Table(ir.TableIntervalJoin(left._tir, self._tir, uid, all_matches)) else: left = Table(ir.TableLeftJoinRightDistinct(left._tir, self._tir, uid)) @@ -2315,6 +2328,20 @@ def repartition(self, n, shuffle=True) -> 'Table': :class:`.Table` Repartitioned table. """ + if hl.current_backend().requires_lowering: + tmp = hl.utils.new_temp_file() + + if len(self.key) == 0: + uid = Env.get_uid() + tmp2 = hl.utils.new_temp_file() + self.checkpoint(tmp2) + ht = hl.read_table(tmp2).add_index(uid).key_by(uid) + ht.checkpoint(tmp) + return hl.read_table(tmp, _n_partitions=n).key_by().drop(uid) + else: + # checkpoint rather than write to use fast codec + self.checkpoint(tmp) + return hl.read_table(tmp, _n_partitions=n) return Table(ir.TableRepartition( self._tir, n, ir.RepartitionStrategy.SHUFFLE if shuffle else ir.RepartitionStrategy.COALESCE)) @@ -2348,6 +2375,8 @@ def naive_coalesce(self, max_partitions: int) -> 'Table': :class:`.Table` Table with at most `max_partitions` partitions. """ + if hl.current_backend().requires_lowering: + return self.repartition(max_partitions) return Table(ir.TableRepartition( self._tir, max_partitions, ir.RepartitionStrategy.NAIVE_COALESCE)) diff --git a/hail/python/hail/utils/__init__.py b/hail/python/hail/utils/__init__.py index 8db153d0b31..db24c963fc7 100644 --- a/hail/python/hail/utils/__init__.py +++ b/hail/python/hail/utils/__init__.py @@ -1,7 +1,7 @@ from .misc import (wrap_to_list, get_env_or_default, uri_path, local_path_uri, new_temp_file, new_local_temp_dir, new_local_temp_file, with_local_temp_file, storage_level, range_matrix_table, range_table, run_command, HailSeedGenerator, timestamp_path, - _dumps_partitions, default_handler, guess_cloud_spark_provider) + _dumps_partitions, default_handler, guess_cloud_spark_provider, no_service_backend) from .hadoop_utils import (hadoop_copy, hadoop_open, hadoop_exists, hadoop_is_dir, hadoop_is_file, hadoop_ls, hadoop_scheme_supported, hadoop_stat, copy_log) from .struct import Struct @@ -52,5 +52,6 @@ 'deduplicate', 'with_local_temp_file', 'guess_cloud_spark_provider', + 'no_service_backend', 'JSONEncoder', ] diff --git a/hail/python/hail/utils/hadoop_utils.py b/hail/python/hail/utils/hadoop_utils.py index 8e8c1a7ea3c..6ea5cac3ad0 100644 --- a/hail/python/hail/utils/hadoop_utils.py +++ b/hail/python/hail/utils/hadoop_utils.py @@ -1,4 +1,8 @@ -from typing import Dict, List +import gzip +import io +import os.path +from typing import Any, Dict, List + from hail.fs.hadoop_fs import HadoopFS from hail.utils.java import Env from hail.typecheck import typecheck, enumeration @@ -77,11 +81,20 @@ def hadoop_open(path: str, mode: str = 'r', buffer_size: int = 8192): ------- Readable or writable file handle. """ - # legacy hack + # pile of hacks to preserve some legacy behavior, like auto gzip fs = Env.fs() if isinstance(fs, HadoopFS): return fs.legacy_open(path, mode, buffer_size) - return fs.open(path, mode, buffer_size) + _, ext = os.path.splitext(path) + if ext in ('.gz', '.bgz'): + binary_mode = 'wb' if mode[0] == 'w' else 'rb' + file = fs.open(path, binary_mode, buffer_size) + file = gzip.GzipFile(fileobj=file, mode=mode) + if 'b' not in mode: + file = io.TextIOWrapper(file, encoding='utf-8') + else: + file = fs.open(path, mode, buffer_size) + return file @typecheck(src=str, @@ -161,7 +174,7 @@ def hadoop_is_dir(path: str) -> bool: return Env.fs().is_dir(path) -def hadoop_stat(path: str) -> Dict: +def hadoop_stat(path: str) -> Dict[str, Any]: """Returns information about the file or directory at a given path. Notes @@ -188,7 +201,7 @@ def hadoop_stat(path: str) -> Dict: return Env.fs().stat(path).to_legacy_dict() -def hadoop_ls(path: str) -> List[Dict]: +def hadoop_ls(path: str) -> List[Dict[str, Any]]: """Returns information about files at `path`. Notes diff --git a/hail/python/hail/utils/java.py b/hail/python/hail/utils/java.py index 6175d0f2104..4f20829d0f9 100644 --- a/hail/python/hail/utils/java.py +++ b/hail/python/hail/utils/java.py @@ -1,9 +1,19 @@ -import warnings +from typing import Optional import os import sys import re import hail +from hailtop.config import get_user_config + + +def choose_backend(backend: Optional[str] = None) -> str: + return ( + backend + or os.environ.get('HAIL_QUERY_BACKEND', None) + or get_user_config().get('query', 'backend', fallback=None) + or 'spark' + ) class FatalError(Exception): @@ -46,21 +56,8 @@ def hc() -> 'hail.context.HailContext': if not Env._hc: sys.stderr.write("Initializing Hail with default parameters...\n") sys.stderr.flush() - - backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') - if backend_name == 'service': - from hail.context import init_service - import asyncio - warnings.warn('When using the query service backend, use `await Env._async_hc()\'') - asyncio.get_event_loop().run_until_complete(init_service()) - elif backend_name == 'spark': - from hail.context import init - init() - elif backend_name == 'local': - from hail.context import init_local - init_local() - else: - raise ValueError(f'unknown Hail Query backend: {backend_name}') + from ..context import init + init() assert Env._hc is not None return Env._hc @@ -71,10 +68,10 @@ async def _async_hc() -> 'hail.context.HailContext': sys.stderr.write("Initializing Hail with default parameters...\n") sys.stderr.flush() - backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') + backend_name = choose_backend() if backend_name == 'service': - from hail.context import init_service - await init_service() + from hail.context import init_batch + await init_batch() else: return Env.hc() assert Env._hc is not None diff --git a/hail/python/hail/utils/misc.py b/hail/python/hail/utils/misc.py index e8a00051478..cdfbbf72d67 100644 --- a/hail/python/hail/utils/misc.py +++ b/hail/python/hail/utils/misc.py @@ -1,20 +1,21 @@ -from typing import Optional -from typing_extensions import Literal -import os import atexit import datetime -import string import difflib +import json +import os +import re +import secrets import shutil +import string import tempfile -import secrets from collections import defaultdict, Counter +from contextlib import contextmanager +from io import StringIO from random import Random -import json -import re +from typing import Optional from urllib.parse import urlparse -from io import StringIO -from contextlib import contextmanager + +from typing_extensions import Literal import hail import hail as hl @@ -66,7 +67,17 @@ def range_matrix_table(n_rows, n_cols, n_partitions=None) -> 'hail.MatrixTable': check_nonnegative_and_in_range('range_matrix_table', 'n_cols', n_cols) if n_partitions is not None: check_positive_and_in_range('range_matrix_table', 'n_partitions', n_partitions) - return hail.MatrixTable(hail.ir.MatrixRead(hail.ir.MatrixRangeReader(n_rows, n_cols, n_partitions))) + return hail.MatrixTable(hail.ir.MatrixRead( + hail.ir.MatrixRangeReader(n_rows, n_cols, n_partitions), + _assert_type=hl.tmatrix( + hl.tstruct(), + hl.tstruct(col_idx=hl.tint32), + ['col_idx'], + hl.tstruct(row_idx=hl.tint32), + ['row_idx'], + hl.tstruct() + ) + )) @typecheck(n=int, n_partitions=nullable(int)) @@ -628,3 +639,12 @@ def guess_cloud_spark_provider() -> Optional[Literal['dataproc', 'hdinsight']]: if 'AZURE_SPARK' in os.environ or 'hdinsight' in os.getenv('CLASSPATH', ''): return 'hdinsight' return None + + +def no_service_backend(unsupported_feature): + from hail import current_backend + from hail.backend.service_backend import ServiceBackend + if isinstance(current_backend(), ServiceBackend): + raise NotImplementedError(f'{unsupported_feature!r} is not yet supported on the service backend.' + f'\n If this is a pressing need, please alert the team on the discussion' + f'\n forum to aid in prioritization: https://discuss.hail.is') diff --git a/hail/python/hail/vds/combiner/variant_dataset_combiner.py b/hail/python/hail/vds/combiner/variant_dataset_combiner.py index 6708af3bd3f..388c21dd510 100644 --- a/hail/python/hail/vds/combiner/variant_dataset_combiner.py +++ b/hail/python/hail/vds/combiner/variant_dataset_combiner.py @@ -100,6 +100,7 @@ def __init__(self, gvcf_info_to_keep: Optional[Collection[str]] = None, gvcf_reference_entry_fields_to_keep: Optional[Collection[str]] = None, ): + hl.utils.no_service_backend('VariantDatasetCombiner') if not (vdses or gvcfs): raise ValueError("one of 'vdses' or 'gvcfs' must be nonempty") if not gvcf_import_intervals: diff --git a/hail/python/hail/vds/variant_dataset.py b/hail/python/hail/vds/variant_dataset.py index 62981c87902..b6d13df69f2 100644 --- a/hail/python/hail/vds/variant_dataset.py +++ b/hail/python/hail/vds/variant_dataset.py @@ -25,6 +25,7 @@ def read_vds(path, *, intervals=None, n_partitions=None) -> 'VariantDataset': assert n_partitions is not None reference_data = hl.read_matrix_table(VariantDataset._reference_path(path)) intervals = reference_data._calculate_new_partitions(n_partitions) + assert len(intervals) > 0 reference_data = hl.read_matrix_table(VariantDataset._reference_path(path), _intervals=intervals) variant_data = hl.read_matrix_table(VariantDataset._variants_path(path), _intervals=intervals) return VariantDataset(reference_data, variant_data) diff --git a/hail/python/hailtop/aiotools/copy.py b/hail/python/hailtop/aiotools/copy.py index fcf29708f9c..9c47445f723 100644 --- a/hail/python/hailtop/aiotools/copy.py +++ b/hail/python/hailtop/aiotools/copy.py @@ -1,8 +1,9 @@ from typing import List, Optional, Dict -import json +import argparse import asyncio +import json import logging -import argparse +import sys import uvloop from concurrent.futures import ThreadPoolExecutor @@ -18,7 +19,7 @@ async def copy(*, gcs_kwargs: Optional[dict] = None, azure_kwargs: Optional[dict] = None, s3_kwargs: Optional[dict] = None, - transfers: List[Transfer] + transfers: List[Transfer], ) -> None: with ThreadPoolExecutor() as thread_pool: if max_simultaneous_transfers is None: @@ -81,8 +82,8 @@ async def main() -> None: parser = argparse.ArgumentParser(description='Hail copy tool') parser.add_argument('requester_pays_project', type=str, help='a JSON string indicating the Google project to which to charge egress costs') - parser.add_argument('files', type=str, - help='a JSON array of JSON objects indicating from where and to where to copy files') + parser.add_argument('files', type=str, nargs='?', + help='a JSON array of JSON objects indicating from where and to where to copy files. If empty or "-", read the array from standard input instead') parser.add_argument('--max-simultaneous-transfers', type=int, help='The limit on the number of simultaneous transfers. Large files are uploaded as multiple transfers. This parameter sets an upper bound on the number of open source and destination files.') parser.add_argument('-v', '--verbose', action='store_const', @@ -95,6 +96,8 @@ async def main() -> None: logging.root.setLevel(logging.INFO) requester_pays_project = json.loads(args.requester_pays_project) + if args.files is None or args.files == '-': + args.files = sys.stdin.read() files = json.loads(args.files) gcs_kwargs = {'project': requester_pays_project} diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 9fb1d9e0aa0..f3611aa1637 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -431,8 +431,8 @@ def create_job(self, image: str, command: List[str], *, mount_docker_socket: boo {'command': command, 'image': image, 'mount_docker_socket': mount_docker_socket, 'type': 'docker'}, **kwargs ) - def create_jvm_job(self, command: List[str], **kwargs): - return self._create_job({'command': command, 'type': 'jvm'}, **kwargs) + def create_jvm_job(self, jar_spec: Dict[str, str], argv: List[str], **kwargs): + return self._create_job({'type': 'jvm', 'jar_spec': jar_spec, 'command': argv}, **kwargs) def _create_job(self, process: dict, diff --git a/hail/python/hailtop/config/user_config.py b/hail/python/hailtop/config/user_config.py index a7d00ec33cd..809d580a3d5 100644 --- a/hail/python/hailtop/config/user_config.py +++ b/hail/python/hailtop/config/user_config.py @@ -31,15 +31,12 @@ def get_user_config() -> configparser.ConfigParser: if user_config is None: user_config = configparser.ConfigParser() config_file = get_user_config_path() - os.makedirs(config_file.parent, exist_ok=True) # in older versions, the config file was accidentally named # config.yaml, if the new config does not exist, and the old # one does, silently rename it old_path = config_file.with_name('config.yaml') if old_path.exists() and not config_file.exists(): old_path.rename(config_file) - else: - config_file.touch(exist_ok=True) user_config.read(config_file) return user_config diff --git a/hail/python/hailtop/hailctl/config/cli.py b/hail/python/hailtop/hailctl/config/cli.py index 67d35ea5af9..fb209a7f831 100644 --- a/hail/python/hailtop/hailctl/config/cli.py +++ b/hail/python/hailtop/hailctl/config/cli.py @@ -1,3 +1,4 @@ +import os import sys import argparse import re @@ -125,7 +126,12 @@ def main(args): if section not in config: config[section] = {} config[section][key] = args.value - with open(config_file, 'w', encoding='utf-8') as f: + try: + f = open(config_file, 'w', encoding='utf-8') + except FileNotFoundError: + os.makedirs(config_file.parent, exist_ok=True) + f = open(config_file, 'w', encoding='utf-8') + with f: config.write(f) sys.exit(0) if args.module == 'unset': diff --git a/hail/python/hailtop/utils/__init__.py b/hail/python/hailtop/utils/__init__.py index 2906ee619a9..fb31c3cd644 100644 --- a/hail/python/hailtop/utils/__init__.py +++ b/hail/python/hailtop/utils/__init__.py @@ -1,4 +1,6 @@ -from .time import time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs +from .time import ( + time_msecs, time_msecs_str, humanize_timedelta_msecs, parse_timestamp_msecs, + time_ns) from .utils import ( unzip, async_to_blocking, blocking_to_async, AsyncWorkerPool, bounded_gather, grouped, sync_sleep_and_backoff, sleep_and_backoff, is_transient_error, @@ -89,4 +91,5 @@ 'retry_all_errors_n_times', 'parse_timestamp_msecs', 'Timings', + 'time_ns', ] diff --git a/hail/python/hailtop/utils/time.py b/hail/python/hailtop/utils/time.py index 56a2448f6ce..8a1ca5b3118 100644 --- a/hail/python/hailtop/utils/time.py +++ b/hail/python/hailtop/utils/time.py @@ -7,6 +7,10 @@ def time_msecs() -> int: return int(time.time() * 1000 + 0.5) +def time_ns() -> int: + return time.monotonic_ns() + + def time_msecs_str(t) -> str: return datetime.datetime.utcfromtimestamp(t / 1000).strftime( '%Y-%m-%dT%H:%M:%SZ') diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 29d603090e3..834c7a52681 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -388,9 +388,10 @@ def call(self, f, *args, **kwargs) -> asyncio.Task: self._counter += 1 async def run_and_cleanup(): + retval = None try: async with self._sema: - await f(*args, **kwargs) + retval = await f(*args, **kwargs) except asyncio.CancelledError: pass except: @@ -402,10 +403,11 @@ async def run_and_cleanup(): log.info('discarding exception', exc_info=True) if self._pending is None: - return + return retval del self._pending[id] if not self._pending: self._done_event.set() + return retval t = asyncio.create_task(run_and_cleanup()) self._pending[id] = t @@ -837,6 +839,13 @@ async def run_if_changed(changed, f, *args, **kwargs): while True: changed.clear() should_wait = await f(*args, **kwargs) + # 0.5 is arbitrary, but should be short enough not to greatly + # increase latency and long enough to reduce the impact of + # wasteful spinning when `should_wait` is always true and the + # event is constantly being set. This was instated to + # avoid wasteful repetition of scheduling loops, but + # might not always be desirable, especially in very low-latency batches. + await asyncio.sleep(0.5) if should_wait: await changed.wait() diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt index da378786689..67ccbee0986 100644 --- a/hail/python/requirements.txt +++ b/hail/python/requirements.txt @@ -10,12 +10,12 @@ botocore>=1.20,<2.0 decorator<5 Deprecated>=1.2.10,<1.3 dill>=0.3.1.1,<0.4 -gcsfs==2021.* google-auth==1.27.0 google-cloud-storage==1.25.* humanize==1.0.0 hurry.filesize==0.9 janus>=0.6,<1.1 +Jinja2==3.0.3 nest_asyncio==1.5.4 numpy<2 orjson==3.6.4 diff --git a/hail/python/setup.py b/hail/python/setup.py index 527cae199f1..a00cc756e64 100755 --- a/hail/python/setup.py +++ b/hail/python/setup.py @@ -47,6 +47,7 @@ package_data={ 'hail': ['hail_pip_version', 'hail_version', + 'hail_revision', 'experimental/datasets.json'], 'hail.backend': ['hail-all-spark.jar'], 'hailtop': ['hail_version', 'py.typed'], diff --git a/hail/python/test/hail/backend/__init__.py b/hail/python/test/hail/backend/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/python/test/hail/backend/test_service_backend.py b/hail/python/test/hail/backend/test_service_backend.py new file mode 100644 index 00000000000..ecc3b50e69d --- /dev/null +++ b/hail/python/test/hail/backend/test_service_backend.py @@ -0,0 +1,28 @@ +import hail as hl + +from ..helpers import skip_unless_service_backend + +@skip_unless_service_backend() +def test_tiny_driver_has_tiny_memory(): + try: + hl.utils.range_table(100_000_000, 50).to_pandas() + except Exception as exc: + assert 'java.lang.OutOfMemoryError: Java heap space' in exc.args[0] + else: + assert Fail + +@skip_unless_service_backend() +def test_big_driver_has_big_memory(): + old_driver_cores = hl.current_backend().driver_cores + old_driver_memory = hl.current_backend().driver_memory + try: + hl.current_backend().driver_cores = 8 + hl.current_backend().driver_memory = 'highmem' + t = hl.utils.range_table(100_000_000, 50) + # The pytest (client-side) worker dies if we try to realize all 100M rows in memory. + # Instead, we realize the 100M rows in memory on the driver and then take just the first 10M + # rows back to the client. + hl.eval(t.aggregate(hl.agg.collect(t.idx), _localize=False)[:10_000_000]) + finally: + hl.current_backend().driver_cores = old_driver_cores + hl.current_backend().driver_memory = old_driver_memory diff --git a/hail/python/test/hail/experimental/test_experimental.py b/hail/python/test/hail/experimental/test_experimental.py index b8b45aa9c6c..8995bd71978 100644 --- a/hail/python/test/hail/experimental/test_experimental.py +++ b/hail/python/test/hail/experimental/test_experimental.py @@ -11,6 +11,7 @@ class Tests(unittest.TestCase): @fails_service_backend() + @fails_local_backend def test_ld_score(self): ht = hl.import_table(doctest_resource('ldsc.annot'), @@ -117,7 +118,6 @@ def test_import_keyby_count_ldsc_lowered_shuffle(self): @pytest.mark.unchecked_allocator - @skip_when_service_backend('hangs >5 minutes; last message is "all results compelte" in ServiceBackend.parallelizeAndComputeWithIndex') def test_ld_score_regression(self): ht_scores = hl.import_table( @@ -294,7 +294,6 @@ def test_sparse(self): .drop('a_index', 'was_split').select_entries(*expected_split_mt.entry.keys())) assert mt._same(expected_split_mt) - @fails_service_backend() def test_define_function(self): f1 = hl.experimental.define_function( lambda a, b: (a + 7) * b, hl.tint32, hl.tint32) diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index d726eefe197..accb1b17325 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -64,7 +64,6 @@ def test_random_function(rand_f): test_random_function(lambda: hl.rand_cat(hl.array([1, 1, 1, 1]))) test_random_function(lambda: hl.rand_dirichlet(hl.array([1, 1, 1, 1]))) - @fails_service_backend(reason='need to convert errors to HailUserError') def test_range(self): def same_as_python(*args): self.assertEqual(hl.eval(hl.range(*args)), list(range(*args))) @@ -199,7 +198,6 @@ def test_operators(self): else: self.assertEqual(v, result[k], msg=k) - @fails_service_backend(reason='need to convert errors to HailUserError') def test_array_slicing(self): schema = hl.tstruct(a=hl.tarray(hl.tint32)) rows = [{'a': [1, 2, 3, 4, 5]}] @@ -267,7 +265,6 @@ def test_dict_methods(self): self.assertDictEqual(result, expected) - @fails_service_backend(reason='need to convert errors to HailUserError') def test_dict_missing_error(self): d = hl.dict({'a': 2, 'b': 3}) with pytest.raises(hl.utils.HailUserError, match='Key NA not found in dictionary'): @@ -668,7 +665,6 @@ def test_agg_densify(self): None]), ] - @fails_service_backend(reason='service backend needs to support flags') @with_flags('distributed_scan_comb_op') def test_densify_table(self): ht = hl.utils.range_table(100, n_partitions=33) @@ -1381,13 +1377,11 @@ def test_aggregators_sum_product(self): self.assertTrue(r.sum_x == -15 and r.sum_y == 10 and r.sum_empty == 0 and r.prod_x == -120 and r.prod_y == 0 and r.prod_empty == 1) - @fails_service_backend def test_aggregators_hist(self): table = hl.utils.range_table(11) r = table.aggregate(hl.agg.hist(table.idx - 1, 0, 8, 4)) self.assertTrue(r.bin_edges == [0, 2, 4, 6, 8] and r.bin_freq == [2, 2, 2, 3] and r.n_smaller == 1 and r.n_larger == 1) - @fails_service_backend() def test_aggregators_hist_neg0(self): table = hl.utils.range_table(32) table = table.annotate(d=hl.if_else(table.idx == 11, -0.0, table.idx / 3)) @@ -1397,7 +1391,6 @@ def test_aggregators_hist_neg0(self): self.assertEqual(r.n_smaller, 0) self.assertEqual(r.n_larger, 1) - @fails_service_backend() def test_aggregators_hist_nan(self): ht = hl.utils.range_table(3).annotate(x=hl.float('nan')) r = ht.aggregate(hl.agg.hist(ht.x, 0, 10, 2)) @@ -1433,7 +1426,6 @@ def test_aggregator_cse(self): # r2adj = sumfit$adj.r.squared # f = sumfit$fstatistic # p = pf(f[1],f[2],f[3],lower.tail=F) - @fails_service_backend() def test_aggregators_linreg(self): t = hl.Table.parallelize([ {"y": None, "x": 1.0}, @@ -1491,7 +1483,6 @@ def test_aggregators_linreg(self): self.assertAlmostEqual(r.multiple_p_value, 0.56671386) self.assertAlmostEqual(r.n, 5) - @fails_service_backend() def test_linreg_no_data(self): ht = hl.utils.range_table(1).filter(False) r = ht.aggregate(hl.agg.linreg(ht.idx, 0)) @@ -1607,7 +1598,6 @@ def test_joins_inside_aggregators(self): table2 = hl.utils.range_table(10) self.assertEqual(table.aggregate(hl.agg.count_where(hl.is_defined(table2[table.idx]))), 10) - @fails_service_backend() def test_switch(self): x = hl.literal('1') na = hl.missing(tint32) @@ -1652,7 +1642,6 @@ def test_switch(self): hl.eval(hl.switch(x).when('0', 0).or_error("foo")) assert '.or_error("foo")' in str(exc.value) - @fails_service_backend() def test_case(self): def make_case(x): x = hl.literal(x) @@ -2565,7 +2554,6 @@ def test_int_typecheck(self): (hl.literal(None, dtype='int32'), None), (hl.literal(None, dtype='int64'), None)]) - @fails_service_backend() def test_is_transition(self): _test_many_equal([ (hl.is_transition("A", "G"), True), @@ -2575,7 +2563,6 @@ def test_is_transition(self): (hl.is_transition("ACA", "AGA"), False), (hl.is_transition("A", "T"), False)]) - @fails_service_backend() def test_is_transversion(self): _test_many_equal([ (hl.is_transversion("A", "T"), True), @@ -2584,7 +2571,6 @@ def test_is_transversion(self): (hl.is_transversion("AA", "T"), False), (hl.is_transversion("ACCC", "ACCT"), False)]) - @fails_service_backend() def test_is_snp(self): _test_many_equal([ (hl.is_snp("A", "T"), True), @@ -2594,36 +2580,30 @@ def test_is_snp(self): (hl.is_snp("AT", "AG"), True), (hl.is_snp("ATCCC", "AGCCC"), True)]) - @fails_service_backend() def test_is_mnp(self): _test_many_equal([ (hl.is_mnp("ACTGAC", "ATTGTT"), True), (hl.is_mnp("CA", "TT"), True)]) - @fails_service_backend() def test_is_insertion(self): _test_many_equal([ (hl.is_insertion("A", "ATGC"), True), (hl.is_insertion("ATT", "ATGCTT"), True)]) - @fails_service_backend() def test_is_deletion(self): self.assertTrue(hl.eval(hl.is_deletion("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_deletion("GTGTA", "GTA"))) - @fails_service_backend() def test_is_indel(self): self.assertTrue(hl.eval(hl.is_indel("A", "ATGC"))) self.assertTrue(hl.eval(hl.is_indel("ATT", "ATGCTT"))) self.assertTrue(hl.eval(hl.is_indel("ATGC", "A"))) self.assertTrue(hl.eval(hl.is_indel("GTGTA", "GTA"))) - @fails_service_backend() def test_is_complex(self): self.assertTrue(hl.eval(hl.is_complex("CTA", "ATTT"))) self.assertTrue(hl.eval(hl.is_complex("A", "TATGC"))) - @fails_service_backend() def test_is_star(self): self.assertTrue(hl.eval(hl.is_star("ATC", "*"))) self.assertTrue(hl.eval(hl.is_star("A", "*"))) @@ -2632,7 +2612,6 @@ def test_is_strand_ambiguous(self): self.assertTrue(hl.eval(hl.is_strand_ambiguous("A", "T"))) self.assertFalse(hl.eval(hl.is_strand_ambiguous("G", "T"))) - @fails_service_backend() def test_allele_type(self): self.assertEqual( hl.eval(hl.tuple(( @@ -3182,8 +3161,6 @@ def test_show_expression(self): +---------+ ''' - @fails_service_backend() - @fails_local_backend() def test_export(self): for delimiter in ['\t', ',', '@']: for missing in ['NA', 'null']: @@ -3651,7 +3628,6 @@ def test_set_operators(self): self.assert_evals_to(hl.set([1, 2, 3]) ^ set([3, 4, 5]), set([1, 2, 4, 5])) self.assert_evals_to(set([1, 2, 3]) ^ hl.set([3, 4, 5]), set([1, 2, 4, 5])) - @fails_service_backend() def test_uniroot(self): tol = 1.220703e-4 @@ -4129,7 +4105,6 @@ def test_bit_shift_edge_cases(self): assert hl.eval(hl.bit_rshift(hl.int64(-1), 64)) == -1 assert hl.eval(hl.bit_rshift(hl.int64(-11), 64, logical=True)) == 0 - @fails_service_backend() def test_bit_shift_errors(self): with pytest.raises(hl.utils.HailUserError): hl.eval(hl.bit_lshift(1, -1)) @@ -4273,7 +4248,6 @@ def test_parse_json(self): ] assert hl.eval(hl._compare(hl.tuple(values), hl.tuple(hl.parse_json(hl.json(v), v.dtype) for v in values)) == 0) - @fails_service_backend() def test_expr_persist(self): # need to test laziness, so we will overwrite a file ht2 = hl.utils.range_table(100) @@ -4335,3 +4309,14 @@ def test_enumerate(self): [('foo', 10), ('bar', 11), ('baz', 12)], [] ) + + def test_split_line(self): + s1 = '1 2 3 4 5 6 7' + s2 = '1 2 "3 4" "a b c d"' + s3 = '"1" "2"' + + assert hl.eval(hl.str(s1)._split_line(' ', ['NA'], quote=None, regex=False)) == s1.split(' ') + assert hl.eval(hl.str(s1)._split_line(r'\s+', ['NA'], quote=None, regex=True)) == s1.split(' ') + assert hl.eval(hl.str(s3)._split_line(' ', ['1'], quote='"', regex=False)) == [None, '2'] + assert hl.eval(hl.str(s2)._split_line(' ', ['1', '2'], quote='"', regex=False)) == [None, None, '3 4', 'a b c d'] + assert hl.eval(hl.str(s2)._split_line(r'\s+', ['1', '2'], quote='"', regex=True)) == [None, None, '3 4', 'a b c d'] diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index cef5d401a04..3fd572cfa39 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -26,7 +26,6 @@ def assert_ndarrays_almost_eq(*expr_and_expected): assert_ndarrays(np.allclose, expr_and_expected) -@fails_service_backend() def test_ndarray_ref(): scalar = 5.0 @@ -62,7 +61,6 @@ def test_ndarray_ref(): assert "Index 4 is out of bounds for axis 0 with size 3" in str(exc.value) -@skip_when_service_backend('slow >800s') def test_ndarray_slice(): np_rect_prism = np.arange(24).reshape((2, 3, 4)) rect_prism = hl.nd.array(np_rect_prism) @@ -204,7 +202,6 @@ def test_ndarray_transposed_slice(): ) -@fails_service_backend() def test_ndarray_eval(): data_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] mishapen_data_list1 = [[4], [1, 2, 3]] @@ -288,7 +285,6 @@ def test_ndarray_shape(): ) -@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_reshape(): np_single = np.array([8]) single = hl.nd.array([8]) @@ -555,7 +551,6 @@ def test_ndarray_transpose(): cube.transpose((1, 1, 1)) assert "Axes cannot contain duplicates" in str(exc.value) -@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_matmul(): np_v = np.array([1, 2]) np_y = np.array([1, 1, 1]) @@ -681,7 +676,6 @@ def test_ndarray_full(): assert hl.eval(hl.nd.full((5, 6, 7), hl.int32(3), dtype=hl.tfloat64)).dtype, np.float64 -@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_arange(): assert_ndarrays_eq( (hl.nd.arange(40), np.arange(40)), @@ -723,7 +717,6 @@ def test_ndarray_diagonal(): assert "2 dimensional" in str(exc.value) -@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_solve_triangular(): a = hl.nd.array([[1, 1], [0, 1]]) b = hl.nd.array([2, 1]) @@ -742,7 +735,6 @@ def test_ndarray_solve_triangular(): hl.eval(hl.nd.solve_triangular(a_sing, b_sing)) assert "singular" in str(exc.value), str(exc.value) -@fails_service_backend(reason='need to convert errors to HailUserError') def test_ndarray_solve(): a = hl.nd.array([[1, 2], [3, 5]]) b = hl.nd.array([1, 2]) @@ -1201,4 +1193,4 @@ def test_ndarray_broadcasting_with_decorator(): nd = hl.nd.array([[1.2, 2.3, 3.3], [4.3, 5.3, 6.3]]) nd_floor = hl.eval(hl.nd.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])) nd = hl.eval(hl.floor(nd)) - assert(np.array_equal(nd, nd_floor)) \ No newline at end of file + assert(np.array_equal(nd, nd_floor)) diff --git a/hail/python/test/hail/genetics/test_pedigree.py b/hail/python/test/hail/genetics/test_pedigree.py index ca592a8ac9a..c03297cbe35 100644 --- a/hail/python/test/hail/genetics/test_pedigree.py +++ b/hail/python/test/hail/genetics/test_pedigree.py @@ -29,7 +29,7 @@ def test_trios(self): self.assertEqual(t1.is_female, True) self.assertEqual(t1.is_female, True) self.assertEqual(t1.is_male, False) - + self.assertEqual(t1.is_complete(), False) self.assertEqual(t4.is_complete(), True) self.assertEqual(t5.is_complete(), False) diff --git a/hail/python/test/hail/ggplot/test_ggplot.py b/hail/python/test/hail/ggplot/test_ggplot.py index 9478d9d3b73..b568f3fc5f7 100644 --- a/hail/python/test/hail/ggplot/test_ggplot.py +++ b/hail/python/test/hail/ggplot/test_ggplot.py @@ -1,4 +1,3 @@ -# These tests only check that the functions don't error out, they don't check what the output plot looks like. import hail as hl from hail.ggplot import * import numpy as np @@ -34,7 +33,6 @@ def test_manhattan_plot(): expected_ticks = ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y') assert pfig.layout.xaxis.ticktext == expected_ticks -@fails_service_backend() def test_histogram(): num_rows = 101 num_groups = 5 @@ -68,3 +66,37 @@ def test_geom_ribbon(): ht = hl.utils.range_table(20) fig = ggplot(ht, aes(x=ht.idx, ymin=ht.idx * 2, ymax=ht.idx * 3)) + geom_ribbon() fig.to_plotly() + + +def test_default_scale_no_repeat_colors(): + num_rows = 20 + ht = hl.utils.range_table(num_rows) + fig = ggplot(ht, aes(x=ht.idx, y=ht.idx, color=hl.str(ht.idx))) + geom_point() + pfig = fig.to_plotly() + + scatter_colors = [scatter['marker']['color'] for scatter in pfig['data']] + num_unique_colors = len(set(scatter_colors)) + assert num_unique_colors == num_rows + + +def test_scale_color_manual(): + num_rows = 4 + colors = set(["red", "blue"]) + ht = hl.utils.range_table(num_rows) + fig = ggplot(ht, aes(x=ht.idx, y=ht.idx, color=hl.str(ht.idx % 2))) + geom_point() + scale_color_manual(values=list(colors)) + pfig = fig.to_plotly() + + assert set([scatter.marker.color for scatter in pfig.data]) == colors + + +def test_weighted_bar(): + x = hl.array([2, 3, 3, 3, 4, 5, 2]) + w = hl.array([1, 2, 3, 4, 5, 6, 7]) + ht = hl.utils.range_table(7) + ht = ht.annotate(x=x[ht.idx], w=w[ht.idx]) + fig = ggplot(ht) + geom_bar(aes(x=ht.x, weight=ht.w)) + + result = [8, 9, 5, 6] + for idx, y in enumerate(fig.to_plotly().data[0].y): + assert(y == result[idx]) + diff --git a/hail/python/test/hail/helpers.py b/hail/python/test/hail/helpers.py index 906c2dc1df3..3b4508ff173 100644 --- a/hail/python/test/hail/helpers.py +++ b/hail/python/test/hail/helpers.py @@ -1,13 +1,11 @@ -import asyncio import os from timeit import default_timer as timer import unittest import pytest from decorator import decorator -from hail.utils.java import Env +from hail.utils.java import Env, choose_backend import hail as hl -from hail.backend.local_backend import LocalBackend _initialized = False @@ -15,7 +13,7 @@ def startTestHailContext(): global _initialized if not _initialized: - backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') + backend_name = choose_backend() if backend_name == 'spark': hl.init(master='local[2]', min_block_size=0, quiet=True) else: @@ -142,23 +140,32 @@ def wrapper(func, *args, **kwargs): return wrapper +def skip_unless_service_backend(message='only relevant to service backend'): + from hail.backend.service_backend import ServiceBackend + @decorator + def wrapper(func, *args, **kwargs): + if not isinstance(hl.utils.java.Env.backend(), ServiceBackend): + raise unittest.SkipTest(message) + else: + return func(*args, **kwargs) + + return wrapper + + fails_local_backend = pytest.mark.xfail( - os.environ.get('HAIL_QUERY_BACKEND') == 'local', + choose_backend() == 'local', reason="doesn't yet work on local backend", strict=True) fails_service_backend = pytest.mark.xfail( - os.environ.get('HAIL_QUERY_BACKEND') == 'service', + choose_backend() == 'batch', reason="doesn't yet work on service backend", strict=True) -def check_spark(): - backend_name = os.environ.get('HAIL_QUERY_BACKEND', 'spark') - return backend_name == 'spark' fails_spark_backend = pytest.mark.xfail( - check_spark(), + choose_backend() == 'spark', reason="doesn't yet work on spark backend", strict=True) @@ -191,7 +198,7 @@ def wrapper(func, *args, **kwargs): def lower_only(): @decorator def wrapper(func, *args, **kwargs): - flags = hl._get_flags() + flags = hl._get_flags('lower', 'lower_only') prev_lower = flags.get('lower') prev_lower_only = flags.get('lower_only') diff --git a/hail/python/test/hail/linalg/test_linalg.py b/hail/python/test/hail/linalg/test_linalg.py index 369916c64ae..98b235d4645 100644 --- a/hail/python/test/hail/linalg/test_linalg.py +++ b/hail/python/test/hail/linalg/test_linalg.py @@ -164,9 +164,12 @@ def test_random_uniform(self): for entry in row: assert entry > 0 - @fails_service_backend() - @fails_local_backend() - def test_to_from_numpy(self): + def test_bm_to_numpy(self): + bm = BlockMatrix.from_ndarray(hl.nd.arange(20).map(lambda x: hl.float64(x)).reshape((4, 5))) + np_bm = bm.to_numpy() + self._assert_eq(np_bm, np.arange(20, dtype=np.float64).reshape((4, 5))) + + def test_numpy_round_trip(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) @@ -176,7 +179,7 @@ def test_to_from_numpy(self): with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f: bm.tofile(bm_f) - a.tofile(a_f) + hl.current_backend().fs.open(a_f, mode='wb').write(a.tobytes()) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() @@ -197,7 +200,7 @@ def test_to_from_numpy(self): with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f: bmt.tofile(bmt_f) - at.tofile(at_f) + hl.current_backend().fs.open(at_f, mode='wb').write(at.tobytes()) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() @@ -213,6 +216,15 @@ def test_to_from_numpy(self): self._assert_eq(at4, at) self._assert_eq(at5, at) + @fails_service_backend() + @fails_local_backend() + def test_numpy_round_trip_force_blocking(self): + n_rows = 10 + n_cols = 11 + data = np.random.rand(n_rows * n_cols) + a = data.reshape((n_rows, n_cols)) + + bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) self._assert_eq(bm.to_numpy(_force_blocking=True), a) @fails_service_backend() @@ -947,6 +959,7 @@ def test_export_blocks(self): self._assert_eq(nd, actual) @fails_service_backend() + @fails_local_backend() def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], @@ -1054,7 +1067,6 @@ def test_locus_windows_per_contig(self): f = hl._locus_windows_per_contig([[1.0, 3.0, 4.0], [2.0, 2.0], [5.0]], 1.0) assert hl.eval(f) == ([0, 1, 1, 3, 3, 5], [1, 3, 3, 5, 5, 6]) - @fails_service_backend() def test_locus_windows(self): def assert_eq(a, b): assert np.array_equal(a, np.array(b)), f"a={a}, b={b}" @@ -1134,8 +1146,6 @@ def assert_eq(a, b): hl.linalg.utils.locus_windows(ht.locus, 1.0, coord_expr=ht.cm) assert "missing value for 'coord_expr'" in str(cm.exception) - @fails_service_backend() - @fails_local_backend() def test_write_overwrite(self): with hl.TemporaryDirectory(ensure_exists=False) as path: bm = BlockMatrix.from_numpy(np.array([[0]])) @@ -1215,8 +1225,6 @@ def assert_same_columns_up_to_sign(a, b): s = x.svd(compute_uv=False, complexity_bound=0) assert np.all(s >= 0) - @fails_service_backend() - @fails_local_backend() def test_filtering(self): np_square = np.arange(16, dtype=np.float64).reshape((4, 4)) bm = BlockMatrix.from_numpy(np_square) @@ -1294,8 +1302,6 @@ def test_sparse_transposition(self): sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np) - - @fails_service_backend() def test_row_blockmatrix_sum(self): row = BlockMatrix.from_numpy(np.arange(10)) diff --git a/hail/python/test/hail/matrixtable/test_file_formats.py b/hail/python/test/hail/matrixtable/test_file_formats.py index a86e0c21fa0..9d1922524d7 100644 --- a/hail/python/test/hail/matrixtable/test_file_formats.py +++ b/hail/python/test/hail/matrixtable/test_file_formats.py @@ -37,26 +37,7 @@ class Tests(unittest.TestCase): def test_write(self): create_backward_compatibility_files() - @skip_when_service_backend('''intermittent worker failure: -> assert backward_compatible_same(all_values_table, ds) - -Caused by: java.lang.AssertionError: assertion failed - at scala.Predef$.assert(Predef.scala:208) - at is.hail.io.BlockingInputBuffer.ensure(InputBuffers.scala:389) - at is.hail.io.BlockingInputBuffer.readInt(InputBuffers.scala:412) - at __C1210collect_distributed_array.__m1218INPLACE_DECODE_r_binary_TO_r_binary(Unknown Source) - at __C1210collect_distributed_array.__m1217INPLACE_DECODE_r_struct_of_r_binaryEND_TO_r_tuple_of_r_binaryEND(Unknown Source) - at __C1210collect_distributed_array.__m1216INPLACE_DECODE_r_struct_of_r_struct_of_r_binaryENDEND_TO_r_struct_of_r_tuple_of_r_binaryENDEND(Unknown Source) - at __C1210collect_distributed_array.__m1215DECODE_r_struct_of_r_struct_of_r_struct_of_r_binaryENDENDEND_TO_SBaseStructPointer(Unknown Source) - at __C1210collect_distributed_array.apply(Unknown Source) - at __C1210collect_distributed_array.apply(Unknown Source) - at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31) - at is.hail.utils.package$.using(package.scala:627) - at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144) - at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30) - at is.hail.backend.service.Worker$.main(Worker.scala:120) - at is.hail.backend.service.Worker.main(Worker.scala) - ... 11 more''') + @fails_service_backend() def test_backward_compatability(self): import os @@ -99,4 +80,4 @@ def backward_compatible_same(current, old): f = os.path.join(matrix_table_dir, '{}.hmt'.format(i)) n += 1 - assert n == 88 + assert n == 88, f'{resource_dir!r} {versions!r}' diff --git a/hail/python/test/hail/matrixtable/test_matrix_table.py b/hail/python/test/hail/matrixtable/test_matrix_table.py index 7019477ad7b..d9cd1d87586 100644 --- a/hail/python/test/hail/matrixtable/test_matrix_table.py +++ b/hail/python/test/hail/matrixtable/test_matrix_table.py @@ -639,7 +639,6 @@ def test_table_product_join(self): rows = left.rows() self.assertTrue(rows.all(rows.matches.map(lambda x: x.idx) == hl.range(0, rows.row_idx))) - @fails_service_backend() def test_naive_coalesce(self): mt = self.get_mt(min_partitions=8) self.assertEqual(mt.n_partitions(), 8) @@ -1006,8 +1005,6 @@ def test_field_groups(self): (df.GT == df.entry_struct.GT)) & (df.AD == df.entry_struct.AD)))) - @fails_service_backend() - @fails_local_backend() def test_filter_partitions(self): ds = self.get_mt(min_partitions=8) self.assertEqual(ds.n_partitions(), 8) @@ -1957,8 +1954,6 @@ def test_read_write_balding_nichols_model(): mt.write(tmp_file) assert hl.read_matrix_table(tmp_file)._same(mt) -@fails_service_backend() -@fails_local_backend() def test_read_partitions(): ht = hl.utils.range_matrix_table(n_rows=100, n_cols=10, n_partitions=3) path = new_temp_file() diff --git a/hail/python/test/hail/methods/relatedness/test_pc_relate.py b/hail/python/test/hail/methods/relatedness/test_pc_relate.py index dbc8d4cd350..3148a41e412 100644 --- a/hail/python/test/hail/methods/relatedness/test_pc_relate.py +++ b/hail/python/test/hail/methods/relatedness/test_pc_relate.py @@ -28,6 +28,7 @@ def test_pc_relate_against_R_truth(): @fails_service_backend() +@fails_local_backend() def test_pc_relate_simple_example(): gs = hl.literal( [[0, 0, 0, 0, 1, 1, 1, 1], diff --git a/hail/python/test/hail/methods/test_family_methods.py b/hail/python/test/hail/methods/test_family_methods.py index 13bd829b340..db19d0706a0 100644 --- a/hail/python/test/hail/methods/test_family_methods.py +++ b/hail/python/test/hail/methods/test_family_methods.py @@ -8,7 +8,6 @@ class Tests(unittest.TestCase): - @fails_service_backend() def test_trio_matrix(self): """ This test depends on certain properties of the trio matrix VCF and @@ -81,7 +80,6 @@ def test_trio_matrix(self): self.assertEqual(e_cols.row.dtype, t_cols.row.dtype) self.assertTrue(e_cols._same(t_cols)) - @fails_service_backend() def test_trio_matrix_null_keys(self): ped = hl.Pedigree.read(resource('triomatrix.fam')) ht = hl.import_fam(resource('triomatrix.fam')) @@ -101,7 +99,6 @@ def test_trio_matrix_incomplete_trios(self): hl.trio_matrix(mt, ped, complete_trios=False) - @fails_service_backend() def test_mendel_errors(self): mt = hl.import_vcf(resource('mendel.vcf')) ped = hl.Pedigree.read(resource('mendel.fam')) @@ -217,7 +214,6 @@ def test_tdt(self): bad.order_by(hl.asc(bad.v)).show() self.fail('Found rows in violation of the predicate (see show output)') - @fails_service_backend() def test_de_novo(self): mt = hl.import_vcf(resource('denovo.vcf')) mt = mt.filter_rows(mt.locus.in_y_par(), keep=False) # de_novo_finder doesn't know about y PAR diff --git a/hail/python/test/hail/methods/test_impex.py b/hail/python/test/hail/methods/test_impex.py index 8d2b7cc607f..5b27d3b360e 100644 --- a/hail/python/test/hail/methods/test_impex.py +++ b/hail/python/test/hail/methods/test_impex.py @@ -2,6 +2,7 @@ import os import shutil import unittest + from unittest import mock from avro.datafile import DataFileReader @@ -11,7 +12,7 @@ import pytest import hail as hl from ..helpers import * -from hail.utils import new_temp_file, FatalError, run_command, uri_path +from hail.utils import new_temp_file, FatalError, run_command, uri_path, HailUserError setUpModule = startTestHailContext tearDownModule = stopTestHailContext @@ -39,7 +40,6 @@ class VCFTests(unittest.TestCase): def test_info_char(self): self.assertEqual(hl.import_vcf(resource('infochar.vcf')).count_rows(), 1) - @fails_service_backend() def test_import_export_same(self): for i in range(10): mt = hl.import_vcf(resource(f'random_vcfs/{i}.vcf.bgz')) @@ -80,14 +80,11 @@ def test_undeclared_info(self): def test_can_import_bad_number_flag(self): hl.import_vcf(resource('bad_flag_number.vcf')).rows()._force_count() - @fails_service_backend() def test_malformed(self): with self.assertRaisesRegex(FatalError, "invalid character"): mt = hl.import_vcf(resource('malformed.vcf')) mt._force_count_rows() - @fails_service_backend() - @fails_local_backend() def test_not_identical_headers(self): t = new_temp_file(extension='vcf') mt = hl.import_vcf(resource('sample.vcf')) @@ -228,7 +225,6 @@ def test_vcf_unsorted_alleles(self): mt = hl.import_vcf(resource('sample.pksorted.vcf'), n_partitions=4) mt.rows()._force_count() - @fails_service_backend() def test_import_vcf_skip_invalid_loci(self): mt = hl.import_vcf(resource('skip_invalid_loci.vcf'), reference_genome='GRCh37', skip_invalid_loci=True) @@ -262,8 +258,6 @@ def test_import_vcf_invalid_float_type(self): with self.assertRaises(TypeError): mt = hl.import_vcf(resource('small-ds.vcf'), entry_float_type=hl.tint64) - @fails_service_backend() - @fails_local_backend() def test_export_vcf(self): dataset = hl.import_vcf(resource('sample.vcf.bgz')) vcf_metadata = hl.get_vcf_metadata(resource('sample.vcf.bgz')) @@ -282,8 +276,6 @@ def test_export_vcf(self): # are py4 JavaMaps, not dicts, so can't use assertDictEqual self.assertEqual(vcf_metadata, metadata_imported) - @fails_service_backend() - @fails_local_backend() def test_export_vcf_empty_format(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).select_entries() tmp = new_temp_file(extension="vcf") @@ -291,8 +283,6 @@ def test_export_vcf_empty_format(self): assert hl.import_vcf(tmp)._same(mt) - @fails_service_backend() - @fails_local_backend() def test_export_vcf_no_gt(self): mt = hl.import_vcf(resource('sample.vcf.bgz')).drop('GT') tmp = new_temp_file(extension="vcf") @@ -300,7 +290,6 @@ def test_export_vcf_no_gt(self): assert hl.import_vcf(tmp)._same(mt) - @fails_service_backend() def test_export_vcf_no_alt_alleles(self): mt = hl.import_vcf(resource('gvcfs/HG0096_excerpt.g.vcf'), reference_genome='GRCh38') self.assertEqual(mt.filter_rows(hl.len(mt.alleles) == 1).count_rows(), 5) @@ -310,8 +299,6 @@ def test_export_vcf_no_alt_alleles(self): mt2 = hl.import_vcf(tmp, reference_genome='GRCh38') self.assertTrue(mt._same(mt2)) - @fails_service_backend() - @fails_local_backend() def test_export_sites_only_from_table(self): mt = hl.import_vcf(resource('sample.vcf.bgz'))\ .select_entries()\ @@ -361,6 +348,7 @@ def test_tabix_export(self): self.import_gvcfs_sample_vcf(tmp) @fails_service_backend() + @fails_local_backend() def test_import_gvcfs(self): path = resource('sample.vcf.bgz') self.import_gvcfs_sample_vcf(path) @@ -472,7 +460,6 @@ def test_combiner_works(self): self.assertEqual(len(parts), comb.n_partitions()) comb._force_count_rows() - @fails_service_backend() def test_haploid_combiner_ok(self): from hail.experimental.vcf_combiner.vcf_combiner import transform_gvcf # make a combiner table @@ -540,39 +527,33 @@ def test_missing_float_entries(self): assert gl_gp == [hl.Struct(GL=[None, None, None], GP=[0.22, 0.5, 0.27]), hl.Struct(GL=[None, None, None], GP=[None, None, None])] - @fails_service_backend() - @fails_local_backend() def test_same_bgzip(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4) f = new_temp_file(extension='vcf.bgz') hl.export_vcf(mt, f) assert hl.import_vcf(f)._same(mt) - @fails_service_backend() - @fails_local_backend() - def test_vcf_parallel_export(self): - import glob + def test_vcf_parallel_separate_header_export(self): + fs = hl.current_backend().fs def concat_files(outpath, inpaths): - with open(outpath, 'wb') as outfile: + with fs.open(outpath, 'wb') as outfile: for path in inpaths: - with open(path, 'rb') as infile: + with fs.open(path, 'rb') as infile: shutil.copyfileobj(infile, outfile) mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4) f = new_temp_file(extension='vcf.bgz') hl.export_vcf(mt, f, parallel='separate_header') - shard_paths = glob.glob(f + "/*.bgz") + stat = fs.stat(f) + assert stat + assert stat.is_dir() + shard_paths = [info.path for info in fs.ls(f) + if os.path.splitext(info.path)[-1] == '.bgz'] + assert shard_paths shard_paths.sort() nf = new_temp_file(extension='vcf.bgz') concat_files(nf, shard_paths) - assert hl.import_vcf(nf)._same(mt) - f = new_temp_file(extension='vcf.bgz') - hl.export_vcf(mt, f, parallel='composable') - shard_paths = glob.glob(f + "/*.bgz") - shard_paths.sort() - nf = new_temp_file(extension='vcf.bgz') - concat_files(nf, shard_paths) assert hl.import_vcf(nf)._same(mt) @fails_service_backend() @@ -581,8 +562,6 @@ def test_custom_rg_import(self): mt = hl.import_vcf(resource('custom_rg.vcf'), reference_genome=rg) assert mt.locus.collect() == [hl.Locus('D', 123, reference_genome=rg)] - @fails_service_backend() - @fails_local_backend() def test_sorted(self): mt = hl.utils.range_matrix_table(10, 10, n_partitions=4).filter_cols(False) mt = mt.key_cols_by(s='dummy') @@ -592,7 +571,7 @@ def test_sorted(self): hl.export_vcf(mt, f) last = 0 - with open(uri_path(f), 'r') as i: + with hl.current_backend().fs.open(f, 'r') as i: for line in i: if line.startswith('#'): continue @@ -601,8 +580,6 @@ def test_sorted(self): assert pos >= last last = pos - @fails_service_backend() - @fails_local_backend() def test_empty_read_write(self): mt = hl.import_vcf(resource('sample.vcf'), min_partitions=4).filter_rows(False) @@ -612,8 +589,8 @@ def test_empty_read_write(self): hl.export_vcf(mt, out1) hl.export_vcf(mt, out2) - assert os.stat(uri_path(out1)).st_size > 0 - assert os.stat(uri_path(out2)).st_size > 0 + assert hl.current_backend().fs.stat(out1).size > 0 + assert hl.current_backend().fs.stat(out2).size > 0 assert hl.import_vcf(out1)._same(mt) assert hl.import_vcf(out2)._same(mt) @@ -624,8 +601,6 @@ def test_empty_import_vcf_group_by_collect(self): groups = ht.group_by(the_key=ht.key).aggregate(values=hl.agg.collect(ht.row_value)).collect() assert not groups - @fails_service_backend() - @fails_local_backend() def test_format_header(self): mt = hl.import_vcf(resource('sample2.vcf')) metadata = hl.get_vcf_metadata(resource('sample2.vcf')) @@ -633,7 +608,7 @@ def test_format_header(self): hl.export_vcf(mt, f, metadata=metadata) s = set() - with open(uri_path(f), 'r') as i: + with hl.current_backend().fs.open(f, 'r') as i: for line in i: if line.startswith('##FORMAT'): s.add(line.strip()) @@ -646,13 +621,11 @@ def test_format_header(self): '##FORMAT=', } - @fails_service_backend() - @fails_local_backend() def test_format_genotypes(self): mt = hl.import_vcf(resource('sample.vcf')) f = new_temp_file(extension='vcf') hl.export_vcf(mt, f) - with open(uri_path(f), 'r') as i: + with hl.current_backend().fs.open(f, 'r') as i: for line in i: if line.startswith('20\t13029920'): expected = "GT:AD:DP:GQ:PL\t1/1:0,6:6:18:234,18,0\t1/1:0,4:4:12:159,12,0\t" \ @@ -665,13 +638,11 @@ def test_format_genotypes(self): else: assert False, 'expected pattern not found' - @fails_service_backend() - @fails_local_backend() def test_contigs_header(self): mt = hl.import_vcf(resource('sample.vcf')).filter_cols(False) f = new_temp_file(extension='vcf') hl.export_vcf(mt, f) - with open(uri_path(f), 'r') as i: + with hl.current_backend().fs.open(f, 'r') as i: for line in i: if line.startswith('##contig=' @@ -679,8 +650,6 @@ def test_contigs_header(self): else: assert False, 'expected pattern not found' - @fails_service_backend() - @fails_local_backend() def test_metadata_argument(self): mt = hl.import_vcf(resource('multipleChromosomes.vcf')) f = new_temp_file(extension='vcf') @@ -693,7 +662,7 @@ def test_metadata_argument(self): saw_gt = False saw_lq = False - with open(uri_path(f), 'r') as f: + with hl.current_backend().fs.open(f, 'r') as f: for line in f: print(line[:25]) if line.startswith('##FORMAT==3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|gcsfs==0.2.1|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.9|tqdm==4.42.1|", + "pip_dependencies": "aiohttp>=3.6,<3.7|aiohttp_session>=2.7,<2.8|asyncinit>=0.2.4,<0.3|bokeh>1.1,<1.3|decorator<5|humanize==1.0.0|hurry.filesize==0.9|nest_asyncio|numpy<2|pandas>0.24,<0.26|parsimonious<0.9|PyJWT|python-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.9|tqdm==4.42.1|", "vep-GRCh37.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh37.sh", "vep-GRCh38.sh": "gs://hail-common/hailctl/dataproc/test-version/vep-GRCh38.sh", } diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala index 01301a6b8a9..e2be3c864c0 100644 --- a/hail/src/main/scala/is/hail/HailContext.scala +++ b/hail/src/main/scala/is/hail/HailContext.scala @@ -50,10 +50,6 @@ object HailContext { def backend: Backend = get.backend - def getFlag(flag: String): String = get.flags.get(flag) - - def setFlag(flag: String, value: String): Unit = get.flags.set(flag, value) - def sparkBackend(op: String): SparkBackend = get.sparkBackend(op) def configureLogging(logFile: String, quiet: Boolean, append: Boolean, skipLoggingConfiguration: Boolean): Unit = { @@ -407,8 +403,6 @@ class HailContext private( def sparkBackend(op: String): SparkBackend = backend.asSpark(op) - val flags: HailFeatureFlags = new HailFeatureFlags() - var checkRVDKeys: Boolean = false private var nextVectorId: Int = 0 @@ -464,54 +458,3 @@ class HailContext private( JsonMethods.compact(Extraction.decompose(metadata)) } } - -object HailFeatureFlags { - val defaults: Map[String, (String, String)] = Map[String, (String, String)]( - ("no_whole_stage_codegen", ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN" -> null)), - ("no_ir_logging", ("HAIL_DEV_NO_IR_LOG" -> null)), - ("lower", ("HAIL_DEV_LOWER" -> null)), - ("lower_only", ("HAIL_DEV_LOWER_ONLY" -> null)), - ("lower_bm", ("HAIL_DEV_LOWER_BM" -> null)), - ("max_leader_scans", ("HAIL_DEV_MAX_LEADER_SCANS" -> "1000")), - ("distributed_scan_comb_op", ("HAIL_DEV_DISTRIBUTED_SCAN_COMB_OP" -> null)), - ("jvm_bytecode_dump", ("HAIL_DEV_JVM_BYTECODE_DUMP" -> null)), - ("use_packed_int_encoding", ("HAIL_DEV_USE_PACKED_INT_ENCODING" -> null)), - ("use_column_encoding", ("HAIL_DEV_USE_COLUMN_ENCODING" -> null)), - ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), - ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), - ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), - ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), - ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")), - ("use_new_shuffle", ("HAIL_USE_NEW_SHUFFLE" -> null)), - ("shuffle_cutoff_to_local_sort", ("HAIL_SHUFFLE_CUTOFF" -> null)), - ("grouped_aggregate_buffer_size", ("HAIL_GROUPED_AGGREGATE_BUFFER_SIZE" -> "50")), - ("use_ssa_logs", "HAIL_USE_SSA_LOGS" -> null) - ) -} - -class HailFeatureFlags { - private[this] val flags: mutable.Map[String, String] = - mutable.Map[String, String](HailFeatureFlags.defaults.mapValues { case (env, default) => - sys.env.getOrElse(env, default) - }.toFastSeq: _*) - - val available: java.util.ArrayList[String] = - new java.util.ArrayList[String](java.util.Arrays.asList[String](flags.keys.toSeq: _*)) - - def set(flag: String, value: String): Unit = { - flags.update(flag, value) - } - - def get(flag: String): String = flags(flag) - - def exists(flag: String): Boolean = flags.contains(flag) - - def toJSONEnv: JArray = - JArray(flags.filter { case (_, v) => - v != null - }.map{ case (name, v) => - JObject( - "name" -> JString(HailFeatureFlags.defaults(name)._1), - "value" -> JString(v)) - }.toList) -} diff --git a/hail/src/main/scala/is/hail/HailFeatureFlags.scala b/hail/src/main/scala/is/hail/HailFeatureFlags.scala new file mode 100644 index 00000000000..70c0a896289 --- /dev/null +++ b/hail/src/main/scala/is/hail/HailFeatureFlags.scala @@ -0,0 +1,73 @@ +package is.hail + +import is.hail.utils._ +import org.json4s.JsonAST.{JArray, JObject, JString} + +import scala.collection.mutable + +object HailFeatureFlags { + val defaults: Map[String, (String, String)] = Map[String, (String, String)]( + ("no_whole_stage_codegen", ("HAIL_DEV_NO_WHOLE_STAGE_CODEGEN" -> null)), + ("no_ir_logging", ("HAIL_DEV_NO_IR_LOG" -> null)), + ("lower", ("HAIL_DEV_LOWER" -> null)), + ("lower_only", ("HAIL_DEV_LOWER_ONLY" -> null)), + ("lower_bm", ("HAIL_DEV_LOWER_BM" -> null)), + ("max_leader_scans", ("HAIL_DEV_MAX_LEADER_SCANS" -> "1000")), + ("distributed_scan_comb_op", ("HAIL_DEV_DISTRIBUTED_SCAN_COMB_OP" -> null)), + ("jvm_bytecode_dump", ("HAIL_DEV_JVM_BYTECODE_DUMP" -> null)), + ("use_packed_int_encoding", ("HAIL_DEV_USE_PACKED_INT_ENCODING" -> null)), + ("use_column_encoding", ("HAIL_DEV_USE_COLUMN_ENCODING" -> null)), + ("use_spicy_ptypes", ("HAIL_USE_SPICY_PTYPES" -> null)), + ("log_service_timing", ("HAIL_DEV_LOG_SERVICE_TIMING" -> null)), + ("cache_service_input", ("HAIL_DEV_CACHE_SERVICE_INPUT" -> null)), + ("write_ir_files", ("HAIL_WRITE_IR_FILES" -> null)), + ("method_split_ir_limit", ("HAIL_DEV_METHOD_SPLIT_LIMIT" -> "16")), + ("use_new_shuffle", ("HAIL_USE_NEW_SHUFFLE" -> null)), + ("shuffle_max_branch_factor", ("HAIL_SHUFFLE_MAX_BRANCH" -> "64")), + ("shuffle_cutoff_to_local_sort", ("HAIL_SHUFFLE_CUTOFF" -> "32000000")), // This is in bytes + ("grouped_aggregate_buffer_size", ("HAIL_GROUPED_AGGREGATE_BUFFER_SIZE" -> "50")), + ("use_ssa_logs", "HAIL_USE_SSA_LOGS" -> null) + ) + + def fromEnv(): HailFeatureFlags = + new HailFeatureFlags( + mutable.Map( + HailFeatureFlags.defaults.mapValues { case (env, default) => + sys.env.getOrElse(env, default) + }.toFastSeq: _* + ) + ) + + def fromMap(m: mutable.Map[String, String]): HailFeatureFlags = + new HailFeatureFlags( + mutable.Map( + HailFeatureFlags.defaults.map { + case (flagName, (_, default)) => (flagName, m.getOrElse(flagName, default)) + }.toFastSeq: _* + ) + ) +} + +class HailFeatureFlags( + private[this] val flags: mutable.Map[String, String] +) { + val available: java.util.ArrayList[String] = + new java.util.ArrayList[String](java.util.Arrays.asList[String](flags.keys.toSeq: _*)) + + def set(flag: String, value: String): Unit = { + flags.update(flag, value) + } + + def get(flag: String): String = flags(flag) + + def exists(flag: String): Boolean = flags.contains(flag) + + def toJSONEnv: JArray = + JArray(flags.filter { case (_, v) => + v != null + }.map{ case (name, v) => + JObject( + "name" -> JString(HailFeatureFlags.defaults(name)._1), + "value" -> JString(v)) + }.toList) +} diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 59c11ad9d87..22a2c10253a 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -95,7 +95,7 @@ trait WrappedModuleBuilder { def genClass[C](baseName: String)(implicit cti: TypeInfo[C]): ClassBuilder[C] = modb.genClass[C](baseName) - def classesBytes(print: Option[PrintWriter] = None): ClassesBytes = modb.classesBytes(print) + def classesBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): ClassesBytes = modb.classesBytes(writeIRs, print) } class ModuleBuilder() { @@ -143,12 +143,12 @@ class ModuleBuilder() { var classesBytes: ClassesBytes = _ - def classesBytes(print: Option[PrintWriter] = None): ClassesBytes = { + def classesBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): ClassesBytes = { if (classesBytes == null) { classesBytes = new ClassesBytes( classes .iterator - .flatMap(c => c.classBytes(print)) + .flatMap(c => c.classBytes(writeIRs, print)) .toArray) } @@ -203,7 +203,7 @@ trait WrappedClassBuilder[C] extends WrappedModuleBuilder { )(body: MethodBuilder[C] => Unit): MethodBuilder[C] = cb.getOrGenMethod(baseName, key, argsInfo, returnInfo)(body) - def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) + def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(writeIRs, print) def _this: Value[C] = cb._this @@ -241,7 +241,8 @@ class ClassBuilder[C]( val lazyFieldMemo: mutable.Map[Any, Value[_]] = mutable.Map.empty - val lInit = lclass.newMethod("", FastIndexedSeq(), UnitInfo) + val lInitBuilder = new MethodBuilder[C](this, "", FastIndexedSeq(), UnitInfo) + val lInit = lInitBuilder.lmethod var initBody: Code[Unit] = { val L = new lir.Block() @@ -265,6 +266,11 @@ class ClassBuilder[C]( initBody = Code(initBody, c) } + def emitInitI(f: CodeBuilder => Unit): Unit = { + val body = CodeBuilder.scopedVoid(lInitBuilder)(f) + emitInit(body) + } + def emitClinit(c: Code[Unit]): Unit = { clinitBody match { case None => @@ -334,7 +340,7 @@ class ClassBuilder[C]( } } - def classBytes(print: Option[PrintWriter] = None): Array[(String, Array[Byte])] = { + def classBytes(writeIRs: Boolean, print: Option[PrintWriter] = None): Array[(String, Array[Byte])] = { assert(initBody.start != null) lInit.setEntry(initBody.start) @@ -348,12 +354,12 @@ class ClassBuilder[C]( lClinit.setEntry(nbody.start) } - lclass.asBytes(print) + lclass.asBytes(writeIRs, print) } - def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = { + def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = { val n = className.replace("/", ".") - val classesBytes = modb.classesBytes() + val classesBytes = modb.classesBytes(writeIRs) assert(TaskContext.get() == null, "FunctionBuilder emission should happen on master, but happened on worker") diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index ed91a971e92..a1808d9b902 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -1073,9 +1073,9 @@ class CodeChar(val lhs: Code[Char]) extends AnyVal { def <=(rhs: Code[Int]): Code[Boolean] = lhs.toI <= rhs - def ceq(rhs: Code[Int]): Code[Boolean] = lhs.toI.ceq(rhs) + def ceq(rhs: Code[Char]): Code[Boolean] = lhs.toI.ceq(rhs.toI) - def cne(rhs: Code[Int]): Code[Boolean] = lhs.toI.cne(rhs) + def cne(rhs: Code[Char]): Code[Boolean] = lhs.toI.cne(rhs.toI) def toI: Code[Int] = lhs.asInstanceOf[Code[Int]] diff --git a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala index 92bac782c8c..1f663230e78 100644 --- a/hail/src/main/scala/is/hail/backend/ExecuteContext.scala +++ b/hail/src/main/scala/is/hail/backend/ExecuteContext.scala @@ -1,7 +1,7 @@ package is.hail.backend import is.hail.asm4s.HailClassLoader -import is.hail.HailContext +import is.hail.{HailContext, HailFeatureFlags} import is.hail.annotations.{Region, RegionPool} import is.hail.io.fs.FS import is.hail.utils.{ExecutionTimer, using} @@ -43,9 +43,30 @@ object ExecuteContext { result } - def scoped[T](tmpdir: String, localTmpdir: String, backend: Backend, fs: FS, timer: ExecutionTimer, tempFileManager: TempFileManager, theHailClassLoader: HailClassLoader)(f: ExecuteContext => T): T = { + def scoped[T]( + tmpdir: String, + localTmpdir: String, + backend: Backend, + fs: FS, + timer: ExecutionTimer, + tempFileManager: TempFileManager, + theHailClassLoader: HailClassLoader, + flags: HailFeatureFlags, + )( + f: ExecuteContext => T + ): T = { RegionPool.scoped { pool => - using(new ExecuteContext(tmpdir, localTmpdir, backend, fs, Region(pool = pool), timer, tempFileManager, theHailClassLoader))(f(_)) + using(new ExecuteContext( + tmpdir, + localTmpdir, + backend, + fs, + Region(pool = pool), + timer, + tempFileManager, + theHailClassLoader, + flags + ))(f(_)) } } @@ -80,12 +101,11 @@ class ExecuteContext( var r: Region, val timer: ExecutionTimer, _tempFileManager: TempFileManager, - val theHailClassLoader: HailClassLoader + val theHailClassLoader: HailClassLoader, + private[this] val flags: HailFeatureFlags ) extends Closeable { var backendContext: BackendContext = _ - val printIRs: Boolean = HailContext.getFlag("no_ir_logging") == null - private val tempFileManager: TempFileManager = if (_tempFileManager != null) _tempFileManager else @@ -113,6 +133,14 @@ class ExecuteContext( cleanupFunctions += cleanupFunction } + def getFlag(name: String): String = flags.get(name) + + def shouldWriteIRFiles(): Boolean = getFlag("write_ir_files") != null + + def shouldNotLogIR(): Boolean = flags.get("no_ir_logging") != null + + def shouldLogIR(): Boolean = !shouldNotLogIR() + def close(): Unit = { tempFileManager.cleanup() diff --git a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala index 3e434c60493..5fefd5b2854 100644 --- a/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala +++ b/hail/src/main/scala/is/hail/backend/local/LocalBackend.scala @@ -1,6 +1,6 @@ package is.hail.backend.local -import is.hail.HailContext +import is.hail.{HailContext, HailFeatureFlags} import is.hail.annotations.{Region, SafeRow, UnsafeRow} import is.hail.asm4s._ import is.hail.backend._ @@ -62,12 +62,19 @@ class LocalBackend( + "is.hail.io.compress.BGzipCodecTbi," + "org.apache.hadoop.io.compress.GzipCodec") + private[this] val flags = HailFeatureFlags.fromEnv() private[this] val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) + def getFlag(name: String): String = flags.get(name) + + def setFlag(name: String, value: String) = flags.set(name, value) + + val availableFlags: java.util.ArrayList[String] = flags.available + val fs: FS = new HadoopFS(new SerializableHadoopConfiguration(hadoopConf)) def withExecuteContext[T](timer: ExecutionTimer)(f: ExecuteContext => T): T = { - ExecuteContext.scoped(tmpdir, tmpdir, this, fs, timer, null, theHailClassLoader)(f) + ExecuteContext.scoped(tmpdir, tmpdir, this, fs, timer, null, theHailClassLoader, flags)(f) } def broadcast[T: ClassTag](value: T): BroadcastValue[T] = new LocalBroadcastValue[T](value) @@ -98,7 +105,7 @@ class LocalBackend( val ir = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, ir0).asInstanceOf[IR] if (!Compilable(ir)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") if (ir.typ == TVoid) { val (pt, f) = ctx.timer.time("Compile") { @@ -129,7 +136,7 @@ class LocalBackend( } private[this] def _execute(ctx: ExecuteContext, ir: IR): (Option[SingleCodeType], Long) = { - TypeCheck(ir) + TypeCheck(ctx, ir) Validate(ir) val queryID = Backend.nextID() log.info(s"starting execution of query $queryID of initial size ${ IRSize(ir) }") @@ -284,7 +291,7 @@ class LocalBackend( rowTypeRequiredness: RStruct ): TableStage = { - if (HailContext.getFlag("shuffle_cutoff_to_local_sort") != null) { + if (getFlag("use_new_shuffle") != null) { LowerDistributedSort.distributedSort(ctx, stage, sortFields, relationalLetsAbove, rowTypeRequiredness) } else { LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) diff --git a/hail/src/main/scala/is/hail/backend/service/Main.scala b/hail/src/main/scala/is/hail/backend/service/Main.scala index daf63fa3a82..94db679c2bc 100644 --- a/hail/src/main/scala/is/hail/backend/service/Main.scala +++ b/hail/src/main/scala/is/hail/backend/service/Main.scala @@ -28,7 +28,7 @@ object Main { val logFile = argv(1) configureLogging(logFile) - argv(2) match { + argv(3) match { case WORKER => Worker.main(argv) case DRIVER => ServiceBackendSocketAPI2.main(argv) case kind => throw new RuntimeException(s"unknown kind: ${kind}") diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index fd5003eb666..6533df64040 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -6,14 +6,20 @@ import java.net._ import java.nio.charset.StandardCharsets import java.util.concurrent._ -import is.hail.{HAIL_REVISION, HailContext} +import is.hail.{HAIL_REVISION, HailContext, HailFeatureFlags} import is.hail.annotations._ import is.hail.asm4s._ import is.hail.backend.{Backend, BackendContext, BroadcastValue, ExecuteContext, HailTaskContext} import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.lowering._ import is.hail.expr.ir.{Compile, IR, IRParser, MakeTuple, SortField} +import is.hail.expr.ir.functions.IRFunctionRegistry +import is.hail.io.{BufferSpec, TypedCodecSpec} +import is.hail.io.bgen.IndexBgen import is.hail.io.fs._ +import is.hail.io.bgen.IndexBgen +import is.hail.io.plink.LoadPlink +import is.hail.io.vcf.LoadVCF import is.hail.linalg.BlockMatrix import is.hail.services._ import is.hail.services.batch_client.BatchClient @@ -21,24 +27,21 @@ import is.hail.types._ import is.hail.types.physical._ import is.hail.types.physical.stypes.PTypeReferenceSingleCodeType import is.hail.types.virtual._ +import is.hail.types.encoded._ import is.hail.utils._ import is.hail.variant.ReferenceGenome import org.apache.commons.io.IOUtils import org.apache.log4j.Logger +import org.json4s.Extraction import org.json4s.JsonAST._ import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats} import org.newsclub.net.unix.{AFUNIXServerSocket, AFUNIXSocketAddress} -import java.io._ -import java.net._ -import java.nio.charset.StandardCharsets -import java.util.concurrent._ import scala.annotation.switch import scala.reflect.ClassTag import scala.{concurrent => scalaConcurrent} -import is.hail.io.vcf.LoadVCF -import org.json4s.Extraction +import scala.collection.mutable class ServiceBackendContext( @@ -54,13 +57,7 @@ object ServiceBackend { private val log = Logger.getLogger(getClass.getName()) } -class User( - val username: String, - val tmpdir: String, - val fs: GoogleStorageFS) - class ServiceBackend( - val revision: String, val jarLocation: String, var name: String, val theHailClassLoader: HailClassLoader, @@ -69,20 +66,10 @@ class ServiceBackend( import ServiceBackend.log private[this] var batchCount = 0 - private[this] val users = new ConcurrentHashMap[String, User]() private[this] implicit val ec = scalaConcurrent.ExecutionContext.fromExecutorService( Executors.newCachedThreadPool()) - - def addUser(username: String, key: String): Unit = synchronized { - val previous = users.put(username, new User(username, "/tmp", new GoogleStorageFS(Some(key)))) - assert(previous == null) - } - - def userContext[T](username: String, timer: ExecutionTimer, theHailClassLoader: HailClassLoader)(f: (ExecuteContext) => T): T = { - val user = users.get(username) - assert(user != null, username) - ExecuteContext.scoped(user.tmpdir, "file:///tmp", this, user.fs, timer, null, theHailClassLoader)(f) - } + private[this] val MAX_AVAILABLE_GCS_CONNECTIONS = 100 + private[this] val availableGCSConnections = new Semaphore(MAX_AVAILABLE_GCS_CONNECTIONS, true) def defaultParallelism: Int = 10 @@ -100,6 +87,13 @@ class ServiceBackend( } } + private[this] def readString(in: DataInputStream): String = { + val n = in.readInt() + val bytes = new Array[Byte](n) + in.read(bytes) + new String(bytes, StandardCharsets.UTF_8) + } + def parallelizeAndComputeWithIndex( _backendContext: BackendContext, _fs: FS, @@ -113,12 +107,19 @@ class ServiceBackend( val token = tokenUrlSafe(32) val root = s"${ backendContext.remoteTmpDir }parallelizeAndComputeWithIndex/$token" + // FIXME: HACK + val (open, create) = if (n <= 50) { + (fs.openCachedNoCompression _, fs.createCachedNoCompression _) + } else { + (fs.openNoCompression _, fs.createNoCompression _) + } + log.info(s"parallelizeAndComputeWithIndex: $token: nPartitions $n") log.info(s"parallelizeAndComputeWithIndex: $token: writing f and contexts") val uploadFunction = scalaConcurrent.Future { retryTransientErrors { - using(new ObjectOutputStream(fs.createCachedNoCompression(s"$root/f"))) { os => + using(new ObjectOutputStream(create(s"$root/f"))) { os => os.writeObject(f) } } @@ -126,7 +127,7 @@ class ServiceBackend( val uploadContexts = scalaConcurrent.Future { retryTransientErrors { - using(fs.createCachedNoCompression(s"$root/contexts")) { os => + using(create(s"$root/contexts")) { os => var o = 12L * n var i = 0 while (i < n) { @@ -156,12 +157,15 @@ class ServiceBackend( "job_id" -> JInt(i + 1), "parent_ids" -> JArray(List()), "process" -> JObject( + "jar_spec" -> JObject( + "type" -> JString("jar_url"), + "value" -> JString(jarLocation) + ), "command" -> JArray(List( JString(Main.WORKER), - JString(revision), - JString(jarLocation), JString(root), - JString(s"$i"))), + JString(s"$i"), + JString(s"$n"))), "type" -> JString("jvm")), "mount_tokens" -> JBool(true), "resources" -> JObject("preemptible" -> JBool(true)) @@ -192,13 +196,30 @@ class ServiceBackend( val r = new Array[Array[Byte]](n) + def resultOrHailException(is: DataInputStream): Array[Byte] = { + val success = is.readBoolean() + if (success) { + IOUtils.toByteArray(is) + } else { + val shortMessage = readString(is) + val expandedMessage = readString(is) + val errorId = is.readInt() + throw new HailWorkerException(shortMessage, expandedMessage, errorId) + } + } + def readResult(i: Int): scalaConcurrent.Future[Unit] = scalaConcurrent.Future { - r(i) = retryTransientErrors { - using(fs.openCachedNoCompression(s"$root/result.$i")) { is => - IOUtils.toByteArray(is) + availableGCSConnections.acquire() + try { + r(i) = retryTransientErrors { + using(open(s"$root/result.$i")) { is => + resultOrHailException(new DataInputStream(is)) + } } + log.info(s"result $i complete") + } finally { + availableGCSConnections.release() } - log.info(s"result $i complete") } scalaConcurrent.Await.result( @@ -213,35 +234,17 @@ class ServiceBackend( def stop(): Unit = () def valueType( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, s: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.valueType", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ): String = { val x = IRParser.parse_value_ir(ctx, s) x.typ.toString } def tableType( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, s: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.tableType", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ): String = { val x = IRParser.parse_table_ir(ctx, s) val t = x.typ val jv = JObject("global" -> JString(t.globalType.toString), @@ -251,18 +254,9 @@ class ServiceBackend( } def matrixTableType( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, s: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.matrixTableType", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ): String = { val x = IRParser.parse_matrix_ir(ctx, s) val t = x.typ val jv = JObject("global" -> JString(t.globalType.toString), @@ -275,18 +269,9 @@ class ServiceBackend( } def blockMatrixType( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, s: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.blockMatrixType", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ): String = { val x = IRParser.parse_blockmatrix_ir(ctx, s) val t = x.typ val jv = JObject("element_type" -> JString(t.elementType.toString), @@ -297,22 +282,14 @@ class ServiceBackend( } def referenceGenome( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, name: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.referenceGenome", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ): String = { ReferenceGenome.getReference(name).toJSONString } - private[this] def execute(ctx: ExecuteContext, _x: IR): Option[(Annotation, PType)] = { + private[this] def execute(ctx: ExecuteContext, _x: IR, bufferSpecString: String): Array[Byte] = { + // FIXME: do we need Validate(_x)? val x = LoweringPipeline.darrayLowerer(true)(DArrayLowering.All).apply(ctx, _x) .asInstanceOf[IR] if (x.typ == TVoid) { @@ -323,45 +300,33 @@ class ServiceBackend( optimize = true) f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) - None + Array() } else { val (Some(PTypeReferenceSingleCodeType(pt)), f) = Compile[AsmFunction1RegionLong](ctx, FastIndexedSeq(), FastIndexedSeq[TypeInfo[_]](classInfo[Region]), LongInfo, MakeTuple.ordered(FastIndexedSeq(x)), optimize = true) - - val a = f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) val retPType = pt.asInstanceOf[PBaseStruct] - Some((new UnsafeRow(retPType, ctx.r, a).get(0), retPType.types(0))) + val off = f(ctx.theHailClassLoader, ctx.fs, 0, ctx.r)(ctx.r) + val codec = TypedCodecSpec( + EType.fromTypeAllOptional(retPType.virtualType), + retPType.virtualType, + BufferSpec.parseOrDefault(bufferSpecString) + ) + codec.encode(ctx, retPType, off) } } def execute( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, code: String, - token: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.execute", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + token: String, + bufferSpecString: String + ): Array[Byte] = { log.info(s"executing: ${token}") - execute(ctx, IRParser.parse_value_ir(ctx, code)) match { - case Some((v, t)) => - JsonMethods.compact( - JObject(List("value" -> JSONAnnotationImpex.exportAnnotation(v, t.virtualType), - "type" -> JString(t.virtualType.toString)))) - case None => - JsonMethods.compact( - JObject(List("value" -> null, "type" -> JString(TVoid.toString)))) - } + execute(ctx, IRParser.parse_value_ir(ctx, code), bufferSpecString) } def lowerDistributedSort( @@ -371,7 +336,11 @@ class ServiceBackend( relationalLetsAbove: Map[String, IR], rowTypeRequiredness: RStruct ): TableStage = { - LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) + if (ctx.getFlag("use_new_shuffle") != null) { + LowerDistributedSort.distributedSort(ctx, stage, sortFields, relationalLetsAbove, rowTypeRequiredness) + } else { + LowerDistributedSort.localSort(ctx, stage, sortFields, relationalLetsAbove) + } } def persist(backendContext: BackendContext, id: String, value: BlockMatrix, storageLevel: String): Unit = ??? @@ -383,55 +352,40 @@ class ServiceBackend( def getPersistedBlockMatrixType(backendContext: BackendContext, id: String): BlockMatrixType = ??? def loadReferencesFromDataset( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, + ctx: ExecuteContext, path: String - ): String = serviceBackendExecuteContext( - "ServiceBackend.loadReferencesFromDataset", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => - ReferenceGenome.fromHailDataset(ctx.fs, path) - } + ): String = ReferenceGenome.fromHailDataset(ctx.fs, path) def parseVCFMetadata( - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String, - path: String, - ): String = serviceBackendExecuteContext( - "ServiceBackend.parseVCFMetadata", - tmpdir, - sessionId, - billingProject, - remoteTmpDir - ) { ctx => + ctx: ExecuteContext, + path: String + ): String = { val metadata = LoadVCF.parseHeaderMetadata(ctx.fs, Set.empty, TFloat64, path) implicit val formats = defaultJSONFormats JsonMethods.compact(Extraction.decompose(metadata)) } - private[this] def serviceBackendExecuteContext[T]( - methodName: String, - tmpdir: String, - sessionId: String, - billingProject: String, - remoteTmpDir: String - )(body: ExecuteContext => T): T = ExecutionTimer.logTime(methodName) { timer => - val fs = retryTransientErrors { - using(new FileInputStream(s"$scratchDir/secrets/gsa-key/key.json")) { is => - new GoogleStorageFS(Some(IOUtils.toString(is, Charset.defaultCharset().toString()))).asCacheable() - } - } - ExecuteContext.scoped(tmpdir, "file:///tmp", this, fs, timer, null, theHailClassLoader) { ctx => - ctx.backendContext = new ServiceBackendContext(sessionId, billingProject, remoteTmpDir) - body(ctx) - } + def importFam( + ctx: ExecuteContext, + path: String, + quantPheno: Boolean, + delimiter: String, + missing: String + ): String = { + LoadPlink.importFamJSON(ctx.fs, path, quantPheno, delimiter, missing) + } + + def indexBgen( + ctx: ExecuteContext, + files: Array[String], + indexFileMap: Map[String, String], + referenceGenomeName: Option[String], + contigRecoding: Map[String, String], + skipInvalidLoci: Boolean + ): String = { + IndexBgen(ctx, files, indexFileMap, referenceGenomeName, contigRecoding, skipInvalidLoci) + info(s"Number of BGEN files indexed: ${ files.size }") + "null" } } @@ -440,21 +394,20 @@ class HailBatchFailure(message: String) extends RuntimeException(message) object ServiceBackendSocketAPI2 { def main(argv: Array[String]): Unit = { - assert(argv.length == 8, argv.toFastIndexedSeq) + assert(argv.length == 7, argv.toFastIndexedSeq) val scratchDir = argv(0) val logFile = argv(1) - val kind = argv(2) + val jarLocation = argv(2) + val kind = argv(3) assert(kind == Main.DRIVER) - val revision = argv(3) - val jarLocation = argv(4) - val name = argv(5) - val input = argv(6) - val output = argv(7) + val name = argv(4) + val input = argv(5) + val output = argv(6) // FIXME: when can the classloader be shared? (optimizer benefits!) val backend = new ServiceBackend( - revision, jarLocation, name, new HailClassLoader(getClass().getClassLoader()), scratchDir) + jarLocation, name, new HailClassLoader(getClass().getClassLoader()), scratchDir) if (HailContext.isInitialized) { HailContext.get.backend = backend } else { @@ -492,8 +445,6 @@ class ServiceBackendSocketAPI2( private[this] val out: OutputStream, private[this] val sessionId: String ) extends Thread { - import ServiceBackendSocketAPI2._ - private[this] val LOAD_REFERENCES_FROM_DATASET = 1 private[this] val VALUE_TYPE = 2 private[this] val TABLE_TYPE = 3 @@ -504,7 +455,6 @@ class ServiceBackendSocketAPI2( private[this] val PARSE_VCF_METADATA = 8 private[this] val INDEX_BGEN = 9 private[this] val IMPORT_FAM = 10 - private[this] val GOODBYE = 254 private[this] val dummy = new Array[Byte](8) @@ -521,6 +471,11 @@ class ServiceBackendSocketAPI2( } } + def readBool(): Boolean = { + read(dummy, 0, 1) + Memory.loadByte(dummy, 0) != 0.toByte + } + def readInt(): Int = { read(dummy, 0, 4) Memory.loadInt(dummy, 0) @@ -562,132 +517,216 @@ class ServiceBackendSocketAPI2( def writeString(s: String): Unit = writeBytes(s.getBytes(StandardCharsets.UTF_8)) def executeOneCommand(): Unit = { + var nFlagsRemaining = readInt() + val flags = mutable.Map[String, String]() + while (nFlagsRemaining > 0) { + val flagName = readString() + val flagValue = readString() + flags.update(flagName, flagValue) + nFlagsRemaining -= 1 + } + val cmd = readInt() - (cmd: @switch) match { - case LOAD_REFERENCES_FROM_DATASET => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val path = readString() - try { - val result = backend.loadReferencesFromDataset(tmpdir, sessionId, billingProject, remoteTmpDir, path) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) - } + val tmpdir = readString() + val billingProject = readString() + val remoteTmpDir = readString() - case VALUE_TYPE => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val s = readString() - try { - val result = backend.valueType(tmpdir, sessionId, billingProject, remoteTmpDir, s) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) + def withExecuteContext(methodName: String, method: ExecuteContext => Array[Byte]): Array[Byte] = ExecutionTimer.logTime(methodName) { timer => + val fs = retryTransientErrors { + using(new FileInputStream(s"${backend.scratchDir}/secrets/gsa-key/key.json")) { is => + new GoogleStorageFS(Some(IOUtils.toString(is, Charset.defaultCharset().toString()))).asCacheable() } + } + ExecuteContext.scoped( + tmpdir, + "file:///tmp", + backend, + fs, + timer, + null, + backend.theHailClassLoader, + HailFeatureFlags.fromMap(flags) + ) { ctx => + ctx.backendContext = new ServiceBackendContext(sessionId, billingProject, remoteTmpDir) + method(ctx) + } + } - case TABLE_TYPE => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val s = readString() - try { - val result = backend.tableType(tmpdir, sessionId, billingProject, remoteTmpDir, s) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) - } + try { + val result = (cmd: @switch) match { + case LOAD_REFERENCES_FROM_DATASET => + val path = readString() + withExecuteContext( + "ServiceBackend.loadReferencesFromDataset", + backend.loadReferencesFromDataset(_, path).getBytes(StandardCharsets.UTF_8) + ) + case VALUE_TYPE => + val s = readString() + withExecuteContext( + "ServiceBackend.valueType", + backend.valueType(_, s).getBytes(StandardCharsets.UTF_8) + ) + case TABLE_TYPE => + val s = readString() + withExecuteContext( + "ServiceBackend.tableType", + backend.tableType(_, s).getBytes(StandardCharsets.UTF_8) + ) + case MATRIX_TABLE_TYPE => + val s = readString() + withExecuteContext( + "ServiceBackend.matrixTableType", + backend.matrixTableType(_, s).getBytes(StandardCharsets.UTF_8) + ) + case BLOCK_MATRIX_TYPE => + val s = readString() + withExecuteContext( + "ServiceBackend.blockMatrixType", + backend.blockMatrixType(_, s).getBytes(StandardCharsets.UTF_8) + ) + case REFERENCE_GENOME => + val name = readString() + withExecuteContext( + "ServiceBackend.referenceGenome", + backend.referenceGenome(_, name).getBytes(StandardCharsets.UTF_8) + ) + case EXECUTE => + val code = readString() + val token = readString() + withExecuteContext( + "ServiceBackend.execute", + { ctx => + withIRFunctionsReadFromInput(ctx) { () => + val bufferSpecString = readString() + backend.execute(ctx, code, token, bufferSpecString) + } + } + ) + case PARSE_VCF_METADATA => + val path = readString() + withExecuteContext( + "ServiceBackend.parseVCFMetadata", + backend.parseVCFMetadata(_, path).getBytes(StandardCharsets.UTF_8) + ) + case IMPORT_FAM => + val path = readString() + val quantPheno = readBool() + val delimiter = readString() + val missing = readString() + withExecuteContext( + "ServiceBackend.importFam", + backend.importFam(_, path, quantPheno, delimiter, missing).getBytes(StandardCharsets.UTF_8) + ) + case INDEX_BGEN => + val nFiles = readInt() + val files = new Array[String](nFiles) + var i = 0 + while (i < nFiles) { + files(i) = readString() + i += 1 + } + val nIndexFiles = readInt() + val indexFileMap = mutable.Map[String, String]() + i = 0 + while (i < nIndexFiles) { + val k = readString() + val v = readString() + indexFileMap(k) = v + i += 1 + } + val hasReferenceGenome = readBool() + val referenceGenomeName = hasReferenceGenome match { + case true => Some(readString()) + case false => None + } + val nContigRecoding = readInt() + val contigRecoding = mutable.Map[String, String]() + i = 0 + while (i < nContigRecoding) { + val k = readString() + val v = readString() + contigRecoding(k) = v + i += 1 + } + val skipInvalidLoci = readBool() + withExecuteContext( + "ServiceBackend.indexBgen", + backend.indexBgen( + _, + files, + indexFileMap.toMap, + referenceGenomeName, + contigRecoding.toMap, + skipInvalidLoci + ).getBytes(StandardCharsets.UTF_8) + ) + } + writeBool(true) + writeBytes(result) + } catch { + case exc: HailWorkerException => + writeBool(false) + writeString(exc.shortMessage) + writeString(exc.expandedMessage) + writeInt(exc.errorId) + case t: Throwable => + val (shortMessage, expandedMessage, errorId) = handleForPython(t) + writeBool(false) + writeString(shortMessage) + writeString(expandedMessage) + writeInt(errorId) + } + } - case MATRIX_TABLE_TYPE => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val s = readString() - try { - val result = backend.matrixTableType(tmpdir, sessionId, billingProject, remoteTmpDir, s) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) - } + def withIRFunctionsReadFromInput(ctx: ExecuteContext)(body: () => Array[Byte]): Array[Byte] = { + try { + var nFunctionsRemaining = readInt() + while (nFunctionsRemaining > 0) { + val name = readString() - case BLOCK_MATRIX_TYPE => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val s = readString() - try { - val result = backend.blockMatrixType(tmpdir, sessionId, billingProject, remoteTmpDir, s) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) + val nTypeParametersRemaining = readInt() + val typeParameters = new Array[String](nTypeParametersRemaining) + var i = 0 + while (i < nTypeParametersRemaining) { + typeParameters(i) = readString() + i += 1 } - case REFERENCE_GENOME => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val name = readString() - try { - val result = backend.referenceGenome(tmpdir, sessionId, billingProject, remoteTmpDir, name) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) + val nValueParameterNamesRemaining = readInt() + val valueParameterNames = new Array[String](nValueParameterNamesRemaining) + i = 0 + while (i < nValueParameterNamesRemaining) { + valueParameterNames(i) = readString() + i += 1 } - case EXECUTE => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val code = readString() - val token = readString() - try { - val result = backend.execute(tmpdir, sessionId, billingProject, remoteTmpDir, code, token) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) + val nValueParameterTypesRemaining = readInt() + val valueParameterTypes = new Array[String](nValueParameterTypesRemaining) + i = 0 + while (i < nValueParameterTypesRemaining) { + valueParameterTypes(i) = readString() + i += 1 } - case PARSE_VCF_METADATA => - val tmpdir = readString() - val billingProject = readString() - val remoteTmpDir = readString() - val path = readString() - try { - val result = backend.parseVCFMetadata(tmpdir, sessionId, billingProject, remoteTmpDir, path) - writeBool(true) - writeString(result) - } catch { - case t: Throwable => - writeBool(false) - writeString(formatException(t)) - } + val returnType = readString() - case GOODBYE => - writeInt(GOODBYE) + val renderedBody = readString() + + IRFunctionRegistry.pyRegisterIRForServiceBackend( + ctx, + name, + typeParameters, + valueParameterNames, + valueParameterTypes, + returnType, + renderedBody + ) + nFunctionsRemaining -= 1 + } + body() + } finally { + IRFunctionRegistry.clearUserFunctions() } } } diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala index 93693561ec3..7edbc55d2b8 100644 --- a/hail/src/main/scala/is/hail/backend/service/Worker.scala +++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala @@ -51,6 +51,12 @@ object Worker { private[this] implicit val ec = ExecutionContext.fromExecutorService( javaConcurrent.Executors.newCachedThreadPool()) + private[this] def writeString(out: DataOutputStream, s: String): Unit = { + val bytes = s.getBytes(StandardCharsets.UTF_8) + out.writeInt(bytes.length) + out.write(bytes) + } + def main(argv: Array[String]): Unit = { val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) @@ -59,12 +65,12 @@ object Worker { } val scratchDir = argv(0) val logFile = argv(1) - val kind = argv(2) + var jarLocation = argv(2) + val kind = argv(3) assert(kind == Main.WORKER) - val revision = argv(3) - val jarGCSPath = argv(4) - val root = argv(5) - val i = argv(6).toInt + val root = argv(4) + val i = argv(5).toInt + val n = argv(6).toInt val timer = new WorkerTimer() val deployConfig = DeployConfig.fromConfigFile( @@ -75,9 +81,9 @@ object Worker { tls.setSSLConfigFromDir(s"$scratchDir/secrets/ssl-config") log.info(s"is.hail.backend.service.Worker $myRevision") - log.info(s"running job $i at root $root with scratch directory '$scratchDir'") + log.info(s"running job $i/$n at root $root with scratch directory '$scratchDir'") - timer.start(s"Job $i") + timer.start(s"Job $i/$n") timer.start("readInputs") val fs = retryTransientErrors { @@ -86,9 +92,16 @@ object Worker { } } + // FIXME: HACK + val (open, create) = if (n <= 50) { + (fs.openCachedNoCompression _, fs.createCachedNoCompression _) + } else { + (fs.openNoCompression _, fs.createNoCompression _) + } + val fFuture = Future { retryTransientErrors { - using(new ObjectInputStream(fs.openCachedNoCompression(s"$root/f"))) { is => + using(new ObjectInputStream(open(s"$root/f"))) { is => is.readObject().asInstanceOf[(Array[Byte], HailTaskContext, HailClassLoader, FS) => Array[Byte]] } } @@ -96,7 +109,7 @@ object Worker { val contextFuture = Future { retryTransientErrors { - using(fs.openCachedNoCompression(s"$root/contexts")) { is => + using(open(s"$root/contexts")) { is => is.seek(i * 12) val offset = is.readLong() val length = is.readInt() @@ -115,21 +128,41 @@ object Worker { timer.start("executeFunction") if (HailContext.isInitialized) { - HailContext.get.backend = new ServiceBackend(null, null, null, new HailClassLoader(getClass().getClassLoader())) + HailContext.get.backend = new ServiceBackend(null, null, new HailClassLoader(getClass().getClassLoader())) } else { HailContext( // FIXME: workers should not have backends, but some things do need hail contexts - new ServiceBackend(null, null, null, new HailClassLoader(getClass().getClassLoader())), skipLoggingConfiguration = true, quiet = true) + new ServiceBackend(null, null, new HailClassLoader(getClass().getClassLoader())), skipLoggingConfiguration = true, quiet = true) } val htc = new ServiceTaskContext(i) - val result = f(context, htc, theHailClassLoader, fs) + var result: Array[Byte] = null + var userError: HailException = null + try { + result = f(context, htc, theHailClassLoader, fs) + } catch { + case err: HailException => userError = err + } htc.finish() timer.end("executeFunction") timer.start("writeOutputs") - using(fs.createCachedNoCompression(s"$root/result.$i")) { os => - os.write(result) + using(create(s"$root/result.$i")) { os => + val dos = new DataOutputStream(os) + if (result != null) { + assert(userError == null) + + dos.writeBoolean(true) + dos.write(result) + } else { + assert(userError != null) + val (shortMessage, expandedMessage, errorId) = handleForPython(userError) + + dos.writeBoolean(false) + writeString(dos, shortMessage) + writeString(dos, expandedMessage) + dos.writeInt(errorId) + } } timer.end("writeOutputs") timer.end(s"Job $i") diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index 5ac8826bfae..62b92c30766 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -5,7 +5,7 @@ import is.hail.asm4s._ import is.hail.expr.ir.IRParser import is.hail.types.encoded.EType import is.hail.io.{BufferSpec, StreamBufferSpec, TypedCodecSpec} -import is.hail.HailContext +import is.hail.{HailContext, HailFeatureFlags} import is.hail.expr.{JSONAnnotationImpex, SparkAnnotationImpex, Validate} import is.hail.expr.ir.lowering._ import is.hail.expr.ir._ @@ -261,6 +261,14 @@ class SparkBackend( val bmCache: SparkBlockMatrixCache = SparkBlockMatrixCache() + private[this] val flags = HailFeatureFlags.fromEnv() + + def getFlag(name: String): String = flags.get(name) + + def setFlag(name: String, value: String) = flags.set(name, value) + + val availableFlags: java.util.ArrayList[String] = flags.available + def persist(backendContext: BackendContext, id: String, value: BlockMatrix, storageLevel: String): Unit = bmCache.persistBlockMatrix(id, value, storageLevel) def unpersist(backendContext: BackendContext, id: String): Unit = unpersist(id) @@ -271,10 +279,33 @@ class SparkBackend( def unpersist(id: String): Unit = bmCache.unpersistBlockMatrix(id) + def createExecuteContextForTests( + timer: ExecutionTimer, + region: Region, + selfContainedExecution: Boolean = true + ): ExecuteContext = new ExecuteContext( + tmpdir, + localTmpdir, + this, + fs, + region, + timer, + if (selfContainedExecution) null else new NonOwningTempFileManager(longLifeTempFileManager), + theHailClassLoader, + flags + ) + def withExecuteContext[T](timer: ExecutionTimer, selfContainedExecution: Boolean = true)(f: ExecuteContext => T): T = { - ExecuteContext.scoped(tmpdir, localTmpdir, this, fs, timer, + ExecuteContext.scoped( + tmpdir, + localTmpdir, + this, + fs, + timer, if (selfContainedExecution) null else new NonOwningTempFileManager(longLifeTempFileManager), - theHailClassLoader)(f) + theHailClassLoader, + flags + )(f) } def broadcast[T : ClassTag](value: T): BroadcastValue[T] = new SparkBroadcastValue[T](sc.broadcast(value)) @@ -333,7 +364,7 @@ class SparkBackend( val ir = LoweringPipeline.darrayLowerer(optimize)(typesToLower).apply(ctx, ir0).asInstanceOf[IR] if (!Compilable(ir)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") val res = ir.typ match { case TVoid => @@ -371,14 +402,14 @@ class SparkBackend( } private[this] def _execute(ctx: ExecuteContext, ir: IR, optimize: Boolean): Either[Unit, (PTuple, Long)] = { - TypeCheck(ir) + TypeCheck(ctx, ir) Validate(ir) try { - val lowerTable = HailContext.getFlag("lower") != null - val lowerBM = HailContext.getFlag("lower_bm") != null + val lowerTable = getFlag("lower") != null + val lowerBM = getFlag("lower_bm") != null _jvmLowerAndExecute(ctx, ir, optimize, lowerTable, lowerBM) } catch { - case e: LowererUnsupportedOperation if HailContext.getFlag("lower_only") != null => throw e + case e: LowererUnsupportedOperation if getFlag("lower_only") != null => throw e case _: LowererUnsupportedOperation => CompileAndEvaluate._apply(ctx, ir, optimize = optimize) } diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala index 23efc2a2bec..e5ecff4b9dd 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixIR.scala @@ -11,9 +11,9 @@ import is.hail.annotations.{NDArray, Region} import is.hail.backend.{BackendContext, ExecuteContext} import is.hail.expr.Nat import is.hail.expr.ir.lowering.{BlockMatrixStage, LowererUnsupportedOperation} -import is.hail.io.TypedCodecSpec +import is.hail.io.{StreamBufferSpec, TypedCodecSpec} import is.hail.io.fs.FS -import is.hail.types.encoded.{EBlockMatrixNDArray, EFloat64} +import is.hail.types.encoded.{EBlockMatrixNDArray, EFloat64, ENumpyBinaryNDArray} import scala.collection.mutable.ArrayBuffer import is.hail.utils.richUtils.RichDenseMatrixDouble @@ -64,7 +64,7 @@ abstract sealed class BlockMatrixIR extends BaseIR { def typ: BlockMatrixType protected[ir] def execute(ctx: ExecuteContext): BlockMatrix = - fatal("tried to execute unexecutable IR:\n" + Pretty(this)) + fatal("tried to execute unexecutable IR:\n" + Pretty(ctx, this)) def copy(newChildren: IndexedSeq[BaseIR]): BlockMatrixIR @@ -165,7 +165,7 @@ class BlockMatrixNativeReader( val spec = TypedCodecSpec(EBlockMatrixNDArray(EFloat64(required = true), required = true), vType, BlockMatrix.bufferSpec) - new BlockMatrixStage(Array(), TString) { + new BlockMatrixStage(IndexedSeq(), Array(), TString) { def blockContext(idx: (Int, Int)): IR = { if (!fullType.hasBlock(idx)) fatal(s"trying to read nonexistent block $idx from path ${ params.path }") @@ -203,6 +203,24 @@ case class BlockMatrixBinaryReader(path: String, shape: IndexedSeq[Long], blockS val breezeMatrix = RichDenseMatrixDouble.importFromDoubles(ctx.fs, path, nRows.toInt, nCols.toInt, rowMajor = true) BlockMatrix.fromBreezeMatrix(breezeMatrix, blockSize) } + + override def lower(ctx: ExecuteContext): BlockMatrixStage = { + val readFromNumpyEType = ENumpyBinaryNDArray(nRows, nCols, true) + val readFromNumpySpec = TypedCodecSpec(readFromNumpyEType, TNDArray(TFloat64, Nat(2)), new StreamBufferSpec()) + val nd = ReadValue(Str(path), readFromNumpySpec, TNDArray(TFloat64, nDimsBase = Nat(2))) + val ndRef = Ref(genUID(), nd.typ) + + new BlockMatrixStage(IndexedSeq(ndRef.name -> nd), Array(), nd.typ) { + def blockContext(idx: (Int, Int)): IR = { + val (r, c) = idx + NDArraySlice(ndRef, MakeTuple.ordered(FastSeq( + MakeTuple.ordered(FastSeq(I64(r.toLong * blockSize), I64(java.lang.Math.min((r.toLong + 1) * blockSize, nRows)), I64(1))), + MakeTuple.ordered(FastSeq(I64(c.toLong * blockSize), I64(java.lang.Math.min((c.toLong + 1) * blockSize, nCols)), I64(1)))))) + } + + def blockBody(ctxRef: Ref): IR = ctxRef + } + } } case class BlockMatrixNativePersistParameters(id: String) @@ -292,7 +310,7 @@ case class BlockMatrixMap(child: BlockMatrixIR, eltName: String, f: IR, needsDen val i = evalIR(ctx, l) ("/", binaryOp(evalIR(ctx, l), BlockMatrix.reverseScalarDiv)) - case _ => fatal(s"Unsupported operation on BlockMatrices: ${Pretty(f)}") + case _ => fatal(s"Unsupported operation on BlockMatrices: ${Pretty(ctx, f)}") } prev.blockMap(breezeF, name, reqDense = needsDense) @@ -962,4 +980,4 @@ case class RelationalLetBlockMatrix(name: String, value: IR, body: BlockMatrixIR val IndexedSeq(newValue: IR, newBody: BlockMatrixIR) = newChildren RelationalLetBlockMatrix(name, newValue, newBody) } -} \ No newline at end of file +} diff --git a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala index 511585b8d86..3459f6c6983 100644 --- a/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/BlockMatrixWriter.scala @@ -6,11 +6,11 @@ import is.hail.asm4s._ import is.hail.backend.ExecuteContext import is.hail.expr.Nat import is.hail.expr.ir.lowering.{BlockMatrixStage, LowererUnsupportedOperation} -import is.hail.io.TypedCodecSpec +import is.hail.io.{StreamBufferSpec, TypedCodecSpec} import is.hail.io.fs.FS import is.hail.linalg.{BlockMatrix, BlockMatrixMetadata} -import is.hail.types.encoded.{EBlockMatrixNDArray, EType} -import is.hail.types.virtual.{TArray, TNDArray, TString, Type} +import is.hail.types.encoded.{EBlockMatrixNDArray, ENumpyBinaryNDArray, EType} +import is.hail.types.virtual.{TArray, TNDArray, TString, Type, TVoid} import is.hail.types.{BlockMatrixType, TypeWithRequiredness} import is.hail.utils._ import is.hail.utils.richUtils.RichDenseMatrixDouble @@ -30,8 +30,8 @@ object BlockMatrixWriter { abstract class BlockMatrixWriter { def pathOpt: Option[String] - def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit - + def apply(ctx: ExecuteContext, bm: BlockMatrix): Any + def loweredTyp: Type def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = throw new LowererUnsupportedOperation(s"unimplemented writer: \n${ this.getClass }") } @@ -45,6 +45,8 @@ case class BlockMatrixNativeWriter( def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = bm.write(ctx, path, overwrite, forceRowMajor, stageLocally) + def loweredTyp: Type = TVoid + override def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = { if (stageLocally) throw new LowererUnsupportedOperation(s"stageLocally not supported in BlockMatrixWrite lowering") @@ -105,14 +107,27 @@ case class BlockMatrixNativeMetadataWriter(path: String, stageLocally: Boolean, }) cb += cb.emb.getObject(metaHelper).invoke[FS, Array[String], Unit]("write", cb.emb.getFS, partFiles) } + + def loweredTyp: Type = TVoid } case class BlockMatrixBinaryWriter(path: String) extends BlockMatrixWriter { def pathOpt: Option[String] = Some(path) - def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = { + def apply(ctx: ExecuteContext, bm: BlockMatrix): String = { RichDenseMatrixDouble.exportToDoubles(ctx.fs, path, bm.toBreezeMatrix(), forceRowMajor = true) + path + } + + def loweredTyp: Type = TString + + override def lower(ctx: ExecuteContext, s: BlockMatrixStage, bm: BlockMatrixIR, relationalBindings: Map[String, IR], eltR: TypeWithRequiredness): IR = { + val nd = s.collectLocal(relationalBindings, bm.typ) + + val etype = ENumpyBinaryNDArray(bm.typ.nRows, bm.typ.nCols, true) + val spec = TypedCodecSpec(etype, TNDArray(bm.typ.elementType, Nat(2)), new StreamBufferSpec()) + WriteValue(nd, Str(path), spec) } } @@ -120,6 +135,7 @@ case class BlockMatrixPersistWriter(id: String, storageLevel: String) extends Bl def pathOpt: Option[String] = None def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = HailContext.backend.persist(ctx.backendContext, id, bm, storageLevel) + def loweredTyp: Type = TVoid } case class BlockMatrixRectanglesWriter( @@ -133,6 +149,8 @@ case class BlockMatrixRectanglesWriter( def apply(ctx: ExecuteContext, bm: BlockMatrix): Unit = { bm.exportRectangles(ctx, path, rectangles, delimiter, binary) } + + def loweredTyp: Type = TVoid } abstract class BlockMatrixMultiWriter { @@ -145,6 +163,8 @@ case class BlockMatrixBinaryMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = BlockMatrix.binaryWriteBlockMatrices(ctx.fs, bms, prefix, overwrite) + + def loweredTyp: Type = TVoid } case class BlockMatrixTextMultiWriter( @@ -158,6 +178,8 @@ case class BlockMatrixTextMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = BlockMatrix.exportBlockMatrices(ctx.fs, bms, prefix, overwrite, delimiter, header, addIndex, compression, customFilenames) + + def loweredTyp: Type = TVoid } case class BlockMatrixNativeMultiWriter( @@ -168,4 +190,6 @@ case class BlockMatrixNativeMultiWriter( def apply(ctx: ExecuteContext, bms: IndexedSeq[BlockMatrix]): Unit = { BlockMatrix.writeBlockMatrices(ctx, bms, prefix, overwrite, forceRowMajor) } + + def loweredTyp: Type = TVoid } diff --git a/hail/src/main/scala/is/hail/expr/ir/Compile.scala b/hail/src/main/scala/is/hail/expr/ir/Compile.scala index 5dcdd9f0a32..6600a5cc81c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Compile.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Compile.scala @@ -29,6 +29,7 @@ object Compile { expectedCodeParamTypes: IndexedSeq[TypeInfo[_]], expectedCodeReturnType: TypeInfo[_], body: IR, optimize: Boolean = true, + writeIRs: Boolean = false, print: Option[PrintWriter] = None ): (Option[SingleCodeType], (HailClassLoader, FS, Int, Region) => F) = { @@ -48,7 +49,7 @@ object Compile { .foldLeft(Env.empty[IR]) { case (e, ((n, t), i)) => e.bind(n, In(i, t)) })) ir = LoweringPipeline.compileLowerer(optimize).apply(ctx, ir).asInstanceOf[IR].noSharing - TypeCheck(ir, BindingEnv.empty) + TypeCheck(ctx, ir, BindingEnv.empty) val returnParam = CodeParamType(SingleCodeType.typeInfoFromType(ir.typ)) @@ -76,7 +77,7 @@ object Compile { val emitContext = EmitContext.analyze(ctx, ir) val rt = Emit(emitContext, ir, fb, expectedCodeReturnType, params.length) - val f = fb.resultWithIndex(print) + val f = fb.resultWithIndex(writeIRs, print) codeCache += k -> CodeCacheValue(rt, f) (rt, f) @@ -110,7 +111,7 @@ object CompileWithAggregators { .foldLeft(Env.empty[IR]) { case (e, ((n, t), i)) => e.bind(n, In(i, t)) })) ir = LoweringPipeline.compileLowerer(optimize).apply(ctx, ir).asInstanceOf[IR].noSharing - TypeCheck(ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) + TypeCheck(ctx, ir, BindingEnv(Env.fromSeq[Type](params.map { case (name, t) => name -> t.virtualType }))) val fb = EmitFunctionBuilder[F](ctx, "CompiledWithAggs", CodeParamType(typeInfo[Region]) +: params.map { case (_, pt) => pt }, @@ -183,6 +184,7 @@ object CompileIterator { ctx: ExecuteContext, body: IR, argTypeInfo: Array[ParamType], + writeIRs: Boolean, printWriter: Option[PrintWriter] ): (PType, (HailClassLoader, FS, Int, Region) => F) = { @@ -198,7 +200,7 @@ object CompileIterator { val outerRegion = outerRegionField val ir = LoweringPipeline.compileLowerer(true)(ctx, body).asInstanceOf[IR].noSharing - TypeCheck(ir) + TypeCheck(ctx, ir) var elementAddress: Settable[Long] = null var returnType: PType = null @@ -261,7 +263,7 @@ object CompileIterator { val getMB = fb.newEmitMethod("loadAddress", FastIndexedSeq(), LongInfo) getMB.emit(elementAddress.load()) - (returnType, fb.resultWithIndex(printWriter)) + (returnType, fb.resultWithIndex(writeIRs, printWriter)) } def forTableMapPartitions( @@ -277,6 +279,7 @@ object CompileIterator { CodeParamType(typeInfo[Object]), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(typ0)), SingleCodeEmitParamType(true, StreamSingleCodeType(true, streamElementType))), + false, None) (eltPType, (theHailClassLoader, fs, idx, consumerCtx, v0, part) => { val stepper = makeStepper(theHailClassLoader, fs, idx, consumerCtx.partitionRegion) @@ -302,6 +305,7 @@ object CompileIterator { CodeParamType(typeInfo[Object]), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(ctxType)), SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(bcValsType))), + false, None) (eltPType, (theHailClassLoader, fs, idx, consumerCtx, v0, v1) => { val stepper = makeStepper(theHailClassLoader, fs, idx, consumerCtx.partitionRegion) diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index 20747e51aa8..86d1314e6f5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -32,7 +32,7 @@ object EmitContext { val usesAndDefs = ComputeUsesAndDefs(ir, errorIfFreeVariables = false) val requiredness = Requiredness.apply(ir, usesAndDefs, null, pTypeEnv) val inLoopCriticalPath = ControlFlowPreventsSplit(ir, ParentPointers(ir), usesAndDefs) - val methodSplits = ComputeMethodSplits(ir,inLoopCriticalPath) + val methodSplits = ComputeMethodSplits(ctx, ir, inLoopCriticalPath) new EmitContext(ctx, requiredness, usesAndDefs, methodSplits, inLoopCriticalPath, Memo.empty[Unit]) } } @@ -55,7 +55,7 @@ case class EmitEnv(bindings: Env[EmitValue], inputValues: IndexedSeq[(EmitCodeBu object Emit { def apply[C](ctx: EmitContext, ir: IR, fb: EmitFunctionBuilder[C], rti: TypeInfo[_], nParams: Int, aggs: Option[Array[AggStateSig]] = None): Option[SingleCodeType] = { - TypeCheck(ir) + TypeCheck(ctx.executeContext, ir) val mb = fb.apply_method val container = aggs.map { a => @@ -614,6 +614,9 @@ class Emit[C]( this.emitI(ir, cb, region, env, container, loopEnv) (ir: @unchecked) match { + case Literal(TVoid, ()) => + Code._empty + case Void() => Code._empty @@ -2402,7 +2405,7 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.st }\n${ Pretty(ctx.executeContext, ir) }") } case _ => @@ -2410,7 +2413,7 @@ class Emit[C]( } if (result.st.virtualType != ir.typ) - throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ir).take(50) }") + throw new RuntimeException(s"type mismatch:\n EC=${ result.st.virtualType }\n IR=${ ir.typ }\n node: ${ Pretty(ctx.executeContext, ir).take(50) }") result } @@ -2565,7 +2568,7 @@ class Emit[C]( ctx.req.lookupOpt(ir) match { case Some(r) => if (result.required != r.required) { - throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ir) }") + throw new RuntimeException(s"requiredness mismatch: EC=${ result.required } / Analysis=${ r.required }\n${ result.emitType }\n${ Pretty(ctx.executeContext, ir) }") } case _ => diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index ee54b73647b..1a5538e1c4e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -116,7 +116,10 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def fieldBuilder: SettableBuilder = cb.fieldBuilder - def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) + def result( + ctx: ExecuteContext, + print: Option[PrintWriter] = None + ): (HailClassLoader) => C = cb.result(ctx.shouldWriteIRFiles(), print) def getHailClassLoader: Code[HailClassLoader] = ecb.getHailClassLoader @@ -141,9 +144,9 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def addEncodedLiteral(encodedLiteral: EncodedLiteral) = ecb.addEncodedLiteral(encodedLiteral) - def getPType(t: PType): Code[PType] = ecb.getPType(t) + def getPType[T <: PType : TypeInfo](t: T): Code[T] = ecb.getPType(t) - def getType(t: Type): Code[Type] = ecb.getType(t) + def getType[T <: Type : TypeInfo](t: T): Code[T] = ecb.getType(t) def newEmitMethod(name: String, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType): EmitMethodBuilder[C] = ecb.newEmitMethod(name, argsInfo, returnInfo) @@ -164,7 +167,7 @@ trait WrappedEmitClassBuilder[C] extends WrappedEmitModuleBuilder { def newRNG(seed: Long): Value[IRRandomness] = ecb.newRNG(seed) - def resultWithIndex(print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = ecb.resultWithIndex(print) + def resultWithIndex(writeIRs: Boolean = false, print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = ecb.resultWithIndex(writeIRs, print) def getOrGenEmitMethod( baseName: String, key: Any, argsInfo: IndexedSeq[ParamType], returnInfo: ParamType @@ -218,7 +221,7 @@ class EmitClassBuilder[C]( def fieldBuilder: SettableBuilder = cb.fieldBuilder - def result(print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(print) + def result(writeIRs: Boolean, print: Option[PrintWriter] = None): (HailClassLoader) => C = cb.result(writeIRs, print) // EmitClassBuilder methods @@ -661,7 +664,10 @@ class EmitClassBuilder[C]( rng } - def resultWithIndex(print: Option[PrintWriter] = None): (HailClassLoader, FS, Int, Region) => C = { + def resultWithIndex( + writeIRs: Boolean, + print: Option[PrintWriter] = None + ): (HailClassLoader, FS, Int, Region) => C = { makeRNGs() makeAddPartitionRegion() makeAddHailClassLoader() @@ -699,7 +705,7 @@ class EmitClassBuilder[C]( "FunctionBuilder emission should happen on master, but happened on worker") val n = cb.className.replace("/", ".") - val classesBytes = modb.classesBytes(print) + val classesBytes = modb.classesBytes(writeIRs, print) new ((HailClassLoader, FS, Int, Region) => C) with java.io.Serializable { @transient @volatile private var theClass: Class[_] = null diff --git a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala index 01b45fd3469..40598f7ab34 100644 --- a/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala +++ b/hail/src/main/scala/is/hail/expr/ir/ExtractIntervalFilters.scala @@ -1,5 +1,6 @@ package is.hail.expr.ir +import is.hail.backend.ExecuteContext import is.hail.annotations.IntervalEndpointOrdering import is.hail.types.virtual._ import is.hail.utils.{FastSeq, Interval, IntervalEndpoint, _} @@ -273,7 +274,7 @@ object ExtractIntervalFilters { extractAndRewrite(cond, ExtractionState(ref, key)) } - def apply(ir0: BaseIR): BaseIR = { + def apply(ctx: ExecuteContext, ir0: BaseIR): BaseIR = { MapIR.mapBaseIR(ir0, (ir: BaseIR) => { (ir match { case TableFilter(child, pred) => @@ -281,8 +282,8 @@ object ExtractIntervalFilters { .map { case (newCond, intervals) => log.info(s"generated TableFilterIntervals node with ${ intervals.length } intervals:\n " + s"Intervals: ${ intervals.mkString(", ") }\n " + - s"Predicate: ${ Pretty(pred) }\n " + - s"Post: ${ Pretty(newCond) }") + s"Predicate: ${ Pretty(ctx, pred) }\n " + + s"Post: ${ Pretty(ctx, newCond) }") TableFilter( TableFilterIntervals(child, intervals, keep = true), newCond) @@ -292,8 +293,8 @@ object ExtractIntervalFilters { .map { case (newCond, intervals) => log.info(s"generated MatrixFilterIntervals node with ${ intervals.length } intervals:\n " + s"Intervals: ${ intervals.mkString(", ") }\n " + - s"Predicate: ${ Pretty(pred) }\n " + - s"Post: ${ Pretty(newCond) }") + s"Predicate: ${ Pretty(ctx, pred) }\n " + + s"Post: ${ Pretty(ctx, newCond) }") MatrixFilterRows( MatrixFilterIntervals(child, intervals, keep = true), newCond) @@ -303,4 +304,4 @@ object ExtractIntervalFilters { }).getOrElse(ir) }) } -} \ No newline at end of file +} diff --git a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala index 4a0cfcb64ba..15cd8a39b57 100644 --- a/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala +++ b/hail/src/main/scala/is/hail/expr/ir/GenericLines.scala @@ -16,7 +16,7 @@ import scala.annotation.meta.param trait CloseableIterator[T] extends Iterator[T] with AutoCloseable object GenericLines { - def read(fs: FS, contexts: IndexedSeq[Any], gzAsBGZ: Boolean): GenericLines = { + def read(fs: FS, contexts: IndexedSeq[Any], gzAsBGZ: Boolean, filePerPartition: Boolean): GenericLines = { val body: (FS, Any) => CloseableIterator[GenericLine] = { (fs: FS, context: Any) => val contextRow = context.asInstanceOf[Row] @@ -32,18 +32,18 @@ object GenericLines { val rawIS = fs.openNoCompression(file) val codec = fs.getCodecFromPath(file, gzAsBGZ) if (codec == null) { - assert(split) + assert(split || filePerPartition) rawIS.seek(start) rawIS } else if (codec == BGZipCompressionCodec) { - assert(split) + assert(split || filePerPartition) splitCompressed = true val bgzIS = new BGzipInputStream(rawIS, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK) new ProxyInputStream(bgzIS) with Positioned { def getPosition: Long = bgzIS.getVirtualOffset } } else { - assert(!split) + assert(!split || filePerPartition) new CountingInputStream(codec.makeInputStream(rawIS)) with Positioned { def getPosition: Long = getByteCount } @@ -244,7 +244,8 @@ object GenericLines { blockSizeInMB: Option[Int], minPartitions: Option[Int], gzAsBGZ: Boolean, - allowSerialRead: Boolean + allowSerialRead: Boolean, + filePerPartition: Boolean = false ): GenericLines = { val fileStatuses = fileStatuses0.filter(_.getLen > 0) val totalSize = fileStatuses.map(_.getLen).sum @@ -267,7 +268,7 @@ object GenericLines { val codec = fs.getCodecFromPath(status.getPath, gzAsBGZ) val splittable = codec == null || codec == BGZipCompressionCodec - if (splittable) { + if (splittable && !filePerPartition) { var fileNParts = ((totalPartitions.toDouble * size) / totalSize + 0.5).toInt if (fileNParts == 0) fileNParts = 1 @@ -283,7 +284,7 @@ object GenericLines { Row(i, status.getPath, start, end, true) } } else { - if (!allowSerialRead) + if (!allowSerialRead && !filePerPartition) fatal(s"Cowardly refusing to read file serially: ${ status.getPath }.") Iterator.single { @@ -292,7 +293,7 @@ object GenericLines { } } - GenericLines.read(fs, contexts, gzAsBGZ) + GenericLines.read(fs, contexts, gzAsBGZ, filePerPartition) } def collect(fs: FS, lines: GenericLines): IndexedSeq[String] = { diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index 16cc84e607f..10132aacba7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -196,7 +196,7 @@ object MakeArray { MakeArray(args, TArray(args.head.typ)) } - def unify(args: Seq[IR], requestedType: TArray = null): MakeArray = { + def unify(ctx: ExecuteContext, args: Seq[IR], requestedType: TArray = null): MakeArray = { assert(requestedType != null || args.nonEmpty) if(args.nonEmpty) @@ -204,7 +204,7 @@ object MakeArray { return MakeArray(args, TArray(args.head.typ)) MakeArray(args.map { arg => - val upcast = PruneDeadFields.upcast(arg, requestedType.elementType) + val upcast = PruneDeadFields.upcast(ctx, arg, requestedType.elementType) assert(upcast.typ == requestedType.elementType) upcast }, requestedType) @@ -214,7 +214,7 @@ object MakeArray { final case class MakeArray(args: Seq[IR], _typ: TArray) extends IR object MakeStream { - def unify(args: Seq[IR], requiresMemoryManagementPerElement: Boolean = false, requestedType: TStream = null): MakeStream = { + def unify(ctx: ExecuteContext, args: Seq[IR], requiresMemoryManagementPerElement: Boolean = false, requestedType: TStream = null): MakeStream = { assert(requestedType != null || args.nonEmpty) if (args.nonEmpty) @@ -222,7 +222,7 @@ object MakeStream { return MakeStream(args, TStream(args.head.typ), requiresMemoryManagementPerElement) MakeStream(args.map { arg => - val upcast = PruneDeadFields.upcast(arg, requestedType.elementType) + val upcast = PruneDeadFields.upcast(ctx, arg, requestedType.elementType) assert(upcast.typ == requestedType.elementType) upcast }, requestedType, requiresMemoryManagementPerElement) @@ -747,6 +747,7 @@ object PartitionWriter { override val typeHints = ShortTypeHints(List( classOf[PartitionNativeWriter], classOf[TableTextPartitionWriter], + classOf[VCFPartitionWriter], classOf[AbstractTypedCodecSpec], classOf[TypedCodecSpec]), typeHintFieldName = "name" ) + BufferSpec.shortTypeHints @@ -765,6 +766,7 @@ object MetadataWriter { classOf[TableSpecWriter], classOf[RelationalWriter], classOf[TableTextFinalizer], + classOf[VCFExportFinalizer], classOf[RVDSpecMaker], classOf[AbstractTypedCodecSpec], classOf[TypedCodecSpec]), diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index 27f633cae7f..0a74106c0e3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -260,7 +260,7 @@ object InferType { case _: MatrixWrite => TVoid case _: MatrixMultiWrite => TVoid case _: BlockMatrixCollect => TNDArray(TFloat64, Nat(2)) - case _: BlockMatrixWrite => TVoid + case BlockMatrixWrite(_, writer) => writer.loweredTyp case _: BlockMatrixMultiWrite => TVoid case TableGetGlobals(child) => child.typ.globalType case TableCollect(child) => TStruct("rows" -> TArray(child.typ.rowType), "global" -> child.typ.globalType) diff --git a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala index 21103cfe72e..ddda5d12698 100644 --- a/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/LowerMatrixIR.scala @@ -1,5 +1,6 @@ package is.hail.expr.ir +import is.hail.backend.ExecuteContext import is.hail.expr.ir.functions.{WrappedMatrixToTableFunction, WrappedMatrixToValueFunction} import is.hail.expr.ir._ import is.hail.types._ @@ -12,40 +13,44 @@ object LowerMatrixIR { val colsField: Symbol = Symbol(colsFieldName) val entriesField: Symbol = Symbol(entriesFieldName) - def apply(ir: IR): IR = { + def apply(ctx: ExecuteContext, ir: IR): IR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(ir, ab) + val l1 = lower(ctx, ir, ab) ab.result().foldRight[IR](l1) { case ((ident, value), body) => RelationalLet(ident, value, body) } } - def apply(tir: TableIR): TableIR = { + def apply(ctx: ExecuteContext, tir: TableIR): TableIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(tir, ab) + val l1 = lower(ctx, tir, ab) ab.result().foldRight[TableIR](l1) { case ((ident, value), body) => RelationalLetTable(ident, value, body) } } - def apply(mir: MatrixIR): TableIR = { + def apply(ctx: ExecuteContext, mir: MatrixIR): TableIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(mir, ab) + val l1 = lower(ctx, mir, ab) ab.result().foldRight[TableIR](l1) { case ((ident, value), body) => RelationalLetTable(ident, value, body) } } - def apply(bmir: BlockMatrixIR): BlockMatrixIR = { + def apply(ctx: ExecuteContext, bmir: BlockMatrixIR): BlockMatrixIR = { val ab = new BoxedArrayBuilder[(String, IR)] - val l1 = lower(bmir, ab) + val l1 = lower(ctx, bmir, ab) ab.result().foldRight[BlockMatrixIR](l1) { case ((ident, value), body) => RelationalLetBlockMatrix(ident, value, body) } } - private[this] def lowerChildren(ir: BaseIR, ab: BoxedArrayBuilder[(String, IR)]): BaseIR = { + private[this] def lowerChildren( + ctx: ExecuteContext, + ir: BaseIR, + ab: BoxedArrayBuilder[(String, IR)] + ): BaseIR = { val loweredChildren = ir.children.map { - case tir: TableIR => lower(tir, ab) + case tir: TableIR => lower(ctx, tir, ab) case mir: MatrixIR => throw new RuntimeException(s"expect specialized lowering rule for " + s"${ ir.getClass.getName }\n Found MatrixIR child $mir") - case bmir: BlockMatrixIR => lower(bmir, ab) - case vir: IR => lower(vir, ab) + case bmir: BlockMatrixIR => lower(ctx, bmir, ab) + case vir: IR => lower(ctx, vir, ab) } if ((ir.children, loweredChildren).zipped.forall(_ eq _)) ir @@ -86,13 +91,17 @@ object LowerMatrixIR { } - private[this] def lower(mir: MatrixIR, ab: BoxedArrayBuilder[(String, IR)]): TableIR = { + private[this] def lower( + ctx: ExecuteContext, + mir: MatrixIR, + ab: BoxedArrayBuilder[(String, IR)] + ): TableIR = { val lowered = mir match { case RelationalLetMatrixTable(name, value, body) => - RelationalLetTable(name, lower(value, ab), lower(body, ab)) + RelationalLetTable(name, lower(ctx, value, ab), lower(ctx, body, ab)) case CastTableToMatrix(child, entries, cols, colKey) => - val lc = lower(child, ab) + val lc = lower(ctx, child, ab) lc.mapRows( irIf('row (Symbol(entries)).isNA) { irDie("missing entry array unsupported in 'to_matrix_table_row_major'", lc.typ.rowType) @@ -106,11 +115,11 @@ object LowerMatrixIR { ).rename(Map(entries -> entriesFieldName), Map(cols -> colsFieldName)) case MatrixToMatrixApply(child, function) => - val loweredChild = lower(child, ab) + val loweredChild = lower(ctx, child, ab) TableToTableApply(loweredChild, function.lower()) case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => - var t = lower(child, ab).rename(rowMap, globalMap) + var t = lower(ctx, child, ab).rename(rowMap, globalMap) if (colMap.nonEmpty) { val newColsType = TArray(child.typ.colType.rename(colMap)) @@ -125,19 +134,19 @@ object LowerMatrixIR { t case MatrixKeyRowsBy(child, keys, isSorted) => - lower(child, ab).keyBy(keys, isSorted) + lower(ctx, child, ab).keyBy(keys, isSorted) case MatrixFilterRows(child, pred) => - lower(child, ab) - .filter(subst(lower(pred, ab), matrixSubstEnv(child))) + lower(ctx, child, ab) + .filter(subst(lower(ctx, pred, ab), matrixSubstEnv(child))) case MatrixFilterCols(child, pred) => - lower(child, ab) + lower(ctx, child, ab) .mapGlobals('global.insertFields('newColIdx -> irRange(0, 'global (colsField).len) .filter('i ~> (let(sa = 'global (colsField)('i)) - in subst(lower(pred, ab), matrixGlobalSubstEnv(child)))))) + in subst(lower(ctx, pred, ab), matrixGlobalSubstEnv(child)))))) .mapRows('row.insertFields(entriesField -> 'global ('newColIdx).map('i ~> 'row (entriesField)('i)))) .mapGlobals('global .insertFields(colsField -> @@ -147,12 +156,12 @@ object LowerMatrixIR { case MatrixAnnotateRowsTable(child, table, root, product) => val kt = table.typ.keyType if (kt.size == 1 && kt.types(0) == TInterval(child.typ.rowKeyStruct.types(0))) - TableIntervalJoin(lower(child, ab), lower(table, ab), root, product) + TableIntervalJoin(lower(ctx, child, ab), lower(ctx, table, ab), root, product) else - TableLeftJoinRightDistinct(lower(child, ab), lower(table, ab), root) + TableLeftJoinRightDistinct(lower(ctx, child, ab), lower(ctx, table, ab), root) case MatrixChooseCols(child, oldIndices) => - lower(child, ab) + lower(ctx, child, ab) .mapGlobals('global.insertFields('newColIdx -> oldIndices.map(I32))) .mapRows('row.insertFields(entriesField -> 'global ('newColIdx).map('i ~> 'row (entriesField)('i)))) .mapGlobals('global @@ -162,8 +171,8 @@ object LowerMatrixIR { case MatrixAnnotateColsTable(child, table, root) => val col = Symbol(genUID()) val colKey = makeStruct(table.typ.key.zip(child.typ.colKey).map { case (tk, mck) => Symbol(tk) -> col(Symbol(mck)) }: _*) - lower(child, ab) - .mapGlobals(let(__dictfield = lower(table, ab) + lower(ctx, child, ab) + .mapGlobals(let(__dictfield = lower(ctx, table, ab) .keyBy(FastIndexedSeq()) .collect() .apply('rows) @@ -173,9 +182,9 @@ object LowerMatrixIR { }) case MatrixMapGlobals(child, newGlobals) => - lower(child, ab) + lower(ctx, child, ab) .mapGlobals( - subst(lower(newGlobals, ab), BindingEnv(Env[IRProxy]( + subst(lower(ctx, newGlobals, ab), BindingEnv(Env[IRProxy]( "global" -> 'global.selectFields(child.typ.globalType.fieldNames: _*)))) .insertFields(colsField -> 'global (colsField))) @@ -277,14 +286,14 @@ object LowerMatrixIR { } - val lc = lower(child, ab) + val lc = lower(ctx, child, ab) lc.mapRows(let(n_cols = 'global(colsField).len) { - liftScans(Subst(lower(newRow, ab), matrixSubstEnvIR(child, lc))) + liftScans(Subst(lower(ctx, newRow, ab), matrixSubstEnvIR(child, lc))) .insertFields(entriesField -> 'row(entriesField)) }) case MatrixMapCols(child, newCol, _) => - val loweredChild = lower(child, ab) + val loweredChild = lower(ctx, child, ab) def lift(ir: IR, scanBindings: BoxedArrayBuilder[(String, IR)], aggBindings: BoxedArrayBuilder[(String, IR)]): IR = ir match { case a: ApplyScanOp => @@ -386,7 +395,7 @@ object LowerMatrixIR { val scanBuilder = new BoxedArrayBuilder[(String, IR)] val aggBuilder = new BoxedArrayBuilder[(String, IR)] - val b0 = lift(Subst(lower(newCol, ab), matrixSubstEnvIR(child, loweredChild)), scanBuilder, aggBuilder) + val b0 = lift(Subst(lower(ctx, newCol, ab), matrixSubstEnvIR(child, loweredChild)), scanBuilder, aggBuilder) val aggs = aggBuilder.result() val scans = scanBuilder.result() @@ -462,13 +471,13 @@ object LowerMatrixIR { )) case MatrixFilterEntries(child, pred) => - val lc = lower(child, ab) + val lc = lower(ctx, child, ab) lc.mapRows('row.insertFields(entriesField -> irRange(0, 'global (colsField).len).map { 'i ~> let(g = 'row (entriesField)('i)) { irIf(let(sa = 'global (colsField)('i)) - in !subst(lower(pred, ab), matrixSubstEnv(child))) { + in !subst(lower(ctx, pred, ab), matrixSubstEnv(child))) { NA(child.typ.entryType) } { 'g @@ -479,7 +488,7 @@ object LowerMatrixIR { case MatrixUnionCols(left, right, joinType) => val rightEntries = genUID() val rightCols = genUID() - val ll = lower(left, ab).distinct() + val ll = lower(ctx, left, ab).distinct() def handleMissingEntriesArray(entries: Symbol, cols: Symbol): IRProxy = if (joinType == "inner") 'row(entries) @@ -492,7 +501,7 @@ object LowerMatrixIR { } TableJoin( ll, - lower(right, ab).distinct() + lower(ctx, right, ab).distinct() .mapRows('row .insertFields(Symbol(rightEntries) -> 'row(entriesField)) .selectFields(right.typ.rowKey :+ rightEntries: _*)) @@ -514,7 +523,7 @@ object LowerMatrixIR { .dropFields(Symbol(rightCols))) case MatrixMapEntries(child, newEntries) => - val loweredChild = lower(child, ab) + val loweredChild = lower(ctx, child, ab) val rt = loweredChild.typ.rowType val gt = loweredChild.typ.globalType TableMapRows( @@ -526,39 +535,39 @@ object LowerMatrixIR { ToStream(GetField(Ref("row", rt), entriesFieldName)), ToStream(GetField(Ref("global", gt), colsFieldName))), FastIndexedSeq("g", "sa"), - Subst(lower(newEntries, ab), BindingEnv(Env( + Subst(lower(ctx, newEntries, ab), BindingEnv(Env( "global" -> SelectFields(Ref("global", gt), child.typ.globalType.fieldNames), "va" -> SelectFields(Ref("row", rt), child.typ.rowType.fieldNames)))), ArrayZipBehavior.AssumeSameLength ))))) ) - case MatrixRepartition(child, n, shuffle) => TableRepartition(lower(child, ab), n, shuffle) + case MatrixRepartition(child, n, shuffle) => TableRepartition(lower(ctx, child, ab), n, shuffle) - case MatrixFilterIntervals(child, intervals, keep) => TableFilterIntervals(lower(child, ab), intervals, keep) + case MatrixFilterIntervals(child, intervals, keep) => TableFilterIntervals(lower(ctx, child, ab), intervals, keep) case MatrixUnionRows(children) => // FIXME: this should check that all children have the same column keys. - val first = lower(children.head, ab) + val first = lower(ctx, children.head, ab) TableUnion(FastIndexedSeq(first) ++ - children.tail.map(lower(_, ab) + children.tail.map(lower(ctx, _, ab) .mapRows('row.selectFields(first.typ.rowType.fieldNames: _*)))) - case MatrixDistinctByRow(child) => TableDistinct(lower(child, ab)) + case MatrixDistinctByRow(child) => TableDistinct(lower(ctx, child, ab)) - case MatrixRowsHead(child, n) => TableHead(lower(child, ab), n) - case MatrixRowsTail(child, n) => TableTail(lower(child, ab), n) + case MatrixRowsHead(child, n) => TableHead(lower(ctx, child, ab), n) + case MatrixRowsTail(child, n) => TableTail(lower(ctx, child, ab), n) - case MatrixColsHead(child, n) => lower(child, ab) + case MatrixColsHead(child, n) => lower(ctx, child, ab) .mapGlobals('global.insertFields(colsField -> 'global (colsField).arraySlice(0, Some(n), 1))) .mapRows('row.insertFields(entriesField -> 'row (entriesField).arraySlice(0, Some(n), 1))) - case MatrixColsTail(child, n) => lower(child, ab) + case MatrixColsTail(child, n) => lower(ctx, child, ab) .mapGlobals('global.insertFields(colsField -> 'global (colsField).arraySlice(-n, None, 1))) .mapRows('row.insertFields(entriesField -> 'row (entriesField).arraySlice(-n, None, 1))) case MatrixExplodeCols(child, path) => - val loweredChild = lower(child, ab) + val loweredChild = lower(ctx, child, ab) val lengths = Symbol(genUID()) val colIdx = Symbol(genUID()) val nestedIdx = Symbol(genUID()) @@ -598,9 +607,9 @@ object LowerMatrixIR { case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => val substEnv = matrixSubstEnv(child) - val eeSub = subst(lower(entryExpr, ab), substEnv) - val reSub = subst(lower(rowExpr, ab), substEnv) - lower(child, ab) + val eeSub = subst(lower(ctx, entryExpr, ab), substEnv) + val reSub = subst(lower(ctx, rowExpr, ab), substEnv) + lower(ctx, child, ab) .aggregateByKey( reSub.insertFields(entriesField -> irRange(0, 'global (colsField).len) .aggElements('__element_idx, '__result_idx, Some('global (colsField).len))( @@ -612,7 +621,7 @@ object LowerMatrixIR { }))) case MatrixCollectColsByKey(child) => - lower(child, ab) + lower(ctx, child, ab) .mapGlobals('global.insertFields('newColIdx -> irRange(0, 'global (colsField).len).map { 'i ~> @@ -640,7 +649,7 @@ object LowerMatrixIR { .dropFields('newColIdx) ) - case MatrixExplodeRows(child, path) => TableExplode(lower(child, ab), path) + case MatrixExplodeRows(child, path) => TableExplode(lower(ctx, child, ab), path) case mr: MatrixRead => mr.lower() @@ -655,11 +664,11 @@ object LowerMatrixIR { val aggElementIdx = Symbol(genUID()) val substEnv = matrixSubstEnv(child) - val ceSub = subst(lower(colExpr, ab), substEnv) + val ceSub = subst(lower(ctx, colExpr, ab), substEnv) val vaBinding = 'row.selectFields(child.typ.rowType.fieldNames: _*) - val eeSub = subst(lower(entryExpr, ab), substEnv.bindEval("va", vaBinding).bindAgg("va", vaBinding)) + val eeSub = subst(lower(ctx, entryExpr, ab), substEnv.bindEval("va", vaBinding).bindAgg("va", vaBinding)) - lower(child, ab) + lower(ctx, child, ab) .mapGlobals('global.insertFields(keyMap -> let(__cols_field = 'global (colsField)) { irRange(0, '__cols_field.len) @@ -698,20 +707,24 @@ object LowerMatrixIR { } if (!mir.typ.isCompatibleWith(lowered.typ)) - throw new RuntimeException(s"Lowering changed type:\n BEFORE: ${ Pretty(mir) }\n ${ mir.typ }\n ${ mir.typ.canonicalTableType}\n AFTER: ${ Pretty(lowered) }\n ${ lowered.typ }") + throw new RuntimeException(s"Lowering changed type:\n BEFORE: ${ Pretty(ctx, mir) }\n ${ mir.typ }\n ${ mir.typ.canonicalTableType}\n AFTER: ${ Pretty(ctx, lowered) }\n ${ lowered.typ }") lowered } - private[this] def lower(tir: TableIR, ab: BoxedArrayBuilder[(String, IR)]): TableIR = { + private[this] def lower( + ctx: ExecuteContext, + tir: TableIR, + ab: BoxedArrayBuilder[(String, IR)] + ): TableIR = { val lowered = tir match { case CastMatrixToTable(child, entries, cols) => - lower(child, ab) + lower(ctx, child, ab) .mapRows('row.selectFields(child.typ.rowType.fieldNames ++ Array(entriesFieldName): _*)) .rename(Map(entriesFieldName -> entries), Map(colsFieldName -> cols)) case x@MatrixEntriesTable(child) => - val lc = lower(child, ab) + val lc = lower(ctx, child, ab) if (child.typ.rowKey.nonEmpty && child.typ.colKey.nonEmpty) { val oldColIdx = Symbol(genUID()) @@ -770,19 +783,19 @@ object LowerMatrixIR { } case MatrixToTableApply(child, function) => - val loweredChild = lower(child, ab) + val loweredChild = lower(ctx, child, ab) TableToTableApply(loweredChild, function.lower() .getOrElse(WrappedMatrixToTableFunction(function, colsFieldName, entriesFieldName, child.typ.colKey))) case MatrixRowsTable(child) => - lower(child, ab) + lower(ctx, child, ab) .mapGlobals('global.dropFields(colsField)) .mapRows('row.dropFields(entriesField)) case MatrixColsTable(child) => val colKey = child.typ.colKey - let(__cols_and_globals = lower(child, ab).getGlobals) { + let(__cols_and_globals = lower(ctx, child, ab).getGlobals) { val sortedCols = if (colKey.isEmpty) '__cols_and_globals (colsField) else @@ -798,34 +811,42 @@ object LowerMatrixIR { makeStruct('rows -> sortedCols, 'global -> '__cols_and_globals.dropFields(colsField)) }.parallelize(None).keyBy(child.typ.colKey) - case table => lowerChildren(table, ab).asInstanceOf[TableIR] + case table => lowerChildren(ctx, table, ab).asInstanceOf[TableIR] } assertTypeUnchanged(tir, lowered) lowered } - private[this] def lower(bmir: BlockMatrixIR, ab: BoxedArrayBuilder[(String, IR)]): BlockMatrixIR = { + private[this] def lower( + ctx: ExecuteContext, + bmir: BlockMatrixIR, + ab: BoxedArrayBuilder[(String, IR)] + ): BlockMatrixIR = { val lowered = bmir match { - case noMatrixChildren => lowerChildren(noMatrixChildren, ab).asInstanceOf[BlockMatrixIR] + case noMatrixChildren => lowerChildren(ctx, noMatrixChildren, ab).asInstanceOf[BlockMatrixIR] } assertTypeUnchanged(bmir, lowered) lowered } - private[this] def lower(ir: IR, ab: BoxedArrayBuilder[(String, IR)]): IR = { + private[this] def lower( + ctx: ExecuteContext, + ir: IR, + ab: BoxedArrayBuilder[(String, IR)] + ): IR = { val lowered = ir match { - case MatrixToValueApply(child, function) => TableToValueApply(lower(child, ab), function.lower() + case MatrixToValueApply(child, function) => TableToValueApply(lower(ctx, child, ab), function.lower() .getOrElse(WrappedMatrixToValueFunction(function, colsFieldName, entriesFieldName, child.typ.colKey))) case MatrixWrite(child, writer) => - TableWrite(lower(child, ab), WrappedMatrixWriter(writer, colsFieldName, entriesFieldName, child.typ.colKey)) + TableWrite(lower(ctx, child, ab), WrappedMatrixWriter(writer, colsFieldName, entriesFieldName, child.typ.colKey)) case MatrixMultiWrite(children, writer) => - TableMultiWrite(children.map(lower(_, ab)), WrappedMatrixNativeMultiWriter(writer, children.head.typ.colKey)) + TableMultiWrite(children.map(lower(ctx, _, ab)), WrappedMatrixNativeMultiWriter(writer, children.head.typ.colKey)) case MatrixCount(child) => - lower(child, ab) + lower(ctx, child, ab) .aggregate(makeTuple(applyAggOp(Count(), FastIndexedSeq(), FastIndexedSeq()), 'global(colsField).len)) case MatrixAggregate(child, query) => - val lc = lower(child, ab) + val lc = lower(ctx, child, ab) val idx = Symbol(genUID()) TableAggregate(lc, aggExplodeIR( @@ -844,7 +865,7 @@ object LowerMatrixIR { isScan = false), isScan = false) }) - case _ => lowerChildren(ir, ab).asInstanceOf[IR] + case _ => lowerChildren(ctx, ir, ab).asInstanceOf[IR] } assertTypeUnchanged(ir, lowered) lowered diff --git a/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala b/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala index c257396b4d8..4660d35d0f0 100644 --- a/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala +++ b/hail/src/main/scala/is/hail/expr/ir/LowerOrInterpretNonCompilable.scala @@ -13,7 +13,7 @@ object LowerOrInterpretNonCompilable { def evaluate(value: IR): IR = { val preTime = System.nanoTime() - val result = CanLowerEfficiently(value) match { + val result = CanLowerEfficiently(ctx, value) match { case Some(failReason) => log.info(s"LowerOrInterpretNonCompilable: cannot efficiently lower query: $failReason") log.info(s"interpreting non-compilable result: ${ value.getClass.getSimpleName }") diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala index 51377192da8..72ad21f70ad 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixIR.scala @@ -8,7 +8,6 @@ import is.hail.expr.ir.IRBuilder._ import is.hail.expr.ir.functions.MatrixToMatrixFunction import is.hail.types._ import is.hail.types.virtual._ -import is.hail.io.TextMatrixReader import is.hail.io.bgen.MatrixBGENReader import is.hail.io.fs.FS import is.hail.io.plink.MatrixPLINKReader @@ -86,7 +85,6 @@ object MatrixReader { case "MatrixRangeReader" => MatrixRangeReader.fromJValue(env.ctx, jv) case "MatrixNativeReader" => MatrixNativeReader.fromJValue(env.ctx.fs, jv) case "MatrixBGENReader" => MatrixBGENReader.fromJValue(env, jv) - case "TextMatrixReader" => TextMatrixReader.fromJValue(env.ctx, jv) case "MatrixPLINKReader" => MatrixPLINKReader.fromJValue(env.ctx, jv) case "MatrixVCFReader" => MatrixVCFReader.fromJValue(env.ctx, jv) } diff --git a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala index 8d32374aac6..58ea87d42bb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala +++ b/hail/src/main/scala/is/hail/expr/ir/MatrixWriter.scala @@ -5,7 +5,7 @@ import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.backend.ExecuteContext import is.hail.expr.ir.functions.MatrixWriteBlockMatrix -import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, TableStage} +import is.hail.expr.ir.lowering.{LowererUnsupportedOperation, RVDToTableStage, TableStage} import is.hail.expr.ir.streams.StreamProducer import is.hail.expr.{JSONAnnotationImpex, Nat} import is.hail.io._ @@ -13,19 +13,22 @@ import is.hail.io.fs.FS import is.hail.io.gen.{ExportBGEN, ExportGen} import is.hail.io.index.StagedIndexWriter import is.hail.io.plink.ExportPlink -import is.hail.io.vcf.ExportVCF +import is.hail.io.vcf.{ExportVCF, TabixVCF} import is.hail.linalg.BlockMatrix import is.hail.rvd.{IndexSpec, RVDPartitioner, RVDSpecMaker} import is.hail.types.encoded.{EBaseStruct, EBlockMatrixNDArray, EType} -import is.hail.types.physical.stypes.{EmitType, SCode} +import is.hail.types.physical.stypes.{EmitType, SValue} import is.hail.types.physical.stypes.interfaces._ +import is.hail.types.physical.stypes.primitives._ import is.hail.types.physical.{PBooleanRequired, PCanonicalBaseStruct, PCanonicalString, PCanonicalStruct, PInt64, PStruct, PType} import is.hail.types.virtual._ import is.hail.types._ -import is.hail.types.physical.stypes.concrete.SStackStruct +import is.hail.types.physical.stypes.concrete.{SJavaString, SJavaArrayString, SJavaArrayStringValue, SStackStruct} +import is.hail.types.physical.stypes.interfaces.{SIndexableValue, SBaseStructValue} import is.hail.types.physical.stypes.primitives.{SBooleanValue, SInt64Value} import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream +import is.hail.variant.{ReferenceGenome, Call} import org.apache.spark.sql.Row import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Formats, ShortTypeHints} @@ -385,7 +388,418 @@ case class MatrixVCFWriter( metadata: Option[VCFMetadata] = None, tabix: Boolean = false ) extends MatrixWriter { - def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = ExportVCF(ctx, mv, path, append, exportType, metadata, tabix) + def apply(ctx: ExecuteContext, mv: MatrixValue): Unit = { + val appendStr = getAppendHeaderValue(ctx.fs) + val tv = mv.toTableValue + val ts = RVDToTableStage(tv.rvd, tv.globals.toEncodedLiteral(ctx.theHailClassLoader)) + val tl = TableLiteral(tv, ctx.theHailClassLoader) + CompileAndEvaluate(ctx, + lower(LowerMatrixIR.colsFieldName, MatrixType.entriesIdentifier, mv.typ.colKey, + ctx, ts, tl, BaseTypeWithRequiredness(tv.typ).asInstanceOf[RTable], Map())) + } + + override def canLowerEfficiently: Boolean = true + override def lower(colsFieldName: String, entriesFieldName: String, colKey: IndexedSeq[String], + ctx: ExecuteContext, ts: TableStage, t: TableIR, r: RTable, relationalLetsAbove: Map[String, IR]): IR = { + require(exportType != ExportType.PARALLEL_COMPOSABLE) + + val tm = MatrixType.fromTableType(t.typ, colsFieldName, entriesFieldName, colKey) + tm.requireRowKeyVariant() + tm.requireColKeyString() + ExportVCF.checkFormatSignature(tm.entryType) + + val ext = ctx.fs.getCodecExtension(path) + + val folder = if (exportType == ExportType.CONCATENATED) + ctx.createTmpPath("write-vcf-concatenated") + else + path + + val appendStr = getAppendHeaderValue(ctx.fs) + + val writeHeader = exportType == ExportType.PARALLEL_HEADER_IN_SHARD + val partAppend = appendStr.filter(_ => writeHeader) + val partMetadata = metadata.filter(_ => writeHeader) + val lineWriter = VCFPartitionWriter(tm, entriesFieldName, writeHeader = exportType == ExportType.PARALLEL_HEADER_IN_SHARD, + partAppend, partMetadata, tabix && exportType != ExportType.CONCATENATED) + + ts.mapContexts { oldCtx => + val d = digitsNeeded(ts.numPartitions) + val partFiles = Literal(TArray(TString), Array.tabulate(ts.numPartitions)(i => s"$folder/${ partFile(d, i) }$ext").toFastIndexedSeq) + + zip2(oldCtx, ToStream(partFiles), ArrayZipBehavior.AssertSameLength) { (ctxElt, pf) => + MakeStruct(FastSeq( + "oldCtx" -> ctxElt, + "partFile" -> pf)) + } + }(GetField(_, "oldCtx")).mapCollectWithContextsAndGlobals(relationalLetsAbove) { (rows, ctxRef) => + val ctx = MakeStruct(FastSeq( + "cols" -> GetField(ts.globals, colsFieldName), + "partFile" -> GetField(ctxRef, "partFile"))) + WritePartition(rows, ctx, lineWriter) + }{ (parts, globals) => + val ctx = MakeStruct(FastSeq("cols" -> GetField(globals, colsFieldName), "partFiles" -> parts)) + val commit = VCFExportFinalizer(tm, path, appendStr, metadata, exportType, tabix) + Begin(FastIndexedSeq(WriteMetadata(ctx, commit))) + } + } + + private def getAppendHeaderValue(fs: FS): Option[String] = append.map { f => + using(fs.open(f)) { s => + val sb = new StringBuilder + scala.io.Source.fromInputStream(s) + .getLines() + .filterNot(_.isEmpty) + .foreach { line => + sb.append(line) + sb += '\n' + } + sb.result() + } + } +} + +case class VCFPartitionWriter(typ: MatrixType, entriesFieldName: String, writeHeader: Boolean, + append: Option[String], metadata: Option[VCFMetadata], tabix: Boolean) extends PartitionWriter { + val ctxType: Type = TStruct("cols" -> TArray(typ.colType), "partFile" -> TString) + + if (typ.rowType.hasField("info")) { + typ.rowType.field("info").typ match { + case _: TStruct => + case t => + warn(s"export_vcf found row field 'info' of type $t, but expected type 'Struct'. Emitting no INFO fields.") + } + } else { + warn(s"export_vcf found no row field 'info'. Emitting no INFO fields.") + } + + val formatFieldOrder: Array[Int] = typ.entryType.fieldIdx.get("GT") match { + case Some(i) => (i +: typ.entryType.fields.filter(fd => fd.name != "GT").map(_.index)).toArray + case None => typ.entryType.fields.indices.toArray + } + val formatFieldString = formatFieldOrder.map(i => typ.entryType.fields(i).name).mkString(":") + val missingFormatStr = if (typ.entryType.size > 0 && typ.entryType.types(formatFieldOrder(0)) == TCall) + "./." + else + "." + + val locusIdx = typ.rowType.fieldIdx("locus") + val allelesIdx = typ.rowType.fieldIdx("alleles") + val (idExists, idIdx) = ExportVCF.lookupVAField(typ.rowType, "rsid", "ID", Some(TString)) + val (qualExists, qualIdx) = ExportVCF.lookupVAField(typ.rowType, "qual", "QUAL", Some(TFloat64)) + val (filtersExists, filtersIdx) = ExportVCF.lookupVAField(typ.rowType, "filters", "FILTERS", Some(TSet(TString))) + val (infoExists, infoIdx) = ExportVCF.lookupVAField(typ.rowType, "info", "INFO", None) + + def returnType: Type = TString + def unionTypeRequiredness(r: TypeWithRequiredness, ctxType: TypeWithRequiredness, streamType: RIterable): Unit = { + r.union(ctxType.required) + r.union(streamType.required) + } + + final def consumeStream(ctx: ExecuteContext, cb: EmitCodeBuilder, stream: StreamProducer, + context: EmitCode, region: Value[Region]): IEmitCode = { + val mb = cb.emb + context.toI(cb).map(cb) { case ctx: SBaseStructValue => + val filename = ctx.loadField(cb, "partFile").get(cb, "partFile can't be missing").asString.loadString(cb) + + val os = cb.memoize(cb.emb.create(filename)) + if (writeHeader) { + val sampleIds = ctx.loadField(cb, "cols").get(cb).asIndexable + val stringSampleIds = cb.memoize(Code.newArray[String](sampleIds.loadLength())) + sampleIds.forEachDefined(cb) { case (cb, i, colv: SBaseStructValue) => + val s = colv.subset(typ.colKey: _*).loadField(cb, 0).get(cb).asString + cb += (stringSampleIds(i) = s.loadString(cb)) + } + + val headerStr = Code.invokeScalaObject6[TStruct, TStruct, ReferenceGenome, Option[String], Option[VCFMetadata], Array[String], String]( + ExportVCF.getClass, "makeHeader", + mb.getType[TStruct](typ.rowType), mb.getType[TStruct](typ.entryType), + mb.getReferenceGenome(typ.referenceGenome), mb.getObject(append), + mb.getObject(metadata), stringSampleIds) + cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) + cb += os.invoke[Int, Unit]("write", '\n') + } + + stream.memoryManagedConsume(region, cb) { cb => + consumeElement(cb, stream.element, os, stream.elementRegion) + } + + cb += os.invoke[Unit]("close") + + if (tabix) { + cb += Code.invokeScalaObject2[FS, String, Unit](TabixVCF.getClass, "apply", cb.emb.getFS, filename) + } + + SJavaString.construct(cb, filename) + } + } + + def consumeElement(cb: EmitCodeBuilder, element: EmitCode, os: Value[OutputStream], region: Value[Region]): Unit = { + def _writeC(cb: EmitCodeBuilder, code: Code[Int]) = { cb += os.invoke[Int, Unit]("write", code) } + def _writeB(cb: EmitCodeBuilder, code: Code[Array[Byte]]) = { cb += os.invoke[Array[Byte], Unit]("write", code) } + def _writeS(cb: EmitCodeBuilder, code: Code[String]) = { _writeB(cb, code.invoke[Array[Byte]]("getBytes")) } + def writeValue(cb: EmitCodeBuilder, value: SValue) = value match { + case v: SInt32Value => _writeS(cb, v.value.toS) + case v: SInt64Value => + cb.ifx(v.value > Int.MaxValue || v.value < Int.MinValue, cb._fatal( + "Cannot convert Long to Int if value is greater than Int.MaxValue (2^31 - 1) ", + "or less than Int.MinValue (-2^31). Found ", v.value.toS)) + _writeS(cb, v.value.toS) + case v: SFloat32Value => + cb.ifx(Code.invokeStatic1[java.lang.Float, Float, Boolean]("isNaN", v.value), + _writeC(cb, '.'), + _writeS(cb, Code.invokeScalaObject2[String, Float, String](ExportVCF.getClass, "fmtFloat", "%.6g", v.value))) + case v: SFloat64Value => + cb.ifx(Code.invokeStatic1[java.lang.Double, Double, Boolean]("isNaN", v.value), + _writeC(cb, '.'), + _writeS(cb, Code.invokeScalaObject2[String, Double, String](ExportVCF.getClass, "fmtDouble", "%.6g", v.value))) + case v: SStringValue => + _writeB(cb, v.toBytes(cb).loadBytes(cb)) + case v: SCallValue => + val ploidy = v.ploidy(cb) + val phased = v.isPhased(cb) + cb.ifx(ploidy.ceq(0), cb._fatal("VCF spec does not support 0-ploid calls.")) + cb.ifx(ploidy.ceq(1) , cb._fatal("VCF spec does not support phased haploid calls.")) + val c = v.canonicalCall(cb) + _writeS(cb, Code.invokeScalaObject1[Int, String](Call.getClass, "toString", c)) + case _ => + fatal(s"VCF does not support ${value.st}") + } + + def writeIterable(cb: EmitCodeBuilder, it: SIndexableValue, delim: Int) = + it.forEachDefinedOrMissing(cb)({ (cb, i) => + cb.ifx(i.cne(0), _writeC(cb, delim)) + _writeC(cb, '.') + }, { (cb, i, value) => + cb.ifx(i.cne(0), _writeC(cb, delim)) + writeValue(cb, value) + }) + + def writeGenotype(cb: EmitCodeBuilder, gt: SBaseStructValue) = { + val end = cb.newLocal[Int]("lastDefined", -1) + val Lend = CodeLabel() + formatFieldOrder.zipWithIndex.reverse.foreach { case (idx, pos) => + cb.ifx(!gt.isFieldMissing(cb, idx), { + cb.assign(end, pos) + cb.goto(Lend) + }) + } + + cb.define(Lend) + + val Lout = CodeLabel() + + cb.ifx(end < 0, { + _writeS(cb, missingFormatStr) + cb.goto(Lout) + }) + + formatFieldOrder.zipWithIndex.foreach { case (idx, pos) => + if (pos != 0) + _writeC(cb, ':') + + gt.loadField(cb, idx).consume(cb, { + if (gt.st.fieldTypes(idx).virtualType == TCall) + _writeS(cb, "./.") + else + _writeC(cb, '.') + }, { + case value: SIndexableValue => + writeIterable(cb, value, ',') + case value => + writeValue(cb, value) + }) + + cb.ifx(end.ceq(pos), cb.goto(Lout)) + } + + cb.define(Lout) + } + + def writeC(code: Code[Int]) = _writeC(cb, code) + def writeB(code: Code[Array[Byte]]) = _writeB(cb, code) + def writeS(code: Code[String]) = _writeS(cb, code) + + val elt = element.toI(cb).get(cb).asBaseStruct + val locus = elt.loadField(cb, locusIdx).get(cb).asLocus + // CHROM + writeB(locus.contig(cb).toBytes(cb).loadBytes(cb)) + // POS + writeC('\t') + writeS(locus.position(cb).toS) + + // ID + writeC('\t') + if (idExists) + elt.loadField(cb, idIdx).consume(cb, writeC('.'), { case id: SStringValue => + writeB(id.toBytes(cb).loadBytes(cb)) + }) + else + writeC('.') + + // REF + writeC('\t') + val alleles = elt.loadField(cb, allelesIdx).get(cb).asIndexable + writeB(alleles.loadElement(cb, 0).get(cb).asString.toBytes(cb).loadBytes(cb)) + + // ALT + writeC('\t') + cb.ifx(alleles.loadLength() > 1, + { + val i = cb.newLocal[Int]("i") + cb.forLoop(cb.assign(i, 1), i < alleles.loadLength(), cb.assign(i, i + 1), { + cb.ifx(i.cne(1), writeC(',')) + writeB(alleles.loadElement(cb, i).get(cb).asString.toBytes(cb).loadBytes(cb)) + }) + }, + writeC('.')) + + // QUAL + writeC('\t') + if (qualExists) + elt.loadField(cb, qualIdx).consume(cb, writeC('.'), { qual => + writeS(Code.invokeScalaObject2[String, Double, String](ExportVCF.getClass, "fmtDouble", "%.2f", qual.asDouble.value)) + }) + else + writeC('.') + + // FILTER + writeC('\t') + if (filtersExists) + elt.loadField(cb, filtersIdx).consume(cb, writeC('.'), { case filters: SIndexableValue => + cb.ifx(filters.loadLength().ceq(0), writeS("PASS"), { + writeIterable(cb, filters, ';') + }) + }) + else + writeC('.') + + // INFO + writeC('\t') + if (infoExists) { + val wroteInfo = cb.newLocal[Boolean]("wroteInfo", false) + + elt.loadField(cb, infoIdx).consume(cb, { /* do nothing */ }, { case info: SBaseStructValue => + var idx = 0 + while (idx < info.st.size) { + val field = info.st.virtualType.fields(idx) + info.loadField(cb, idx).consume(cb, { /* do nothing */ }, { + case infoArray: SIndexableValue if infoArray.st.elementType.virtualType != TBoolean => + cb.ifx(infoArray.loadLength() > 0, { + cb.ifx(wroteInfo, writeC(';')) + writeS(field.name) + writeC('=') + writeIterable(cb, infoArray, ',') + cb.assign(wroteInfo, true) + }) + case infoFlag: SBooleanValue => + cb.ifx(infoFlag.value, { + cb.ifx(wroteInfo, writeC(';')) + writeS(field.name) + cb.assign(wroteInfo, true) + }) + case info => + cb.ifx(wroteInfo, writeC(';')) + writeS(field.name) + writeC('=') + writeValue(cb, info) + cb.assign(wroteInfo, true) + }) + idx += 1 + } + }) + + cb.ifx(!wroteInfo, writeC('.')) + } else { + writeC('.') + } + + // FORMAT + val genotypes = elt.loadField(cb, entriesFieldName).get(cb).asIndexable + cb.ifx(genotypes.loadLength() > 0, { + writeC('\t') + writeS(formatFieldString) + genotypes.forEachDefinedOrMissing(cb)({ (cb, _) => + _writeC(cb, '\t') + _writeS(cb, missingFormatStr) + }, { case (cb, _, gt: SBaseStructValue) => + _writeC(cb, '\t') + writeGenotype(cb, gt) + }) + }) + + writeC('\n') + } +} + +case class VCFExportFinalizer(typ: MatrixType, outputPath: String, append: Option[String], + metadata: Option[VCFMetadata], exportType: String, tabix: Boolean) extends MetadataWriter { + def annotationType: Type = TStruct("cols" -> TArray(typ.colType), "partFiles" -> TArray(TString)) + private def header(cb: EmitCodeBuilder, annotations: SBaseStructValue): Code[String] = { + val mb = cb.emb + val sampleIds = annotations.loadField(cb, "cols").get(cb).asIndexable + val stringSampleIds = cb.memoize(Code.newArray[String](sampleIds.loadLength())) + sampleIds.forEachDefined(cb) { case (cb, i, colv: SBaseStructValue) => + val s = colv.subset(typ.colKey: _*).loadField(cb, 0).get(cb).asString + cb += (stringSampleIds(i) = s.loadString(cb)) + } + Code.invokeScalaObject6[TStruct, TStruct, ReferenceGenome, Option[String], Option[VCFMetadata], Array[String], String]( + ExportVCF.getClass, "makeHeader", + mb.getType[TStruct](typ.rowType), mb.getType[TStruct](typ.entryType), + mb.getReferenceGenome(typ.referenceGenome), mb.getObject(append), + mb.getObject(metadata), stringSampleIds) + } + + def writeMetadata(writeAnnotations: => IEmitCode, cb: EmitCodeBuilder, region: Value[Region]): Unit = { + val ctx: ExecuteContext = cb.emb.ctx + val ext = ctx.fs.getCodecExtension(outputPath) + + val annotations = writeAnnotations.get(cb).asBaseStruct + + exportType match { + case ExportType.CONCATENATED => + val headerStr = header(cb, annotations) + + val partPaths = annotations.loadField(cb, "partFiles").get(cb) + val files = partPaths.castTo(cb, region, SJavaArrayString(true), false) + val headerFilePath = ctx.createTmpPath("header", ext) + val os = cb.memoize(cb.emb.create(const(headerFilePath))) + cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) + cb += os.invoke[Int, Unit]("write", '\n') + cb += os.invoke[Unit]("close") + + val partFiles = files.asInstanceOf[SJavaArrayStringValue].array + val jFiles = cb.memoize(Code.newArray[String](partFiles.length + 1)) + cb += (jFiles(0) = const(headerFilePath)) + cb += Code.invokeStatic5[System, Any, Int, Any, Int, Int, Unit]( + "arraycopy", partFiles /*src*/, 0 /*srcPos*/, jFiles /*dest*/, 1 /*destPos*/, partFiles.length /*len*/) + + cb += cb.emb.getFS.invoke[Array[String], String, Unit]("concatenateFiles", jFiles, const(outputPath)) + + val i = cb.newLocal[Int]("i") + cb.forLoop(cb.assign(i, 0), i < jFiles.length, cb.assign(i, i + 1), { + cb += cb.emb.getFS.invoke[String, Boolean, Unit]("delete", jFiles(i), const(false)) + }) + + if (tabix) { + cb += Code.invokeScalaObject2[FS, String, Unit](TabixVCF.getClass, "apply", cb.emb.getFS, const(outputPath)) + } + + case ExportType.PARALLEL_HEADER_IN_SHARD => + cb += cb.emb.getFS.invoke[String, Unit]("touch", const(outputPath).concat("/_SUCCESS")) + + case ExportType.PARALLEL_SEPARATE_HEADER => + val headerFilePath = s"$outputPath/header$ext" + val headerStr = header(cb, annotations) + + val os = cb.memoize(cb.emb.create(const(headerFilePath))) + cb += os.invoke[Array[Byte], Unit]("write", headerStr.invoke[Array[Byte]]("getBytes")) + cb += os.invoke[Int, Unit]("write", '\n') + cb += os.invoke[Unit]("close") + + cb += cb.emb.getFS.invoke[String, Unit]("touch", const(outputPath).concat("/_SUCCESS")) + } + } } case class MatrixGENWriter( diff --git a/hail/src/main/scala/is/hail/expr/ir/Optimize.scala b/hail/src/main/scala/is/hail/expr/ir/Optimize.scala index 1ba313d14db..9e5a7f3599d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Optimize.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Optimize.scala @@ -6,8 +6,8 @@ import is.hail.utils._ object Optimize { def apply[T <: BaseIR](ir0: T, context: String, ctx: ExecuteContext): T = { - if (ctx.printIRs) - log.info(s"optimize $context: before: IR size ${ IRSize(ir0) }: \n" + Pretty(ir0, elideLiterals = true)) + if (ctx.shouldLogIR()) + log.info(s"optimize $context: before: IR size ${ IRSize(ir0) }: \n" + Pretty(ctx, ir0, elideLiterals = true)) var ir = ir0 var last: BaseIR = null @@ -22,11 +22,11 @@ object Optimize { while (iter < maxIter && ir != last) { last = ir runOpt(FoldConstants(ctx, _), iter, "FoldConstants") - runOpt(ExtractIntervalFilters(_), iter, "ExtractIntervalFilters") - runOpt(Simplify(_), iter, "Simplify") + runOpt(ExtractIntervalFilters(ctx, _), iter, "ExtractIntervalFilters") + runOpt(Simplify(ctx, _), iter, "Simplify") runOpt(ForwardLets(_), iter, "ForwardLets") runOpt(ForwardRelationalLets(_), iter, "ForwardRelationalLets") - runOpt(PruneDeadFields(_), iter, "PruneDeadFields") + runOpt(PruneDeadFields(ctx, _), iter, "PruneDeadFields") iter += 1 } @@ -36,11 +36,11 @@ object Optimize { throw new RuntimeException(s"optimization changed type!" + s"\n before: ${ ir0.typ.parsableString() }" + s"\n after: ${ ir.typ.parsableString() }" + - s"\n Before IR:\n ----------\n${ Pretty(ir0) }" + - s"\n After IR:\n ---------\n${ Pretty(ir) }") + s"\n Before IR:\n ----------\n${ Pretty(ctx, ir0) }" + + s"\n After IR:\n ---------\n${ Pretty(ctx, ir) }") - if (ctx.printIRs) - log.info(s"optimize $context: after: IR size ${ IRSize(ir) }:\n" + Pretty(ir, elideLiterals = true)) + if (ctx.shouldLogIR()) + log.info(s"optimize $context: after: IR size ${ IRSize(ir) }:\n" + Pretty(ctx, ir, elideLiterals = true)) ir } diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index ed34874b4c0..bd5471b9eef 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -869,7 +869,7 @@ object IRParser { case "MakeArray" => val typ = opt(it, type_expr(env.typEnv)).map(_.asInstanceOf[TArray]).orNull ir_value_children(env)(it).map { args => - MakeArray.unify(args, typ) + MakeArray.unify(env.ctx, args, typ) } case "MakeStream" => val typ = opt(it, type_expr(env.typEnv)).map(_.asInstanceOf[TStream]).orNull diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index 88c3aef048e..cffc70f7775 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -1,6 +1,7 @@ package is.hail.expr.ir import is.hail.HailContext +import is.hail.backend.ExecuteContext import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir.agg._ import is.hail.expr.ir.functions.RelationalFunctions @@ -14,8 +15,8 @@ import org.json4s.jackson.{JsonMethods, Serialization} import scala.collection.mutable object Pretty { - def apply(ir: BaseIR, width: Int = 100, ribbonWidth: Int = 50, elideLiterals: Boolean = true, maxLen: Int = -1, allowUnboundRefs: Boolean = false): String = { - val useSSA = HailContext.getFlag("use_ssa_logs") != null + def apply(ctx: ExecuteContext, ir: BaseIR, width: Int = 100, ribbonWidth: Int = 50, elideLiterals: Boolean = true, maxLen: Int = -1, allowUnboundRefs: Boolean = false): String = { + val useSSA = ctx != null && ctx.getFlag("use_ssa_logs") != null val pretty = new Pretty(width, ribbonWidth, elideLiterals, maxLen, allowUnboundRefs, useSSA) pretty(ir) } diff --git a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala index 70dacc98dd9..5cd3fd54aa7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala +++ b/hail/src/main/scala/is/hail/expr/ir/PruneDeadFields.scala @@ -1,5 +1,6 @@ package is.hail.expr.ir +import is.hail.backend.ExecuteContext import is.hail.annotations._ import is.hail.types._ import is.hail.types.virtual._ @@ -58,7 +59,7 @@ object PruneDeadFields { } case (t1: TTuple, t2: TTuple) => var idx = -1 - t1.fields.forall { f => + t1._types.forall { f => val t2field = t2.fields(t2.fieldIndex(f.index)) if (t2field.index > idx) { idx = t2field.index @@ -76,26 +77,26 @@ object PruneDeadFields { } } - def apply(ir: BaseIR): BaseIR = { + def apply(ctx: ExecuteContext, ir: BaseIR): BaseIR = { try { val irCopy = ir.deepCopy() val ms = ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) irCopy match { case mir: MatrixIR => - memoizeMatrixIR(mir, mir.typ, ms) - rebuild(mir, ms.rebuildState) + memoizeMatrixIR(ctx, mir, mir.typ, ms) + rebuild(ctx, mir, ms.rebuildState) case tir: TableIR => - memoizeTableIR(tir, tir.typ, ms) - rebuild(tir, ms.rebuildState) + memoizeTableIR(ctx, tir, tir.typ, ms) + rebuild(ctx, tir, ms.rebuildState) case bmir: BlockMatrixIR => - memoizeBlockMatrixIR(bmir, bmir.typ, ms) - rebuild(bmir, ms.rebuildState) + memoizeBlockMatrixIR(ctx, bmir, bmir.typ, ms) + rebuild(ctx, bmir, ms.rebuildState) case vir: IR => - memoizeValueIR(vir, vir.typ, ms) - rebuildIR(vir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) + memoizeValueIR(ctx, vir, vir.typ, ms) + rebuildIR(ctx, vir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) } } catch { - case e: Throwable => fatal(s"error trying to rebuild IR:\n${ Pretty(ir, elideLiterals = true) }", e) + case e: Throwable => fatal(s"error trying to rebuild IR:\n${ Pretty(ctx, ir, elideLiterals = true) }", e) } } @@ -320,20 +321,25 @@ object PruneDeadFields { BindingEnv(e, Some(e), Some(e)) } - def memoizeTableIR(tir: TableIR, requestedType: TableType, memo: ComputeMutableState) { + def memoizeTableIR( + ctx: ExecuteContext, + tir: TableIR, + requestedType: TableType, + memo: ComputeMutableState + ) { memo.requestedType.bind(tir, requestedType) tir match { case TableRead(_, _, _) => case TableLiteral(_, _, _, _) => case TableParallelize(rowsAndGlobal, _) => - memoizeValueIR(rowsAndGlobal, TStruct("rows" -> TArray(requestedType.rowType), "global" -> requestedType.globalType), memo) + memoizeValueIR(ctx, rowsAndGlobal, TStruct("rows" -> TArray(requestedType.rowType), "global" -> requestedType.globalType), memo) case TableRange(_, _) => - case TableRepartition(child, _, _) => memoizeTableIR(child, requestedType, memo) - case TableHead(child, _) => memoizeTableIR(child, TableType( + case TableRepartition(child, _, _) => memoizeTableIR(ctx, child, requestedType, memo) + case TableHead(child, _) => memoizeTableIR(ctx, child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.key), requestedType.rowType), globalType = requestedType.globalType), memo) - case TableTail(child, _) => memoizeTableIR(child, TableType( + case TableTail(child, _) => memoizeTableIR(ctx, child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.key), requestedType.rowType), globalType = requestedType.globalType), memo) @@ -349,7 +355,7 @@ object PruneDeadFields { requestedType.rowType.fieldOption(f).map(reqF => f -> reqF.typ)): _*), globalType = TStruct(left.typ.globalType.fieldNames.flatMap(f => requestedType.globalType.fieldOption(f).map(reqF => f -> reqF.typ)): _*)) - memoizeTableIR(left, leftDep, memo) + memoizeTableIR(ctx, left, leftDep, memo) val rk = right.typ.key.take(joinKey + math.max(0, requestedType.key.length - left.typ.key.length)) val rightKeyFields = rk.toSet @@ -362,7 +368,7 @@ object PruneDeadFields { requestedType.rowType.fieldOption(f).map(reqF => f -> reqF.typ)): _*), globalType = TStruct(right.typ.globalType.fieldNames.flatMap(f => requestedType.globalType.fieldOption(f).map(reqF => f -> reqF.typ)): _*)) - memoizeTableIR(right, rightDep, memo) + memoizeTableIR(ctx, right, rightDep, memo) case TableLeftJoinRightDistinct(left, right, root) => val fieldDep = requestedType.rowType.fieldOption(root).map(_.typ.asInstanceOf[TStruct]) fieldDep match { @@ -374,7 +380,7 @@ object PruneDeadFields { FastIndexedSeq[TStruct](right.typ.rowType.filterSet(right.typ.key.toSet, true)._1) ++ FastIndexedSeq(struct): _*), globalType = minimal(right.typ.globalType)) - memoizeTableIR(right, rightDep, memo) + memoizeTableIR(ctx, right, rightDep, memo) val lk = unifyKey(FastSeq(left.typ.key.take(right.typ.key.length), requestedType.key)) val leftDep = TableType( @@ -382,10 +388,10 @@ object PruneDeadFields { rowType = unify(left.typ.rowType, requestedType.rowType.filterSet(Set(root), include = false)._1, selectKey(left.typ.rowType, lk)), globalType = requestedType.globalType) - memoizeTableIR(left, leftDep, memo) + memoizeTableIR(ctx, left, leftDep, memo) case None => // don't memoize right if we are going to elide it during rebuild - memoizeTableIR(left, requestedType, memo) + memoizeTableIR(ctx, left, requestedType, memo) } case TableIntervalJoin(left, right, root, product) => val fieldDep = requestedType.rowType.fieldOption(root).map { field => @@ -403,7 +409,7 @@ object PruneDeadFields { FastIndexedSeq[TStruct](right.typ.rowType.filterSet(right.typ.key.toSet, true)._1) ++ FastIndexedSeq(struct): _*), globalType = minimal(right.typ.globalType)) - memoizeTableIR(right, rightDep, memo) + memoizeTableIR(ctx, right, rightDep, memo) val lk = unifyKey(FastSeq(left.typ.key.take(right.typ.key.length), requestedType.key)) val leftDep = TableType( @@ -411,10 +417,10 @@ object PruneDeadFields { rowType = unify(left.typ.rowType, requestedType.rowType.filterSet(Set(root), include = false)._1, selectKey(left.typ.rowType, lk)), globalType = requestedType.globalType) - memoizeTableIR(left, leftDep, memo) + memoizeTableIR(ctx, left, leftDep, memo) case None => // don't memoize right if we are going to elide it during rebuild - memoizeTableIR(left, requestedType, memo) + memoizeTableIR(ctx, left, requestedType, memo) } case TableMultiWayZipJoin(children, fieldName, globalName) => val gType = requestedType.globalType.fieldOption(globalName) @@ -430,7 +436,7 @@ object PruneDeadFields { child1.typ.keyType.fieldOption(f).orElse(rType.fieldOption(f)).map(reqF => f -> reqF.typ) ): _*), globalType = gType) - children.foreach(memoizeTableIR(_, dep, memo)) + children.foreach(memoizeTableIR(ctx, _, dep, memo)) case TableExplode(child, path) => def getExplodedField(typ: TableType): Type = typ.rowType.queryTyped(path.toList)._1 @@ -446,10 +452,10 @@ object PruneDeadFields { } val dep = requestedType.copy(rowType = unify(child.typ.rowType, requestedType.rowType.insert(prunedPreExlosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeTableIR(child, dep, memo) + memoizeTableIR(ctx, child, dep, memo) case TableFilter(child, pred) => - val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) - memoizeTableIR(child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) + memoizeTableIR(ctx, child, unify(child.typ, requestedType, irDep), memo) case TableKeyBy(child, _, isSorted) => val reqKey = requestedType.key val isPrefix = reqKey.zip(child.typ.key).forall { case (l, r) => l == r } @@ -459,7 +465,7 @@ object PruneDeadFields { if (reqKey.length <= child.typ.key.length) reqKey else child.typ.key else FastIndexedSeq() - memoizeTableIR(child, TableType( + memoizeTableIR(ctx, child, TableType( key = childReqKey, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, childReqKey), requestedType.rowType), globalType = requestedType.globalType), memo) @@ -468,7 +474,7 @@ object PruneDeadFields { child.typ.key else FastIndexedSeq() - memoizeTableIR(child, TableType( + memoizeTableIR(ctx, child, TableType( key = k, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, sortFields.map(_.field) ++ k), @@ -478,10 +484,10 @@ object PruneDeadFields { val dep = TableType(key = child.typ.key, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = requestedType.globalType) - memoizeTableIR(child, dep, memo) + memoizeTableIR(ctx, child, dep, memo) case TableMapPartitions(child, gName, pName, body) => val reqRowsType = TStream(requestedType.rowType) - val bodyDep = memoizeValueIR(body, reqRowsType, memo) + val bodyDep = memoizeValueIR(ctx, body, reqRowsType, memo) val depGlobalType = unifySeq(child.typ.globalType, bodyDep.eval.lookupOption(gName).map(_.result()).getOrElse(Array()) :+ requestedType.globalType) val depRowType = unifySeq(child.typ.rowType, @@ -492,28 +498,28 @@ object PruneDeadFields { key = requestedType.key, rowType = depRowType.asInstanceOf[TStruct], globalType = depGlobalType.asInstanceOf[TStruct]) - memoizeTableIR(child, dep, memo) + memoizeTableIR(ctx, child, dep, memo) case TableMapRows(child, newRow) => - val rowDep = memoizeAndGetDep(newRow, requestedType.rowType, child.typ, memo) + val rowDep = memoizeAndGetDep(ctx, newRow, requestedType.rowType, child.typ, memo) val dep = TableType( key = requestedType.key, rowType = unify(child.typ.rowType, selectKey(requestedType.rowType, requestedType.key), rowDep.rowType), globalType = unify(child.typ.globalType, requestedType.globalType, rowDep.globalType) ) - memoizeTableIR(child, dep, memo) + memoizeTableIR(ctx, child, dep, memo) case TableMapGlobals(child, newGlobals) => - val globalDep = memoizeAndGetDep(newGlobals, requestedType.globalType, child.typ, memo) - memoizeTableIR(child, unify(child.typ, requestedType.copy(globalType = globalDep.globalType), globalDep), memo) + val globalDep = memoizeAndGetDep(ctx, newGlobals, requestedType.globalType, child.typ, memo) + memoizeTableIR(ctx, child, unify(child.typ, requestedType.copy(globalType = globalDep.globalType), globalDep), memo) case TableAggregateByKey(child, expr) => val exprRequestedType = requestedType.rowType.filter(f => expr.typ.asInstanceOf[TStruct].hasField(f.name))._1 - val aggDep = memoizeAndGetDep(expr, exprRequestedType, child.typ, memo) - memoizeTableIR(child, TableType(key = child.typ.key, + val aggDep = memoizeAndGetDep(ctx, expr, exprRequestedType, child.typ, memo) + memoizeTableIR(ctx, child, TableType(key = child.typ.key, rowType = unify(child.typ.rowType, aggDep.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = unify(child.typ.globalType, aggDep.globalType, requestedType.globalType)), memo) case TableKeyByAndAggregate(child, expr, newKey, _, _) => - val keyDep = memoizeAndGetDep(newKey, newKey.typ, child.typ, memo) - val exprDep = memoizeAndGetDep(expr, requestedType.valueType, child.typ, memo) - memoizeTableIR(child, + val keyDep = memoizeAndGetDep(ctx, newKey, newKey.typ, child.typ, memo) + val exprDep = memoizeAndGetDep(ctx, expr, requestedType.valueType, child.typ, memo) + memoizeTableIR(ctx, child, TableType( key = FastIndexedSeq(), // note: this can deoptimize if prune runs before Simplify rowType = unify(child.typ.rowType, keyDep.rowType, exprDep.rowType), @@ -525,14 +531,14 @@ object PruneDeadFields { entryType = TStruct.empty, colType = requestedType.rowType, colKey = requestedType.key) - memoizeMatrixIR(child, mtDep, memo) + memoizeMatrixIR(ctx, child, mtDep, memo) case MatrixRowsTable(child) => val minChild = minimal(child.typ) val mtDep = minChild.copy( globalType = requestedType.globalType, rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, requestedType.key), requestedType.rowType), rowKey = requestedType.key) - memoizeMatrixIR(child, mtDep, memo) + memoizeMatrixIR(ctx, child, mtDep, memo) case MatrixEntriesTable(child) => val mtDep = MatrixType( rowKey = requestedType.key.take(child.typ.rowKey.length), @@ -545,9 +551,9 @@ object PruneDeadFields { entryType = TStruct( child.typ.entryType.fields.flatMap(f => requestedType.rowType.fieldOption(f.name).map(f2 => f.name -> f2.typ)): _*) ) - memoizeMatrixIR(child, mtDep, memo) + memoizeMatrixIR(ctx, child, mtDep, memo) case TableUnion(children) => - children.foreach(memoizeTableIR(_, requestedType, memo)) + children.foreach(memoizeTableIR(ctx, _, requestedType, memo)) case CastMatrixToTable(child, entriesFieldName, colsFieldName) => val childDep = MatrixType( rowKey = requestedType.key, @@ -568,7 +574,7 @@ object PruneDeadFields { requestedType.rowType.deleteKey(entriesFieldName) else requestedType.rowType) - memoizeMatrixIR(child, childDep, memo) + memoizeMatrixIR(ctx, child, childDep, memo) case TableRename(child, rowMap, globalMap) => val rowMapRev = rowMap.map { case (k, v) => (v, k) } val globalMapRev = globalMap.map { case (k, v) => (v, k) } @@ -576,37 +582,42 @@ object PruneDeadFields { rowType = requestedType.rowType.rename(rowMapRev), globalType = requestedType.globalType.rename(globalMapRev), key = requestedType.key.map(k => rowMapRev.getOrElse(k, k))) - memoizeTableIR(child, childDep, memo) + memoizeTableIR(ctx, child, childDep, memo) case TableFilterIntervals(child, _, _) => - memoizeTableIR(child, requestedType.copy(key = child.typ.key, + memoizeTableIR(ctx, child, requestedType.copy(key = child.typ.key, rowType = PruneDeadFields.unify(child.typ.rowType, requestedType.rowType, PruneDeadFields.selectKey(child.typ.rowType, child.typ.key))), memo) - case TableToTableApply(child, f) => memoizeTableIR(child, child.typ, memo) - case MatrixToTableApply(child, _) => memoizeMatrixIR(child, child.typ, memo) + case TableToTableApply(child, f) => memoizeTableIR(ctx, child, child.typ, memo) + case MatrixToTableApply(child, _) => memoizeMatrixIR(ctx, child, child.typ, memo) case BlockMatrixToTableApply(bm, aux, _) => - memoizeBlockMatrixIR(bm, bm.typ, memo) - memoizeValueIR(aux, aux.typ, memo) - case BlockMatrixToTable(child) => memoizeBlockMatrixIR(child, child.typ, memo) + memoizeBlockMatrixIR(ctx, bm, bm.typ, memo) + memoizeValueIR(ctx, aux, aux.typ, memo) + case BlockMatrixToTable(child) => memoizeBlockMatrixIR(ctx, child, child.typ, memo) case RelationalLetTable(name, value, body) => - memoizeTableIR(body, requestedType, memo) + memoizeTableIR(ctx, body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(value, unifySeq(value.typ, usages), memo) + memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) } } - def memoizeMatrixIR(mir: MatrixIR, requestedType: MatrixType, memo: ComputeMutableState) { + def memoizeMatrixIR( + ctx: ExecuteContext, + mir: MatrixIR, + requestedType: MatrixType, + memo: ComputeMutableState + ) { memo.requestedType.bind(mir, requestedType) mir match { case MatrixFilterCols(child, pred) => - val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) - memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) + memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) case MatrixFilterRows(child, pred) => - val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) - memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) + memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) case MatrixFilterEntries(child, pred) => - val irDep = memoizeAndGetDep(pred, pred.typ, child.typ, memo) - memoizeMatrixIR(child, unify(child.typ, requestedType, irDep), memo) + val irDep = memoizeAndGetDep(ctx, pred, pred.typ, child.typ, memo) + memoizeMatrixIR(ctx, child, unify(child.typ, requestedType, irDep), memo) case MatrixUnionCols(left, right, joinType) => val leftRequestedType = requestedType.copy( rowKey = left.typ.rowKey, @@ -616,38 +627,38 @@ object PruneDeadFields { globalType = TStruct.empty, rowKey = right.typ.rowKey, rowType = selectKey(right.typ.rowType, right.typ.rowKey)) - memoizeMatrixIR(left, leftRequestedType, memo) - memoizeMatrixIR(right, rightRequestedType, memo) + memoizeMatrixIR(ctx, left, leftRequestedType, memo) + memoizeMatrixIR(ctx, right, rightRequestedType, memo) case MatrixMapEntries(child, newEntries) => - val irDep = memoizeAndGetDep(newEntries, requestedType.entryType, child.typ, memo) + val irDep = memoizeAndGetDep(ctx, newEntries, requestedType.entryType, child.typ, memo) val depMod = requestedType.copy(entryType = TStruct.empty) - memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) case MatrixKeyRowsBy(child, _, isSorted) => val reqKey = requestedType.rowKey val childReqKey = if (isSorted) child.typ.rowKey.take(reqKey.length) else FastIndexedSeq() - memoizeMatrixIR(child, requestedType.copy( + memoizeMatrixIR(ctx, child, requestedType.copy( rowKey = childReqKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, childReqKey))), memo) case MatrixMapRows(child, newRow) => - val irDep = memoizeAndGetDep(newRow, requestedType.rowType, child.typ, memo) + val irDep = memoizeAndGetDep(ctx, newRow, requestedType.rowType, child.typ, memo) val depMod = requestedType.copy(rowType = selectKey(child.typ.rowType, child.typ.rowKey)) - memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) case MatrixMapCols(child, newCol, newKey) => - val irDep = memoizeAndGetDep(newCol, requestedType.colType, child.typ, memo) + val irDep = memoizeAndGetDep(ctx, newCol, requestedType.colType, child.typ, memo) val reqKey = newKey match { case Some(_) => FastIndexedSeq() case None => requestedType.colKey } val depMod = requestedType.copy(colType = selectKey(child.typ.colType, reqKey), colKey = reqKey) - memoizeMatrixIR(child, unify(child.typ, depMod, irDep), memo) + memoizeMatrixIR(ctx, child, unify(child.typ, depMod, irDep), memo) case MatrixMapGlobals(child, newGlobals) => - val irDep = memoizeAndGetDep(newGlobals, requestedType.globalType, child.typ, memo) - memoizeMatrixIR(child, unify(child.typ, requestedType.copy(globalType = irDep.globalType), irDep), memo) + val irDep = memoizeAndGetDep(ctx, newGlobals, requestedType.globalType, child.typ, memo) + memoizeMatrixIR(ctx, child, unify(child.typ, requestedType.copy(globalType = irDep.globalType), irDep), memo) case MatrixRead(_, _, _, _) => case MatrixLiteral(_, _) => case MatrixChooseCols(child, _) => - memoizeMatrixIR(child, unify(child.typ, requestedType), memo) + memoizeMatrixIR(ctx, child, unify(child.typ, requestedType), memo) case MatrixCollectColsByKey(child) => val colKeySet = child.typ.colKey.toSet val requestedColType = requestedType.colType @@ -663,10 +674,10 @@ object PruneDeadFields { }: _*), rowType = requestedType.rowType, entryType = TStruct(requestedType.entryType.fields.map(f => f.copy(typ = f.typ.asInstanceOf[TArray].elementType)))) - memoizeMatrixIR(child, explodedDep, memo) + memoizeMatrixIR(ctx, child, explodedDep, memo) case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => - val irDepEntry = memoizeAndGetDep(entryExpr, requestedType.entryType, child.typ, memo) - val irDepRow = memoizeAndGetDep(rowExpr, requestedType.rowValueStruct, child.typ, memo) + val irDepEntry = memoizeAndGetDep(ctx, entryExpr, requestedType.entryType, child.typ, memo) + val irDepRow = memoizeAndGetDep(ctx, rowExpr, requestedType.rowValueStruct, child.typ, memo) val childDep = MatrixType( rowKey = child.typ.rowKey, colKey = requestedType.colKey, @@ -674,10 +685,10 @@ object PruneDeadFields { rowType = unify(child.typ.rowType, selectKey(child.typ.rowType, child.typ.rowKey), irDepRow.rowType, irDepEntry.rowType), colType = unify(child.typ.colType, requestedType.colType, irDepEntry.colType, irDepRow.colType), globalType = unify(child.typ.globalType, requestedType.globalType, irDepEntry.globalType, irDepRow.globalType)) - memoizeMatrixIR(child, childDep, memo) + memoizeMatrixIR(ctx, child, childDep, memo) case MatrixAggregateColsByKey(child, entryExpr, colExpr) => - val irDepEntry = memoizeAndGetDep(entryExpr, requestedType.entryType, child.typ, memo) - val irDepCol = memoizeAndGetDep(colExpr, requestedType.colValueStruct, child.typ, memo) + val irDepEntry = memoizeAndGetDep(ctx, entryExpr, requestedType.entryType, child.typ, memo) + val irDepCol = memoizeAndGetDep(ctx, colExpr, requestedType.colValueStruct, child.typ, memo) val childDep: MatrixType = MatrixType( rowKey = requestedType.rowKey, colKey = child.typ.colKey, @@ -685,7 +696,7 @@ object PruneDeadFields { globalType = unify(child.typ.globalType, requestedType.globalType, irDepEntry.globalType, irDepCol.globalType), rowType = unify(child.typ.rowType, irDepEntry.rowType, irDepCol.rowType, requestedType.rowType), entryType = irDepEntry.entryType) - memoizeMatrixIR(child, childDep, memo) + memoizeMatrixIR(ctx, child, childDep, memo) case MatrixAnnotateRowsTable(child, table, root, product) => val fieldDep = requestedType.rowType.fieldOption(root).map { field => if (product) @@ -700,7 +711,7 @@ object PruneDeadFields { key = tk, rowType = unify(table.typ.rowType, struct, selectKey(table.typ.rowType, tk)), globalType = minimal(table.typ.globalType)) - memoizeTableIR(table, tableDep, memo) + memoizeTableIR(ctx, table, tableDep, memo) val mk = unifyKey(FastSeq(child.typ.rowKey.take(tk.length), requestedType.rowKey)) val matDep = requestedType.copy( @@ -709,10 +720,10 @@ object PruneDeadFields { unify(child.typ.rowType, selectKey(child.typ.rowType, mk), requestedType.rowType.filterSet(Set(root), include = false)._1)) - memoizeMatrixIR(child, matDep, memo) + memoizeMatrixIR(ctx, child, matDep, memo) case None => // don't depend on key IR dependencies if we are going to elide the node anyway - memoizeMatrixIR(child, requestedType, memo) + memoizeMatrixIR(ctx, child, requestedType, memo) } case MatrixAnnotateColsTable(child, table, uid) => val fieldDep = requestedType.colType.fieldOption(uid).map(_.typ.asInstanceOf[TStruct]) @@ -723,17 +734,17 @@ object PruneDeadFields { key = tk, rowType = unify(table.typ.rowType, struct, selectKey(table.typ.rowType, tk)), globalType = minimal(table.typ.globalType)) - memoizeTableIR(table, tableDep, memo) + memoizeTableIR(ctx, table, tableDep, memo) val mk = unifyKey(FastSeq(child.typ.colKey.take(table.typ.key.length), requestedType.colKey)) val matDep = requestedType.copy( colKey = mk, colType = unify(child.typ.colType, requestedType.colType.filterSet(Set(uid), include = false)._1, selectKey(child.typ.colType, mk))) - memoizeMatrixIR(child, matDep, memo) + memoizeMatrixIR(ctx, child, matDep, memo) case None => // don't depend on key IR dependencies if we are going to elide the node anyway - memoizeMatrixIR(child, requestedType, memo) + memoizeMatrixIR(ctx, child, requestedType, memo) } case MatrixExplodeRows(child, path) => def getExplodedField(typ: MatrixType): Type = typ.rowType.queryTyped(path.toList)._1 @@ -750,7 +761,7 @@ object PruneDeadFields { } val dep = requestedType.copy(rowType = unify(child.typ.rowType, requestedType.rowType.insert(prunedPreExlosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeMatrixIR(child, dep, memo) + memoizeMatrixIR(ctx, child, dep, memo) case MatrixExplodeCols(child, path) => def getExplodedField(typ: MatrixType): Type = typ.colType.queryTyped(path.toList)._1 @@ -766,31 +777,31 @@ object PruneDeadFields { } val dep = requestedType.copy(colType = unify(child.typ.colType, requestedType.colType.insert(prunedPreExplosionFieldType, path.toList)._1.asInstanceOf[TStruct])) - memoizeMatrixIR(child, dep, memo) + memoizeMatrixIR(ctx, child, dep, memo) case MatrixRepartition(child, _, _) => - memoizeMatrixIR(child, requestedType, memo) + memoizeMatrixIR(ctx, child, requestedType, memo) case MatrixUnionRows(children) => - children.foreach(memoizeMatrixIR(_, requestedType, memo)) + children.foreach(memoizeMatrixIR(ctx, _, requestedType, memo)) case MatrixDistinctByRow(child) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(child, dep, memo) + memoizeMatrixIR(ctx, child, dep, memo) case MatrixRowsHead(child, n) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(child, dep, memo) - case MatrixColsHead(child, n) => memoizeMatrixIR(child, requestedType, memo) + memoizeMatrixIR(ctx, child, dep, memo) + case MatrixColsHead(child, n) => memoizeMatrixIR(ctx, child, requestedType, memo) case MatrixRowsTail(child, n) => val dep = requestedType.copy( rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey)) ) - memoizeMatrixIR(child, dep, memo) - case MatrixColsTail(child, n) => memoizeMatrixIR(child, requestedType, memo) + memoizeMatrixIR(ctx, child, dep, memo) + case MatrixColsTail(child, n) => memoizeMatrixIR(ctx, child, requestedType, memo) case CastTableToMatrix(child, entriesFieldName, colsFieldName, _) => val m = Map(MatrixType.entriesIdentifier -> entriesFieldName) val childDep = child.typ.copy( @@ -798,13 +809,13 @@ object PruneDeadFields { globalType = unify(child.typ.globalType, requestedType.globalType, TStruct((colsFieldName, TArray(requestedType.colType)))), rowType = unify(child.typ.rowType, requestedType.rowType, TStruct((entriesFieldName, TArray(requestedType.entryType)))) ) - memoizeTableIR(child, childDep, memo) + memoizeTableIR(ctx, child, childDep, memo) case MatrixFilterIntervals(child, _, _) => - memoizeMatrixIR(child, requestedType.copy(rowKey = child.typ.rowKey, + memoizeMatrixIR(ctx, child, requestedType.copy(rowKey = child.typ.rowKey, rowType = unify(child.typ.rowType, requestedType.rowType, selectKey(child.typ.rowType, child.typ.rowKey))), memo) - case MatrixToMatrixApply(child, f) => memoizeMatrixIR(child, child.typ, memo) + case MatrixToMatrixApply(child, f) => memoizeMatrixIR(ctx, child, child.typ, memo) case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => val globalMapRev = globalMap.map { case (k, v) => (v, k) } val colMapRev = colMap.map { case (k, v) => (v, k) } @@ -817,33 +828,44 @@ object PruneDeadFields { colKey = requestedType.colKey.map(k => colMapRev.getOrElse(k, k)), rowType = requestedType.rowType.rename(rowMapRev), entryType = requestedType.entryType.rename(entryMapRev)) - memoizeMatrixIR(child, childDep, memo) + memoizeMatrixIR(ctx, child, childDep, memo) case RelationalLetMatrixTable(name, value, body) => - memoizeMatrixIR(body, requestedType, memo) + memoizeMatrixIR(ctx, body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(value, unifySeq(value.typ, usages), memo) + memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) } } - def memoizeBlockMatrixIR(bmir: BlockMatrixIR, requestedType: BlockMatrixType, memo: ComputeMutableState): Unit = { + def memoizeBlockMatrixIR( + ctx: ExecuteContext, + bmir: BlockMatrixIR, + requestedType: BlockMatrixType, + memo: ComputeMutableState + ): Unit = { memo.requestedType.bind(bmir, requestedType) bmir match { case RelationalLetBlockMatrix(name, value, body) => - memoizeBlockMatrixIR(body, requestedType, memo) + memoizeBlockMatrixIR(ctx, body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(value, unifySeq(value.typ, usages), memo) + memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) case _ => bmir.children.foreach { - case mir: MatrixIR => memoizeMatrixIR(mir, mir.typ, memo) - case tir: TableIR => memoizeTableIR(tir, tir.typ, memo) - case bmir: BlockMatrixIR => memoizeBlockMatrixIR(bmir, bmir.typ, memo) - case ir: IR => memoizeValueIR(ir, ir.typ, memo) + case mir: MatrixIR => memoizeMatrixIR(ctx, mir, mir.typ, memo) + case tir: TableIR => memoizeTableIR(ctx, tir, tir.typ, memo) + case bmir: BlockMatrixIR => memoizeBlockMatrixIR(ctx, bmir, bmir.typ, memo) + case ir: IR => memoizeValueIR(ctx, ir, ir.typ, memo) } } } - def memoizeAndGetDep(ir: IR, requestedType: Type, base: TableType, memo: ComputeMutableState): TableType = { - val depEnv = memoizeValueIR(ir, requestedType, memo) + def memoizeAndGetDep( + ctx: ExecuteContext, + ir: IR, + requestedType: Type, + base: TableType, + memo: ComputeMutableState + ): TableType = { + val depEnv = memoizeValueIR(ctx, ir, requestedType, memo) val depEnvUnified = concatEnvs(FastIndexedSeq(depEnv.eval) ++ FastIndexedSeq(depEnv.agg, depEnv.scan).flatten) val expectedBindingSet = Set("row", "global") @@ -851,7 +873,7 @@ object PruneDeadFields { if (!expectedBindingSet.contains(k)) throw new RuntimeException(s"found unexpected free variable in pruning: $k\n" + s" ${ depEnv.pretty(_.result().mkString(",")) }\n" + - s" ${ Pretty(ir) }") + s" ${ Pretty(ctx, ir) }") } val min = minimal(base) @@ -864,14 +886,20 @@ object PruneDeadFields { globalType = globalType.asInstanceOf[TStruct]) } - def memoizeAndGetDep(ir: IR, requestedType: Type, base: MatrixType, memo: ComputeMutableState): MatrixType = { - val depEnv = memoizeValueIR(ir, requestedType, memo) + def memoizeAndGetDep( + ctx: ExecuteContext, + ir: IR, + requestedType: Type, + base: MatrixType, + memo: ComputeMutableState + ): MatrixType = { + val depEnv = memoizeValueIR(ctx, ir, requestedType, memo) val depEnvUnified = concatEnvs(FastIndexedSeq(depEnv.eval) ++ FastIndexedSeq(depEnv.agg, depEnv.scan).flatten) val expectedBindingSet = Set("va", "sa", "g", "global", "n_rows", "n_cols") depEnvUnified.m.keys.foreach { k => if (!expectedBindingSet.contains(k)) - throw new RuntimeException(s"found unexpected free variable in pruning: $k\n ${ Pretty(ir) }") + throw new RuntimeException(s"found unexpected free variable in pruning: $k\n ${ Pretty(ctx, ir) }") } val min = minimal(base) @@ -889,7 +917,7 @@ object PruneDeadFields { .asInstanceOf[TStruct] if (rowType.hasField(MatrixType.entriesIdentifier)) - throw new RuntimeException(s"prune: found dependence on entry array in row binding:\n${ Pretty(ir) }") + throw new RuntimeException(s"prune: found dependence on entry array in row binding:\n${ Pretty(ctx, ir) }") MatrixType( rowKey = FastIndexedSeq(), @@ -911,10 +939,15 @@ object PruneDeadFields { * any of the "b" dependencies in order to create its own requested type, * which only contains "a". */ - def memoizeValueIR(ir: IR, requestedType: Type, memo: ComputeMutableState): BindingEnv[BoxedArrayBuilder[Type]] = { + def memoizeValueIR( + ctx: ExecuteContext, + ir: IR, + requestedType: Type, + memo: ComputeMutableState + ): BindingEnv[BoxedArrayBuilder[Type]] = { memo.requestedType.bind(ir, requestedType) ir match { - case IsNA(value) => memoizeValueIR(value, minimal(value.typ), memo) + case IsNA(value) => memoizeValueIR(ctx, value, minimal(value.typ), memo) case CastRename(v, _typ) => def recur(reqType: Type, castType: Type, baseType: Type): Type = { ((reqType, castType, baseType): @unchecked) match { @@ -940,33 +973,33 @@ object PruneDeadFields { } } - memoizeValueIR(v, recur(requestedType, _typ, v.typ), memo) + memoizeValueIR(ctx, v, recur(requestedType, _typ, v.typ), memo) case If(cond, cnsq, alt) => unifyEnvs( - memoizeValueIR(cond, cond.typ, memo), - memoizeValueIR(cnsq, requestedType, memo), - memoizeValueIR(alt, requestedType, memo) + memoizeValueIR(ctx, cond, cond.typ, memo), + memoizeValueIR(ctx, cnsq, requestedType, memo), + memoizeValueIR(ctx, alt, requestedType, memo) ) - case Coalesce(values) => unifyEnvsSeq(values.map(memoizeValueIR(_, requestedType, memo))) - case Consume(value) => memoizeValueIR(value, value.typ, memo) + case Coalesce(values) => unifyEnvsSeq(values.map(memoizeValueIR(ctx, _, requestedType, memo))) + case Consume(value) => memoizeValueIR(ctx, value, value.typ, memo) case Let(name, value, body) => - val bodyEnv = memoizeValueIR(body, requestedType, memo) + val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) val valueType = bodyEnv.eval.lookupOption(name) match { case Some(ab) => unifySeq(value.typ, ab.result()) case None => minimal(value.typ) } unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(value, valueType, memo) + memoizeValueIR(ctx, value, valueType, memo) ) case AggLet(name, value, body, isScan) => - val bodyEnv = memoizeValueIR(body, requestedType, memo) + val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) if (isScan) { val valueType = unifySeq( value.typ, bodyEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val valueEnv = memoizeValueIR(value, valueType, memo) + val valueEnv = memoizeValueIR(ctx, value, valueType, memo) unifyEnvs( bodyEnv.copy(scan = bodyEnv.scan.map(_.delete(name))), valueEnv.copy(eval = Env.empty, scan = Some(valueEnv.eval)) @@ -976,7 +1009,7 @@ object PruneDeadFields { value.typ, bodyEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val valueEnv = memoizeValueIR(value, valueType, memo) + val valueEnv = memoizeValueIR(ctx, value, valueType, memo) unifyEnvs( bodyEnv.copy(agg = bodyEnv.agg.map(_.delete(name))), valueEnv.copy(eval = Env.empty, agg = Some(valueEnv.eval)) @@ -987,38 +1020,38 @@ object PruneDeadFields { ab += requestedType BindingEnv.empty.bindEval(name -> ab) case RelationalLet(name, value, body) => - val e = memoizeValueIR(body, requestedType, memo) + val e = memoizeValueIR(ctx, body, requestedType, memo) val usages = memo.relationalRefs.get(name).map(_.result()).getOrElse(Array()) - memoizeValueIR(value, unifySeq(value.typ, usages), memo) + memoizeValueIR(ctx, value, unifySeq(value.typ, usages), memo) e case RelationalRef(name, _) => memo.relationalRefs.getOrElseUpdate(name, new BoxedArrayBuilder[Type]) += requestedType BindingEnv.empty case MakeArray(args, _) => val eltType = requestedType.asInstanceOf[TArray].elementType - unifyEnvsSeq(args.map(a => memoizeValueIR(a, eltType, memo))) + unifyEnvsSeq(args.map(a => memoizeValueIR(ctx, a, eltType, memo))) case MakeStream(args, _, _) => val eltType = requestedType.asInstanceOf[TStream].elementType - unifyEnvsSeq(args.map(a => memoizeValueIR(a, eltType, memo))) + unifyEnvsSeq(args.map(a => memoizeValueIR(ctx, a, eltType, memo))) case ArrayRef(a, i, s) => unifyEnvs( - memoizeValueIR(a, TArray(requestedType), memo), - memoizeValueIR(i, i.typ, memo), - memoizeValueIR(s, s.typ, memo) + memoizeValueIR(ctx, a, TArray(requestedType), memo), + memoizeValueIR(ctx, i, i.typ, memo), + memoizeValueIR(ctx, s, s.typ, memo) ) case ArrayLen(a) => - memoizeValueIR(a, minimal(a.typ), memo) + memoizeValueIR(ctx, a, minimal(a.typ), memo) case StreamTake(a, len) => unifyEnvs( - memoizeValueIR(a, requestedType, memo), - memoizeValueIR(len, len.typ, memo)) + memoizeValueIR(ctx, a, requestedType, memo), + memoizeValueIR(ctx, len, len.typ, memo)) case StreamDrop(a, len) => unifyEnvs( - memoizeValueIR(a, requestedType, memo), - memoizeValueIR(len, len.typ, memo)) + memoizeValueIR(ctx, a, requestedType, memo), + memoizeValueIR(ctx, len, len.typ, memo)) case StreamMap(a, name, body) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(body, + val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TStream].elementType, memo) val valueType = unifySeq( @@ -1026,25 +1059,25 @@ object PruneDeadFields { bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamGrouped(a, size) => unifyEnvs( - memoizeValueIR(a, requestedType.asInstanceOf[TStream].elementType, memo), - memoizeValueIR(size, size.typ, memo)) + memoizeValueIR(ctx, a, requestedType.asInstanceOf[TStream].elementType, memo), + memoizeValueIR(ctx, size, size.typ, memo)) case StreamGroupByKey(a, key) => val reqStructT = coerce[TStruct](coerce[TStream](coerce[TStream](requestedType).elementType).elementType) val origStructT = coerce[TStruct](coerce[TStream](a.typ).elementType) - memoizeValueIR(a, TStream(unify(origStructT, reqStructT, selectKey(origStructT, key))), memo) + memoizeValueIR(ctx, a, TStream(unify(origStructT, reqStructT, selectKey(origStructT, key))), memo) case StreamZip(as, names, body, behavior, _) => - val bodyEnv = memoizeValueIR(body, + val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TStream].elementType, memo) val valueTypes = (names, as).zipped.map { (name, a) => bodyEnv.eval.lookupOption(name).map(ab => unifySeq(coerce[TStream](a.typ).elementType, ab.result())) } if (behavior == ArrayZipBehavior.AssumeSameLength && valueTypes.forall(_.isEmpty)) { - unifyEnvs(memoizeValueIR(as.head, TStream(minimal(coerce[TStream](as.head.typ).elementType)), memo) +: + unifyEnvs(memoizeValueIR(ctx, as.head, TStream(minimal(coerce[TStream](as.head.typ).elementType)), memo) +: Array(bodyEnv.deleteEval(names)): _*) } else { unifyEnvs( @@ -1052,73 +1085,73 @@ object PruneDeadFields { val at = coerce[TStream](a.typ) if (behavior == ArrayZipBehavior.AssumeSameLength) { vtOption.map { vt => - memoizeValueIR(a, TStream(vt), memo) + memoizeValueIR(ctx, a, TStream(vt), memo) }.getOrElse(BindingEnv.empty) } else - memoizeValueIR(a, TStream(vtOption.getOrElse(minimal(at.elementType))), memo) + memoizeValueIR(ctx, a, TStream(vtOption.getOrElse(minimal(at.elementType))), memo) } ++ Array(bodyEnv.deleteEval(names)): _*) } case StreamZipJoin(as, key, curKey, curVals, joinF) => val eltType = coerce[TStruct](coerce[TStream](as.head.typ).elementType) val requestedEltType = coerce[TStream](requestedType).elementType - val bodyEnv = memoizeValueIR(joinF, requestedEltType, memo) + val bodyEnv = memoizeValueIR(ctx, joinF, requestedEltType, memo) val childRequestedEltType = unifySeq( eltType, bodyEnv.eval.lookupOption(curVals).map(_.result().map(_.asInstanceOf[TArray].elementType)).getOrElse(Array()) :+ selectKey(eltType, key)) - unifyEnvsSeq(as.map(memoizeValueIR(_, TStream(childRequestedEltType), memo))) + unifyEnvsSeq(as.map(memoizeValueIR(ctx, _, TStream(childRequestedEltType), memo))) case StreamMultiMerge(as, key) => val eltType = coerce[TStruct](coerce[TStream](as.head.typ).elementType) val requestedEltType = coerce[TStream](requestedType).elementType val childRequestedEltType = unify(eltType, requestedEltType, selectKey(eltType, key)) - unifyEnvsSeq(as.map(memoizeValueIR(_, TStream(childRequestedEltType), memo))) + unifyEnvsSeq(as.map(memoizeValueIR(ctx, _, TStream(childRequestedEltType), memo))) case StreamFilter(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamTakeWhile(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamDropWhile(a, name, cond) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(cond, cond.typ, memo) + val bodyEnv = memoizeValueIR(ctx, cond, cond.typ, memo) val valueType = unifySeq( aType.elementType, FastIndexedSeq(requestedType.asInstanceOf[TStream].elementType) ++ bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamFlatMap(a, name, body) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(body, requestedType, memo) + val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(name), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamFold(a, zero, accumName, valueName, body) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnv = memoizeValueIR(zero, zero.typ, memo) - val bodyEnv = memoizeValueIR(body, body.typ, memo) + val zeroEnv = memoizeValueIR(ctx, zero, zero.typ, memo) + val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) @@ -1126,13 +1159,13 @@ object PruneDeadFields { unifyEnvs( zeroEnv, bodyEnv.deleteEval(valueName).deleteEval(accumName), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamFold2(a, accum, valueName, seq, res) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnvs = accum.map { case (name, zval) => memoizeValueIR(zval, zval.typ, memo) } - val seqEnvs = seq.map { seq => memoizeValueIR(seq, seq.typ, memo) } - val resEnv = memoizeValueIR(res, requestedType, memo) + val zeroEnvs = accum.map { case (name, zval) => memoizeValueIR(ctx, zval, zval.typ, memo) } + val seqEnvs = seq.map { seq => memoizeValueIR(ctx, seq, seq.typ, memo) } + val resEnv = memoizeValueIR(ctx, res, requestedType, memo) val valueType = unifySeq( aType.elementType, resEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array()) ++ @@ -1144,25 +1177,25 @@ object PruneDeadFields { zeroEnvs ++ Array(resEnv.copy(eval = resEnv.eval.delete(accumNames))) ++ seqEnvs.map(e => e.copy(eval = e.eval.delete(seqNames))) - ++ Array(memoizeValueIR(a, TStream(valueType), memo)) + ++ Array(memoizeValueIR(ctx, a, TStream(valueType), memo)) ) case StreamScan(a, zero, accumName, valueName, body) => val aType = a.typ.asInstanceOf[TStream] - val zeroEnv = memoizeValueIR(zero, zero.typ, memo) - val bodyEnv = memoizeValueIR(body, body.typ, memo) + val zeroEnv = memoizeValueIR(ctx, zero, zero.typ, memo) + val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) unifyEnvs( zeroEnv, bodyEnv.deleteEval(valueName).deleteEval(accumName), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case StreamJoinRightDistinct(left, right, lKey, rKey, l, r, join, joinType) => val lType = left.typ.asInstanceOf[TStream] val rType = right.typ.asInstanceOf[TStream] - val joinEnv = memoizeValueIR(join, requestedType.asInstanceOf[TStream].elementType, memo) + val joinEnv = memoizeValueIR(ctx, join, requestedType.asInstanceOf[TStream].elementType, memo) val lRequested = unifySeq( lType.elementType, @@ -1176,10 +1209,10 @@ object PruneDeadFields { unifyEnvs( joinEnv.deleteEval(l).deleteEval(r), - memoizeValueIR(left, TStream(lRequested), memo), - memoizeValueIR(right, TStream(rRequested), memo)) + memoizeValueIR(ctx, left, TStream(lRequested), memo), + memoizeValueIR(ctx, right, TStream(rRequested), memo)) case ArraySort(a, left, right, lessThan) => - val compEnv = memoizeValueIR(lessThan, lessThan.typ, memo) + val compEnv = memoizeValueIR(ctx, lessThan, lessThan.typ, memo) val aType = a.typ.asInstanceOf[TStream] val requestedElementType = unifySeq( @@ -1188,7 +1221,7 @@ object PruneDeadFields { compEnv.eval.lookupOption(left).map(_.result()).getOrElse(Array()) ++ compEnv.eval.lookupOption(right).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TStream(requestedElementType), memo) + val aEnv = memoizeValueIR(ctx, a, TStream(requestedElementType), memo) unifyEnvs( compEnv.deleteEval(left).deleteEval(right), @@ -1197,37 +1230,37 @@ object PruneDeadFields { case StreamFor(a, valueName, body) => assert(requestedType == TVoid) val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(body, body.typ, memo) + val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) val valueType = unifySeq( aType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array())) unifyEnvs( bodyEnv.deleteEval(valueName), - memoizeValueIR(a, TStream(valueType), memo) + memoizeValueIR(ctx, a, TStream(valueType), memo) ) case MakeNDArray(data, shape, rowMajor, errorId) => val elementType = requestedType.asInstanceOf[TNDArray].elementType val dataType = if (data.typ.isInstanceOf[TArray]) TArray(elementType) else TStream(elementType) unifyEnvs( - memoizeValueIR(data, dataType, memo), - memoizeValueIR(shape, shape.typ, memo), - memoizeValueIR(rowMajor, rowMajor.typ, memo) + memoizeValueIR(ctx, data, dataType, memo), + memoizeValueIR(ctx, shape, shape.typ, memo), + memoizeValueIR(ctx, rowMajor, rowMajor.typ, memo) ) case NDArrayMap(nd, valueName, body) => val ndType = nd.typ.asInstanceOf[TNDArray] - val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TNDArray].elementType, memo) + val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TNDArray].elementType, memo) val valueType = unifySeq( ndType.elementType, bodyEnv.eval.lookupOption(valueName).map(_.result()).getOrElse(Array()) ) unifyEnvs( bodyEnv.deleteEval(valueName), - memoizeValueIR(nd, ndType.copy(elementType = valueType), memo) + memoizeValueIR(ctx, nd, ndType.copy(elementType = valueType), memo) ) case NDArrayMap2(left, right, leftName, rightName, body, _) => val leftType = left.typ.asInstanceOf[TNDArray] val rightType = right.typ.asInstanceOf[TNDArray] - val bodyEnv = memoizeValueIR(body, requestedType.asInstanceOf[TNDArray].elementType, memo) + val bodyEnv = memoizeValueIR(ctx, body, requestedType.asInstanceOf[TNDArray].elementType, memo) val leftValueType = unify( leftType.elementType, @@ -1241,12 +1274,12 @@ object PruneDeadFields { unifyEnvs( bodyEnv.deleteEval(leftName).deleteEval(rightName), - memoizeValueIR(left, leftType.copy(elementType = leftValueType), memo), - memoizeValueIR(right, rightType.copy(elementType = rightValueType), memo) + memoizeValueIR(ctx, left, leftType.copy(elementType = leftValueType), memo), + memoizeValueIR(ctx, right, rightType.copy(elementType = rightValueType), memo) ) case AggExplode(a, name, body, isScan) => val aType = a.typ.asInstanceOf[TStream] - val bodyEnv = memoizeValueIR(body, + val bodyEnv = memoizeValueIR(ctx, body, requestedType, memo) if (isScan) { @@ -1254,7 +1287,7 @@ object PruneDeadFields { aType.elementType, bodyEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TStream(valueType), memo) + val aEnv = memoizeValueIR(ctx, a, TStream(valueType), memo) unifyEnvs( BindingEnv(scan = bodyEnv.scan.map(_.delete(name))), BindingEnv(scan = Some(aEnv.eval)) @@ -1264,33 +1297,33 @@ object PruneDeadFields { aType.elementType, bodyEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TStream(valueType), memo) + val aEnv = memoizeValueIR(ctx, a, TStream(valueType), memo) unifyEnvs( BindingEnv(agg = bodyEnv.agg.map(_.delete(name))), BindingEnv(agg = Some(aEnv.eval)) ) } case AggFilter(cond, aggIR, isScan) => - val condEnv = memoizeValueIR(cond, cond.typ, memo) + val condEnv = memoizeValueIR(ctx, cond, cond.typ, memo) unifyEnvs( if (isScan) BindingEnv(scan = Some(condEnv.eval)) else BindingEnv(agg = Some(condEnv.eval)), - memoizeValueIR(aggIR, requestedType, memo) + memoizeValueIR(ctx, aggIR, requestedType, memo) ) case AggGroupBy(key, aggIR, isScan) => - val keyEnv = memoizeValueIR(key, requestedType.asInstanceOf[TDict].keyType, memo) + val keyEnv = memoizeValueIR(ctx, key, requestedType.asInstanceOf[TDict].keyType, memo) unifyEnvs( if (isScan) BindingEnv(scan = Some(keyEnv.eval)) else BindingEnv(agg = Some(keyEnv.eval)), - memoizeValueIR(aggIR, requestedType.asInstanceOf[TDict].valueType, memo) + memoizeValueIR(ctx, aggIR, requestedType.asInstanceOf[TDict].valueType, memo) ) case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) => val aType = a.typ.asInstanceOf[TArray] - val bodyEnv = memoizeValueIR(aggBody, + val bodyEnv = memoizeValueIR(ctx, aggBody, requestedType.asInstanceOf[TArray].elementType, memo) if (isScan) { @@ -1298,36 +1331,36 @@ object PruneDeadFields { aType.elementType, bodyEnv.scanOrEmpty.lookupOption(elementName).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TArray(valueType), memo) + val aEnv = memoizeValueIR(ctx, a, TArray(valueType), memo) unifyEnvsSeq(FastSeq( bodyEnv.copy(eval = bodyEnv.eval.delete(indexName), scan = bodyEnv.scan.map(_.delete(elementName))), BindingEnv(scan = Some(aEnv.eval)) - ) ++ knownLength.map(x => memoizeValueIR(x, x.typ, memo))) + ) ++ knownLength.map(x => memoizeValueIR(ctx, x, x.typ, memo))) } else { val valueType = unifySeq( aType.elementType, bodyEnv.aggOrEmpty.lookupOption(elementName).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TArray(valueType), memo) + val aEnv = memoizeValueIR(ctx, a, TArray(valueType), memo) unifyEnvsSeq(FastSeq( bodyEnv.copy(eval = bodyEnv.eval.delete(indexName), agg = bodyEnv.agg.map(_.delete(elementName))), BindingEnv(agg = Some(aEnv.eval)) - ) ++ knownLength.map(x => memoizeValueIR(x, x.typ, memo))) + ) ++ knownLength.map(x => memoizeValueIR(ctx, x, x.typ, memo))) } case ApplyAggOp(initOpArgs, seqOpArgs, sig) => val prunedSig = AggSignature.prune(sig, requestedType) - val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) - val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) + val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) + val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) BindingEnv(eval = initEnv.eval, agg = Some(seqOpEnv.eval)) case ApplyScanOp(initOpArgs, seqOpArgs, sig) => val prunedSig = AggSignature.prune(sig, requestedType) - val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) - val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(arg, req, memo) }) + val initEnv = unifyEnvsSeq((initOpArgs, prunedSig.initOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) + val seqOpEnv = unifyEnvsSeq((seqOpArgs, prunedSig.seqOpArgs).zipped.map { (arg, req) => memoizeValueIR(ctx, arg, req, memo) }) BindingEnv(eval = initEnv.eval, scan = Some(seqOpEnv.eval)) case AggFold(zero, seqOp, combOp, accumName, otherAccumName, isScan) => - val initEnv = memoizeValueIR(zero, zero.typ, memo) - val seqEnv = memoizeValueIR(seqOp, seqOp.typ, memo) - memoizeValueIR(combOp, combOp.typ, memo) + val initEnv = memoizeValueIR(ctx, zero, zero.typ, memo) + val seqEnv = memoizeValueIR(ctx, seqOp, seqOp.typ, memo) + memoizeValueIR(ctx, combOp, combOp.typ, memo) if (isScan) BindingEnv(eval = initEnv.eval, scan = Some(seqEnv.eval.delete(accumName))) @@ -1335,47 +1368,47 @@ object PruneDeadFields { BindingEnv(eval = initEnv.eval, agg = Some(seqEnv.eval.delete(accumName))) case StreamAgg(a, name, query) => val aType = a.typ.asInstanceOf[TStream] - val queryEnv = memoizeValueIR(query, requestedType, memo) + val queryEnv = memoizeValueIR(ctx, query, requestedType, memo) val requestedElemType = unifySeq( aType.elementType, queryEnv.aggOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TStream(requestedElemType), memo) + val aEnv = memoizeValueIR(ctx, a, TStream(requestedElemType), memo) unifyEnvs( BindingEnv(eval = concatEnvs(Array(queryEnv.eval, queryEnv.aggOrEmpty.delete(name)))), aEnv) case StreamAggScan(a, name, query) => val aType = a.typ.asInstanceOf[TStream] - val queryEnv = memoizeValueIR(query, requestedType.asInstanceOf[TStream].elementType, memo) + val queryEnv = memoizeValueIR(ctx, query, requestedType.asInstanceOf[TStream].elementType, memo) val requestedElemType = unifySeq( aType.elementType, queryEnv.scanOrEmpty.lookupOption(name).map(_.result()).getOrElse(Array()) ++ queryEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) - val aEnv = memoizeValueIR(a, TStream(requestedElemType), memo) + val aEnv = memoizeValueIR(ctx, a, TStream(requestedElemType), memo) unifyEnvs( BindingEnv(eval = concatEnvs(Array(queryEnv.eval.delete(name), queryEnv.scanOrEmpty.delete(name)))), aEnv) case RunAgg(body, result, _) => unifyEnvs( - memoizeValueIR(body, body.typ, memo), - memoizeValueIR(result, requestedType, memo) + memoizeValueIR(ctx, body, body.typ, memo), + memoizeValueIR(ctx, result, requestedType, memo) ) case RunAggScan(array, name, init, seqs, result, signature) => val aType = array.typ.asInstanceOf[TStream] - val resultEnv = memoizeValueIR(result, requestedType.asInstanceOf[TStream].elementType, memo) - val seqEnv = memoizeValueIR(seqs, seqs.typ, memo) + val resultEnv = memoizeValueIR(ctx, result, requestedType.asInstanceOf[TStream].elementType, memo) + val seqEnv = memoizeValueIR(ctx, seqs, seqs.typ, memo) val elemEnv = unifyEnvs(resultEnv, seqEnv) val requestedElemType = unifySeq(aType.elementType, elemEnv.eval.lookupOption(name).map(_.result()).getOrElse(Array())) unifyEnvs( elemEnv, - memoizeValueIR(array, TStream(requestedElemType), memo), - memoizeValueIR(init, init.typ, memo) + memoizeValueIR(ctx, array, TStream(requestedElemType), memo), + memoizeValueIR(ctx, init, init.typ, memo) ) case MakeStruct(fields) => val sType = requestedType.asInstanceOf[TStruct] unifyEnvsSeq(fields.flatMap { case (fname, fir) => // ignore unreachable fields, these are eliminated on the upwards pass - sType.fieldOption(fname).map(f => memoizeValueIR(fir, f.typ, memo)) + sType.fieldOption(fname).map(f => memoizeValueIR(ctx, fir, f.typ, memo)) }) case InsertFields(old, fields, _) => val sType = requestedType.asInstanceOf[TStruct] @@ -1392,10 +1425,10 @@ object PruneDeadFields { sType.fieldOption(f.name).map(f.name -> _.typ) }: _*) unifyEnvsSeq( - FastSeq(memoizeValueIR(old, leftDep, memo)) ++ + FastSeq(memoizeValueIR(ctx, old, leftDep, memo)) ++ // ignore unreachable fields, these are eliminated on the upwards pass fields.flatMap { case (fname, fir) => - rightDep.fieldOption(fname).map(f => memoizeValueIR(fir, f.typ, memo)) + rightDep.fieldOption(fname).map(f => memoizeValueIR(ctx, fir, f.typ, memo)) } ) case SelectFields(old, fields) => @@ -1403,9 +1436,9 @@ object PruneDeadFields { val oldReqType = TStruct(old.typ.asInstanceOf[TStruct] .fieldNames .flatMap(fn => sType.fieldOption(fn).map(fd => (fd.name, fd.typ))): _*) - memoizeValueIR(old, oldReqType, memo) + memoizeValueIR(ctx, old, oldReqType, memo) case GetField(o, name) => - memoizeValueIR(o, TStruct(name -> requestedType), memo) + memoizeValueIR(ctx, o, TStruct(name -> requestedType), memo) case MakeTuple(fields) => val tType = requestedType.asInstanceOf[TTuple] @@ -1414,29 +1447,29 @@ object PruneDeadFields { // ignore unreachable fields, these are eliminated on the upwards pass tType.fieldIndex.get(i) .map { idx => - memoizeValueIR(value, tType.types(idx), memo) + memoizeValueIR(ctx, value, tType.types(idx), memo) }}) case GetTupleElement(o, idx) => val childTupleType = o.typ.asInstanceOf[TTuple] val tupleDep = TTuple(FastIndexedSeq(TupleField(idx, requestedType))) - memoizeValueIR(o, tupleDep, memo) + memoizeValueIR(ctx, o, tupleDep, memo) case ConsoleLog(message, result) => unifyEnvs( - memoizeValueIR(message, TString, memo), - memoizeValueIR(result, result.typ, memo) + memoizeValueIR(ctx, message, TString, memo), + memoizeValueIR(ctx, result, result.typ, memo) ) case MatrixCount(child) => - memoizeMatrixIR(child, minimal(child.typ), memo) + memoizeMatrixIR(ctx, child, minimal(child.typ), memo) BindingEnv.empty case TableCount(child) => - memoizeTableIR(child, minimal(child.typ), memo) + memoizeTableIR(ctx, child, minimal(child.typ), memo) BindingEnv.empty case TableGetGlobals(child) => - memoizeTableIR(child, minimal(child.typ).copy(globalType = requestedType.asInstanceOf[TStruct]), memo) + memoizeTableIR(ctx, child, minimal(child.typ).copy(globalType = requestedType.asInstanceOf[TStruct]), memo) BindingEnv.empty case TableCollect(child) => val rStruct = requestedType.asInstanceOf[TStruct] - memoizeTableIR(child, TableType( + memoizeTableIR(ctx, child, TableType( key = child.typ.key, rowType = unify(child.typ.rowType, rStruct.fieldOption("rows").map(_.typ.asInstanceOf[TArray].elementType.asInstanceOf[TStruct]).getOrElse(TStruct.empty)), @@ -1444,23 +1477,23 @@ object PruneDeadFields { memo) BindingEnv.empty case TableToValueApply(child, _) => - memoizeTableIR(child, child.typ, memo) + memoizeTableIR(ctx, child, child.typ, memo) BindingEnv.empty - case MatrixToValueApply(child, _) => memoizeMatrixIR(child, child.typ, memo) + case MatrixToValueApply(child, _) => memoizeMatrixIR(ctx, child, child.typ, memo) BindingEnv.empty - case BlockMatrixToValueApply(child, _) => memoizeBlockMatrixIR(child, child.typ, memo) + case BlockMatrixToValueApply(child, _) => memoizeBlockMatrixIR(ctx, child, child.typ, memo) BindingEnv.empty case TableAggregate(child, query) => - val queryDep = memoizeAndGetDep(query, query.typ, child.typ, memo) + val queryDep = memoizeAndGetDep(ctx, query, query.typ, child.typ, memo) val dep = TableType( key = child.typ.key, rowType = unify(child.typ.rowType, queryDep.rowType, selectKey(child.typ.rowType, child.typ.key)), globalType = queryDep.globalType ) - memoizeTableIR(child, dep, memo) + memoizeTableIR(ctx, child, dep, memo) BindingEnv.empty case MatrixAggregate(child, query) => - val queryDep = memoizeAndGetDep(query, query.typ, child.typ, memo) + val queryDep = memoizeAndGetDep(ctx, query, query.typ, child.typ, memo) val dep = MatrixType( rowKey = child.typ.rowKey, colKey = FastIndexedSeq(), @@ -1469,10 +1502,10 @@ object PruneDeadFields { colType = queryDep.colType, globalType = queryDep.globalType ) - memoizeMatrixIR(child, dep, memo) + memoizeMatrixIR(ctx, child, dep, memo) BindingEnv.empty case TailLoop(name, params, body) => - val bodyEnv = memoizeValueIR(body, body.typ, memo) + val bodyEnv = memoizeValueIR(ctx, body, body.typ, memo) val paramTypes = params.map{ case (paramName, paramIR) => bodyEnv.eval.lookupOption(paramName) match { case Some(ab) => unifySeq(paramIR.typ, ab.result()) @@ -1482,12 +1515,12 @@ object PruneDeadFields { unifyEnvsSeq( IndexedSeq(bodyEnv.deleteEval(params.map(_._1))) ++ (params, paramTypes).zipped.map{ case ((paramName, paramIR), paramType) => - memoizeValueIR(paramIR, paramType, memo) + memoizeValueIR(ctx, paramIR, paramType, memo) } ) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => val rArray = requestedType.asInstanceOf[TArray] - val bodyEnv = memoizeValueIR(body, rArray.elementType, memo) + val bodyEnv = memoizeValueIR(ctx, body, rArray.elementType, memo) assert(bodyEnv.scan.isEmpty) assert(bodyEnv.agg.isEmpty) @@ -1502,32 +1535,36 @@ object PruneDeadFields { } unifyEnvs( - memoizeValueIR(contexts, cDep, memo), - memoizeValueIR(globals, gDep, memo) + memoizeValueIR(ctx, contexts, cDep, memo), + memoizeValueIR(ctx, globals, gDep, memo) ) case _: IR => val envs = ir.children.flatMap { case mir: MatrixIR => - memoizeMatrixIR(mir, mir.typ, memo) + memoizeMatrixIR(ctx, mir, mir.typ, memo) None case tir: TableIR => - memoizeTableIR(tir, tir.typ, memo) + memoizeTableIR(ctx, tir, tir.typ, memo) None case bmir: BlockMatrixIR => //NOTE Currently no BlockMatrixIRs would have dead fields None case ir: IR => - Some(memoizeValueIR(ir, ir.typ, memo)) + Some(memoizeValueIR(ctx, ir, ir.typ, memo)) } unifyEnvsSeq(envs) } } - def rebuild(tir: TableIR, memo: RebuildMutableState): TableIR = { + def rebuild( + ctx: ExecuteContext, + tir: TableIR, + memo: RebuildMutableState + ): TableIR = { val requestedType = memo.requestedType.lookup(tir).asInstanceOf[TableType] tir match { case TableParallelize(rowsAndGlobal, nPartitions) => TableParallelize( - upcast(rebuildIR(rowsAndGlobal, BindingEnv.empty, memo), + upcast(ctx, rebuildIR(ctx, rowsAndGlobal, BindingEnv.empty, memo), memo.requestedType.lookup(rowsAndGlobal).asInstanceOf[TStruct]), nPartitions) case TableRead(typ, dropRows, tr) => @@ -1538,12 +1575,12 @@ object PruneDeadFields { globalType = requestedType.globalType) TableRead(requestedTypeWithKey, dropRows, tr) case TableFilter(child, pred) => - val child2 = rebuild(child, memo) - val pred2 = rebuildIR(pred, BindingEnv(child2.typ.rowEnv), memo) + val child2 = rebuild(ctx, child, memo) + val pred2 = rebuildIR(ctx, pred, BindingEnv(child2.typ.rowEnv), memo) TableFilter(child2, pred2) case TableMapPartitions(child, gName, pName, body) => - val child2 = rebuild(child, memo) - val body2 = rebuildIR(body, BindingEnv(Env( + val child2 = rebuild(ctx, child, memo) + val body2 = rebuildIR(ctx, body, BindingEnv(Env( gName -> child2.typ.globalType, pName -> TStream(child2.typ.rowType))), memo) val body2ElementType = body2.typ.asInstanceOf[TStream].elementType.asInstanceOf[TStruct] @@ -1553,8 +1590,8 @@ object PruneDeadFields { child2 TableMapPartitions(child2Keyed, gName, pName, body2) case TableMapRows(child, newRow) => - val child2 = rebuild(child, memo) - val newRow2 = rebuildIR(newRow, BindingEnv(child2.typ.rowEnv, scan = Some(child2.typ.rowEnv)), memo) + val child2 = rebuild(ctx, child, memo) + val newRow2 = rebuildIR(ctx, newRow, BindingEnv(child2.typ.rowEnv, scan = Some(child2.typ.rowEnv)), memo) val newRowType = newRow2.typ.asInstanceOf[TStruct] val child2Keyed = if (child2.typ.key.exists(k => !newRowType.hasField(k))) TableKeyBy(child2, child2.typ.key.takeWhile(newRowType.hasField)) @@ -1562,43 +1599,43 @@ object PruneDeadFields { child2 TableMapRows(child2Keyed, newRow2) case TableMapGlobals(child, newGlobals) => - val child2 = rebuild(child, memo) - TableMapGlobals(child2, rebuildIR(newGlobals, BindingEnv(child2.typ.globalEnv), memo)) + val child2 = rebuild(ctx, child, memo) + TableMapGlobals(child2, rebuildIR(ctx, newGlobals, BindingEnv(child2.typ.globalEnv), memo)) case TableKeyBy(child, _, isSorted) => - var child2 = rebuild(child, memo) + var child2 = rebuild(ctx, child, memo) val keys2 = requestedType.key // fully upcast before shuffle if (!isSorted && keys2.nonEmpty) - child2 = upcastTable(child2, memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) + child2 = upcastTable(ctx, child2, memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) TableKeyBy(child2, keys2, isSorted) case TableOrderBy(child, sortFields) => // fully upcast before shuffle - val child2 = upcastTable(rebuild(child, memo), memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) + val child2 = upcastTable(ctx, rebuild(ctx, child, memo), memo.requestedType.lookup(child).asInstanceOf[TableType], upcastGlobals = false) TableOrderBy(child2, sortFields) case TableLeftJoinRightDistinct(left, right, root) => if (requestedType.rowType.hasField(root)) - TableLeftJoinRightDistinct(rebuild(left, memo), rebuild(right, memo), root) + TableLeftJoinRightDistinct(rebuild(ctx, left, memo), rebuild(ctx, right, memo), root) else - rebuild(left, memo) + rebuild(ctx, left, memo) case TableIntervalJoin(left, right, root, product) => if (requestedType.rowType.hasField(root)) - TableIntervalJoin(rebuild(left, memo), rebuild(right, memo), root, product) + TableIntervalJoin(rebuild(ctx, left, memo), rebuild(ctx, right, memo), root, product) else - rebuild(left, memo) + rebuild(ctx, left, memo) case TableMultiWayZipJoin(children, fieldName, globalName) => - val rebuilt = children.map { c => rebuild(c, memo) } - val upcasted = rebuilt.map { t => upcastTable(t, memo.requestedType.lookup(children(0)).asInstanceOf[TableType]) } + val rebuilt = children.map { c => rebuild(ctx, c, memo) } + val upcasted = rebuilt.map { t => upcastTable(ctx, t, memo.requestedType.lookup(children(0)).asInstanceOf[TableType]) } TableMultiWayZipJoin(upcasted, fieldName, globalName) case TableAggregateByKey(child, expr) => - val child2 = rebuild(child, memo) - TableAggregateByKey(child2, rebuildIR(expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) + val child2 = rebuild(ctx, child, memo) + TableAggregateByKey(child2, rebuildIR(ctx, expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) case TableKeyByAndAggregate(child, expr, newKey, nPartitions, bufferSize) => - val child2 = rebuild(child, memo) - val expr2 = rebuildIR(expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) - val newKey2 = rebuildIR(newKey, BindingEnv(child2.typ.rowEnv), memo) + val child2 = rebuild(ctx, child, memo) + val expr2 = rebuildIR(ctx, expr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) + val newKey2 = rebuildIR(ctx, newKey, BindingEnv(child2.typ.rowEnv), memo) TableKeyByAndAggregate(child2, expr2, newKey2, nPartitions, bufferSize) case TableRename(child, rowMap, globalMap) => - val child2 = rebuild(child, memo) + val child2 = rebuild(ctx, child, memo) TableRename( child2, rowMap.filterKeys(child2.typ.rowType.hasField), @@ -1606,27 +1643,31 @@ object PruneDeadFields { case TableUnion(children) => val requestedType = memo.requestedType.lookup(tir).asInstanceOf[TableType] val rebuilt = children.map { c => - upcastTable(rebuild(c, memo), requestedType, upcastGlobals = false) + upcastTable(ctx, rebuild(ctx, c, memo), requestedType, upcastGlobals = false) } TableUnion(rebuilt) case RelationalLetTable(name, value, body) => - val value2 = rebuildIR(value, BindingEnv.empty, memo) + val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetTable(name, value2, rebuild(body, memo)) + RelationalLetTable(name, value2, rebuild(ctx, body, memo)) case BlockMatrixToTableApply(bmir, aux, function) => - val bmir2 = rebuild(bmir, memo) - val aux2 = rebuildIR(aux, BindingEnv.empty, memo) + val bmir2 = rebuild(ctx, bmir, memo) + val aux2 = rebuildIR(ctx, aux, BindingEnv.empty, memo) BlockMatrixToTableApply(bmir2, aux2, function) case _ => tir.copy(tir.children.map { // IR should be a match error - all nodes with child value IRs should have a rule - case childT: TableIR => rebuild(childT, memo) - case childM: MatrixIR => rebuild(childM, memo) - case childBm: BlockMatrixIR => rebuild(childBm, memo) + case childT: TableIR => rebuild(ctx, childT, memo) + case childM: MatrixIR => rebuild(ctx, childM, memo) + case childBm: BlockMatrixIR => rebuild(ctx, childBm, memo) }) } } - def rebuild(mir: MatrixIR, memo: RebuildMutableState): MatrixIR = { + def rebuild( + ctx: ExecuteContext, + mir: MatrixIR, + memo: RebuildMutableState + ): MatrixIR = { val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] mir match { case x@MatrixRead(typ, dropCols, dropRows, reader) => @@ -1641,20 +1682,20 @@ object PruneDeadFields { ) MatrixRead(requestedTypeWithKeys, dropCols, dropRows, reader) case MatrixFilterCols(child, pred) => - val child2 = rebuild(child, memo) - MatrixFilterCols(child2, rebuildIR(pred, BindingEnv(child2.typ.colEnv), memo)) + val child2 = rebuild(ctx, child, memo) + MatrixFilterCols(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.colEnv), memo)) case MatrixFilterRows(child, pred) => - val child2 = rebuild(child, memo) - MatrixFilterRows(child2, rebuildIR(pred, BindingEnv(child2.typ.rowEnv), memo)) + val child2 = rebuild(ctx, child, memo) + MatrixFilterRows(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.rowEnv), memo)) case MatrixFilterEntries(child, pred) => - val child2 = rebuild(child, memo) - MatrixFilterEntries(child2, rebuildIR(pred, BindingEnv(child2.typ.entryEnv), memo)) + val child2 = rebuild(ctx, child, memo) + MatrixFilterEntries(child2, rebuildIR(ctx, pred, BindingEnv(child2.typ.entryEnv), memo)) case MatrixMapEntries(child, newEntries) => - val child2 = rebuild(child, memo) - MatrixMapEntries(child2, rebuildIR(newEntries, BindingEnv(child2.typ.entryEnv), memo)) + val child2 = rebuild(ctx, child, memo) + MatrixMapEntries(child2, rebuildIR(ctx, newEntries, BindingEnv(child2.typ.entryEnv), memo)) case MatrixMapRows(child, newRow) => - val child2 = rebuild(child, memo) - val newRow2 = rebuildIR(newRow, + val child2 = rebuild(ctx, child, memo) + val newRow2 = rebuildIR(ctx, newRow, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv), scan = Some(child2.typ.rowEnv)), memo) val newRowType = newRow2.typ.asInstanceOf[TStruct] val child2Keyed = if (child2.typ.rowKey.exists(k => !newRowType.hasField(k))) @@ -1663,8 +1704,8 @@ object PruneDeadFields { child2 MatrixMapRows(child2Keyed, newRow2) case MatrixMapCols(child, newCol, newKey) => - val child2 = rebuild(child, memo) - val newCol2 = rebuildIR(newCol, + val child2 = rebuild(ctx, child, memo) + val newCol2 = rebuildIR(ctx, newCol, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv), scan = Some(child2.typ.colEnv)), memo) val newColType = newCol2.typ.asInstanceOf[TStruct] val newKey2 = newKey match { @@ -1676,32 +1717,32 @@ object PruneDeadFields { } MatrixMapCols(child2, newCol2, newKey2) case MatrixMapGlobals(child, newGlobals) => - val child2 = rebuild(child, memo) - MatrixMapGlobals(child2, rebuildIR(newGlobals, BindingEnv(child2.typ.globalEnv), memo)) + val child2 = rebuild(ctx, child, memo) + MatrixMapGlobals(child2, rebuildIR(ctx, newGlobals, BindingEnv(child2.typ.globalEnv), memo)) case MatrixKeyRowsBy(child, keys, isSorted) => - val child2 = rebuild(child, memo) + val child2 = rebuild(ctx, child, memo) val keys2 = keys.takeWhile(child2.typ.rowType.hasField) MatrixKeyRowsBy(child2, keys2, isSorted) case MatrixAggregateRowsByKey(child, entryExpr, rowExpr) => - val child2 = rebuild(child, memo) + val child2 = rebuild(ctx, child, memo) MatrixAggregateRowsByKey(child2, - rebuildIR(entryExpr, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv)), memo), - rebuildIR(rowExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) + rebuildIR(ctx, entryExpr, BindingEnv(child2.typ.colEnv, agg = Some(child2.typ.entryEnv)), memo), + rebuildIR(ctx, rowExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo)) case MatrixAggregateColsByKey(child, entryExpr, colExpr) => - val child2 = rebuild(child, memo) + val child2 = rebuild(ctx, child, memo) MatrixAggregateColsByKey(child2, - rebuildIR(entryExpr, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv)), memo), - rebuildIR(colExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.colEnv)), memo)) + rebuildIR(ctx, entryExpr, BindingEnv(child2.typ.rowEnv, agg = Some(child2.typ.entryEnv)), memo), + rebuildIR(ctx, colExpr, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.colEnv)), memo)) case MatrixUnionRows(children) => val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] MatrixUnionRows(children.map { child => - upcast(rebuild(child, memo), requestedType, + upcast(ctx, rebuild(ctx, child, memo), requestedType, upcastGlobals = false) }) case MatrixUnionCols(left, right, joinType) => val requestedType = memo.requestedType.lookup(mir).asInstanceOf[MatrixType] - val left2 = rebuild(left, memo) - val right2 = rebuild(right, memo) + val left2 = rebuild(ctx, left, memo) + val right2 = rebuild(ctx, right, memo) if (left2.typ.colType == right2.typ.colType && left2.typ.entryType == right2.typ.entryType) { MatrixUnionCols( @@ -1711,31 +1752,31 @@ object PruneDeadFields { ) } else { MatrixUnionCols( - upcast(left2, requestedType, upcastRows=false, upcastGlobals = false), - upcast(right2, requestedType, upcastRows=false, upcastGlobals = false), + upcast(ctx, left2, requestedType, upcastRows=false, upcastGlobals = false), + upcast(ctx, right2, requestedType, upcastRows=false, upcastGlobals = false), joinType ) } case MatrixAnnotateRowsTable(child, table, root, product) => // if the field is not used, this node can be elided entirely if (!requestedType.rowType.hasField(root)) - rebuild(child, memo) + rebuild(ctx, child, memo) else { - val child2 = rebuild(child, memo) - val table2 = rebuild(table, memo) + val child2 = rebuild(ctx, child, memo) + val table2 = rebuild(ctx, table, memo) MatrixAnnotateRowsTable(child2, table2, root, product) } case MatrixAnnotateColsTable(child, table, uid) => // if the field is not used, this node can be elided entirely if (!requestedType.colType.hasField(uid)) - rebuild(child, memo) + rebuild(ctx, child, memo) else { - val child2 = rebuild(child, memo) - val table2 = rebuild(table, memo) + val child2 = rebuild(ctx, child, memo) + val table2 = rebuild(ctx, table, memo) MatrixAnnotateColsTable(child2, table2, uid) } case MatrixRename(child, globalMap, colMap, rowMap, entryMap) => - val child2 = rebuild(child, memo) + val child2 = rebuild(ctx, child, memo) MatrixRename( child2, globalMap.filterKeys(child2.typ.globalType.hasField), @@ -1743,41 +1784,50 @@ object PruneDeadFields { rowMap.filterKeys(child2.typ.rowType.hasField), entryMap.filterKeys(child2.typ.entryType.hasField)) case RelationalLetMatrixTable(name, value, body) => - val value2 = rebuildIR(value, BindingEnv.empty, memo) + val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetMatrixTable(name, value2, rebuild(body, memo)) + RelationalLetMatrixTable(name, value2, rebuild(ctx, body, memo)) case CastTableToMatrix(child, entriesFieldName, colsFieldName, _) => - CastTableToMatrix(rebuild(child, memo), entriesFieldName, colsFieldName, requestedType.colKey) + CastTableToMatrix(rebuild(ctx, child, memo), entriesFieldName, colsFieldName, requestedType.colKey) case _ => mir.copy(mir.children.map { // IR should be a match error - all nodes with child value IRs should have a rule - case childT: TableIR => rebuild(childT, memo) - case childM: MatrixIR => rebuild(childM, memo) + case childT: TableIR => rebuild(ctx, childT, memo) + case childM: MatrixIR => rebuild(ctx, childM, memo) }) } } - def rebuild(bmir: BlockMatrixIR, memo: RebuildMutableState): BlockMatrixIR = bmir match { + def rebuild( + ctx: ExecuteContext, + bmir: BlockMatrixIR, + memo: RebuildMutableState + ): BlockMatrixIR = bmir match { case RelationalLetBlockMatrix(name, value, body) => - val value2 = rebuildIR(value, BindingEnv.empty, memo) + val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLetBlockMatrix(name, value2, rebuild(body, memo)) + RelationalLetBlockMatrix(name, value2, rebuild(ctx, body, memo)) case _ => bmir.copy( bmir.children.map { - case tir: TableIR => rebuild(tir, memo) - case mir: MatrixIR => rebuild(mir, memo) - case ir: IR => rebuildIR(ir, BindingEnv.empty[Type], memo) - case bmir: BlockMatrixIR => rebuild(bmir, memo) + case tir: TableIR => rebuild(ctx, tir, memo) + case mir: MatrixIR => rebuild(ctx, mir, memo) + case ir: IR => rebuildIR(ctx, ir, BindingEnv.empty[Type], memo) + case bmir: BlockMatrixIR => rebuild(ctx, bmir, memo) } ) } - def rebuildIR(ir: IR, env: BindingEnv[Type], memo: RebuildMutableState): IR = { + def rebuildIR( + ctx: ExecuteContext, + ir: IR, + env: BindingEnv[Type], + memo: RebuildMutableState + ): IR = { val requestedType = memo.requestedType.lookup(ir).asInstanceOf[Type] ir match { case NA(_) => NA(requestedType) case CastRename(v, _typ) => - val v2 = rebuildIR(v, env, memo) + val v2 = rebuildIR(ctx, v, env, memo) def recur(rebuildType: Type, castType: Type, baseType: Type): Type = { ((rebuildType, castType, baseType): @unchecked) match { @@ -1805,100 +1855,100 @@ object PruneDeadFields { CastRename(v2, recur(v2.typ, _typ, v.typ)) case If(cond, cnsq, alt) => - val cond2 = rebuildIR(cond, env, memo) - val cnsq2 = rebuildIR(cnsq, env, memo) - val alt2 = rebuildIR(alt, env, memo) + val cond2 = rebuildIR(ctx, cond, env, memo) + val cnsq2 = rebuildIR(ctx, cnsq, env, memo) + val alt2 = rebuildIR(ctx, alt, env, memo) if (cnsq2.typ == alt2.typ) If(cond2, cnsq2, alt2) else If(cond2, - upcast(cnsq2, requestedType), - upcast(alt2, requestedType) + upcast(ctx, cnsq2, requestedType), + upcast(ctx, alt2, requestedType) ) case Coalesce(values) => - val values2 = values.map(rebuildIR(_, env, memo)) + val values2 = values.map(rebuildIR(ctx, _, env, memo)) require(values2.nonEmpty) if (values2.forall(_.typ == values2.head.typ)) Coalesce(values2) else - Coalesce(values2.map(upcast(_, requestedType))) + Coalesce(values2.map(upcast(ctx, _, requestedType))) case Consume(value) => - val value2 = rebuildIR(value, env, memo) + val value2 = rebuildIR(ctx, value, env, memo) Consume(value2) case Let(name, value, body) => - val value2 = rebuildIR(value, env, memo) + val value2 = rebuildIR(ctx, value, env, memo) Let( name, value2, - rebuildIR(body, env.bindEval(name, value2.typ), memo) + rebuildIR(ctx, body, env.bindEval(name, value2.typ), memo) ) case AggLet(name, value, body, isScan) => - val value2 = rebuildIR(value, if (isScan) env.promoteScan else env.promoteAgg, memo) + val value2 = rebuildIR(ctx, value, if (isScan) env.promoteScan else env.promoteAgg, memo) AggLet( name, value2, - rebuildIR(body, if (isScan) env.bindScan(name, value2.typ) else env.bindAgg(name, value2.typ), memo), + rebuildIR(ctx, body, if (isScan) env.bindScan(name, value2.typ) else env.bindAgg(name, value2.typ), memo), isScan ) case Ref(name, t) => Ref(name, env.eval.lookupOption(name).getOrElse(t)) case RelationalLet(name, value, body) => - val value2 = rebuildIR(value, BindingEnv.empty, memo) + val value2 = rebuildIR(ctx, value, BindingEnv.empty, memo) memo.relationalRefs += name -> value2.typ - RelationalLet(name, value2, rebuildIR(body, env, memo)) + RelationalLet(name, value2, rebuildIR(ctx, body, env, memo)) case RelationalRef(name, _) => RelationalRef(name, memo.relationalRefs(name)) case MakeArray(args, _) => val dep = requestedType.asInstanceOf[TArray] - val args2 = args.map(a => rebuildIR(a, env, memo)) - MakeArray.unify(args2, TArray(dep.elementType)) + val args2 = args.map(a => rebuildIR(ctx, a, env, memo)) + MakeArray.unify(ctx, args2, TArray(dep.elementType)) case MakeStream(args, _, requiresMemoryManagementPerElement) => val dep = requestedType.asInstanceOf[TStream] - val args2 = args.map(a => rebuildIR(a, env, memo)) - MakeStream.unify(args2, requiresMemoryManagementPerElement, requestedType = TStream(dep.elementType)) + val args2 = args.map(a => rebuildIR(ctx, a, env, memo)) + MakeStream.unify(ctx, args2, requiresMemoryManagementPerElement, requestedType = TStream(dep.elementType)) case StreamMap(a, name, body) => - val a2 = rebuildIR(a, env, memo) - StreamMap(a2, name, rebuildIR(body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(ctx, a, env, memo) + StreamMap(a2, name, rebuildIR(ctx, body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamZip(as, names, body, b, errorID) => val (newAs, newNames) = (as, names) .zipped - .flatMap { case (a, name) => if (memo.requestedType.contains(a)) Some((rebuildIR(a, env, memo), name)) else None } + .flatMap { case (a, name) => if (memo.requestedType.contains(a)) Some((rebuildIR(ctx, a, env, memo), name)) else None } .unzip - StreamZip(newAs, newNames, rebuildIR(body, + StreamZip(newAs, newNames, rebuildIR(ctx, body, env.bindEval(newNames.zip(newAs.map(a => a.typ.asInstanceOf[TStream].elementType)): _*), memo), b, errorID) case StreamZipJoin(as, key, curKey, curVals, joinF) => - val newAs = as.map(a => rebuildIR(a, env, memo)) + val newAs = as.map(a => rebuildIR(ctx, a, env, memo)) val newEltType = as.head.typ.asInstanceOf[TStream].elementType.asInstanceOf[TStruct] - val newJoinF = rebuildIR( + val newJoinF = rebuildIR(ctx, joinF, env.bindEval(curKey -> selectKey(newEltType, key), curVals -> TArray(newEltType)), memo) StreamZipJoin(newAs, key, curKey, curVals, newJoinF) case StreamFilter(a, name, cond) => - val a2 = rebuildIR(a, env, memo) - StreamFilter(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(ctx, a, env, memo) + StreamFilter(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamTakeWhile(a, name, cond) => - val a2 = rebuildIR(a, env, memo) - StreamTakeWhile(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(ctx, a, env, memo) + StreamTakeWhile(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamDropWhile(a, name, cond) => - val a2 = rebuildIR(a, env, memo) - StreamDropWhile(a2, name, rebuildIR(cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(ctx, a, env, memo) + StreamDropWhile(a2, name, rebuildIR(ctx, cond, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamFlatMap(a, name, body) => - val a2 = rebuildIR(a, env, memo) - StreamFlatMap(a2, name, rebuildIR(body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) + val a2 = rebuildIR(ctx, a, env, memo) + StreamFlatMap(a2, name, rebuildIR(ctx, body, env.bindEval(name, a2.typ.asInstanceOf[TStream].elementType), memo)) case StreamFold(a, zero, accumName, valueName, body) => - val a2 = rebuildIR(a, env, memo) - val z2 = rebuildIR(zero, env, memo) + val a2 = rebuildIR(ctx, a, env, memo) + val z2 = rebuildIR(ctx, zero, env, memo) StreamFold( a2, z2, accumName, valueName, - rebuildIR(body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + rebuildIR(ctx, body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) ) case StreamFold2(a: IR, accum, valueName, seqs, result) => - val a2 = rebuildIR(a, env, memo) - val newAccum = accum.map { case (n, z) => n -> rebuildIR(z, env, memo) } + val a2 = rebuildIR(ctx, a, env, memo) + val newAccum = accum.map { case (n, z) => n -> rebuildIR(ctx, z, env, memo) } val newEnv = env .bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType) .bindEval(newAccum.map { case (n, z) => n -> z.typ }: _*) @@ -1906,49 +1956,49 @@ object PruneDeadFields { a2, newAccum, valueName, - seqs.map(rebuildIR(_, newEnv, memo)), - rebuildIR(result, newEnv, memo)) + seqs.map(rebuildIR(ctx, _, newEnv, memo)), + rebuildIR(ctx, result, newEnv, memo)) case StreamScan(a, zero, accumName, valueName, body) => - val a2 = rebuildIR(a, env, memo) - val z2 = rebuildIR(zero, env, memo) + val a2 = rebuildIR(ctx, a, env, memo) + val z2 = rebuildIR(ctx, zero, env, memo) StreamScan( a2, z2, accumName, valueName, - rebuildIR(body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + rebuildIR(ctx, body, env.bindEval(accumName -> z2.typ, valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) ) case StreamJoinRightDistinct(left, right, lKey, rKey, l, r, join, joinType) => - val left2 = rebuildIR(left, env, memo) - val right2 = rebuildIR(right, env, memo) + val left2 = rebuildIR(ctx, left, env, memo) + val right2 = rebuildIR(ctx, right, env, memo) val ltyp = left2.typ.asInstanceOf[TStream] val rtyp = right2.typ.asInstanceOf[TStream] StreamJoinRightDistinct( left2, right2, lKey, rKey, l, r, - rebuildIR(join, env.bindEval(l -> ltyp.elementType, r -> rtyp.elementType), memo), + rebuildIR(ctx, join, env.bindEval(l -> ltyp.elementType, r -> rtyp.elementType), memo), joinType) case StreamFor(a, valueName, body) => - val a2 = rebuildIR(a, env, memo) - val body2 = rebuildIR(body, env.bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) + val a2 = rebuildIR(ctx, a, env, memo) + val body2 = rebuildIR(ctx, body, env.bindEval(valueName -> a2.typ.asInstanceOf[TStream].elementType), memo) StreamFor(a2, valueName, body2) case ArraySort(a, left, right, lessThan) => - val a2 = rebuildIR(a, env, memo) + val a2 = rebuildIR(ctx, a, env, memo) val et = a2.typ.asInstanceOf[TStream].elementType - val lessThan2 = rebuildIR(lessThan, env.bindEval(left -> et, right -> et), memo) + val lessThan2 = rebuildIR(ctx, lessThan, env.bindEval(left -> et, right -> et), memo) ArraySort(a2, left, right, lessThan2) case MakeNDArray(data, shape, rowMajor, errorId) => - val data2 = rebuildIR(data, env, memo) - val shape2 = rebuildIR(shape, env, memo) - val rowMajor2 = rebuildIR(rowMajor, env, memo) + val data2 = rebuildIR(ctx, data, env, memo) + val shape2 = rebuildIR(ctx, shape, env, memo) + val rowMajor2 = rebuildIR(ctx, rowMajor, env, memo) MakeNDArray(data2, shape2, rowMajor2, errorId) case NDArrayMap(nd, valueName, body) => - val nd2 = rebuildIR(nd, env, memo) - NDArrayMap(nd2, valueName, rebuildIR(body, env.bindEval(valueName, nd2.typ.asInstanceOf[TNDArray].elementType), memo)) + val nd2 = rebuildIR(ctx, nd, env, memo) + NDArrayMap(nd2, valueName, rebuildIR(ctx, body, env.bindEval(valueName, nd2.typ.asInstanceOf[TNDArray].elementType), memo)) case NDArrayMap2(left, right, leftName, rightName, body, errorID) => - val left2 = rebuildIR(left, env, memo) - val right2 = rebuildIR(right, env, memo) - val body2 = rebuildIR(body, + val left2 = rebuildIR(ctx, left, env, memo) + val right2 = rebuildIR(ctx, right, env, memo) + val body2 = rebuildIR(ctx, body, env.bindEval(leftName, left2.typ.asInstanceOf[TNDArray].elementType).bindEval(rightName, right2.typ.asInstanceOf[TNDArray].elementType), memo) NDArrayMap2(left2, right2, leftName, rightName, body2, errorID) @@ -1958,7 +2008,7 @@ object PruneDeadFields { val depFields = depStruct.fieldNames.toSet MakeStruct(fields.flatMap { case (f, fir) => if (depFields.contains(f)) - Some(f -> rebuildIR(fir, env, memo)) + Some(f -> rebuildIR(ctx, fir, env, memo)) else { log.info(s"Prune: MakeStruct: eliminating field '$f'") None @@ -1970,14 +2020,14 @@ object PruneDeadFields { val depFieldIndices = depTuple.fieldIndex.keySet MakeTuple(fields.flatMap { case (i, f) => if (depFieldIndices(i)) - Some(i -> rebuildIR(f, env, memo)) + Some(i -> rebuildIR(ctx, f, env, memo)) else None }) case InsertFields(old, fields, fieldOrder) => val depStruct = requestedType.asInstanceOf[TStruct] val depFields = depStruct.fieldNames.toSet - val rebuiltChild = rebuildIR(old, env, memo) + val rebuiltChild = rebuildIR(ctx, old, env, memo) val preservedChildFields = rebuiltChild.typ.asInstanceOf[TStruct].fieldNames.toSet val insertOverwritesUnrequestedButPreservedField = fields.exists{ case (fieldName, _) => @@ -1994,7 +2044,7 @@ object PruneDeadFields { InsertFields(wrappedChild, fields.flatMap { case (f, fir) => if (depFields.contains(f)) - Some(f -> rebuildIR(fir, env, memo)) + Some(f -> rebuildIR(ctx, fir, env, memo)) else { log.info(s"Prune: InsertFields: eliminating field '$f'") None @@ -2002,104 +2052,104 @@ object PruneDeadFields { }, fieldOrder.map(fds => fds.filter(f => depFields.contains(f) || wrappedChild.typ.asInstanceOf[TStruct].hasField(f)))) case SelectFields(old, fields) => val depStruct = requestedType.asInstanceOf[TStruct] - val old2 = rebuildIR(old, env, memo) + val old2 = rebuildIR(ctx, old, env, memo) SelectFields(old2, fields.filter(f => old2.typ.asInstanceOf[TStruct].hasField(f) && depStruct.hasField(f))) case ConsoleLog(message, result) => - val message2 = rebuildIR(message, env, memo) - val result2 = rebuildIR(result, env, memo) + val message2 = rebuildIR(ctx, message, env, memo) + val result2 = rebuildIR(ctx, result, env, memo) ConsoleLog(message2, result2) case TableAggregate(child, query) => - val child2 = rebuild(child, memo) - val query2 = rebuildIR(query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) + val child2 = rebuild(ctx, child, memo) + val query2 = rebuildIR(ctx, query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.rowEnv)), memo) TableAggregate(child2, query2) case MatrixAggregate(child, query) => - val child2 = rebuild(child, memo) - val query2 = rebuildIR(query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.entryEnv)), memo) + val child2 = rebuild(ctx, child, memo) + val query2 = rebuildIR(ctx, query, BindingEnv(child2.typ.globalEnv, agg = Some(child2.typ.entryEnv)), memo) MatrixAggregate(child2, query2) case TableCollect(child) => val rStruct = requestedType.asInstanceOf[TStruct] if (!rStruct.hasField("rows")) if (rStruct.hasField("global")) - MakeStruct(FastSeq("global" -> TableGetGlobals(rebuild(child, memo)))) + MakeStruct(FastSeq("global" -> TableGetGlobals(rebuild(ctx, child, memo)))) else MakeStruct(FastSeq()) else - TableCollect(rebuild(child, memo)) + TableCollect(rebuild(ctx, child, memo)) case AggExplode(array, name, aggBody, isScan) => - val a2 = rebuildIR(array, if (isScan) env.promoteScan else env.promoteAgg, memo) + val a2 = rebuildIR(ctx, array, if (isScan) env.promoteScan else env.promoteAgg, memo) val a2t = a2.typ.asInstanceOf[TStream].elementType - val body2 = rebuildIR(aggBody, if (isScan) env.bindScan(name, a2t) else env.bindAgg(name, a2t), memo) + val body2 = rebuildIR(ctx, aggBody, if (isScan) env.bindScan(name, a2t) else env.bindAgg(name, a2t), memo) AggExplode(a2, name, body2, isScan) case AggFilter(cond, aggIR, isScan) => - val cond2 = rebuildIR(cond, if (isScan) env.promoteScan else env.promoteAgg, memo) - val aggIR2 = rebuildIR(aggIR, env, memo) + val cond2 = rebuildIR(ctx, cond, if (isScan) env.promoteScan else env.promoteAgg, memo) + val aggIR2 = rebuildIR(ctx, aggIR, env, memo) AggFilter(cond2, aggIR2, isScan) case AggGroupBy(key, aggIR, isScan) => - val key2 = rebuildIR(key, if (isScan) env.promoteScan else env.promoteAgg, memo) - val aggIR2 = rebuildIR(aggIR, env, memo) + val key2 = rebuildIR(ctx, key, if (isScan) env.promoteScan else env.promoteAgg, memo) + val aggIR2 = rebuildIR(ctx, aggIR, env, memo) AggGroupBy(key2, aggIR2, isScan) case AggArrayPerElement(a, elementName, indexName, aggBody, knownLength, isScan) => val aEnv = if (isScan) env.promoteScan else env.promoteAgg - val a2 = rebuildIR(a, aEnv, memo) + val a2 = rebuildIR(ctx, a, aEnv, memo) val a2t = a2.typ.asInstanceOf[TArray].elementType val env_ = env.bindEval(indexName -> TInt32) - val aggBody2 = rebuildIR(aggBody, if (isScan) env_.bindScan(elementName, a2t) else env_.bindAgg(elementName, a2t), memo) - AggArrayPerElement(a2, elementName, indexName, aggBody2, knownLength.map(rebuildIR(_, aEnv, memo)), isScan) + val aggBody2 = rebuildIR(ctx, aggBody, if (isScan) env_.bindScan(elementName, a2t) else env_.bindAgg(elementName, a2t), memo) + AggArrayPerElement(a2, elementName, indexName, aggBody2, knownLength.map(rebuildIR(ctx, _, aEnv, memo)), isScan) case StreamAgg(a, name, query) => - val a2 = rebuildIR(a, env, memo) + val a2 = rebuildIR(ctx, a, env, memo) val newEnv = env.copy(agg = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))) - val query2 = rebuildIR(query, newEnv, memo) + val query2 = rebuildIR(ctx, query, newEnv, memo) StreamAgg(a2, name, query2) case StreamAggScan(a, name, query) => - val a2 = rebuildIR(a, env, memo) - val query2 = rebuildIR(query, env.copy(scan = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))), memo) + val a2 = rebuildIR(ctx, a, env, memo) + val query2 = rebuildIR(ctx, query, env.copy(scan = Some(env.eval.bind(name -> a2.typ.asInstanceOf[TStream].elementType))), memo) StreamAggScan(a2, name, query2) case RunAgg(body, result, signatures) => - val body2 = rebuildIR(body, env, memo) - val result2 = rebuildIR(result, env, memo) + val body2 = rebuildIR(ctx, body, env, memo) + val result2 = rebuildIR(ctx, result, env, memo) RunAgg(body2, result2, signatures) case RunAggScan(array, name, init, seqs, result, signature) => - val array2 = rebuildIR(array, env, memo) - val init2 = rebuildIR(init, env, memo) + val array2 = rebuildIR(ctx, array, env, memo) + val init2 = rebuildIR(ctx, init, env, memo) val eltEnv = env.bindEval(name, array2.typ.asInstanceOf[TStream].elementType) - val seqs2 = rebuildIR(seqs, eltEnv, memo) - val result2 = rebuildIR(result, eltEnv, memo) + val seqs2 = rebuildIR(ctx, seqs, eltEnv, memo) + val result2 = rebuildIR(ctx, result, eltEnv, memo) RunAggScan(array2, name, init2, seqs2, result2, signature) case ApplyAggOp(initOpArgs, seqOpArgs, aggSig) => - val initOpArgs2 = initOpArgs.map(rebuildIR(_, env, memo)) - val seqOpArgs2 = seqOpArgs.map(rebuildIR(_, env.promoteAgg, memo)) + val initOpArgs2 = initOpArgs.map(rebuildIR(ctx, _, env, memo)) + val seqOpArgs2 = seqOpArgs.map(rebuildIR(ctx, _, env.promoteAgg, memo)) ApplyAggOp(initOpArgs2, seqOpArgs2, aggSig.copy( initOpArgs = initOpArgs2.map(_.typ), seqOpArgs = seqOpArgs2.map(_.typ))) case ApplyScanOp(initOpArgs, seqOpArgs, aggSig) => - val initOpArgs2 = initOpArgs.map(rebuildIR(_, env, memo)) - val seqOpArgs2 = seqOpArgs.map(rebuildIR(_, env.promoteScan, memo)) + val initOpArgs2 = initOpArgs.map(rebuildIR(ctx, _, env, memo)) + val seqOpArgs2 = seqOpArgs.map(rebuildIR(ctx, _, env.promoteScan, memo)) ApplyScanOp(initOpArgs2, seqOpArgs2, aggSig.copy( initOpArgs = initOpArgs2.map(_.typ), seqOpArgs = seqOpArgs2.map(_.typ))) case AggFold(zero, seqOp, combOp, accumName, otherAccumName, isScan) => - val zero2 = rebuildIR(zero, env, memo) - val seqOp2 = rebuildIR(seqOp, if (isScan) env.promoteScan else env.promoteAgg, memo) - val combOp2 = rebuildIR(combOp, env, memo) + val zero2 = rebuildIR(ctx, zero, env, memo) + val seqOp2 = rebuildIR(ctx, seqOp, if (isScan) env.promoteScan else env.promoteAgg, memo) + val combOp2 = rebuildIR(ctx, combOp, env, memo) AggFold(zero2, seqOp2, combOp2, accumName, otherAccumName, isScan) case CollectDistributedArray(contexts, globals, cname, gname, body, tsd) => - val contexts2 = upcast(rebuildIR(contexts, env, memo), memo.requestedType.lookup(contexts).asInstanceOf[Type]) - val globals2 = upcast(rebuildIR(globals, env, memo), memo.requestedType.lookup(globals).asInstanceOf[Type]) - val body2 = rebuildIR(body, BindingEnv(Env(cname -> contexts2.typ.asInstanceOf[TStream].elementType, gname -> globals2.typ)), memo) + val contexts2 = upcast(ctx, rebuildIR(ctx, contexts, env, memo), memo.requestedType.lookup(contexts).asInstanceOf[Type]) + val globals2 = upcast(ctx, rebuildIR(ctx, globals, env, memo), memo.requestedType.lookup(globals).asInstanceOf[Type]) + val body2 = rebuildIR(ctx, body, BindingEnv(Env(cname -> contexts2.typ.asInstanceOf[TStream].elementType, gname -> globals2.typ)), memo) CollectDistributedArray(contexts2, globals2, cname, gname, body2, tsd) case _ => ir.copy(ir.children.map { - case valueIR: IR => rebuildIR(valueIR, env, memo) // FIXME: assert IR does not bind or change env - case mir: MatrixIR => rebuild(mir, memo) - case tir: TableIR => rebuild(tir, memo) + case valueIR: IR => rebuildIR(ctx, valueIR, env, memo) // FIXME: assert IR does not bind or change env + case mir: MatrixIR => rebuild(ctx, mir, memo) + case tir: TableIR => rebuild(ctx, tir, memo) case bmir: BlockMatrixIR => bmir //NOTE Currently no BlockMatrixIRs would have dead fields }) } } - def upcast(ir: IR, rType: Type): IR = { + def upcast(ctx: ExecuteContext, ir: IR, rType: Type): IR = { if (ir.typ == rType) ir else { @@ -2110,7 +2160,7 @@ object PruneDeadFields { val ref = Ref(uid, ir.typ) val ms = MakeStruct( rs.fields.map { f => - f.name -> upcast(GetField(ref, f.name), f.typ) + f.name -> upcast(ctx, GetField(ref, f.name), f.typ) } ) Let(uid, ir, If(IsNA(ref), NA(ms.typ), ms)) @@ -2118,40 +2168,43 @@ object PruneDeadFields { val ra = rType.asInstanceOf[TStream] val uid = genUID() val ref = Ref(uid, ts.elementType) - StreamMap(ir, uid, upcast(ref, ra.elementType)) + StreamMap(ir, uid, upcast(ctx, ref, ra.elementType)) case ts: TArray => val ra = rType.asInstanceOf[TArray] val uid = genUID() val ref = Ref(uid, ts.elementType) - ToArray(StreamMap(ToStream(ir), uid, upcast(ref, ra.elementType))) + ToArray(StreamMap(ToStream(ir), uid, upcast(ctx, ref, ra.elementType))) case _: TTuple => val rt = rType.asInstanceOf[TTuple] val uid = genUID() val ref = Ref(uid, ir.typ) val mt = MakeTuple(rt._types.map { tupleField => - tupleField.index -> upcast(GetTupleElement(ref, tupleField.index), tupleField.typ) + tupleField.index -> upcast(ctx, GetTupleElement(ref, tupleField.index), tupleField.typ) }) Let(uid, ir, If(IsNA(ref), NA(mt.typ), mt)) case _: TDict => val rd = rType.asInstanceOf[TDict] - ToDict(upcast(ToStream(ir), TArray(rd.elementType))) + ToDict(upcast(ctx, ToStream(ir), TArray(rd.elementType))) case _: TSet => val rs = rType.asInstanceOf[TSet] - ToSet(upcast(ToStream(ir), TSet(rs.elementType))) + ToSet(upcast(ctx, ToStream(ir), TSet(rs.elementType))) case _ => ir } - assert(result.typ == rType, s"${ Pretty(result) }, ${ result.typ }, $rType") + assert(result.typ == rType, s"${ Pretty(ctx, result) }, ${ result.typ }, $rType") result } } - def upcast(ir: MatrixIR, rType: MatrixType, + def upcast( + ctx: ExecuteContext, + ir: MatrixIR, + rType: MatrixType, upcastRows: Boolean = true, upcastCols: Boolean = true, upcastGlobals: Boolean = true, - upcastEntries: Boolean = true): MatrixIR = { - + upcastEntries: Boolean = true + ): MatrixIR = { if (ir.typ == rType || !(upcastRows || upcastCols || upcastGlobals || upcastEntries)) ir else { @@ -2163,24 +2216,25 @@ object PruneDeadFields { } if (upcastEntries && mt.typ.entryType != rType.entryType) - mt = MatrixMapEntries(mt, upcast(Ref("g", mt.typ.entryType), rType.entryType)) + mt = MatrixMapEntries(mt, upcast(ctx, Ref("g", mt.typ.entryType), rType.entryType)) if (upcastRows && mt.typ.rowType != rType.rowType) - mt = MatrixMapRows(mt, upcast(Ref("va", mt.typ.rowType), rType.rowType)) + mt = MatrixMapRows(mt, upcast(ctx, Ref("va", mt.typ.rowType), rType.rowType)) if (upcastCols && (mt.typ.colType != rType.colType || mt.typ.colKey != rType.colKey)) { - mt = MatrixMapCols(mt, upcast(Ref("sa", mt.typ.colType), rType.colType), + mt = MatrixMapCols(mt, upcast(ctx, Ref("sa", mt.typ.colType), rType.colType), if (rType.colKey == mt.typ.colKey) None else Some(rType.colKey)) } if (upcastGlobals && mt.typ.globalType != rType.globalType) - mt = MatrixMapGlobals(mt, upcast(Ref("global", ir.typ.globalType), rType.globalType)) + mt = MatrixMapGlobals(mt, upcast(ctx, Ref("global", ir.typ.globalType), rType.globalType)) mt } } def upcastTable( + ctx: ExecuteContext, ir: TableIR, rType: TableType, upcastRow: Boolean = true, @@ -2195,11 +2249,11 @@ object PruneDeadFields { table = TableKeyBy(table, rType.key) } if (upcastRow && ir.typ.rowType != rType.rowType) { - table = TableMapRows(table, upcast(Ref("row", table.typ.rowType), rType.rowType)) + table = TableMapRows(table, upcast(ctx, Ref("row", table.typ.rowType), rType.rowType)) } if (upcastGlobals && ir.typ.globalType != rType.globalType) { table = TableMapGlobals(table, - upcast(Ref("global", table.typ.globalType), rType.globalType)) + upcast(ctx, Ref("global", table.typ.globalType), rType.globalType)) } table } diff --git a/hail/src/main/scala/is/hail/expr/ir/Random.scala b/hail/src/main/scala/is/hail/expr/ir/Random.scala new file mode 100644 index 00000000000..085d6873d70 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/Random.scala @@ -0,0 +1,481 @@ +package is.hail.expr.ir + +import is.hail.asm4s._ +import is.hail.types.physical.stypes.concrete.SRNGState +import is.hail.utils.FastIndexedSeq +import net.sourceforge.jdistlib.rng.RandomEngine + +object Threefry { + val keyConst = 0x1BD11BDAA9FC1A22L + + val rotConsts = Array( + Array(14, 16), + Array(52, 57), + Array(23, 40), + Array( 5, 37), + Array(25, 33), + Array(46, 12), + Array(58, 22), + Array(32, 32)) + + val defaultNumRounds = 20 + + def expandKey(k: IndexedSeq[Long]): IndexedSeq[Long] = { + assert(k.length == 4) + val k4 = k(0) ^ k(1) ^ k(2) ^ k(3) ^ keyConst + k :+ k4 + } + + def rotL(i: Value[Long], n: Value[Int]): Code[Long] = { + (i << n) | (i >>> -n) + } + + def mix(cb: CodeBuilderLike, x0: Settable[Long], x1: Settable[Long], n: Int): Unit = { + cb.assign(x0, x0 + x1) + cb.assign(x1, rotL(x1, n)) + cb.assign(x1, x0 ^ x1) + } + + def injectKey(key: IndexedSeq[Long], tweak: Long, block: Array[Long], s: Int): Unit = { + val tweakExt = Array[Long](tweak, 0, tweak) + block(0) += key(s % 5) + block(1) += key((s + 1) % 5) + tweakExt(s % 3) + block(2) += key((s + 2) % 5) + tweakExt((s + 1) % 3) + block(3) += key((s + 3) % 5) + s.toLong + } + + def injectKey(cb: CodeBuilderLike, + key: IndexedSeq[Long], + tweak: Value[Long], + block: IndexedSeq[Settable[Long]], + s: Int + ): Unit = { + val tweakExt = Array[Value[Long]](tweak, const(0), tweak) + cb.assign(block(0), block(0) + key(s % 5)) + cb.assign(block(1), block(1) + const(key((s + 1) % 5)) + tweakExt(s % 3)) + cb.assign(block(2), block(2) + const(key((s + 2) % 5)) + tweakExt((s + 1) % 3)) + cb.assign(block(3), block(3) + const(key((s + 3) % 5)) + const(s.toLong)) + } + + def permute(x: Array[Settable[Long]]): Unit = { + val tmp = x(1) + x(1) = x(3) + x(3) = tmp + } + + def encryptUnrolled(k0: Long, k1: Long, k2: Long, k3: Long, t: Long, _x0: Long, _x1: Long, _x2: Long, _x3: Long): Unit = { + import java.lang.Long.rotateLeft + var x0 = _x0 + var x1 = _x1 + var x2 = _x2 + var x3 = _x3 + val k4 = k0 ^ k1 ^ k2 ^ k3 ^ keyConst + // d = 0 + // injectKey s = 0 + x0 += k0; x1 += k1 + t; x2 += k2; x3 += k3 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 1 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 2 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 3 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 4 + // injectKey s = 1 + x0 += k1; x1 += k2; x2 += k3 + t; x3 += k4 + 1 + x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 + // d = 5 + x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 + // d = 6 + x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 + // d = 7 + x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 + // d = 8 + // injectKey s = 2 + x0 += k2; x1 += k3 + t; x2 += k4 + t; x3 += k0 + 2 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 9 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 10 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 11 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 12 + // injectKey s = 3 + x0 += k3; x1 += k4 + t; x2 += k0; x3 += k1 + 3 + x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 + // d = 13 + x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 + // d = 14 + x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 + // d = 15 + x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 + // d = 16 + // injectKey s = 4 + x0 += k4; x1 += k0; x2 += k1 + t; x3 += k2 + 4 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 17 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 18 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 19 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 20 + // injectKey s = 5 + x0 += k0; x1 += k1 + t; x2 += k2 + t; x3 += k3 + 5 + } + + def encrypt(k: IndexedSeq[Long], t: Long, x: Array[Long]): Unit = + encrypt(k, t, x, defaultNumRounds) + + def encrypt(k: IndexedSeq[Long], t: Long, x: Array[Long], rounds: Int): Unit = { + assert(k.length == 5) + assert(x.length == 4) + + for (d <- 0 until rounds) { + if (d % 4 == 0) + injectKey(k, t, x, d / 4) + + x(0) += x(1) + x(1) = java.lang.Long.rotateLeft(x(1), rotConsts(d % 8)(0)) + x(1) ^= x(0) + x(2) += x(3) + x(3) = java.lang.Long.rotateLeft(x(3), rotConsts(d % 8)(1)) + x(3) ^= x(2) + + val tmp = x(1) + x(1) = x(3) + x(3) = tmp + } + + if (rounds % 4 == 0) + injectKey(k, t, x, rounds / 4) + } + + def encrypt(cb: CodeBuilderLike, + k: IndexedSeq[Long], + t: Value[Long], + x: IndexedSeq[Settable[Long]] + ): Unit = + encrypt(cb, k, t, x, defaultNumRounds) + + def encrypt(cb: CodeBuilderLike, + k: IndexedSeq[Long], + t: Value[Long], + _x: IndexedSeq[Settable[Long]], + rounds: Int + ): Unit = { + assert(k.length == 5) + assert(_x.length == 4) + val x = _x.toArray + + for (d <- 0 until rounds) { + if (d % 4 == 0) + injectKey(cb, k, t, x, d / 4) + + for (j <- 0 until 2) + mix(cb, x(2*j), x(2*j+1), rotConsts(d % 8)(j)) + + permute(x) + } + + if (rounds % 4 == 0) + injectKey(cb, k, t, x, rounds / 4) + } + + def debugPrint(cb: EmitCodeBuilder, x: IndexedSeq[Settable[Long]], info: String) { + cb.println(s"[$info]=\n\t", x(0).toString, " ", x(1).toString, " ", x(2).toString, " ", x(3).toString) + } + + def apply(k: IndexedSeq[Long]): AsmFunction2[Array[Long], Long, Unit] = { + val f = FunctionBuilder[Array[Long], Long, Unit]("Threefry") + f.mb.emitWithBuilder { cb => + val xArray = f.mb.getArg[Array[Long]](1) + val t = f.mb.getArg[Long](2) + val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"x$i", xArray(i))) + encrypt(cb, expandKey(k), t, x) + for (i <- 0 until 4) cb += (xArray(i) = x(i)) + Code._empty + } + f.result(false)(new HailClassLoader(getClass.getClassLoader)) + } +} + +class RNGState { + val staticAcc: Array[Long] = Array.fill(4)(0) + val staticIdx: Int = 0 + val staticOpen: Array[Long] = Array.fill(4)(0) + val staticOpenLen: Int = 0 + val dynAcc: Array[Long] = Array.fill(4)(0) + val dynIdx: Int = 0 + val dynOpen: Array[Long] = Array.fill(4)(0) + val dynOpenLen: Int = 0 +} + +object ThreefryRandomEngine { + def apply( + k1: Long, k2: Long, k3: Long, k4: Long, + h1: Long, h2: Long, h3: Long, h4: Long, + x1: Long, x2: Long, x3: Long + ): ThreefryRandomEngine = { + new ThreefryRandomEngine( + Threefry.expandKey(FastIndexedSeq(k1, k2, k3, k4)), + Array(h1 ^ x1, h2 ^ x2, h3 ^ x3, h4), + 0) + } + + def apply(): ThreefryRandomEngine = { + val rand = new java.util.Random() + new ThreefryRandomEngine( + Threefry.expandKey(Array.fill(4)(rand.nextLong())), + Array.fill(4)(rand.nextLong()), + 0) + } +} + +class ThreefryRandomEngine( + val key: IndexedSeq[Long], + val state: Array[Long], + var counter: Long, + val tweak: Long = SRNGState.finalBlockNoPadTweak +) extends RandomEngine { + val buffer: Array[Long] = Array.ofDim[Long](4) + var usedInts: Int = 8 + var hasBufferedGaussian: Boolean = false + var bufferedGaussian: Double = 0.0 + + override def clone(): ThreefryRandomEngine = ??? + + private def fillBuffer(): Unit = { + import java.lang.Long.rotateLeft + var x0 = state(0) + var x1 = state(1) + var x2 = state(2) + var x3 = state(3) ^ counter + val k0 = key(0); val k1 = key(1); val k2 = key(2); val k3 = key(3) + val k4 = k0 ^ k1 ^ k2 ^ k3 ^ Threefry.keyConst + val t = tweak + // d = 0 + // injectKey s = 0 + x0 += k0; x1 += k1 + t; x2 += k2; x3 += k3 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 1 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 2 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 3 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 4 + // injectKey s = 1 + x0 += k1; x1 += k2; x2 += k3 + t; x3 += k4 + 1 + x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 + // d = 5 + x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 + // d = 6 + x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 + // d = 7 + x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 + // d = 8 + // injectKey s = 2 + x0 += k2; x1 += k3 + t; x2 += k4 + t; x3 += k0 + 2 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 9 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 10 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 11 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 12 + // injectKey s = 3 + x0 += k3; x1 += k4 + t; x2 += k0; x3 += k1 + 3 + x0 += x1; x1 = rotateLeft(x1, 25); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 33); x3 ^= x2 + // d = 13 + x0 += x3; x3 = rotateLeft(x3, 46); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 12); x1 ^= x2 + // d = 14 + x0 += x1; x1 = rotateLeft(x1, 58); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 22); x3 ^= x2 + // d = 15 + x0 += x3; x3 = rotateLeft(x3, 32); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 32); x1 ^= x2 + // d = 16 + // injectKey s = 4 + x0 += k4; x1 += k0; x2 += k1 + t; x3 += k2 + 4 + x0 += x1; x1 = rotateLeft(x1, 14); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 16); x3 ^= x2 + // d = 17 + x0 += x3; x3 = rotateLeft(x3, 52); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 57); x1 ^= x2 + // d = 18 + x0 += x1; x1 = rotateLeft(x1, 23); x1 ^= x0 + x2 += x3; x3 = rotateLeft(x3, 40); x3 ^= x2 + // d = 19 + x0 += x3; x3 = rotateLeft(x3, 5); x3 ^= x0 + x2 += x1; x1 = rotateLeft(x1, 37); x1 ^= x2 + // d = 20 + // injectKey s = 5 + x0 += k0; x1 += k1 + t; x2 += k2 + t; x3 += k3 + 5 + + buffer(0) = x0; buffer(1) = x1; buffer(2) = x2; buffer(3) = x3 + counter += 1 + usedInts = 0 + } + + override def setSeed(seed: Long): Unit = ??? + + override def getSeed: Long = ??? + + override def nextLong(): Long = { + usedInts += usedInts & 1 // round up to multiple of 2 + if (usedInts >= 8) fillBuffer() + val result = buffer(usedInts >> 1) + usedInts += 2 + result + } + + override def nextInt(): Int = { + if (usedInts >= 8) fillBuffer() + val result = buffer(usedInts >> 1) + usedInts += 1 + val parity = usedInts & 1 + val shift = parity << 5 // either 0 or 32 + (result >>> shift).toInt // either first or second 32 bits + } + + // Uses approach from https://github.com/apple/swift/pull/39143 + override def nextInt(n: Int): Int = { + val nL = n.toLong + val mult = nL * (nextInt().toLong & 0xFFFFFFFFL) + val result = (mult >>> 32).toInt + val fraction = mult & 0xFFFFFFFFL + + // optional early return, benchmark to decide if it helps + if (fraction < ((1L << 32) - nL)) return result + + val multHigh = (((nL * (nextInt().toLong & 0xFFFFFFFFL)) >>> 32) + (nL * (nextInt().toLong & 0xFFFFFFFFL))) >>> 32 + val sum = fraction + multHigh + val carry = (sum >>> 32).toInt + result + carry + } + + // Uses standard Java approach. We could use the same approach as for ints, + // but that requires full-width multiplication of two longs, which adds some + // complexity. + override def nextLong(l: Long): Long = { + var x = nextLong() >>> 1 + var r = x % l + while (x - r + (l - 1) < 0) { + x = nextLong() >>> 1 + r = x % l + } + r + } + + override def nextGaussian(): Double = { + if (hasBufferedGaussian) { + hasBufferedGaussian = false + return bufferedGaussian + } + + var v1 = 2 * nextDouble() - 1 // between -1 and 1 + var v2 = 2 * nextDouble() - 1 + var s = v1 * v1 + v2 * v2 + while (s >= 1 || s == 0) { + v1 = 2 * nextDouble() - 1 // between -1 and 1 + v2 = 2 * nextDouble() - 1 + s = v1 * v1 + v2 * v2 + } + val multiplier = StrictMath.sqrt(-2 * StrictMath.log(s) / s) + bufferedGaussian = v2 * multiplier + hasBufferedGaussian = true + v1 * multiplier + } + + // Equivalent to generating an infinite-precision real number in [0, 1), + // represented as an infinitely long bitstream, and rounding down to the + // nearest representable floating point number. + // In contrast, the standard Java and jdistlib generators sample uniformly + // from a sequence of equidistant floating point numbers in [0, 1), using + // (nextLong() >>> 11).toDouble / (1L << 53) + // + // Intuitively, the algorithm is: + // * lazily generate an infinite string of random bits, interpreted as + // the binary expansion of a real number in [0, 1), i.e. `0.${bits}` + // * convert to floating point representation: the exponent is -n, where n is + // the number of 0s before the first 1, and the significand is the first 1 + // followed by the next 52 bits. + override def nextDouble(): Double = { + // first generate random bits until we get the first 1, counting the number + // of zeroes + var bits: Long = nextLong() + // the exponent starts at 1022 and subtracts the number of leading zeroes, + // to account for the exponent bias in IEE754 + var exponent: Int = 1022 + while (bits == 0) { + bits = nextLong() + exponent -= 64 + } + // use trailing zeroes instead of leading zeroes as slight optimization, + // but probabilistically equivalent + val e = java.lang.Long.numberOfTrailingZeros(bits) + exponent -= e + // If there are at least 52 bits before the trailing 1, use those + val significand = (if (e < 12) bits else nextLong()) >>> 12 + val result = (exponent.toLong << 52) | significand + java.lang.Double.longBitsToDouble(result) + } + + override def nextFloat(): Float = { + // first generate random bits until we get the first 1, counting the number + // of zeroes + var bits: Int = nextInt() + // the exponent starts at 126 and subtracts the number of leading zeroes, + // to account for the exponent bias in IEE754 + var exponent: Int = 126 + while (bits == 0) { + bits = nextInt() + exponent -= 32 + } + // use trailing zeroes instead of leading zeroes as slight optimization, + // but probabilistically equivalent + val e = java.lang.Long.numberOfTrailingZeros(bits) + exponent -= e + // If there are at least 23 bits before the trailing 1, use those + val significand = (if (e < 9) bits else nextInt()) >>> 9 + val result = (exponent << 23) | significand + java.lang.Float.intBitsToFloat(result) + } +} \ No newline at end of file diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index af5b747149e..077237ddc6d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -289,7 +289,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { refMap(partitionStreamName).foreach { u => defs.bind(u, Array[BaseTypeWithRequiredness](RIterable(lookup(child).rowType))) } val refs = refMap.getOrElse(globalName, FastIndexedSeq()) ++ refMap.getOrElse(partitionStreamName, FastIndexedSeq()) dependents.getOrElseUpdate(child, mutable.Set[RefEquality[BaseIR]]()) ++= refs - case _ => fatal(Pretty(node)) + case _ => fatal(Pretty(ctx, node)) } } @@ -758,6 +758,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { coerce[RStruct](requiredness).field("global").unionFrom(lookup(c).globalType) case BlockMatrixToValueApply(child, GetElement(_)) => // BlockMatrix elements are all required case BlockMatrixCollect(child) => // BlockMatrix elements are all required + case BlockMatrixWrite(child, writer) => // write result is required } requiredness.probeChangedAndReset() } diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 06646653420..1891e894941 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -1,6 +1,7 @@ package is.hail.expr.ir import is.hail.HailContext +import is.hail.backend.ExecuteContext import is.hail.types.virtual._ import is.hail.io.bgen.MatrixBGENReader import is.hail.rvd.{PartitionBoundOrdering, RVDPartitionInfo} @@ -10,18 +11,18 @@ object Simplify { /** Transform 'ir' using simplification rules until none apply. */ - def apply(ir: BaseIR): BaseIR = Simplify(ir, allowRepartitioning = true) + def apply(ctx: ExecuteContext, ir: BaseIR): BaseIR = Simplify(ctx, ir, allowRepartitioning = true) /** Use 'allowRepartitioning'=false when in a context where simplification * should not change the partitioning of the result of 'ast', such as when * some parent (downstream) node of 'ast' uses seeded randomness. */ - private[ir] def apply(ast: BaseIR, allowRepartitioning: Boolean): BaseIR = + private[ir] def apply(ctx: ExecuteContext, ast: BaseIR, allowRepartitioning: Boolean): BaseIR = ast match { - case ir: IR => simplifyValue(ir) - case tir: TableIR => simplifyTable(allowRepartitioning)(tir) - case mir: MatrixIR => simplifyMatrix(allowRepartitioning)(mir) - case bmir: BlockMatrixIR => simplifyBlockMatrix(bmir) + case ir: IR => simplifyValue(ctx)(ir) + case tir: TableIR => simplifyTable(ctx, allowRepartitioning)(tir) + case mir: MatrixIR => simplifyMatrix(ctx, allowRepartitioning)(mir) + case bmir: BlockMatrixIR => simplifyBlockMatrix(ctx)(bmir) } private[this] def visitNode[T <: BaseIR]( @@ -33,38 +34,38 @@ object Simplify { transform(t1).map(post).getOrElse(t1) } - private[this] def simplifyValue: IR => IR = + private[this] def simplifyValue(ctx: ExecuteContext): IR => IR = visitNode( - Simplify(_), + Simplify(ctx, _), rewriteValueNode, - simplifyValue) + simplifyValue(ctx)) - private[this] def simplifyTable(allowRepartitioning: Boolean)(tir: TableIR): TableIR = + private[this] def simplifyTable(ctx: ExecuteContext, allowRepartitioning: Boolean)(tir: TableIR): TableIR = visitNode( - Simplify(_, allowRepartitioning && isDeterministicallyRepartitionable(tir)), - rewriteTableNode(allowRepartitioning), - simplifyTable(allowRepartitioning) + Simplify(ctx, _, allowRepartitioning && isDeterministicallyRepartitionable(tir)), + rewriteTableNode(ctx, allowRepartitioning), + simplifyTable(ctx, allowRepartitioning) )(tir) - private[this] def simplifyMatrix(allowRepartitioning: Boolean)(mir: MatrixIR): MatrixIR = + private[this] def simplifyMatrix(ctx: ExecuteContext, allowRepartitioning: Boolean)(mir: MatrixIR): MatrixIR = visitNode( - Simplify(_, allowRepartitioning && isDeterministicallyRepartitionable(mir)), + Simplify(ctx, _, allowRepartitioning && isDeterministicallyRepartitionable(mir)), rewriteMatrixNode(allowRepartitioning), - simplifyMatrix(allowRepartitioning) + simplifyMatrix(ctx, allowRepartitioning) )(mir) - private[this] def simplifyBlockMatrix(bmir: BlockMatrixIR): BlockMatrixIR = { + private[this] def simplifyBlockMatrix(ctx: ExecuteContext)(bmir: BlockMatrixIR): BlockMatrixIR = { visitNode( - Simplify(_), + Simplify(ctx, _), rewriteBlockMatrixNode, - simplifyBlockMatrix + simplifyBlockMatrix(ctx) )(bmir) } private[this] def rewriteValueNode: IR => Option[IR] = valueRules.lift - private[this] def rewriteTableNode(allowRepartitioning: Boolean)(tir: TableIR): Option[TableIR] = - tableRules(allowRepartitioning && isDeterministicallyRepartitionable(tir)).lift(tir) + private[this] def rewriteTableNode(ctx: ExecuteContext, allowRepartitioning: Boolean)(tir: TableIR): Option[TableIR] = + tableRules(ctx, allowRepartitioning && isDeterministicallyRepartitionable(tir)).lift(tir) private[this] def rewriteMatrixNode(allowRepartitioning: Boolean)(mir: MatrixIR): Option[MatrixIR] = matrixRules(allowRepartitioning && isDeterministicallyRepartitionable(mir)).lift(mir) @@ -594,7 +595,7 @@ object Simplify { case LiftMeOut(child) if IsConstant(child) => child } - private[this] def tableRules(canRepartition: Boolean): PartialFunction[TableIR, TableIR] = { + private[this] def tableRules(ctx: ExecuteContext, canRepartition: Boolean): PartialFunction[TableIR, TableIR] = { case TableRename(child, m1, m2) if m1.isTrivial && m2.isTrivial => child @@ -782,10 +783,16 @@ object Simplify { TableAggregateByKey(child, expr) case TableAggregateByKey(x@TableKeyBy(child, keys, false), expr) if canRepartition && !x.definitelyDoesNotShuffle => - TableKeyByAndAggregate(child, expr, MakeStruct(keys.map(k => k -> GetField(Ref("row", child.typ.rowType), k))), bufferSize = HailContext.getFlag("grouped_aggregate_buffer_size").toInt) + TableKeyByAndAggregate(child, expr, MakeStruct(keys.map(k => k -> GetField(Ref("row", child.typ.rowType), k))), bufferSize = ctx.getFlag("grouped_aggregate_buffer_size").toInt) case TableParallelize(TableCollect(child), _) if isDeterministicallyRepartitionable(child) => child + case TableFilterIntervals(child, intervals, keep) if intervals.isEmpty => + if (keep) + TableFilter(child, False()) + else + child + // push down filter intervals nodes case TableFilterIntervals(TableFilter(child, pred), intervals, keep) => TableFilter(TableFilterIntervals(child, intervals, keep), pred) diff --git a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala index a6e724a0619..2376f2f618c 100644 --- a/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala +++ b/hail/src/main/scala/is/hail/expr/ir/SpecializedArrayBuilders.scala @@ -806,6 +806,69 @@ final class BooleanArrayBuilder(initialCapacity: Int = 16) { } } +final class StringArrayBuilder(initialCapacity: Int = 16) { + var size_ : Int = 0 + var b: Array[String] = new Array[String](initialCapacity) + + def size: Int = size_ + + def setSize(n: Int) { + require(n >= 0 && n <= size) + size_ = n + } + + def apply(i: Int): String = { + require(i >= 0 && i < size) + b(i) + } + + def ensureCapacity(n: Int): Unit = { + if (b.length < n) { + val newCapacity = math.max(n, b.length * 2) + val newb = new Array[String](newCapacity) + Array.copy(b, 0, newb, 0, size_) + b = newb + } + } + + def +=(x: String) = add(x) + + def add(x: String): Unit = { + ensureCapacity(size_ + 1) + b(size_) = x + size_ += 1 + } + + def update(i: Int, x: String): Unit = { + require(i >= 0 && i < size) + b(i) = x + } + + def clear() { size_ = 0 } + + def result(): Array[String] = { + val a = new Array[String](size_) + System.arraycopy(b, 0, a, 0, size_) + a + } + + def clearAndResize(): Unit = { + size_ = 0 + if (b.length > initialCapacity) + b = new Array[String](initialCapacity) + } + def appendFrom(ab2: StringArrayBuilder): Unit = { + ensureCapacity(size_ + ab2.size_) + System.arraycopy(ab2.b, 0, b, size_, ab2.size_) + size_ = size_ + ab2.size_ + } + + def pop(): String = { + size_ -= 1 + b(size) + } +} + final class AnyRefArrayBuilder[T <: AnyRef](initialCapacity: Int = 16)(implicit ct: ClassTag[T]) { var size_ : Int = 0 diff --git a/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala b/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala index 50946b0bd60..7ce9b95a40b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala +++ b/hail/src/main/scala/is/hail/expr/ir/StringTableReader.scala @@ -12,16 +12,21 @@ import is.hail.types.physical.stypes.interfaces.{SBaseStructValue, SStreamValue} import is.hail.types.physical.{PCanonicalString, PCanonicalStruct, PField, PStruct} import is.hail.types.virtual.{TArray, TString, TStruct, Type} import is.hail.types.{BaseTypeWithRequiredness, RStruct, TableType, TypeWithRequiredness} -import is.hail.utils.{FastIndexedSeq, FastSeq, fatal} +import is.hail.types.physical.{PCanonicalString, PCanonicalStruct, PField, PStruct, PType} +import is.hail.types.virtual.{Field, TArray, TStream, TString, TStruct, Type} +import is.hail.utils.{FastIndexedSeq, FastSeq, checkGzippedFile, fatal} import org.json4s.{Extraction, Formats, JValue} case class StringTableReaderParameters( files: Array[String], - minPartitions: Option[Int]) + minPartitions: Option[Int], + forceBGZ: Boolean, + forceGZ: Boolean, + filePerPartition: Boolean) object StringTableReader { def apply(fs: FS, params: StringTableReaderParameters): StringTableReader = { - val fileStatuses = getFileStatuses(fs, params.files) + val fileStatuses = getFileStatuses(fs, params.files, params.forceBGZ, params.forceGZ) new StringTableReader(params, fileStatuses) } def fromJValue(fs: FS, jv: JValue): StringTableReader = { @@ -30,10 +35,17 @@ object StringTableReader { StringTableReader(fs, params) } - def getFileStatuses(fs: FS, files: Array[String]): Array[FileStatus] = { + def getFileStatuses(fs: FS, files: Array[String], forceBGZ: Boolean, forceGZ: Boolean): Array[FileStatus] = { val status = fs.globAllStatuses(files) if (status.isEmpty) fatal(s"arguments refer to no files: ${files.toIndexedSeq}.") + if (!forceBGZ) { + status.foreach { status => + val file = status.getPath + if (file.endsWith(".gz")) + checkGzippedFile(fs, file, forceGZ, forceBGZ) + } + } status } } @@ -122,7 +134,8 @@ class StringTableReader( override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = { val fs = ctx.fs - val lines = GenericLines.read(fs, fileStatuses, None, None, params.minPartitions, false, true) + val lines = GenericLines.read(fs, fileStatuses, None, None, params.minPartitions, false, true, + params.filePerPartition) TableStage(globals = MakeStruct(FastSeq()), partitioner = RVDPartitioner.unkeyed(lines.nPartitions), dependency = TableStageDependency.none, @@ -137,12 +150,10 @@ class StringTableReader( val (broadCastRow, rVD) = TableStageToRVD.apply(ctx, ts, Map[String, IR]()) TableValue(ctx, tr.typ, broadCastRow, rVD) } - override def partitionCounts: Option[IndexedSeq[Long]] = None override def rowAndGlobalPTypes(ctx: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = (PCanonicalStruct(IndexedSeq(PField("file", PCanonicalString(true), 0), PField("text", PCanonicalString(true), 1)), true).subsetTo(requestedType.rowType).asInstanceOf[PStruct], PCanonicalStruct.empty(required = true)) - } diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index 84f8c6ad3b1..78d57898cca 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -58,7 +58,7 @@ abstract sealed class TableIR extends BaseIR { } protected[ir] def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = - fatal("tried to execute unexecutable IR:\n" + Pretty(this)) + fatal("tried to execute unexecutable IR:\n" + Pretty(ctx, this)) override def copy(newChildren: IndexedSeq[BaseIR]): TableIR @@ -107,7 +107,6 @@ object TableReader { def fromJValue(fs: FS, jv: JValue): TableReader = { (jv \ "name").extract[String] match { case "TableNativeReader" => TableNativeReader.fromJValue(fs, jv) - case "TextTableReader" => TextTableReader.fromJValue(fs, jv) case "TableFromBlockMatrixNativeReader" => TableFromBlockMatrixNativeReader.fromJValue(fs, jv) case "StringTableReader" => StringTableReader.fromJValue(fs, jv) case "AvroTableReader" => AvroTableReader.fromJValue(jv) @@ -124,7 +123,7 @@ object LoweredTableReader { contextType: Type, contexts: IndexedSeq[Any], keyType: TStruct, - keyPType: (TStruct) => PStruct, + bodyPType: (TStruct) => PStruct, keys: (TStruct) => (Region, HailClassLoader, FS, Any) => Iterator[Long] ): LoweredTableReaderCoercer = { assert(key.nonEmpty) @@ -173,7 +172,7 @@ object LoweredTableReader { ReadPartition(ctx, keyType, new PartitionIteratorLongReader( keyType, contextType, - (requestedType: Type) => keyPType(requestedType.asInstanceOf[TStruct]), + (requestedType: Type) => bodyPType(requestedType.asInstanceOf[TStruct]), (requestedType: Type) => keys(requestedType.asInstanceOf[TStruct]))), "key", MakeStruct(FastIndexedSeq( @@ -398,7 +397,7 @@ object LoweredTableReader { ToStream(Literal(TArray(contextType), partOrigIndex.map(i => contexts(i)))), body) - val rowRType = TypeWithRequiredness(tableStage.rowType).asInstanceOf[RStruct] + val rowRType = VirtualTypeWithReq(bodyPType(tableStage.rowType)).r.asInstanceOf[RStruct] ctx.backend.lowerDistributedSort(ctx, tableStage, @@ -495,7 +494,7 @@ case class PartitionRVDReader(rvd: RVD) extends PartitionReader { FastIndexedSeq(("elt", SingleCodeEmitParamType(true, PTypeReferenceSingleCodeType(rvd.rowPType)))), FastIndexedSeq(classInfo[Region], LongInfo), LongInfo, - PruneDeadFields.upcast(Ref("elt", rvd.rowType), requestedType)) + PruneDeadFields.upcast(ctx, Ref("elt", rvd.rowType), requestedType)) val upcastCode = mb.getObject[Function4[HailClassLoader, FS, Int, Region, AsmFunction2RegionLongLong]](upcast) @@ -2104,7 +2103,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { } } - if (HailContext.getFlag("distributed_scan_comb_op") != null && extracted.shouldTreeAggregate) { + if (ctx.getFlag("distributed_scan_comb_op") != null && extracted.shouldTreeAggregate) { val fsBc = ctx.fs.broadcast val tmpBase = ctx.createTmpPath("table-map-rows-distributed-scan") val d = digitsNeeded(tv.rvd.getNumPartitions) @@ -2236,7 +2235,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { } Iterator.single(write(aggRegion, seq.getAggOffset())) } - }, HailContext.getFlag("max_leader_scans").toInt) + }, ctx.getFlag("max_leader_scans").toInt) // 3. load in partition aggregations, comb op as necessary, write back out. val partAggs = scanPartitionAggs.scanLeft(initAgg)(combOpFNeedsPool(() => ctx.r.pool)) diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index fb09854cbe5..270ddf85daa 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -1,33 +1,34 @@ package is.hail.expr.ir +import is.hail.backend.ExecuteContext import is.hail.expr.ir.streams.StreamUtils import is.hail.types.virtual._ import is.hail.utils._ object TypeCheck { - def apply(ir: BaseIR): Unit = { + def apply(ctx: ExecuteContext, ir: BaseIR): Unit = { try { - check(ir, BindingEnv.empty) + check(ctx, ir, BindingEnv.empty) } catch { - case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ir) }", e) + case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ctx, ir) }", e) } } - def apply(ir: IR, env: BindingEnv[Type]): Unit = { + def apply(ctx: ExecuteContext, ir: IR, env: BindingEnv[Type]): Unit = { try { - check(ir, env) + check(ctx, ir, env) } catch { - case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ir) }", e) + case e: Throwable => fatal(s"Error while typechecking IR:\n${ Pretty(ctx, ir) }", e) } } - private def check(ir: BaseIR, env: BindingEnv[Type]): Unit = { + private def check(ctx: ExecuteContext, ir: BaseIR, env: BindingEnv[Type]): Unit = { ir.children .iterator .zipWithIndex .foreach { case (child, i) => - check(child, ChildBindings(ir, i, env)) + check(ctx, child, ChildBindings(ir, i, env)) if (child.typ == TVoid) { ir match { @@ -44,7 +45,7 @@ object TypeCheck { case _: WriteMetadata => case _ => throw new RuntimeException(s"unexpected void-typed IR at child $i of ${ ir.getClass.getSimpleName }" + - s"\n IR: ${ Pretty(ir) }") + s"\n IR: ${ Pretty(ctx, ir) }") } } } @@ -323,7 +324,7 @@ object TypeCheck { assert(body.typ.isInstanceOf[TStream]) case x@StreamFold(a, zero, accumName, valueName, body) => assert(a.typ.isInstanceOf[TStream]) - assert(a.typ.asInstanceOf[TStream].elementType.isRealizable, Pretty(x)) + assert(a.typ.asInstanceOf[TStream].elementType.isRealizable, Pretty(ctx, x)) assert(body.typ == zero.typ) assert(x.typ == zero.typ) case x@StreamFold2(a, accum, valueName, seq, res) => @@ -472,7 +473,7 @@ object TypeCheck { case MatrixToValueApply(_, _) => case BlockMatrixToValueApply(_, _) => case BlockMatrixCollect(_) => - case BlockMatrixWrite(_, _) => + case BlockMatrixWrite(_, writer) => writer.loweredTyp case BlockMatrixMultiWrite(_, _) => case ValueToBlockMatrix(child, _, _) => assert(child.typ.isInstanceOf[TArray] || child.typ.isInstanceOf[TNDArray] || child.typ == TFloat64) diff --git a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala index ce9f1eb6c15..5e26262d44d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala +++ b/hail/src/main/scala/is/hail/expr/ir/analyses/ComputeMethodSplits.scala @@ -1,13 +1,14 @@ package is.hail.expr.ir.analyses import is.hail.HailContext +import is.hail.backend.ExecuteContext import is.hail.expr.ir._ object ComputeMethodSplits { - def apply(ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { + def apply(ctx: ExecuteContext, ir: IR, controlFlowPreventsSplit: Memo[Unit]): Memo[Unit] = { val m = Memo.empty[Unit] - val splitThreshold = HailContext.getFlag("method_split_ir_limit").toInt + val splitThreshold = ctx.getFlag("method_split_ir_limit").toInt require(splitThreshold > 0, s"invalid method_split_ir_limit") def recurAndComputeSizeUnderneath(x: IR): Int = { diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala index 8df34cb93b1..46db3559b2f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/Functions.scala @@ -6,6 +6,7 @@ import is.hail.expr.ir._ import is.hail.types._ import is.hail.utils._ import is.hail.asm4s.coerce +import is.hail.backend.ExecuteContext import is.hail.experimental.ExperimentalFunctions import is.hail.types.physical._ import is.hail.types.physical.stypes.{EmitType, SCode, SType, SValue} @@ -82,6 +83,39 @@ object IRFunctionRegistry { }) } + def pyRegisterIRForServiceBackend( + ctx: ExecuteContext, + name: String, + typeParamStrs: Array[String], + argNames: Array[String], + argTypeStrs: Array[String], + returnType: String, + bodyStr: String + ): Unit = { + requireJavaIdentifier(name) + + val typeParameters = typeParamStrs.map(IRParser.parseType).toFastIndexedSeq + val valueParameterTypes = argTypeStrs.map(IRParser.parseType).toFastIndexedSeq + val refMap = argNames.zip(valueParameterTypes).toMap + val body = IRParser.parse_value_ir( + bodyStr, + IRParserEnvironment(ctx, refMap, Map()) + ) + + userAddedFunctions += ((name, (body.typ, typeParameters, valueParameterTypes))) + addIR( + name, + typeParameters, + valueParameterTypes, + IRParser.parseType(returnType), + false, + { (_, args, _) => + Subst(body, + BindingEnv(Env[IR](argNames.zip(args): _*))) + } + ) + } + def removeIRFunction( name: String, returnType: Type, @@ -488,6 +522,10 @@ abstract class RegistryFunctions { pt: (Type, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = registerWrappedScalaFunction(name, Array(a1, a2, a3), returnType, unwrappedApply(pt))(cls, method) + def registerWrappedScalaFunction4(name: String, a1: Type, a2: Type, a3: Type, a4: Type, returnType: Type, + pt: (Type, SType, SType, SType, SType) => SType)(cls: Class[_], method: String): Unit = + registerWrappedScalaFunction(name, Array(a1, a2, a3, a4), returnType, unwrappedApply(pt))(cls, method) + def registerJavaStaticFunction(name: String, valueParameterTypes: Array[Type], returnType: Type, pt: (Type, Seq[SType]) => SType)(cls: Class[_], method: String) { registerCode(name, valueParameterTypes, returnType, pt) { case (r, cb, rt, _, args) => val cts = valueParameterTypes.map(PrimitiveTypeToIRIntermediateClassTag(_).runtimeClass) diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala index ec62ed1ff93..da7274f51b3 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/RelationalFunctions.scala @@ -1,6 +1,7 @@ package is.hail.expr.ir.functions import is.hail.backend.ExecuteContext +import is.hail.expr.ir.lowering.TableStage import is.hail.expr.ir.{LowerMatrixIR, MatrixValue, RelationalSpec, TableReader, TableValue} import is.hail.types.virtual.Type import is.hail.types.{BlockMatrixType, MatrixType, RTable, TableType, TypeWithRequiredness} diff --git a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala index c2d22891715..e9f508a9e17 100644 --- a/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala +++ b/hail/src/main/scala/is/hail/expr/ir/functions/StringFunctions.scala @@ -1,15 +1,11 @@ package is.hail.expr.ir.functions -import java.time.temporal.ChronoField -import java.time.{Instant, ZoneId} -import java.util.Locale import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.JSONAnnotationImpex import is.hail.expr.ir._ -import is.hail.types.physical._ import is.hail.types.physical.stypes._ -import is.hail.types.physical.stypes.concrete.{SIndexablePointer, SJavaArrayString, SJavaString, SStringPointer} +import is.hail.types.physical.stypes.concrete.{SJavaArrayString, SJavaArrayStringSettable, SJavaArrayStringValue, SJavaString} import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives.{SBoolean, SInt32, SInt64} import is.hail.types.virtual._ @@ -18,10 +14,13 @@ import org.apache.spark.sql.Row import org.json4s.JValue import org.json4s.jackson.JsonMethods +import java.time.temporal.ChronoField +import java.time.{Instant, ZoneId} +import java.util.Locale +import java.util.regex.{Matcher, Pattern} import scala.collection.mutable object StringFunctions extends RegistryFunctions { - def reverse(s: String): String = { val sb = new StringBuilder sb.append(s) @@ -46,11 +45,13 @@ object StringFunctions extends RegistryFunctions { def regexMatch(regex: String, s: String): Boolean = regex.r.findFirstIn(s).isDefined + def regexFullMatch(regex: String, s: String): Boolean = s.matches(regex) + def concat(s: String, t: String): String = s + t def replace(str: String, pattern1: String, pattern2: String): String = str.replaceAll(pattern1, pattern2) - + def split(s: String, p: String): Array[String] = s.split(p, -1) def translate(s: String, d: Map[String, String]): String = { @@ -82,10 +83,142 @@ object StringFunctions extends RegistryFunctions { def escapeString(s: String): String = StringEscapeUtils.escapeString(s) + def addValueOrNull(ab: StringArrayBuilder, value: String, missingValues: Array[String]): Unit = { + var i = 0 + while (i < missingValues.length) { + if (missingValues(i) == value) { + ab += null + return + } + i += 1 + } + ab += value + } + + def matchPattern(s: String, i: Int, m: Matcher): Int = { + m.region(i, s.length) + if (m.lookingAt()) + m.end() - m.start() + else + -1 + } + + def generateSplitQuotedRegex( + cb: EmitCodeBuilder, + string: Value[String], + separator: Either[Value[Char], Value[String]], + quoteChar: Option[Value[Char]], + missingSV: SIndexableValue, + errorID: Value[Int] + ): Value[Array[String]] = { + + // note: it will be inefficient to convert a SIndexablePointer to SJavaArrayString to split each line. + // We should really choose SJavaArrayString as the stype for a literal if used in a place like this, + // but this is a non-local stype decision that is hard in the current system + val missing: Value[Array[String]] = missingSV.st match { + case SJavaArrayString(elementRequired) => missingSV.asInstanceOf[SJavaArrayStringSettable].array + case _ => + val mb = cb.emb.ecb.newEmitMethod("convert_region_to_str_array", FastIndexedSeq(missingSV.st.paramType), arrayInfo[String]) + mb.emitWithBuilder[Array[String]] { cb => + val sv = mb.getSCodeParam(1).asIndexable + val m = cb.newLocal[Array[String]]("missingvals", Code.newArray[String](sv.loadLength())) + sv.forEachDefined(cb) { case (cb, idx, sc) => cb += (m(idx) = sc.asString.loadString(cb)) } + m + } + cb.newLocal[Array[String]]("missing_arr", cb.invokeCode(mb, missingSV)) + } + + // lazy field reused across calls to split functions + val ab = cb.emb.getOrDefineLazyField[StringArrayBuilder](Code.newInstance[StringArrayBuilder, Int](16), "generate_split_quoted_regex_ab") + cb += ab.invoke[Unit]("clear") + + // takes the current position and current char value, returns the number of matching chars + // in the separator, or -1 if not a separator + val getPatternMatch: (Value[Int], Value[Char]) => Value[Int] = { + val x = cb.newLocal[Int]("sepCharMatch"); + separator match { + case Left(sepChar) => + (_: Value[Int], char: Value[Char]) => { + cb.ifx(char.ceq(sepChar), cb.assign(x, 1), cb.assign(x, -1)); + x + } + case Right(regex) => + val m = cb.newLocal[Matcher]("matcher", + Code.invokeStatic1[Pattern, String, Pattern]("compile", regex) + .invoke[CharSequence, Matcher]("matcher", string)); + (idx: Value[Int], _: Value[Char]) => { + cb.assign(x, Code.invokeScalaObject3[String, Int, Matcher, Int]( + StringFunctions.getClass, "matchPattern", string, idx, m)); + x + } + } + } + + val i = cb.newLocal[Int]("i", 0) + val lastFieldStart = cb.newLocal[Int]("lastfieldstart", 0) + + def addValueOrNA(cb: EmitCodeBuilder, endIdx: Code[Int]): Unit = { + cb += Code.invokeScalaObject3[StringArrayBuilder, String, Array[String], Unit]( + StringFunctions.getClass, "addValueOrNull", ab, string.invoke[Int, Int, String]("substring", lastFieldStart, endIdx), missing) + } + + val LreturnWithoutAppending = CodeLabel() + + cb.whileLoop(i < string.length(), { + val c = cb.newLocal[Char]("c", string(i)) + + val l = getPatternMatch(i, c) + cb.ifx(l.cne(-1), { + addValueOrNA(cb, i) + cb.assign(i, i + l) // skip delim + cb.assign(lastFieldStart, i) + }, { + quoteChar match { + case Some(qc) => + cb.ifx(c.ceq(qc), { + cb.ifx(i.cne(lastFieldStart), + cb._fatalWithError(errorID, "opening quote character '", qc.toS, "' not at start of field")) + cb.assign(i, i + 1) // skip quote + cb.assign(lastFieldStart, i) + + cb.whileLoop(i < string.length() && string(i).cne(qc), { + cb.assign(i, i + 1) + }) + + addValueOrNA(cb, i) + + cb.ifx(i.ceq(string.length()), + cb._fatalWithError(errorID, "missing terminating quote character '", qc.toS, "'")) + cb.assign(i, i + 1) // skip quote + + cb.ifx(i < string.length, { + cb.assign(c, string(i)) + val l = getPatternMatch(i, c) + cb.ifx(l.ceq(-1), { + cb._fatalWithError(errorID, "terminating quote character '", qc.toS, "' not at end of field") + }) + cb.assign(i, i + l) // skip delim + cb.assign(lastFieldStart, i) + }, { + cb.goto(LreturnWithoutAppending) + }) + }, { + cb.assign(i, i + 1) + }) + case None => + cb.assign(i, i + 1) + } + }) + }) + + addValueOrNA(cb, string.length()) + cb.define(LreturnWithoutAppending) + cb.memoize(ab.invoke[Array[String]]("result"), "generateSplitQuotedRegexResult") + } + def softBounds(i: IR, len: IR): IR = If(i < -len, 0, If(i < 0, i + len, If(i >= len, len, i))) - private val locale: Locale = Locale.US def strftime(fmtStr: String, epochSeconds: Long, zoneId: String): String = @@ -189,6 +322,7 @@ object StringFunctions extends RegistryFunctions { IEmitCode.present(cb, st.construct(cb, str)) } + registerWrappedScalaFunction1("reverse", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "reverse") registerWrappedScalaFunction1("upper", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "upper") registerWrappedScalaFunction1("lower", TString, TString, (_: Type, _: SType) => SJavaString)(thisClass, "lower") @@ -208,6 +342,9 @@ object StringFunctions extends RegistryFunctions { registerWrappedScalaFunction2("regexMatch", TString, TString, TBoolean, { case (_: Type, _: SType, _: SType) => SBoolean })(thisClass, "regexMatch") + registerWrappedScalaFunction2("regexFullMatch", TString, TString, TBoolean, { + case (_: Type, _: SType, _: SType) => SBoolean + })(thisClass, "regexFullMatch") registerWrappedScalaFunction2("concat", TString, TString, TString, { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "concat") @@ -230,6 +367,61 @@ object StringFunctions extends RegistryFunctions { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "setMkString") + registerSCode4("splitQuotedRegex", TString, TString, TArray(TString), TString, TArray(TString), { + case (_: Type, _: SType, _: SType, _: SType, _: SType) => SJavaArrayString(false) + }) { case (r, cb, st: SJavaArrayString, s, separator, missing, quote, errorID) => + val quoteStr = cb.newLocal[String]("quoteStr", quote.asString.loadString(cb)) + val quoteChar = cb.newLocal[Char]("quoteChar") + cb.ifx(quoteStr.length().cne(1), cb._fatalWithError(errorID, "quote must be a single character")) + cb.assign(quoteChar, quoteStr(0)) + + val string = cb.newLocal[String]("string", s.asString.loadString(cb)) + val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) + val mv = missing.asIndexable + + new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Right(sep), Some(quoteChar), mv, errorID)) + } + + registerSCode4("splitQuotedChar", TString, TString, TArray(TString), TString, TArray(TString), { + case (_: Type, _: SType, _: SType, _: SType, _: SType) => SJavaArrayString(false) + }) { case (r, cb, st: SJavaArrayString, s, separator, missing, quote, errorID) => + val quoteStr = cb.newLocal[String]("quoteStr", quote.asString.loadString(cb)) + val quoteChar = cb.newLocal[Char]("quoteChar") + cb.ifx(quoteStr.length().cne(1), cb._fatalWithError(errorID, "quote must be a single character")) + cb.assign(quoteChar, quoteStr(0)) + + val string = cb.newLocal[String]("string", s.asString.loadString(cb)) + val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) + val sepChar = cb.newLocal[Char]("sepChar") + cb.ifx(sep.length().cne(1), cb._fatalWithError(errorID, "splitQuotedChar expected a single character for separator")) + cb.assign(sepChar, sep(0)) + val mv = missing.asIndexable + + new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Left(sepChar), Some(quoteChar), mv, errorID)) + } + + registerSCode3("splitRegex", TString, TString, TArray(TString), TArray(TString), { + case (_: Type, _: SType, _: SType, _: SType) => SJavaArrayString(false) + }) { case (r, cb, st: SJavaArrayString, s, separator, missing, errorID) => + val string = cb.newLocal[String]("string", s.asString.loadString(cb)) + val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) + val mv = missing.asIndexable + new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Right(sep), None, mv, errorID)) + } + + registerSCode3("splitChar", TString, TString, TArray(TString), TArray(TString), { + case (_: Type, _: SType, _: SType, _: SType) => SJavaArrayString(false) + }) { case (r, cb, st: SJavaArrayString, s, separator, missing, errorID) => + val string = cb.newLocal[String]("string", s.asString.loadString(cb)) + val sep = cb.newLocal[String]("sep", separator.asString.loadString(cb)) + val sepChar = cb.newLocal[Char]("sepChar") + cb.ifx(sep.length().cne(1), cb._fatalWithError(errorID, "splitChar expected a single character for separator")) + cb.assign(sepChar, sep(0)) + val mv = missing.asIndexable + + new SJavaArrayStringValue(st, generateSplitQuotedRegex(cb, string, Left(sepChar), None, mv, errorID)) + } + registerWrappedScalaFunction2("mkString", TArray(TString), TString, TString, { case (_: Type, _: SType, _: SType) => SJavaString })(thisClass, "arrayMkString") diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala index b3697ad1a82..05ee96b5a8f 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/CanLowerEfficiently.scala @@ -1,19 +1,19 @@ package is.hail.expr.ir.lowering import is.hail.HailContext +import is.hail.backend.ExecuteContext import is.hail.expr.ir._ -import is.hail.expr.ir.functions.TableToValueFunction -import is.hail.io.TextMatrixReader +import is.hail.expr.ir.functions.{TableCalculateNewPartitions, TableToValueFunction} import is.hail.io.avro.AvroTableReader import is.hail.io.bgen.MatrixBGENReader import is.hail.io.plink.MatrixPLINKReader import is.hail.io.vcf.MatrixVCFReader -import is.hail.methods.{ForceCountTable, NPartitionsTable} +import is.hail.methods.{ForceCountTable, NPartitionsTable, TableFilterPartitions} object CanLowerEfficiently { - def apply(ir0: BaseIR): Option[String] = { + def apply(ctx: ExecuteContext, ir0: BaseIR): Option[String] = { - if (HailContext.getFlag("no_whole_stage_codegen") != null) + if (ctx.getFlag("no_whole_stage_codegen") != null) return Some("flag 'no_whole_stage_codegen' is enabled") var prohibitiveReason: Option[String] = None @@ -28,8 +28,6 @@ object CanLowerEfficiently { ir match { case TableRead(_, _, _: TableNativeReader) => case TableRead(_, _, _: TableNativeZippedReader) => - case TableRead(_, _, _: TextTableReader) => - case TableRead(_, _, _: TextMatrixReader) => case TableRead(_, _, _: StringTableReader) => case TableRead(_, _, _: MatrixPLINKReader) => case TableRead(_, _, _: MatrixVCFReader) => @@ -64,6 +62,7 @@ object CanLowerEfficiently { case t: TableAggregateByKey => case t: TableRename => case t: TableFilterIntervals => + case TableToTableApply(_, TableFilterPartitions(_, _)) => case t: TableToTableApply => fail(s"TableToTableApply") case t: BlockMatrixToTableApply => fail(s"BlockMatrixToTableApply") case t: BlockMatrixToTable => fail(s"BlockMatrixToTable has no lowered implementation") @@ -79,6 +78,7 @@ object CanLowerEfficiently { case TableCount(_) => case TableToValueApply(_, ForceCountTable()) => case TableToValueApply(_, NPartitionsTable()) => + case TableToValueApply(_, TableCalculateNewPartitions(_)) => case TableToValueApply(_, f: TableToValueFunction) => fail(s"TableToValueApply: no lowering for ${ f.getClass.getName }") case TableAggregate(_, _) => case TableCollect(_) => diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala index d4a1ed9c957..eefb8d06faf 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerBlockMatrixIR.scala @@ -15,7 +15,7 @@ object BlockMatrixStage { def broadcastVector(vector: IR, typ: BlockMatrixType, asRowVector: Boolean): BlockMatrixStage = { val v = Ref(genUID(), vector.typ) - new BlockMatrixStage(Array(v.name -> vector), TStruct("start" -> TInt32, "shape" -> TTuple(TInt32, TInt32))) { + new BlockMatrixStage(IndexedSeq(), Array(v.name -> vector), TStruct("start" -> TInt32, "shape" -> TTuple(TInt32, TInt32))) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = typ.blockShape(idx._1, idx._2) val start = (if (asRowVector) idx._2 else idx._1) * typ.blockSize @@ -38,7 +38,7 @@ object BlockMatrixStage { } } -case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(Array(), TInt32) { +case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(IndexedSeq(), Array(), TInt32) { def blockContext(idx: (Int, Int)): IR = throw new LowererUnsupportedOperation("empty stage has no block contexts!") @@ -50,24 +50,31 @@ case class EmptyBlockMatrixStage(eltType: Type) extends BlockMatrixStage(Array() } } -abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType: Type) { +// Scope structure: +// letBindings are available in blockContext and broadcastVals. +// broadcastVals are available in the blockContext and the blockBody +abstract class BlockMatrixStage(val letBindings: IndexedSeq[(String, IR)], val broadcastVals: Array[(String, IR)], val ctxType: Type) { def blockContext(idx: (Int, Int)): IR def blockBody(ctxRef: Ref): IR + def wrapLetsAndBroadcasts(ctxIR: IR): IR = { + (letBindings ++ broadcastVals).foldRight[IR](ctxIR) { case ((f, v), accum) => Let(f, v, accum) } + } + def collectBlocks(relationalBindings: Map[String, IR])(f: (IR, IR) => IR, blocksToCollect: Array[(Int, Int)]): IR = { val ctxRef = Ref(genUID(), ctxType) val body = f(ctxRef, blockBody(ctxRef)) val ctxs = MakeStream(blocksToCollect.map(idx => blockContext(idx)), TStream(ctxRef.typ)) val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) - val bcFields = globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } + val bcFields = broadcastVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } val bcVals = MakeStruct(bcFields.map { case (f, v) => f -> Ref(f, v.typ) }) val bcRef = Ref(genUID(), bcVals.typ) val wrappedBody = bcFields.foldLeft(body) { case (accum, (f, _)) => Let(f, GetField(bcRef, f), accum) } - val collect = CollectDistributedArray(ctxs, bcVals, ctxRef.name, bcRef.name, wrappedBody) - LowerToCDA.substLets(globalVals.foldRight[IR](collect) { case ((f, v), accum) => Let(f, v, accum) }, relationalBindings) + val collect = wrapLetsAndBroadcasts(CollectDistributedArray(ctxs, bcVals, ctxRef.name, bcRef.name, wrappedBody)) + LowerToCDA.substLets(collect, relationalBindings) } def collectLocal(relationalBindings: Map[String, IR], typ: BlockMatrixType): IR = { @@ -101,9 +108,17 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType Let(blockResults.name, cda, NDArrayConcat(rows, 0)) } + def addLets(newLets: (String, IR)*): BlockMatrixStage = { + val outer = this + new BlockMatrixStage(outer.letBindings ++ newLets, outer.broadcastVals, ctxType) { + override def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) + override def blockBody(ctxRef: Ref): IR = outer.blockBody(ctxRef) + } + } + def addGlobals(newGlobals: (String, IR)*): BlockMatrixStage = { val outer = this - new BlockMatrixStage(globalVals ++ newGlobals, ctxType) { + new BlockMatrixStage(outer.letBindings, broadcastVals ++ newGlobals, ctxType) { def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) def blockBody(ctxRef: Ref): IR = outer.blockBody(ctxRef) } @@ -112,7 +127,7 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType def addContext(newTyp: Type)(newCtx: ((Int, Int)) => IR): BlockMatrixStage = { val outer = this val newCtxType = TStruct("old" -> ctxType, "new" -> newTyp) - new BlockMatrixStage(globalVals, newCtxType) { + new BlockMatrixStage(outer.letBindings, broadcastVals, newCtxType) { def blockContext(idx: (Int, Int)): IR = makestruct("old" -> outer.blockContext(idx), "new" -> newCtx(idx)) @@ -121,7 +136,7 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType } def mapBody(f: (IR, IR) => IR): BlockMatrixStage = { val outer = this - new BlockMatrixStage(globalVals, outer.ctxType) { + new BlockMatrixStage(outer.letBindings, broadcastVals, outer.ctxType) { def blockContext(idx: (Int, Int)): IR = outer.blockContext(idx) def blockBody(ctxRef: Ref): IR = f(ctxRef, outer.blockBody(ctxRef)) @@ -131,7 +146,7 @@ abstract class BlockMatrixStage(val globalVals: Array[(String, IR)], val ctxType def condenseBlocks(typ: BlockMatrixType, rowBlocks: Array[Array[Int]], colBlocks: Array[Array[Int]]): BlockMatrixStage = { val outer = this val ctxType = TArray(TArray(TTuple(TTuple(TInt64, TInt64), outer.ctxType))) - new BlockMatrixStage(outer.globalVals, ctxType) { + new BlockMatrixStage(outer.letBindings, outer.broadcastVals, ctxType) { def blockContext(idx: (Int, Int)): IR = { val i = idx._1 val j = idx._2 @@ -190,15 +205,15 @@ object LowerBlockMatrixIR { NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(iInBlock), I64(jInBlock)), -1) } - lowered.globalVals.foldRight[IR](elt) { case ((f, v), accum) => Let(f, v, accum) } + lowered.wrapLetsAndBroadcasts(elt) case BlockMatrixWrite(child, writer) => writer.lower(ctx, lower(child), child, relationalLetsAbove, TypeWithRequiredness(child.typ.elementType)) //FIXME: BlockMatrixIR is currently ignored in Requiredness inference since all eltTypes are +TFloat64 - case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(node) + case BlockMatrixMultiWrite(blockMatrices, writer) => unimplemented(ctx, node) case node if node.children.exists(_.isInstanceOf[BlockMatrixIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"IR nodes with BlockMatrixIR children need explicit rules: \n${ Pretty(ctx, node) }") case node => - throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"Value IRs with no BlockMatrixIR children must be lowered through LowerIR: \n${ Pretty(ctx, node) }") } } @@ -215,13 +230,13 @@ object LowerBlockMatrixIR { } val emptyGlobals = MakeStruct(Seq()) val globalsId = genUID() - val letBindings = bmsWithCtx.globalVals :+ globalsId -> emptyGlobals + val letBindings = bmsWithCtx.letBindings ++ bmsWithCtx.broadcastVals :+ globalsId -> emptyGlobals val contextsIR = MakeStream(blocksRowMajor.map{ case (i, j) => bmsWithCtx.blockContext((i, j)) }, TStream(bmsWithCtx.ctxType)) val ctxRef = Ref(genUID(), bmsWithCtx.ctxType) val body = bmsWithCtx.blockBody(ctxRef) val bodyFreeVars = FreeVariables(body, supportsAgg = false, supportsScan = false) - val bcFields = bmsWithCtx.globalVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) + val bcFields = bmsWithCtx.broadcastVals.filter { case (f, _) => bodyFreeVars.eval.lookupOption(f).isDefined } :+ globalsId -> Ref(globalsId, emptyGlobals.typ) def tsPartitionFunction(ctxRef: Ref): IR = { val s = MakeStruct(Seq("blockRow" -> GetTupleElement(GetField(ctxRef, "new"), 0), "blockCol" -> GetTupleElement(GetField(ctxRef, "new"), 1), "block" -> bmsWithCtx.blockBody(ctxRef))) @@ -233,15 +248,15 @@ object LowerBlockMatrixIR { ts } - private def unimplemented[T](node: BaseIR): T = - throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(node) }") + private def unimplemented[T](ctx: ExecuteContext, node: BaseIR): T = + throw new LowererUnsupportedOperation(s"unimplemented: \n${ Pretty(ctx, node) }") def lower(bmir: BlockMatrixIR, typesToLower: DArrayLowering.Type, ctx: ExecuteContext, analyses: Analyses, relationalLetsAbove: Map[String, IR]): BlockMatrixStage = { if (!DArrayLowering.lowerBM(typesToLower)) throw new LowererUnsupportedOperation("found BlockMatrixIR in lowering; lowering only TableIRs.") bmir.children.foreach { case c: BlockMatrixIR if c.typ.blockSize != bmir.typ.blockSize => - throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(bmir) }") + throw new LowererUnsupportedOperation(s"Can't lower node with mismatched block sizes: ${ bmir.typ.blockSize } vs child ${ c.typ.blockSize }\n\n ${ Pretty(ctx, bmir) }") case _ => } if (bmir.typ.nDefinedBlocks == 0) @@ -258,7 +273,7 @@ object LowerBlockMatrixIR { case BlockMatrixRead(reader) => reader.lower(ctx) case x@BlockMatrixRandom(seed, gaussian, shape, blockSize) => val generator = invokeSeeded(if (gaussian) "rand_norm" else "rand_unif", seed, TFloat64, F64(0.0), F64(1.0)) - new BlockMatrixStage(Array(), TTuple(TInt64, TInt64)) { + new BlockMatrixStage(IndexedSeq(), Array(), TTuple(TInt64, TInt64)) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = x.typ.blockShape(idx._1, idx._2) MakeTuple.ordered(FastSeq(i, j)) @@ -276,19 +291,20 @@ object LowerBlockMatrixIR { val loweredLeft = lower(left) val loweredRight = lower(right) loweredLeft - .addGlobals(loweredRight.globalVals: _*) + .addLets(loweredRight.letBindings: _*) + .addGlobals(loweredRight.broadcastVals: _*) .addContext(loweredRight.ctxType)(loweredRight.blockContext).mapBody { (ctx, leftBody) => NDArrayMap2(leftBody, bindIR(GetField(ctx, "new"))(loweredRight.blockBody), lname, rname, f, ErrorIDs.NO_ERROR) } case x@BlockMatrixBroadcast(child, IndexedSeq(), _, _) => val lowered = lower(child) - val eltValue = lowered.globalVals.foldRight[IR](bindIR(lowered.blockContext(0 -> 0)) { ctx => + val eltValue = lowered.wrapLetsAndBroadcasts(bindIR(lowered.blockContext(0 -> 0)) { ctx => NDArrayRef(lowered.blockBody(ctx), FastIndexedSeq(I64(0L), I64(0L)), -1) - }) { case ((f, v), accum) => Let(f, v, accum) } + }) val elt = Ref(genUID(), eltValue.typ) - new BlockMatrixStage(Array(elt.name -> eltValue), TTuple(TInt64, TInt64)) { + new BlockMatrixStage(lowered.letBindings, Array(elt.name -> eltValue), TTuple(TInt64, TInt64)) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = x.typ.blockShape(idx._1, idx._2) MakeTuple.ordered(FastSeq(I64(i.toInt), I64(j.toInt))) @@ -332,7 +348,7 @@ object LowerBlockMatrixIR { case BlockMatrixBroadcast(child, IndexedSeq(1, 0), _, _) => //transpose val lowered = lower(child) - new BlockMatrixStage(lowered.globalVals, lowered.ctxType) { + new BlockMatrixStage(lowered.letBindings, lowered.broadcastVals, lowered.ctxType) { def blockContext(idx: (Int, Int)): IR = lowered.blockContext(idx.swap) def blockBody(ctxRef: Ref): IR = NDArrayReindex(lowered.blockBody(ctxRef), FastIndexedSeq(1, 0)) } @@ -348,12 +364,12 @@ object LowerBlockMatrixIR { } val summedChildType = BlockMatrixType(child.typ.elementType, IndexedSeq[Long](child.typ.nRowBlocks, child.typ.nColBlocks), child.typ.nRowBlocks == 1, 1, BlockMatrixSparsity.dense) val res = NDArrayAgg(summedChild.collectLocal(relationalLetsAbove, summedChildType), IndexedSeq[Int](0, 1)) - new BlockMatrixStage(summedChild.globalVals, TStruct.empty) { + new BlockMatrixStage(loweredChild.letBindings, summedChild.broadcastVals, TStruct.empty) { override def blockContext(idx: (Int, Int)): IR = makestruct() override def blockBody(ctxRef: Ref): IR = NDArrayReshape(res, MakeTuple.ordered(Seq(I64(1L), I64(1L))), ErrorIDs.NO_ERROR) } case IndexedSeq(0) => { // Number of rows goes to 1. Number of cols remains the same. - new BlockMatrixStage(loweredChild.globalVals, TArray(loweredChild.ctxType)) { + new BlockMatrixStage(loweredChild.letBindings, loweredChild.broadcastVals, TArray(loweredChild.ctxType)) { override def blockContext(idx: (Int, Int)): IR = { val (row, col) = idx assert(row == 0, s"Asked for idx ${idx}") @@ -372,7 +388,7 @@ object LowerBlockMatrixIR { } } case IndexedSeq(1) => { // Number of cols goes to 1. Number of rows remains the same. - new BlockMatrixStage(loweredChild.globalVals, TArray(loweredChild.ctxType)) { + new BlockMatrixStage(loweredChild.letBindings, loweredChild.broadcastVals, TArray(loweredChild.ctxType)) { override def blockContext(idx: (Int, Int)): IR = { val (row, col) = idx assert(col == 0, s"Asked for idx ${idx}") @@ -451,11 +467,11 @@ object LowerBlockMatrixIR { case BlockMatrixDensify(child) => lower(child) case BlockMatrixSparsify(child, sparsifier) => lower(child) - case RelationalLetBlockMatrix(name, value, body) => unimplemented(bmir) + case RelationalLetBlockMatrix(name, value, body) => unimplemented(ctx, bmir) case ValueToBlockMatrix(child, shape, blockSize) if !child.typ.isInstanceOf[TArray] && !child.typ.isInstanceOf[TNDArray] => { val element = lowerIR(child) - new BlockMatrixStage(Array(), TStruct()) { + new BlockMatrixStage(IndexedSeq(), Array(), TStruct()) { override def blockContext(idx: (Int, Int)): IR = MakeStruct(Seq()) override def blockBody(ctxRef: Ref): IR = MakeNDArray(MakeArray(element), MakeTuple(Seq((0, I64(1)), (1, I64(1)))), False(), ErrorIDs.NO_ERROR) @@ -467,9 +483,7 @@ object LowerBlockMatrixIR { case _: TNDArray => lowerIR(child) } val v = Ref(genUID(), nd.typ) - new BlockMatrixStage( - Array(v.name -> nd), - nd.typ) { + new BlockMatrixStage(IndexedSeq(v.name -> nd), Array(), nd.typ) { def blockContext(idx: (Int, Int)): IR = { val (r, c) = idx NDArraySlice(v, MakeTuple.ordered(FastSeq( @@ -483,7 +497,7 @@ object LowerBlockMatrixIR { val left = lower(leftIR) val right = lower(rightIR) val newCtxType = TArray(TTuple(left.ctxType, right.ctxType)) - new BlockMatrixStage(left.globalVals ++ right.globalVals, newCtxType) { + new BlockMatrixStage(left.letBindings ++ right.letBindings, left.broadcastVals ++ right.broadcastVals, newCtxType) { def blockContext(idx: (Int, Int)): IR = { val (i, j) = idx MakeArray(Array.tabulate[Option[IR]](leftIR.typ.nColBlocks) { k => diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala index 49cbfd7d8d1..d18c5a06047 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerDistributedSort.scala @@ -103,13 +103,14 @@ object LowerDistributedSort { inputStage: TableStage, sortFields: IndexedSeq[SortField], relationalLetsAbove: Map[String, IR], - rowTypeRequiredness: RStruct + rowTypeRequiredness: RStruct, + optTargetNumPartitions: Option[Int] = None ): TableStage = { val oversamplingNum = 3 val seed = 7L - val defaultBranchingFactor = 4 - val sizeCutoff = HailContext.getFlag("shuffle_cutoff_to_local_sort").toInt + val defaultBranchingFactor = ctx.getFlag("shuffle_max_branch_factor").toInt + val sizeCutoff = ctx.getFlag("shuffle_cutoff_to_local_sort").toInt val (keyToSortBy, _) = inputStage.rowType.select(sortFields.map(sf => sf.field)) @@ -120,31 +121,47 @@ object LowerDistributedSort { val initialStageDataRow = CompileAndEvaluate[Annotation](ctx, inputStage.mapCollectWithGlobals(relationalLetsAbove) { part => WritePartition(part, UUID4(), writer) - }{ case (part, globals) => MakeTuple.ordered(Seq(part, globals))}).asInstanceOf[Row] - val (initialPartInfo, initialGlobals) = (initialStageDataRow(0).asInstanceOf[IndexedSeq[Row]], initialStageDataRow(1).asInstanceOf[Row]) + }{ case (part, globals) => + val streamElement = Ref(genUID(), part.typ.asInstanceOf[TArray].elementType) + bindIR(StreamAgg(ToStream(part), streamElement.name, + MakeStruct(FastSeq( + "min" -> AggFold.min(GetField(streamElement, "firstKey"), sortFields), + "max" -> AggFold.max(GetField(streamElement, "lastKey"), sortFields) + )) + )) { intervalRange => MakeTuple.ordered(Seq(part, globals, intervalRange)) } + }).asInstanceOf[Row] + val (initialPartInfo, initialGlobals, intervalRange) = (initialStageDataRow(0).asInstanceOf[IndexedSeq[Row]], initialStageDataRow(1).asInstanceOf[Row], initialStageDataRow(2).asInstanceOf[Row]) val initialGlobalsLiteral = Literal(inputStage.globalType, initialGlobals) val initialChunks = initialPartInfo.map(row => Chunk(initialTmpPath + row(0).asInstanceOf[String], row(1).asInstanceOf[Long].toInt, None)) - val initialSegment = SegmentResult(IndexedSeq(0), inputStage.partitioner.range.get, initialChunks) + + val initialInterval = Interval(intervalRange(0), intervalRange(1), true, true) + val initialSegment = SegmentResult(IndexedSeq(0), initialInterval, initialChunks) val totalNumberOfRows = initialChunks.map(_.size).sum - val idealNumberOfRowsPerPart = Math.max(1, totalNumberOfRows / inputStage.numPartitions) + optTargetNumPartitions.foreach(i => assert(i >= 1, s"Must request positive number of partitions. Requested ${i}")) + val targetNumPartitions = optTargetNumPartitions.getOrElse(inputStage.numPartitions) + + val idealNumberOfRowsPerPart = if (targetNumPartitions == 0) 1 else { + Math.max(1, totalNumberOfRows / targetNumPartitions) + } - var loopState = LoopState(IndexedSeq(initialSegment), IndexedSeq.empty[SegmentResult], IndexedSeq.empty[SegmentResult]) + var loopState = LoopState(IndexedSeq(initialSegment), IndexedSeq.empty[SegmentResult], IndexedSeq.empty[OutputPartition]) var i = 0 val rand = new IRRandomness(seed) /* - There are three categories of segments. largeUnsortedSegments are too big to sort locally so have to broken up. - largeSortedSegments were identified as being already sorted, so no reason to recur on them. smallSegments are small - enough to be sorted locally. + Loop state keeps track of three things. largeSegments are too big to sort locally so have to broken up. + smallSegments are small enough to be sorted locally. readyOutputParts are any partitions that we noticed were + sorted already during course of the recursion. Loop continues until there are no largeSegments left. Then we + sort the small segments and combine them with readyOutputParts to get the final table. */ - while (!loopState.largeUnsortedSegments.isEmpty) { - val partitionDataPerSegment = segmentsToPartitionData(loopState.largeUnsortedSegments, idealNumberOfRowsPerPart) + while (!loopState.largeSegments.isEmpty) { + val partitionDataPerSegment = segmentsToPartitionData(loopState.largeSegments, idealNumberOfRowsPerPart) val partitionCountsPerSegment = partitionDataPerSegment.map(oneSegment => oneSegment.map(_.currentPartSize)) - assert(partitionCountsPerSegment.size == loopState.largeUnsortedSegments.size) + assert(partitionCountsPerSegment.size == loopState.largeSegments.size) val numSamplesPerPartitionPerSegment = partitionCountsPerSegment.map { partitionCountsForOneSegment => val recordsInSegment = partitionCountsForOneSegment.sum @@ -170,6 +187,7 @@ object LowerDistributedSort { /* Aggregate over the segments, to compute the pivots, whether it's already sorted, and what key interval is contained in that segment. + Also get the min and max of each individual partition. That way if it's sorted already, we know the partitioning to use. */ val pivotsPerSegmentAndSortedCheck = ToArray(bindIR(perPartStatsIR) { perPartStats => mapIR(StreamGroupByKey(ToStream(perPartStats), IndexedSeq("segmentIdx"))) { oneGroup => @@ -178,8 +196,10 @@ object LowerDistributedSort { bindIR(StreamAgg(oneGroup, streamElementRef.name, { AggLet(dataRef.name, GetField(streamElementRef, "partData"), MakeStruct(Seq( - ("min", AggFold.min(GetField(dataRef, "min"), sortFields)), - ("max", AggFold.max(GetField(dataRef, "max"), sortFields)), + ("min", AggFold.min(GetField(dataRef, "min"), sortFields)), // Min of the mins + ("max", AggFold.max(GetField(dataRef, "max"), sortFields)), // Max of the maxes + ("perPartMins", ApplyAggOp(Collect())(GetField(dataRef, "min"))), // All the mins + ("perPartMaxes", ApplyAggOp(Collect())(GetField(dataRef, "max"))), // All the maxes ("samples", ApplyAggOp(Collect())(GetField(dataRef, "samples"))), ("eachPartSorted", AggFold.all(GetField(dataRef, "isSorted"))), ("perPartIntervalTuples", ApplyAggOp(Collect())(MakeTuple.ordered(Seq(GetField(dataRef, "min"), GetField(dataRef, "max"))))) @@ -200,7 +220,9 @@ object LowerDistributedSort { MakeStruct(Seq( "pivotsWithEndpoints" -> ArrayFunctions.extend(ArrayFunctions.extend(minArray, sortedSampling), maxArray), "isSorted" -> ApplySpecial("land", Seq.empty[Type], Seq(GetField(aggResults, "eachPartSorted"), tuplesInSortedOrder), TBoolean, ErrorIDs.NO_ERROR), - "intervalTuple" -> MakeTuple.ordered(Seq(GetField(aggResults, "min"), GetField(aggResults, "max"))) + "intervalTuple" -> MakeTuple.ordered(Seq(GetField(aggResults, "min"), GetField(aggResults, "max"))), + "perPartMins" -> GetField(aggResults, "perPartMins"), + "perPartMaxes" -> GetField(aggResults, "perPartMaxes") )) } } @@ -210,12 +232,22 @@ object LowerDistributedSort { // Going to check now if it's fully sorted, as well as collect and sort all the samples. val pivotsWithEndpointsAndInfoGroupedBySegmentNumber = CompileAndEvaluate[Annotation](ctx, pivotsPerSegmentAndSortedCheck) - .asInstanceOf[IndexedSeq[Row]].map(x => (x(0).asInstanceOf[IndexedSeq[Row]], x(1).asInstanceOf[Boolean], x(2).asInstanceOf[Row])) + .asInstanceOf[IndexedSeq[Row]].map(x => (x(0).asInstanceOf[IndexedSeq[Row]], x(1).asInstanceOf[Boolean], x(2).asInstanceOf[Row], x(3).asInstanceOf[IndexedSeq[Row]], x(4).asInstanceOf[IndexedSeq[Row]])) + + val (sortedSegmentsTuples, unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber) = pivotsWithEndpointsAndInfoGroupedBySegmentNumber.zipWithIndex.partition { case ((_, isSorted, _, _, _), _) => isSorted} - val (sortedSegmentsTuples, unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber) = pivotsWithEndpointsAndInfoGroupedBySegmentNumber.zipWithIndex.partition { case ((_, isSorted, _), _) => isSorted} + val outputPartitions = sortedSegmentsTuples.flatMap { case ((_, _, _, partMins, partMaxes), originalSegmentIdx) => + val segmentToBreakUp = loopState.largeSegments(originalSegmentIdx) + val currentSegmentPartitionData = partitionDataPerSegment(originalSegmentIdx) + val partRanges = partMins.zip(partMaxes) + assert(partRanges.size == currentSegmentPartitionData.size) - val sortedSegments = sortedSegmentsTuples.map { case (_, idx) => loopState.largeUnsortedSegments(idx)} - val remainingUnsortedSegments = unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.map {case (_, idx) => loopState.largeUnsortedSegments(idx)} + currentSegmentPartitionData.zip(partRanges).zipWithIndex.map { case ((pi, (intervalStart, intervalEnd)), idx) => + OutputPartition(segmentToBreakUp.indices :+ idx, Interval(intervalStart, intervalEnd, true, true), pi.files) + } + } + + val remainingUnsortedSegments = unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.map {case (_, idx) => loopState.largeSegments(idx)} val (newBigUnsortedSegments, newSmallSegments) = if (unsortedPivotsWithEndpointsAndInfoGroupedBySegmentNumber.size > 0) { @@ -284,7 +316,7 @@ object LowerDistributedSort { isBig && (sr.interval.left.point != sr.interval.right.point) && (sr.chunks.map(_.size).sum > 1) } } else { (IndexedSeq.empty[SegmentResult], IndexedSeq.empty[SegmentResult]) } - loopState = LoopState(newBigUnsortedSegments, loopState.largeSortedSegments ++ sortedSegments, loopState.smallSegments ++ newSmallSegments) + loopState = LoopState(newBigUnsortedSegments, loopState.smallSegments ++ newSmallSegments, loopState.readyOutputParts ++ outputPartitions) i = i + 1 } @@ -305,28 +337,21 @@ object LowerDistributedSort { val sortedFilenames = CompileAndEvaluate[Annotation](ctx, sortedFilenamesIR).asInstanceOf[IndexedSeq[Row]].map(_(0).asInstanceOf[String]) val newlySortedSegments = loopState.smallSegments.zip(sortedFilenames).map { case (sr, newFilename) => - // "Small" segments are small because we decided they had a small enough number of bytes, so it must be present. - val totalNumRows = sr.chunks.map(_.size).sum - val totalByteSize = sr.chunks.map(_.byteSize.get).sum - SegmentResult(sr.indices, sr.interval, IndexedSeq(Chunk(initialTmpPath + newFilename, totalNumRows, Some(totalByteSize)))) + OutputPartition(sr.indices, sr.interval, IndexedSeq(initialTmpPath + newFilename)) } - val unorderedSegments = newlySortedSegments ++ loopState.largeSortedSegments - val orderedSegments = unorderedSegments.sortWith{ (srt1, srt2) => lessThanForSegmentIndices(srt1.indices, srt2.indices)} - - // Now let's treat the whole thing as one segment that can be partitioned by the segmentToPartitionData method. - val megaSegment = SegmentResult(IndexedSeq(), null, orderedSegments.flatMap(sr => sr.chunks)) - val partitioned = segmentsToPartitionData(IndexedSeq(megaSegment), idealNumberOfRowsPerPart).flatten + val unorderedOutputPartitions = newlySortedSegments ++ loopState.readyOutputParts + val orderedOutputPartitions = unorderedOutputPartitions.sortWith{ (srt1, srt2) => lessThanForSegmentIndices(srt1.indices, srt2.indices)} - val contextData = partitioned.map { part => Row(part.files) } + val contextData = orderedOutputPartitions.map { segment => Row(segment.files) } val contexts = ToStream(Literal(TArray(TStruct("files" -> TArray(TString))), contextData)) // Note: If all of the sort fields are not ascending, the the resulting table is sorted, but not keyed. val keyed = sortFields.forall(sf => sf.sortOrder == Ascending) val (partitionerKey, intervals) = if (keyed) { - (keyToSortBy, orderedSegments.map{ segment => segment.interval}) + (keyToSortBy, orderedOutputPartitions.map{ segment => segment.interval}) } else { - (TStruct(), orderedSegments.map{ _ => Interval(Row(), Row(), true, false)}) + (TStruct(), orderedOutputPartitions.map{ _ => Interval(Row(), Row(), true, false)}) } val partitioner = new RVDPartitioner(partitionerKey, intervals) @@ -494,4 +519,5 @@ object LowerDistributedSort { case class Chunk(filename: String, size: Int, byteSize: Option[Long]) case class SegmentResult(indices: IndexedSeq[Int], interval: Interval, chunks: IndexedSeq[Chunk]) -case class LoopState(largeUnsortedSegments: IndexedSeq[SegmentResult], largeSortedSegments: IndexedSeq[SegmentResult], smallSegments: IndexedSeq[SegmentResult]) +case class OutputPartition(indices: IndexedSeq[Int], interval: Interval, files: IndexedSeq[String]) +case class LoopState(largeSegments: IndexedSeq[SegmentResult], smallSegments: IndexedSeq[SegmentResult], readyOutputParts: IndexedSeq[OutputPartition]) diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index d939c85f591..4579eaa2245 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -2,13 +2,14 @@ package is.hail.expr.ir.lowering import is.hail.HailContext import is.hail.backend.ExecuteContext +import is.hail.expr.ir.functions.TableCalculateNewPartitions import is.hail.expr.ir.{agg, _} import is.hail.io.{BufferSpec, TypedCodecSpec} -import is.hail.methods.{ForceCountTable, NPartitionsTable} +import is.hail.methods.{ForceCountTable, NPartitionsTable, TableFilterPartitions} import is.hail.rvd.{PartitionBoundOrdering, RVDPartitioner} import is.hail.types.physical.{PCanonicalBinary, PCanonicalTuple} import is.hail.types.virtual._ -import is.hail.types.{RField, RStruct, RTable, TableType} +import is.hail.types.{RField, RPrimitive, RStruct, RTable, TableType, TypeWithRequiredness} import is.hail.utils.{partition, _} import org.apache.spark.sql.Row @@ -77,8 +78,10 @@ class TableStage( // useful for debugging, but should be disabled in production code due to N^2 complexity // typecheckPartition() - def typecheckPartition(): Unit = { - TypeCheck(partitionIR, + def typecheckPartition(ctx: ExecuteContext): Unit = { + TypeCheck( + ctx, + partitionIR, BindingEnv(Env[Type](((letBindings ++ broadcastVals).map { case (s, x) => (s, x.typ) }) ++ FastIndexedSeq[(String, Type)]((ctxRefName, contexts.typ.asInstanceOf[TStream].elementType)): _*))) @@ -425,11 +428,12 @@ class TableStage( } } } + val rightRowRTypeWithPartNum = RStruct(IndexedSeq(RField("__partNum", TypeWithRequiredness(TInt32), 0)) ++ rightRowRType.fields.map(rField => RField(rField.name, rField.typ, rField.index + 1))) val sorted = ctx.backend.lowerDistributedSort(ctx, rightWithPartNums, SortField("__partNum", Ascending) +: right.key.map(k => SortField(k, Ascending)), relationalLetsAbove, - rightRowRType) + rightRowRTypeWithPartNum) assert(sorted.kType.fieldNames.sameElements("__partNum" +: right.key)) val newRightPartitioner = new RVDPartitioner( Some(1), @@ -464,6 +468,67 @@ object LowerTableIR { invoke("sum", TInt64, stage.mapCollect(relationalLetsAbove)(rows => foldIR(mapIR(rows)(row => Consume(row)), 0L)(_ + _))) + case TableToValueApply(child, TableCalculateNewPartitions(nPartitions)) => + val stage = lower(child) + val sampleSize = math.min(nPartitions * 20, 1000000) + val samplesPerPartition = sampleSize / math.max(1, stage.numPartitions) + val keyType = child.typ.keyType + val samplekey = AggSignature(TakeBy(), + FastIndexedSeq(TInt32), + FastIndexedSeq(keyType, TFloat64)) + + val minkey = AggSignature(TakeBy(), + FastIndexedSeq(TInt32), + FastIndexedSeq(keyType, keyType)) + + val maxkey = AggSignature(TakeBy(Descending), + FastIndexedSeq(TInt32), + FastIndexedSeq(keyType, keyType)) + + + bindIR(flatten(stage.mapCollect(relationalLetsAbove) { rows => + streamAggIR(rows) { elt => + ToArray(flatMapIR(ToStream( + MakeArray( + ApplyAggOp( + FastIndexedSeq(I32(samplesPerPartition)), + FastIndexedSeq(SelectFields(elt, keyType.fieldNames), invokeSeeded("rand_unif", 1, TFloat64, F64(0.0), F64(1.0))), + samplekey), + ApplyAggOp( + FastIndexedSeq(I32(1)), + FastIndexedSeq(elt, elt), + minkey), + ApplyAggOp( + FastIndexedSeq(I32(1)), + FastIndexedSeq(elt, elt), + maxkey) + ) + )) { inner => ToStream(inner) }) + } + })) { partData => + + val sorted = sortIR(partData) { (l, r) => ApplyComparisonOp(LT(keyType, keyType), l, r) } + bindIR(ToArray(flatMapIR(StreamGroupByKey(ToStream(sorted), keyType.fieldNames)) { groupRef => + StreamTake(groupRef, 1) + })) { boundsArray => + + bindIR(ArrayLen(boundsArray)) { nBounds => + bindIR(minIR(nBounds, nPartitions)) { nParts => + If(nParts.ceq(0), + MakeArray(Seq(), TArray(TInterval(keyType))), + bindIR((nBounds + (nParts - 1)) floorDiv nParts) { stepSize => + ToArray(mapIR(StreamRange(0, nBounds, stepSize)) { i => + If((i + stepSize) < (nBounds - 1), + invoke("Interval", TInterval(keyType), ArrayRef(boundsArray, i), ArrayRef(boundsArray, i + stepSize), True(), False()), + invoke("Interval", TInterval(keyType), ArrayRef(boundsArray, i), ArrayRef(boundsArray, nBounds - 1), True(), True()) + )}) + } + ) + } + } + } + } + case TableGetGlobals(child) => lower(child).getGlobals() @@ -606,7 +671,7 @@ object LowerTableIR { writer.lower(ctx, lower(child), child, coerce[RTable](analyses.requirednessAnalysis.lookup(child)), relationalLetsAbove) case node if node.children.exists(_.isInstanceOf[TableIR]) => - throw new LowererUnsupportedOperation(s"IR nodes with TableIR children must be defined explicitly: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"IR nodes with TableIR children must be defined explicitly: \n${ Pretty(ctx, node) }") } lowered } @@ -1538,6 +1603,32 @@ object LowerTableIR { case TableLiteral(typ, rvd, enc, encodedGlobals) => RVDToTableStage(rvd, EncodedLiteral(enc, encodedGlobals)) + case TableToTableApply(child, TableFilterPartitions(seq, keep)) => + val lc = lower(child) + + val arr = seq.sorted.toArray + val keptSet = seq.toSet + val lit = Literal(TSet(TInt32), keptSet) + if (keep) { + lc.copy( + partitioner = lc.partitioner.copy(rangeBounds = arr.map(idx => lc.partitioner.rangeBounds(idx))), + contexts = mapIR( + filterIR( + zipWithIndex(lc.contexts)) { t => + invoke("contains", TBoolean, lit, GetField(t, "idx")) }) { t => + GetField(t, "elt") } + ) + } else { + lc.copy( + partitioner = lc.partitioner.copy(rangeBounds = lc.partitioner.rangeBounds.zipWithIndex.filter { case (_, idx) => !keptSet.contains(idx) }.map(_._1)), + contexts = mapIR( + filterIR( + zipWithIndex(lc.contexts)) { t => + !invoke("contains", TBoolean, lit, GetField(t, "idx")) }) { t => + GetField(t, "elt") } + ) + } + case bmtt@BlockMatrixToTable(bmir) => val ts = LowerBlockMatrixIR.lowerToTableStage(bmir, typesToLower, ctx, analyses, relationalLetsAbove) // I now have an unkeyed table of (blockRow, blockCol, block). @@ -1562,7 +1653,7 @@ object LowerTableIR { ctx.backend.lowerDistributedSort(ctx, entriesUnkeyed, IndexedSeq(SortField("i", Ascending), SortField("j", Ascending)), relationalLetsAbove, rowR) case node => - throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"undefined: \n${ Pretty(ctx, node) }") } assert(tir.typ.globalType == lowered.globalType, s"\n ir global: ${tir.typ.globalType}\n lowered global: ${lowered.globalType}") diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala index 52a0aa1a8b8..7743047508e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerToCDA.scala @@ -38,7 +38,7 @@ object LowerToCDA { val loweredValue = substLets(lower(value, typesToLower, ctx, analyses, relationalLetsAbove), relationalLetsAbove) if (!Compilable(loweredValue)) - throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ir) }") + throw new LowererUnsupportedOperation(s"lowered to uncompilable IR: ${ Pretty(ctx, ir) }") val (Some(PTypeReferenceSingleCodeType(pt: PTuple)), f) = ctx.timer.time("Compile") { Compile[AsmFunction1RegionLong](ctx, @@ -72,10 +72,10 @@ object LowerToCDA { LowerBlockMatrixIR(ir, typesToLower, ctx, analyses, relationalLetsAbove) case node if node.children.exists(_.isInstanceOf[MatrixIR]) => - throw new LowererUnsupportedOperation(s"MatrixIR nodes must be lowered to TableIR nodes separately: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"MatrixIR nodes must be lowered to TableIR nodes separately: \n${ Pretty(ctx, node) }") case node => - throw new LowererUnsupportedOperation(s"Cannot lower: \n${ Pretty(node) }") + throw new LowererUnsupportedOperation(s"Cannot lower: \n${ Pretty(ctx, node) }") } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala index efff62a5214..af7aa11599b 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPass.scala @@ -36,10 +36,10 @@ case object LowerMatrixToTablePass extends LoweringPass { val context: String = "LowerMatrixToTable" def transform(ctx: ExecuteContext, ir: BaseIR): BaseIR = ir match { - case x: IR => LowerMatrixIR(x) - case x: TableIR => LowerMatrixIR(x) - case x: MatrixIR => LowerMatrixIR(x) - case x: BlockMatrixIR => LowerMatrixIR(x) + case x: IR => LowerMatrixIR(ctx, x) + case x: TableIR => LowerMatrixIR(ctx, x) + case x: MatrixIR => LowerMatrixIR(ctx, x) + case x: BlockMatrixIR => LowerMatrixIR(ctx, x) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala index b92dbb5234b..3ec015862f5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LoweringPipeline.scala @@ -19,7 +19,7 @@ case class LoweringPipeline(lowerings: LoweringPass*) { throw e } try { - TypeCheck(x) + TypeCheck(ctx, x) } catch { case e: Throwable => fatal(s"error after applying ${ l.context }", e) diff --git a/hail/src/main/scala/is/hail/expr/ir/package.scala b/hail/src/main/scala/is/hail/expr/ir/package.scala index 037af8b7fcc..b9368741400 100644 --- a/hail/src/main/scala/is/hail/expr/ir/package.scala +++ b/hail/src/main/scala/is/hail/expr/ir/package.scala @@ -121,6 +121,11 @@ package object ir { If(a < b, a, b) } + def streamAggIR(stream: IR)(f: Ref => IR): IR = { + val ref = Ref(genUID(), coerce[TStream](stream.typ).elementType) + StreamAgg(stream, ref.name, f(ref)) + } + def forIR(stream: IR)(f: Ref => IR): IR = { val ref = Ref(genUID(), coerce[TStream](stream.typ).elementType) StreamFor(stream, ref.name, f(ref)) @@ -141,9 +146,9 @@ package object ir { StreamFlatMap(stream, ref.name, f(ref)) } - def flatten(stream: IR): IR = flatMapIR(stream) { elt => - if (elt.typ.isInstanceOf[TStream]) elt else ToStream(elt) - } + def flatten(stream: IR): IR = flatMapIR(if (stream.typ.isInstanceOf[TStream]) stream else ToStream(stream)) { elt => + if (elt.typ.isInstanceOf[TStream]) elt else ToStream(elt) + } def foldIR(stream: IR, zero: IR)(f: (Ref, Ref) => IR): IR = { val elt = Ref(genUID(), coerce[TStream](stream.typ).elementType) diff --git a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala b/hail/src/main/scala/is/hail/io/TextMatrixReader.scala deleted file mode 100644 index 53d77b6bd6c..00000000000 --- a/hail/src/main/scala/is/hail/io/TextMatrixReader.scala +++ /dev/null @@ -1,702 +0,0 @@ -package is.hail.io - -import is.hail.annotations._ -import is.hail.asm4s._ -import is.hail.backend.ExecuteContext -import is.hail.expr.ir.lowering.TableStage -import is.hail.expr.ir.{EmitCode, EmitCodeBuilder, EmitFunctionBuilder, GenericLine, GenericLines, GenericTableValue, IEmitCode, IRParser, IntArrayBuilder, LowerMatrixIR, MatrixHybridReader, TableRead, TableValue, TextReaderOptions} -import is.hail.io.fs.FS -import is.hail.rvd.RVDPartitioner -import is.hail.types._ -import is.hail.types.physical._ -import is.hail.types.physical.stypes.concrete.{SIndexablePointerValue, SStackStruct, SStringPointer} -import is.hail.types.physical.stypes.interfaces._ -import is.hail.types.physical.stypes.{SCode, SValue} -import is.hail.types.virtual._ -import is.hail.utils._ -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.Row -import org.json4s.{DefaultFormats, Formats, JValue} - -import scala.collection.mutable -import scala.io.Source -import scala.language.{existentials, implicitConversions} - -case class TextMatrixHeaderInfo( - headerValues: Array[String], - rowFieldNames: Array[String], - columnIdentifiers: Array[_] // String or Int -) { - val nCols: Int = columnIdentifiers.length -} - -object TextMatrixReader { - - def warnDuplicates(ids: Array[String]) { - val duplicates = ids.counter().filter(_._2 > 1) - if (duplicates.nonEmpty) { - warn(s"Found ${ duplicates.size } duplicate ${ plural(duplicates.size, "sample ID") }:\n @1", - duplicates.toArray.sortBy(-_._2).map { case (id, count) => s"""($count) "$id"""" }.truncatable("\n ")) - } - } - - private def parseHeader( - fs: FS, - file: String, - sep: Char, - nRowFields: Int, - opts: TextMatrixReaderOptions - ): TextMatrixHeaderInfo = { - val maybeFirstTwoLines = using(fs.open(file)) { s => - Source.fromInputStream(s).getLines().filter(!opts.isComment(_)).take(2).toArray.toSeq - } - - (opts.hasHeader, maybeFirstTwoLines) match { - case (true, Seq()) => - fatal(s"Expected header in every file, but found empty file: $file") - case (true, Seq(header)) => - warn(s"File $file contains a header, but no lines of data.") - val headerValues = header.split(sep) - if (headerValues.length < nRowFields) { - fatal( - s"""File ${ file } contains one line and you told me it had a header, - |so I expected to see at least the ${ nRowFields } row field names - |on the header line, but instead I only saw ${ headerValues.length } - |separated values. The header was: - | ${ header }""".stripMargin) - } - TextMatrixHeaderInfo( - headerValues, - headerValues.slice(0, nRowFields), - headerValues.drop(nRowFields)) - case (true, Seq(header, dataLine)) => - val headerValues = header.split(sep) - val nHeaderValues = headerValues.length - val nSeparatedValues = dataLine.split(sep).length - if (nHeaderValues + nRowFields == nSeparatedValues) { - TextMatrixHeaderInfo( - headerValues, - rowFieldNames = Array.tabulate(nRowFields)(i => s"f$i"), - columnIdentifiers = headerValues) - } else if (nHeaderValues == nSeparatedValues) { - TextMatrixHeaderInfo( - headerValues, - rowFieldNames = headerValues.slice(0, nRowFields), - columnIdentifiers = headerValues.drop(nRowFields)) - } else { - fatal( - s"""In file $file, expected the header line to match either: - | rowField0 rowField1 ... rowField${ nRowFields } colId0 colId1 ... - |or - | colId0 colId1 ... - |Instead the first two lines were: - | ${ header.truncate } - | ${ dataLine.truncate } - |The first line contained ${ nHeaderValues } separated values and the - |second line contained ${ nSeparatedValues } separated values.""".stripMargin) - } - case (false, Seq()) => - warn(s"File $file is empty and has no header, so we assume no columns.") - TextMatrixHeaderInfo(Array(), Array.tabulate(nRowFields)(i => s"f$i"), Array()) - case (false, firstLine +: _) => - val nSeparatedValues = firstLine.split(sep).length - TextMatrixHeaderInfo( - Array(), - Array.tabulate(nRowFields)(i => s"f$i"), - Array.range(0, nSeparatedValues - nRowFields)) - } - } - - def makePartitionerFromCounts(partitionCounts: Array[Long], kType: TStruct): (RVDPartitioner, Array[Int]) = { - var includesStart = true - val keepPartitions = new IntArrayBuilder() - val rangeBoundIntervals = partitionCounts.zip(partitionCounts.tail).zipWithIndex.flatMap { case ((s, e), i) => - val interval = Interval.orNone(kType.ordering, - Row(if (includesStart) s else s - 1), - Row(e - 1), - includesStart, true) - includesStart = false - if (interval.isDefined) - keepPartitions.add(i) - interval - } - val ranges = rangeBoundIntervals - (new RVDPartitioner(Array(kType.fieldNames(0)), kType, ranges), keepPartitions.result()) - } - - def verifyRowFields( - fileName: String, - fieldNames: Array[String], - fieldTypes: Map[String, Type] - ): TStruct = { - val headerDups = fieldNames.duplicates() - if (headerDups.nonEmpty) - fatal(s"Found following duplicate row fields in header: \n ${ headerDups.mkString("\n ") }") - - val fields: Array[(String, Type)] = fieldNames.map { name => - fieldTypes.get(name) match { - case Some(t) => (name, t) - case None => - val rowFieldsAsPython = fieldTypes - .map { case (fieldName, typ) => s"'${ fieldName }': ${ typ.toString }" } - .mkString("{", ",\n ", "}") - fatal( - s"""In file $fileName, found a row field, $name, that is not in `row_fields': - | row fields found in file: - | ${ fieldNames.mkString("\n ") } - | row_fields: - | ${ rowFieldsAsPython } - """.stripMargin) - } - } - TStruct(fields: _*) - } - - def checkHeaders( - header1Path: String, - header1: Array[String], - headerPartitions: mutable.Set[Int], - partitionPaths: Array[String], - lines: RDD[GenericLine], - separator: Char - ): Unit = { - lines - .mapPartitionsWithIndex { (i, it) => - if (headerPartitions.contains(i)) { - val hd = it.next().toString.split(separator) - if (!header1.sameElements(hd)) { - if (header1.length != hd.length) { - fatal( - s"""invalid header: lengths of headers differ. - | ${ header1.length } elements in $header1Path - | ${ header1.truncate } - | ${ hd.length } elements in ${ partitionPaths(i) } - | ${ hd.truncate }""".stripMargin - ) - } - header1.zip(hd).zipWithIndex.foreach { case ((s1, s2), j) => - if (s1 != s2) { - fatal( - s"""invalid header: expected elements to be identical for all input paths. Found different elements at position $j. - | ${ header1Path }: $s1 - | ${ partitionPaths(i) }: $s2""". - stripMargin) - } - } - } - } - it - }.foreachPartition { _ => () } - } - - def fromJValue(ctx: ExecuteContext, jv: JValue): TextMatrixReader = { - val fs = ctx.fs - - implicit val formats: Formats = DefaultFormats - val params = jv.extract[TextMatrixReaderParameters] - - assert(params.separatorStr.length == 1) - val separator = params.separatorStr.charAt(0) - val rowFields = params.rowFieldsStr.mapValues(IRParser.parseType(_)) - val entryType = TStruct("x" -> IRParser.parseType(params.entryTypeStr)) - val fileStatuses = fs.globAllStatuses(params.paths) - require(entryType.size == 1, "entryType can only have 1 field") - if (fileStatuses.isEmpty) - fatal("no paths specified for import_matrix_table.") - assert((rowFields.values ++ entryType.types).forall { t => - t == TString || - t == TInt32 || - t == TInt64 || - t == TFloat32 || - t == TFloat64 - }) - - val opts = TextMatrixReaderOptions(params.comment, params.hasHeader) - - val headerInfo = parseHeader(fs, fileStatuses.head.getPath, separator, rowFields.size, opts) - if (params.addRowId && headerInfo.rowFieldNames.contains("row_id")) { - fatal( - s"""If no key is specified, `import_matrix_table`, uses 'row_id' - |as the key, please provide a key or choose a different row field name.\n - | Row field names: ${ headerInfo.rowFieldNames }""".stripMargin) - } - val rowFieldTypeWithoutRowId = verifyRowFields( - fileStatuses.head.getPath, headerInfo.rowFieldNames, rowFields) - val rowFieldType = - if (params.addRowId) - TStruct("row_id" -> TInt64) ++ rowFieldTypeWithoutRowId - else - rowFieldTypeWithoutRowId - if (params.hasHeader) - warnDuplicates(headerInfo.columnIdentifiers.asInstanceOf[Array[String]]) - - val lines = GenericLines.read(fs, fileStatuses, params.nPartitions, None, None, params.gzipAsBGZip, false) - - val linesRDD = lines.toRDD(fs) - .filter { line => - val l = line.toString - l.nonEmpty && !opts.isComment(l) - } - - val linesPartitionCounts = linesRDD.countPerPartition() - val partitionPaths = lines.contexts.map(a => a.asInstanceOf[Row].getAs[String](1)).toArray - - val headerPartitions = mutable.Set[Int]() - val partitionLineIndexWithinFile = new Array[Long](linesRDD.getNumPartitions) - - var indexWithinFile = 0L - var i = 0 - var prevPartitionPath: String = null - while (i < linesRDD.getNumPartitions) { - if (linesPartitionCounts(i) > 0) { - val partPath = partitionPaths(i) - if (prevPartitionPath == null - || prevPartitionPath != partPath) { - prevPartitionPath = partPath - indexWithinFile = 0 - if (opts.hasHeader) { - linesPartitionCounts(i) -= 1 - headerPartitions += i - } - } - } - partitionLineIndexWithinFile(i) = indexWithinFile - indexWithinFile += linesPartitionCounts(i) - i += 1 - } - - if (params.hasHeader) - checkHeaders(fileStatuses.head.getPath, headerInfo.headerValues, headerPartitions, partitionPaths, linesRDD, separator) - - val fullMatrixType = MatrixType( - TStruct.empty, - colType = TStruct("col_id" -> (if (params.hasHeader) TString else TInt32)), - colKey = Array("col_id"), - rowType = rowFieldType, - rowKey = Array().toFastIndexedSeq, - entryType = entryType) - - new TextMatrixReader(params, opts, lines, separator, rowFieldType, fullMatrixType, headerInfo, headerPartitions, linesPartitionCounts, partitionLineIndexWithinFile, partitionPaths) - } -} - -case class TextMatrixReaderParameters( - paths: Array[String], - nPartitions: Option[Int], - rowFieldsStr: Map[String, String], - entryTypeStr: String, - missingValue: String, - hasHeader: Boolean, - separatorStr: String, - gzipAsBGZip: Boolean, - addRowId: Boolean, - comment: Array[String]) - -case class TextMatrixReaderOptions(comment: Array[String], hasHeader: Boolean) extends TextReaderOptions - -class TextMatrixReader( - val params: TextMatrixReaderParameters, - opts: TextMatrixReaderOptions, - lines: GenericLines, - separator: Char, - rowFieldType: TStruct, - val fullMatrixType: MatrixType, - headerInfo: TextMatrixHeaderInfo, - headerPartitions: mutable.Set[Int], - _partitionCounts: Array[Long], - partitionLineIndexWithinFile: Array[Long], - partitionPaths: Array[String] -) extends MatrixHybridReader { - def pathsUsed: Seq[String] = params.paths - - def columnCount = Some(headerInfo.nCols) - - def partitionCounts = Some(_partitionCounts) - - def rowAndGlobalPTypes(context: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = { - PType.canonical(requestedType.rowType, required = true).asInstanceOf[PStruct] -> - PType.canonical(requestedType.globalType, required = true).asInstanceOf[PStruct] - } - - def executeGeneric(ctx: ExecuteContext): GenericTableValue = { - val tt = fullMatrixType.toTableType(LowerMatrixIR.entriesFieldName, LowerMatrixIR.colsFieldName) - - val globals = Row(headerInfo.columnIdentifiers.map(Row(_)).toFastIndexedSeq) - - val bodyPType = (requestedRowType: TStruct) => PType.canonical(requestedRowType, required = true).asInstanceOf[PCanonicalStruct] - - val body = { (requestedType: TStruct) => - val linesBody = lines.body - val requestedPType = bodyPType(requestedType) - val localOpts = opts - - val partitionRowIdxGlobal = (0 until _partitionCounts.length - 1).scanLeft(0L) { case (acc, i) => acc + _partitionCounts(i) }.toArray - - val compiledLineParser = new CompiledLineParser(ctx, - rowFieldType, - requestedPType, - headerInfo.nCols, - params.missingValue, - separator, - headerPartitions, - _partitionCounts, - partitionPaths, - partitionRowIdxGlobal, - partitionLineIndexWithinFile, - params.hasHeader) - - { (region: Region, theHailClassLoader: HailClassLoader, fs: FS, context: Any) => - val Row(lc, partitionIdx: Int) = context - compiledLineParser.apply(partitionIdx, region, theHailClassLoader, - linesBody(fs, lc).filter { line => - val l = line.toString - l.nonEmpty && !localOpts.isComment(l) - } - ) - } - } - - new GenericTableValue( - tt, - None, - { (requestedGlobalsType: Type) => - val subset = tt.globalType.valueSubsetter(requestedGlobalsType) - subset(globals).asInstanceOf[Row] - }, - TTuple(lines.contextType, TInt32), - lines.contexts.zipWithIndex.map { case (x, i) => Row(x, i) }, - bodyPType, - body) - - } - - override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = - executeGeneric(ctx).toTableStage(ctx, requestedType) - - def apply(tr: TableRead, ctx: ExecuteContext): TableValue = { - executeGeneric(ctx).toTableValue(ctx, tr.typ) - } - - override def toJValue: JValue = { - implicit val formats: Formats = DefaultFormats - decomposeWithName(params, "TextMatrixReader") - } - - override def renderShort(): String = defaultRender() - - override def hashCode(): Int = params.hashCode() - - override def equals(that: Any): Boolean = that match { - case that: TextMatrixReader => params == that.params - case _ => false - } -} - -class MatrixParseError( - val msg: String, - val filename: String, - val line: Long, - val posStart: Int, - val posEnd: Int -) extends RuntimeException(s"${ filename }:${ posStart }-${ posEnd }, ${ msg }") - -class CompiledLineParser( - ctx: ExecuteContext, - onDiskRowFieldsType: TStruct, - rowPType: PCanonicalStruct, - nCols: Int, - missingValue: String, - separator: Char, - headerPartitions: mutable.Set[Int], - partitionCounts: Array[Long], - partitionPaths: Array[String], - partitionRowIndexGlobal: Array[Long], - partitionRowIndexFile: Array[Long], - hasHeader: Boolean -) extends ((Int, Region, HailClassLoader, Iterator[GenericLine]) => Iterator[Long]) with Serializable { - assert(!missingValue.contains(separator)) - @transient private[this] val entriesType = rowPType - .selfField(MatrixType.entriesIdentifier) - .map(f => f.typ.asInstanceOf[PCanonicalArray]) - @transient private[this] val rowFieldsType = rowPType - .dropFields(Set(MatrixType.entriesIdentifier)) - @transient private[this] val fb = EmitFunctionBuilder[Region, String, Long, String, Long](ctx, "text_matrix_reader") - @transient private[this] val mb = fb.apply_method - @transient private[this] val region = fb.getCodeParam[Region](1) - @transient private[this] val _filename = fb.getCodeParam[String](2) - @transient private[this] val _lineNumber = fb.getCodeParam[Long](3) - @transient private[this] val _line = fb.getCodeParam[String](4) - @transient private[this] val filename = mb.genFieldThisRef[String]("filename") - @transient private[this] val lineNumber = mb.genFieldThisRef[Long]("lineNumber") - @transient private[this] val line = mb.genFieldThisRef[String]("line") - @transient private[this] val pos = mb.genFieldThisRef[Int]("pos") - - fb.cb.emitInit(Code( - pos := 0, - filename := Code._null[String], - lineNumber := 0L, - line := Code._null[String])) - - - @transient private[this] val parseStringMb = fb.genEmitMethod[Region, String]("parseString") - parseStringMb.emitWithBuilder(parseString(_)) - @transient private[this] val parseIntMb = fb.genEmitMethod[Region, Int]("parseInt") - parseIntMb.emitWithBuilder(parseInt(_)) - @transient private[this] val parseLongMb = fb.genEmitMethod[Region, Long]("parseLong") - parseLongMb.emitWithBuilder(parseLong(_)) - - @transient private[this] def parseEntriesOpt(cb: EmitCodeBuilder): Option[EmitCode] = entriesType.map { entriesType => - val sc = parseEntries(cb, entriesType) - EmitCode.present(cb.emb, sc) - } - - mb.emitWithBuilder[Long] { cb => - cb.assign(pos, 0) - cb.assign(filename, _filename) - cb.assign(lineNumber, _lineNumber) - cb.assign(line, _line) - val rowFields = parseRowFields(cb) - val entries = parseEntriesOpt(cb) - rowPType.constructFromFields(cb, region, rowFields ++ entries, deepCopy = false).a - } - - private[this] val loadParserOnWorker = fb.result() - - private[this] def parseError(cb: EmitCodeBuilder, msg: Code[String]): Unit = - cb += Code._throw[MatrixParseError, Unit](Code.newInstance[MatrixParseError, String, String, Long, Int, Int]( - msg, filename, lineNumber, pos, pos + 1)) - - private[this] def numericValue(cb: EmitCodeBuilder, cCode: Code[Char]): Code[Int] = { - val c = cb.newLocal[Char]("clp_numeric_val_c", cCode) - cb.ifx(c < const('0') || c > const('9'), - parseError(cb, const("invalid character '") - .concat(c.toS) - .concat("' in integer literal"))) - (c - const('0')).toI - } - - private[this] def endField(cb: EmitCodeBuilder, p: Value[Int]): Code[Boolean] = { - p.ceq(line.length()) || line(p).ceq(const(separator)) - } - - private[this] def endField(cb: EmitCodeBuilder): Code[Boolean] = - endField(cb, pos) - - private[this] def parseOptionalValue( - cb: EmitCodeBuilder, - parse: EmitCodeBuilder => SValue - ): IEmitCode = { - assert(missingValue.size > 0) - val end = cb.newLocal[Int]("parse_optional_value_end", pos + missingValue.size) - - val Lmissing = CodeLabel() - - cb.ifx(end <= line.length, - cb.ifx(endField(cb, end), - cb.ifx(line.invoke[Int, String, Int, Int, Boolean]("regionMatches", - pos, missingValue, 0, missingValue.size), - { - cb.assign(pos, end) - cb.goto(Lmissing) - }))) - - val pc = parse(cb) - val Ldefined = CodeLabel() - cb.goto(Ldefined) - - IEmitCode(Lmissing, Ldefined, pc, false) - } - - private[this] def skipOptionalValue(cb: EmitCodeBuilder, skip: EmitCodeBuilder => Unit): Unit = { - assert(missingValue.size > 0) - val end = cb.newLocal[Int]("skip_optional_value_end", pos + missingValue.size) - - val Lfinished = CodeLabel() - - cb.ifx(end <= line.length, - cb.ifx(endField(cb, end), - cb.ifx(line.invoke[Int, String, Int, Int, Boolean]("regionMatches", - pos, missingValue, 0, missingValue.size), - { - cb.assign(pos, end) - cb.goto(Lfinished) - }))) - - skip(cb) - - cb.define(Lfinished) - } - - private[this] def parseInt(cb: EmitCodeBuilder): Code[Int] = { - cb.ifx(endField(cb), parseError(cb, "empty integer literal")) - - val mul = cb.newLocal[Int]("mul", 1) - cb.ifx(line(pos).ceq(const('-')), { - cb.assign(mul, -1) - cb.assign(pos, pos + 1) - }) - val c = cb.newLocal[Char]("c", line(pos)) - val v = cb.newLocal[Int]("v", numericValue(cb, c)) - cb.assign(pos, pos + 1) - - cb.whileLoop(!endField(cb), { - cb.assign(c, line(pos)) - cb.assign(v, v * const(10) + numericValue(cb, c)) - cb.assign(pos, pos + 1) - }) - v * mul - } - - private[this] def parseLong(cb: EmitCodeBuilder): Code[Long] = { - cb.ifx(endField(cb), parseError(cb, "empty integer literal")) - - val mul = cb.newLocal[Long]("mulL", 1L) - cb.ifx(line(pos).ceq(const('-')), { - cb.assign(mul, -1L) - cb.assign(pos, pos + 1) - }) - val c = cb.newLocal[Char]("cL", line(pos)) - val v = cb.newLocal[Long]("vL", numericValue(cb, c).toL) - cb.assign(pos, pos + 1) - - cb.whileLoop(!endField(cb), { - cb.assign(c, line(pos)) - cb.assign(v, v * const(10L) + numericValue(cb, c).toL) - cb.assign(pos, pos + 1) - }) - v * mul - } - - private[this] def parseString(cb: EmitCodeBuilder): Code[String] = { - val start = cb.newLocal[Int]("start", pos) - cb.whileLoop(!endField(cb), - cb.assign(pos, pos + 1)) - line.invoke[Int, Int, String]("substring", start, pos) - } - - private[this] def parseValueOfType(cb: EmitCodeBuilder, t: PType): IEmitCode = { - def parseDefinedValue(cb: EmitCodeBuilder): SValue = t match { - case t: PInt32 => - primitive(cb.memoize(cb.invokeCode[Int](parseIntMb, region))) - case t: PInt64 => - primitive(cb.memoize(cb.invokeCode[Long](parseLongMb, region))) - case t: PFloat32 => - primitive(cb.memoize(Code.invokeStatic1[java.lang.Float, String, Float]("parseFloat", cb.invokeCode(parseStringMb, region)))) - case t: PFloat64 => - primitive(cb.memoize(Code.invokeStatic1[java.lang.Double, String, Double]("parseDouble", cb.invokeCode(parseStringMb, region)))) - case t: PString => - val st = SStringPointer(t) - st.constructFromString(cb, region, cb.invokeCode[String](parseStringMb, region)) - } - if (t.required) - IEmitCode.present(cb, parseDefinedValue(cb)) - else - parseOptionalValue(cb, parseDefinedValue) - } - - private[this] def skipValueOfType(cb: EmitCodeBuilder, t: PType): Unit = { - def skipDefinedValue(cb: EmitCodeBuilder): Unit = { - cb.whileLoop(!endField(cb), cb.assign(pos, pos + 1)) - } - - if (t.required) skipDefinedValue(cb) else skipOptionalValue(cb, skipDefinedValue) - } - - private[this] def parseRowFields(cb: EmitCodeBuilder): Array[EmitCode] = { - assert(onDiskRowFieldsType.size >= rowFieldsType.size) - - // need to be careful to ensure parsing code is directly appended to code builder, not EmitCode block - val fieldEmitCodes = new Array[EmitCode](rowFieldsType.size) - - onDiskRowFieldsType.fields.foreach { onDiskField => - rowPType.selfField(onDiskField.name) match { - - case Some(requestedField) => - val reqFieldType = requestedField.typ - val reqIndex = requestedField.index - - - val ec = if (onDiskField.name == "row_id") - EmitCode.present(cb.emb, primitive(lineNumber)) - else { - cb.ifx(pos >= line.length, - parseError(cb, const("unexpected end of line while reading row field ") - .concat(onDiskField.name))) - val ev = parseValueOfType(cb, reqFieldType).memoize(cb, s"field_${onDiskField.name}") - cb.assign(pos, pos + 1) - ev.load - } - - fieldEmitCodes(reqIndex) = ec - - case None => - if (onDiskField.name != "row_id") { - skipValueOfType(cb, PType.canonical(onDiskField.typ)) // will always be optional - cb.assign(pos, pos + 1) - } - } - } - fieldEmitCodes - } - - private[this] def parseEntries(cb: EmitCodeBuilder, entriesType: PCanonicalArray): SIndexablePointerValue = { - val entryType = entriesType.elementType.asInstanceOf[PCanonicalStruct] - assert(entryType.fields.size == 1) - val (push, finish) = entriesType.constructFromFunctions(cb, region, nCols, false) - - val i = cb.newLocal[Int]("i", 0) - cb.whileLoop(i < nCols, { - cb.ifx(pos >= line.length, parseError(cb, const("unexpected end of line while reading entry ").concat(i.toS))) - - val ec = EmitCode.fromI(cb.emb)(cb => parseValueOfType(cb, entryType.fields(0).typ)) - push(cb, IEmitCode.present(cb, SStackStruct.constructFromArgs(cb, region, entryType.virtualType, ec))) - cb.assign(pos, pos + 1) - cb.assign(i, i + 1) - }) - finish(cb) - } - - def apply( - partition: Int, - r: Region, - theHailClassLoader: HailClassLoader, - it: Iterator[GenericLine] - ): Iterator[Long] = { - val filename = partitionPaths(partition) - if (hasHeader && headerPartitions.contains(partition)) - it.next() - - val parse = loadParserOnWorker(theHailClassLoader) - val fileLineIndex = partitionRowIndexFile(partition) - val globalLineIndex = partitionRowIndexGlobal(partition) - - var idxWithinPartition = 0L - it.map { line => - val x = line.toString - try { - val res = - parse( - r, - filename, - globalLineIndex + idxWithinPartition, - x) - idxWithinPartition += 1 - res - } catch { - case e: MatrixParseError => - fatal( - s"""""Error parse line ${ fileLineIndex + idxWithinPartition }:${ e.posStart }-${ e.posEnd }: - | File: $filename - | Line: - | ${ x.truncate }""".stripMargin, - e) - case e: Exception => fatal( - s"""""Error parse line ${ fileLineIndex + idxWithinPartition }: - | File: $filename - | Line: - | ${ x.truncate }""".stripMargin, - e) - } - } - } -} diff --git a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala index 5ce75847c5e..27fe23eb195 100644 --- a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala +++ b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala @@ -421,7 +421,11 @@ case class MatrixBGENReaderParameters( }.toList), "nPartitions" -> nPartitions.map(JInt(_)).getOrElse(JNull), "blockSizeInMB" -> blockSizeInMB.map(JInt(_)).getOrElse(JNull), - "includedVariants" -> includedVariants.map(t => JString(Pretty(t))).getOrElse(JNull))) + // FIXME: feels like a hack that Pretty needs execute context + // FIXME: feels like a hack that I use null here + // FIXME: feels like a hack that toJValue uses Pretty? + // Q: can we parse SSA'ed pretty table IR? + "includedVariants" -> includedVariants.map(t => JString(Pretty(null, t))).getOrElse(JNull))) } } diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index 92e834d4d49..e9cb2cf41f3 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -104,19 +104,26 @@ class GoogleStorageFileStatus(path: String, modificationTime: java.lang.Long, si class GoogleStorageFS(val serviceAccountKey: Option[String] = None) extends FS { import GoogleStorageFS._ - @transient private lazy val storage: Storage = serviceAccountKey match { - case None => - log.info("Initializing google storage client from latent credentials") - StorageOptions.newBuilder() - .build() - .getService - case Some(keyData) => - log.info("Initializing google storage client from service account key") - StorageOptions.newBuilder() - .setCredentials( - ServiceAccountCredentials.fromStream(new ByteArrayInputStream(keyData.getBytes))) - .build() - .getService + @transient private lazy val storage: Storage = { + val transportOptions = StorageOptions.getDefaultHttpTransportOptions().toBuilder() + .setConnectTimeout(5000) + .setReadTimeout(5000) + .build() + serviceAccountKey match { + case None => + log.info("Initializing google storage client from latent credentials") + StorageOptions.newBuilder() + .build() + .getService + case Some(keyData) => + log.info("Initializing google storage client from service account key") + StorageOptions.newBuilder() + .setCredentials( + ServiceAccountCredentials.fromStream(new ByteArrayInputStream(keyData.getBytes))) + .setTransportOptions(transportOptions) + .build() + .getService + } } def asCacheable(): CacheableGoogleStorageFS = new CacheableGoogleStorageFS(serviceAccountKey, null) diff --git a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala index 95fe5307ac9..2fcd584d46b 100644 --- a/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala +++ b/hail/src/main/scala/is/hail/io/plink/LoadPlink.scala @@ -289,7 +289,8 @@ class PlinkVariant( val index: Int, val locusAlleles: Any, val cmPos: Double, - val rsid: String) + val rsid: String +) extends Serializable class MatrixPLINKReader( val params: MatrixPLINKReaderParameters, @@ -315,8 +316,6 @@ class MatrixPLINKReader( } def executeGeneric(ctx: ExecuteContext): GenericTableValue = { - val fsBc = ctx.fsBc - val localA2Reference = params.a2Reference val variantsBc = ctx.backend.broadcast(variants) val localNSamples = nSamples @@ -361,9 +360,12 @@ class MatrixPLINKReader( val rvb = new RegionValueBuilder(region) - val is = fsBc.value.open(bed) - TaskContext.get.addTaskCompletionListener[Unit] { (context: TaskContext) => - is.close() + val is = fs.open(bed) + if (TaskContext.get != null) { + // FIXME: need to close InputStream for other backends too + TaskContext.get.addTaskCompletionListener[Unit] { (context: TaskContext) => + is.close() + } } var offset: Long = 0 diff --git a/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala b/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala index 5d39aa0162d..340379d2f42 100644 --- a/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala +++ b/hail/src/main/scala/is/hail/io/vcf/ExportVCF.scala @@ -10,12 +10,11 @@ import is.hail.expr.ir.MatrixValue import is.hail.io.compress.{BGzipLineReader, BGzipOutputStream} import is.hail.io.fs.FS import is.hail.io.{VCFAttributes, VCFFieldAttributes, VCFMetadata} +import is.hail.types.MatrixType import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.utils._ -import is.hail.variant.{Call, RegionValueVariant} - -import scala.io.Source +import is.hail.variant.{Call, ReferenceGenome, RegionValueVariant} object ExportVCF { def infoNumber(t: Type): String = t match { @@ -25,87 +24,8 @@ object ExportVCF { case _ => "1" } - def strVCF(sb: StringBuilder, elementType: PType, offset: Long) { - elementType match { - case PInt32(_) => - val x = Region.loadInt(offset) - sb.append(x) - case PInt64(_) => - val x = Region.loadLong(offset) - if (x > Int.MaxValue || x < Int.MinValue) - fatal(s"Cannot convert Long to Int if value is greater than Int.MaxValue (2^31 - 1) " + - s"or less than Int.MinValue (-2^31). Found $x.") - sb.append(x) - case PFloat32(_) => - val x = Region.loadFloat(offset) - if (x.isNaN) - sb += '.' - else - sb.append(x.formatted("%.6g")) - case PFloat64(_) => - val x = Region.loadDouble(offset) - if (x.isNaN) - sb += '.' - else - sb.append(x.formatted("%.6g")) - case t@PCanonicalString(_) => - sb.append(t.loadString(offset)) - case _: PCall => - val c = Region.loadInt(offset) - Call.vcfString(c, sb) - case _ => - fatal(s"VCF does not support type $elementType") - } - } - - def iterableVCF(sb: StringBuilder, t: PContainer, length: Int, offset: Long, delim: Char) { - if (length > 0) { - var i = 0 - while (i < length) { - if (i > 0) - sb += delim - if (t.isElementDefined(offset, i)) { - val eOffset = t.loadElement(offset, length, i) - strVCF(sb, t.elementType, eOffset) - } else - sb += '.' - i += 1 - } - } else - sb += '.' - } - - def emitInfo(sb: StringBuilder, f: PField, offset: Long, wroteLast: Boolean): Boolean = { - f.typ match { - case it: PContainer if it.elementType.virtualType != TBoolean => - val length = it.loadLength(offset) - if (length == 0) - wroteLast - else { - if (wroteLast) - sb += ';' - sb.append(f.name) - sb += '=' - iterableVCF(sb, it, length, offset, ',') - true - } - case PBoolean(_) => - if (Region.loadBoolean(offset)) { - if (wroteLast) - sb += ';' - sb.append(f.name) - true - } else - wroteLast - case t => - if (wroteLast) - sb += ';' - sb.append(f.name) - sb += '=' - strVCF(sb, t, offset) - true - } - } + def fmtFloat(fmt: String, value: Float): String = value.formatted(fmt) + def fmtDouble(fmt: String, value: Double): String = value.formatted(fmt) def infoType(t: Type): Option[String] = t match { case TInt32 | TInt64 => Some("Integer") @@ -172,309 +92,102 @@ object ExportVCF { } } - def emitGenotype(sb: StringBuilder, formatFieldOrder: Array[Int], tg: PStruct, offset: Long, fieldDefined: Array[Boolean], missingFormat: String) { - var i = 0 - while (i < formatFieldOrder.length) { - fieldDefined(i) = tg.isFieldDefined(offset, formatFieldOrder(i)) - i += 1 - } - - var end = i - while (end > 0 && !fieldDefined(end - 1)) - end -= 1 - - if (end == 0) - sb.append(missingFormat) - else { - i = 0 - while (i < end) { - if (i > 0) - sb += ':' - val j = formatFieldOrder(i) - val fIsDefined = fieldDefined(i) - val fOffset = tg.loadField(offset, j) - - tg.fields(j).typ match { - case it: PContainer => - val pt = it - if (fIsDefined) { - val fLength = pt.loadLength(fOffset) - iterableVCF(sb, pt, fLength, fOffset, ',') - } else - sb += '.' - case t => - if (fIsDefined) - strVCF(sb, t, fOffset) - else if (t.virtualType == TCall) - sb.append("./.") - else - sb += '.' - } - i += 1 - } - } - } - def getAttributes(k1: String, attributes: Option[VCFMetadata]): Option[VCFAttributes] = attributes.flatMap(_.get(k1)) def getAttributes(k1: String, k2: String, attributes: Option[VCFMetadata]): Option[VCFFieldAttributes] = getAttributes(k1, attributes).flatMap(_.get(k2)) - def apply(ctx: ExecuteContext, mv: MatrixValue, path: String, append: Option[String], - exportType: String, metadata: Option[VCFMetadata], tabix: Boolean = false) { - val fs = ctx.fs - - mv.typ.requireColKeyString() - mv.typ.requireRowKeyVariant() - - val typ = mv.typ - - val tg = mv.entryPType + def makeHeader(rowType: TStruct, entryType: TStruct, rg: ReferenceGenome, append: Option[String], + metadata: Option[VCFMetadata], sampleIds: Array[String]): String = { + val sb = new StringBuilder() + + sb.append("##fileformat=VCFv4.2\n") + sb.append(s"##hailversion=${ hail.HAIL_PRETTY_VERSION }\n") + + entryType.fields.foreach { f => + val attrs = getAttributes("format", f.name, metadata).getOrElse(Map.empty[String, String]) + sb.append("##FORMAT=\n") + } - checkFormatSignature(tg.virtualType) + val filters = getAttributes("filter", metadata).getOrElse(Map.empty[String, Any]).keys.toArray.sorted + filters.foreach { id => + val attrs = getAttributes("filter", id, metadata).getOrElse(Map.empty[String, String]) + sb.append("##FILTER=\n") + } - val formatFieldOrder: Array[Int] = tg.fieldIdx.get("GT") match { - case Some(i) => (i +: tg.fields.filter(fd => fd.name != "GT").map(_.index)).toArray - case None => tg.fields.indices.toArray + val tinfo = rowType.selfField("info") match { + case Some(fld) if fld.typ.isInstanceOf[TStruct] => + fld.typ.asInstanceOf[TStruct] + case _ => + TStruct() } - val formatFieldString = formatFieldOrder.map(i => tg.fields(i).name).mkString(":") - val missingFormatStr = if (typ.entryType.size > 0 && typ.entryType.types(formatFieldOrder(0)) == TCall) - "./." - else "." + tinfo.fields.foreach { f => + val attrs = getAttributes("info", f.name, metadata).getOrElse(Map.empty[String, String]) + sb.append("##INFO=\n") + } - val tinfo = - if (typ.rowType.hasField("info")) { - typ.rowType.field("info").typ match { - case _: TStruct => mv.rvRowPType.field("info").typ.asInstanceOf[PStruct] - case t => - warn(s"export_vcf found row field 'info' of type $t, but expected type 'Struct'. Emitting no INFO fields.") - PCanonicalStruct.empty() - } - } else { - warn(s"export_vcf found no row field 'info'. Emitting no INFO fields.") - PCanonicalStruct.empty() - } + append.foreach { append => + sb.append(append) + } - val rg = mv.referenceGenome val assembly = rg.name - - val localNSamples = mv.nCols - val hasSamples = localNSamples > 0 - - def header: String = { - val sb = new StringBuilder() - - sb.append("##fileformat=VCFv4.2\n") - sb.append(s"##hailversion=${ hail.HAIL_PRETTY_VERSION }\n") - - tg.fields.foreach { f => - val attrs = getAttributes("format", f.name, metadata).getOrElse(Map.empty[String, String]) - sb.append("##FORMAT=\n") - } - - val filters = getAttributes("filter", metadata).getOrElse(Map.empty[String, Any]).keys.toArray.sorted - filters.foreach { id => - val attrs = getAttributes("filter", id, metadata).getOrElse(Map.empty[String, String]) - sb.append("##FILTER=\n") - } - - tinfo.virtualType.fields.foreach { f => - val attrs = getAttributes("info", f.name, metadata).getOrElse(Map.empty[String, String]) - sb.append("##INFO=\n") - } - - append.foreach { f => - using(fs.open(f)) { s => - Source.fromInputStream(s) - .getLines() - .filterNot(_.isEmpty) - .foreach { line => - sb.append(line) - sb += '\n' - } - } - } - - rg.contigs.foreachBetween { c => - sb.append("##contig= + rg.contigs.foreachBetween { c => + sb.append("##contig= sb += '\t' sb.append(id) } - sb.result() } + sb.result() + } - val fieldIdx = typ.rowType.fieldIdx - - def lookupVAField(fieldName: String, vcfColName: String, expectedTypeOpt: Option[Type]): (Boolean, Int) = { - fieldIdx.get(fieldName) match { - case Some(idx) => - val t = typ.rowType.types(idx) - if (expectedTypeOpt.forall(t == _)) // FIXME: make sure this is right - (true, idx) - else { - warn(s"export_vcf found row field $fieldName with type '$t', but expected type ${ expectedTypeOpt.get }. " + - s"Emitting missing $vcfColName.") - (false, 0) - } - case None => (false, 0) - } - } - val filtersType = TSet(TString) - val filtersPType = if (typ.rowType.hasField("filters")) { - assert(typ.rowType.fieldType("filters") == TSet(TString)) - mv.rvRowPType.field("filters").typ.asInstanceOf[PSet] - } else null - - val (idExists, idIdx) = lookupVAField("rsid", "ID", Some(TString)) - val (qualExists, qualIdx) = lookupVAField("qual", "QUAL", Some(TFloat64)) - val (filtersExists, filtersIdx) = lookupVAField("filters", "FILTERS", Some(filtersType)) - val (infoExists, infoIdx) = lookupVAField("info", "INFO", None) - - val fullRowType = mv.rvRowPType - val localEntriesIndex = mv.entriesIdx - val localEntriesType = mv.entryArrayPType - - mv.rvd.mapPartitions { (_, it) => - val sb = new StringBuilder - - val formatDefinedArray = new Array[Boolean](formatFieldOrder.length) - - val rvv = new RegionValueVariant(fullRowType) - it.map { ptr => - sb.clear() - - rvv.set(ptr) - - sb.append(rvv.contig()) - sb += '\t' - sb.append(rvv.position()) - sb += '\t' - - if (idExists && fullRowType.isFieldDefined(ptr, idIdx)) { - val idOffset = fullRowType.loadField(ptr, idIdx) - sb.append(fullRowType.types(idIdx).asInstanceOf[PString].loadString(idOffset)) - } else - sb += '.' - - sb += '\t' - sb.append(rvv.alleles()(0)) - sb += '\t' - if (rvv.alleles().length > 1) { - rvv.alleles().tail.foreachBetween(aa => - sb.append(aa))(sb += ',') - } else { - sb += '.' - } - sb += '\t' - - if (qualExists && fullRowType.isFieldDefined(ptr, qualIdx)) { - val qualOffset = fullRowType.loadField(ptr, qualIdx) - sb.append(Region.loadDouble(qualOffset).formatted("%.2f")) - } else - sb += '.' - - sb += '\t' - - if (filtersExists && fullRowType.isFieldDefined(ptr, filtersIdx)) { - val filtersOffset = fullRowType.loadField(ptr, filtersIdx) - val filtersLength = filtersPType.loadLength(filtersOffset) - if (filtersLength == 0) - sb.append("PASS") - else - iterableVCF(sb, filtersPType, filtersLength, filtersOffset, ';') - } else - sb += '.' - - sb += '\t' - - var wroteAnyInfo: Boolean = false - if (infoExists && fullRowType.isFieldDefined(ptr, infoIdx)) { - var wrote: Boolean = false - val infoOffset = fullRowType.loadField(ptr, infoIdx) - var i = 0 - while (i < tinfo.size) { - if (tinfo.isFieldDefined(infoOffset, i)) { - wrote = emitInfo(sb, tinfo.fields(i), tinfo.loadField(infoOffset, i), wrote) - wroteAnyInfo = wroteAnyInfo || wrote - } - i += 1 - } - } - if (!wroteAnyInfo) - sb += '.' - - if (hasSamples) { - sb += '\t' - sb.append(formatFieldString) - - val gsOffset = fullRowType.loadField(ptr, localEntriesIndex) - var i = 0 - while (i < localNSamples) { - sb += '\t' - if (localEntriesType.isElementDefined(gsOffset, i)) - emitGenotype(sb, formatFieldOrder, tg, localEntriesType.loadElement(gsOffset, localNSamples, i), formatDefinedArray, missingFormatStr) - else - sb.append(missingFormatStr) - - i += 1 - } + def lookupVAField(rowType: TStruct, fieldName: String, vcfColName: String, expectedTypeOpt: Option[Type]): (Boolean, Int) = { + rowType.fieldIdx.get(fieldName) match { + case Some(idx) => + val t = rowType.types(idx) + if (expectedTypeOpt.forall(t == _)) // FIXME: make sure this is right + (true, idx) + else { + warn(s"export_vcf found row field $fieldName with type '$t', but expected type ${ expectedTypeOpt.get }. " + + s"Emitting missing $vcfColName.") + (false, 0) } - - sb.result() - } - }.writeTable(ctx, path, Some(header), exportType = exportType) - - if (tabix) { - exportType match { - case ExportType.CONCATENATED => - info(s"Writing tabix index for $path") - TabixVCF(fs, path) - case ExportType.PARALLEL_SEPARATE_HEADER | ExportType.PARALLEL_HEADER_IN_SHARD => - val files = fs.glob(path + "/part-*").map(_.getPath.getBytes) - info(s"Writing tabix index for ${ files.length } in $path") - ctx.backend.parallelizeAndComputeWithIndex(ctx.backendContext, ctx.fs, files)({ (pathBytes, _, _, fs) => - TabixVCF(fs, new String(pathBytes)) - Array.empty - }) - case ExportType.PARALLEL_COMPOSABLE => - warn("Writing tabix index for `parallel=composable` is not supported. No index will be written.") - } + case None => (false, 0) } } } diff --git a/hail/src/main/scala/is/hail/lir/X.scala b/hail/src/main/scala/is/hail/lir/X.scala index 4f8b1ffb212..cd6cec20189 100644 --- a/hail/src/main/scala/is/hail/lir/X.scala +++ b/hail/src/main/scala/is/hail/lir/X.scala @@ -56,7 +56,7 @@ class Classx[C](val name: String, val superName: String, var sourceFile: Option[ sourceFile = Some(path) } - def asBytes(print: Option[PrintWriter]): Array[(String, Array[Byte])] = { + def asBytes(writeIRs: Boolean, print: Option[PrintWriter]): Array[(String, Array[Byte])] = { val classes = new mutable.ArrayBuffer[Classx[_]]() classes += this @@ -65,8 +65,6 @@ class Classx[C](val name: String, val superName: String, var sourceFile: Option[ SimplifyControl(m) } - val writeIRs = HailContext.isInitialized && HailContext.getFlag("write_ir_files") != null - if (writeIRs) saveToFile(s"/tmp/hail/${name}.lir") for (m <- methods) { diff --git a/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala b/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala new file mode 100644 index 00000000000..4b3fdb125aa --- /dev/null +++ b/hail/src/main/scala/is/hail/types/encoded/ENumpyBinaryNDArray.scala @@ -0,0 +1,77 @@ +package is.hail.types.encoded + +import is.hail.annotations.Region +import is.hail.asm4s._ +import is.hail.expr.ir.EmitCodeBuilder +import is.hail.io.{InputBuffer, OutputBuffer} +import is.hail.types.physical.PCanonicalNDArray +import is.hail.types.physical.stypes.{SType, SValue} +import is.hail.types.physical.stypes.concrete.SNDArrayPointer +import is.hail.types.physical.stypes.interfaces.SNDArrayValue +import is.hail.types.physical.stypes.primitives.SFloat64 +import is.hail.types.virtual.{TNDArray, Type} +import is.hail.utils.FastIndexedSeq + +final case class ENumpyBinaryNDArray(nRows: Long, nCols: Long, required: Boolean) extends EType { + type DecodedPType = PCanonicalNDArray + val elementType = EFloat64(true) + + def setRequired(newRequired: Boolean): ENumpyBinaryNDArray = ENumpyBinaryNDArray(nRows, nCols, newRequired) + + def _decodedSType(requestedType: Type): SType = { + val elementPType = elementType.decodedPType(requestedType.asInstanceOf[TNDArray].elementType) + SNDArrayPointer(PCanonicalNDArray(elementPType, 2, false)) + } + + override def _buildEncoder(cb: EmitCodeBuilder, v: SValue, out: Value[OutputBuffer]): Unit = { + val ndarray = v.asInstanceOf[SNDArrayValue] + assert(ndarray.st.elementType == SFloat64) + val i = cb.newLocal[Long]("i") + val j = cb.newLocal[Long]("j") + val writeElemF = elementType.buildEncoder(ndarray.st.elementType, cb.emb.ecb) + + cb.forLoop(cb.assign(i, 0L), i < nRows, cb.assign(i, i + 1L), { + cb.forLoop(cb.assign(j, 0L), j < nCols, cb.assign(j, j + 1L), { + writeElemF(cb, ndarray.loadElement(FastIndexedSeq(i, j), cb), out) + }) + }) + + } + + override def _buildDecoder(cb: EmitCodeBuilder, t: Type, region: Value[Region], in: Value[InputBuffer]): SValue = { + val st = decodedSType(t).asInstanceOf[SNDArrayPointer] + val pt = st.pType + val readElemF = elementType.buildInplaceDecoder(pt.elementType, cb.emb.ecb) + + val stride0 = cb.newLocal[Long]("stride0", nCols * pt.elementType.byteSize) + val stride1 = cb.newLocal[Long]("stride1", pt.elementType.byteSize) + + val n = cb.newLocal[Long]("length", nRows * nCols) + + val (tFirstElementAddress, tFinisher) = pt.constructDataFunction(IndexedSeq(nRows, nCols), IndexedSeq(stride0, stride1), cb, region) + val currElementAddress = cb.newLocal[Long]("eblockmatrix_ndarray_currElementAddress", tFirstElementAddress) + + val i = cb.newLocal[Long]("i") + cb.forLoop(cb.assign(i, 0L), i < n, cb.assign(i, i + 1L), { + readElemF(cb, region, currElementAddress, in) + cb.assign(currElementAddress, currElementAddress + pt.elementType.byteSize) + }) + + tFinisher(cb) + } + + def _buildSkip(cb: EmitCodeBuilder, r: Value[Region], in: Value[InputBuffer]): Unit = { + ??? + } + + def _asIdent = s"ndarray_of_${ elementType.asIdent }" + + def _toPretty = s"ENDArray[$elementType]" + + override def _pretty(sb: StringBuilder, indent: Int, compact: Boolean = false) { + sb.append("ENDArray[") + elementType.pretty(sb, indent, compact) + sb.append("]") + } + +} diff --git a/hail/src/main/scala/is/hail/types/encoded/EType.scala b/hail/src/main/scala/is/hail/types/encoded/EType.scala index 04978a408d7..48897b9eea7 100644 --- a/hail/src/main/scala/is/hail/types/encoded/EType.scala +++ b/hail/src/main/scala/is/hail/types/encoded/EType.scala @@ -210,7 +210,7 @@ object EType { val f = et.buildEncoder(pc.st, mb.ecb) f(cb, pc, out) } - val func = fb.result() + val func = fb.result(ctx) encoderCache.put(k, func) func } @@ -247,7 +247,7 @@ object EType { pt.store(cb, region, pc, false) } - val r = (pt, fb.result()) + val r = (pt, fb.result(ctx)) decoderCache.put(k, r) r } diff --git a/hail/src/main/scala/is/hail/types/physical/PType.scala b/hail/src/main/scala/is/hail/types/physical/PType.scala index 53bc962820c..911da7c13ee 100644 --- a/hail/src/main/scala/is/hail/types/physical/PType.scala +++ b/hail/src/main/scala/is/hail/types/physical/PType.scala @@ -334,7 +334,7 @@ object PType { val srcAddr = fb.apply_method.getCodeParam[Long](2) cpt.store(cb, region, t.loadCheapSCode(cb, srcAddr), deepCopy = false) } - Some(fb.result()) + Some(fb.result(ctx)) } } } diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala index a9de6f56bc5..d2e0fb362a7 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/SCode.scala @@ -3,6 +3,7 @@ package is.hail.types.physical.stypes import is.hail.annotations.Region import is.hail.asm4s._ import is.hail.expr.ir.EmitCodeBuilder +import is.hail.types.physical.stypes.concrete.SRNGStateValue import is.hail.types.physical.stypes.interfaces._ import is.hail.types.physical.stypes.primitives._ @@ -89,6 +90,8 @@ trait SValue { def asStream: SStreamValue = asInstanceOf[SStreamValue] + def asRNGState: SRNGStateValue = asInstanceOf[SRNGStateValue] + def castTo(cb: EmitCodeBuilder, region: Value[Region], destType: SType): SValue = castTo(cb, region, destType, false) diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala new file mode 100644 index 00000000000..43bcafa65b1 --- /dev/null +++ b/hail/src/main/scala/is/hail/types/physical/stypes/concrete/SRNGState.scala @@ -0,0 +1,117 @@ +package is.hail.types.physical.stypes.concrete + +import is.hail.annotations.Region +import is.hail.asm4s._ +import is.hail.expr.ir.{EmitCodeBuilder, Threefry} +import is.hail.types.TypeWithRequiredness +import is.hail.types.physical.PType +import is.hail.types.physical.stypes.{SSettable, SType, SValue} +import is.hail.types.virtual.{TRNGState, Type} +import is.hail.utils.{Bitstring, toRichIterable} + +import scala.collection.mutable + +import scala.collection.mutable + +object SRNGState { + val staticTweak = -1L + val finalBlockNoPadTweak = -2L + val finalBlockPaddedTweak = -3L +} + +final case class SRNGState( + key: IndexedSeq[Long], + numWordsInLastDynBlock: Int +) extends SType { + assert(key.length == 4) + assert(numWordsInLastDynBlock <= 4 && numWordsInLastDynBlock >= 0) + + def virtualType: Type = TRNGState + + override protected[stypes] def _coerceOrCopy(cb: EmitCodeBuilder, region: Value[Region], value: SValue, deepCopy: Boolean): SValue = ??? + + override def settableTupleTypes(): IndexedSeq[TypeInfo[_]] = + Array.fill(4 + numWordsInLastDynBlock)(typeInfo[Long]) + + override def fromSettables(settables: IndexedSeq[Settable[_]]): SSettable = ??? + + override def fromValues(values: IndexedSeq[Value[_]]): SValue = ??? + + override def storageType(): PType = ??? + + override def copiedType: SType = ??? + + override def castRename(t: Type): SType = ??? + + override protected[stypes] def _typeWithRequiredness: TypeWithRequiredness = ??? + + override def containsPointers: Boolean = false +} + +object SRNGStateValue { + def apply(cb: EmitCodeBuilder, key: IndexedSeq[Long]): SRNGStateValue = { + val typ = SRNGState(key, 0) + new SRNGStateValue( + typ, + Array.fill[Value[Long]](4)(0), + Array[Value[Long]](), + false, + 0) + } +} + +final case class SRNGStateValue( + st: SRNGState, + runningSum: IndexedSeq[Value[Long]], + lastDynBlock: IndexedSeq[Value[Long]], + hasStaticSplit: Boolean, + numDynBlocks: Int, +) extends SValue { + assert(runningSum.length == 4) + assert(lastDynBlock.length == st.numWordsInLastDynBlock) + + override def valueTuple: IndexedSeq[Value[_]] = + runningSum ++ lastDynBlock + + override def sizeToStoreInBytes(cb: EmitCodeBuilder) = ??? + + def splitStatic(cb: EmitCodeBuilder, idx: Long): SRNGStateValue = { + assert(!hasStaticSplit) + val x = Array.ofDim[Long](4) + x(0) = idx + Threefry.encrypt(st.key, SRNGState.staticTweak, x) + val newDynBlocksSum = Array.tabulate[Value[Long]](4)(i => cb.memoize(runningSum(i) ^ x(i))) + copy( + runningSum = newDynBlocksSum, + hasStaticSplit = true) + } + + def splitDyn(cb: EmitCodeBuilder, idx: Value[Long]): SRNGStateValue = { + if (st.numWordsInLastDynBlock < 4) { + return copy( + st = st.copy(numWordsInLastDynBlock = st.numWordsInLastDynBlock + 1), + lastDynBlock = lastDynBlock :+ idx) + } + val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"splitDyn_x$i", lastDynBlock(i))) + Threefry.encrypt(cb, st.key, numDynBlocks.toLong, x) + for (i <- 0 until 4) cb.assign(x(i), x(i) ^ runningSum(i)) + copy( + st = st.copy(numWordsInLastDynBlock = 1), + runningSum = x, + lastDynBlock = Array(idx), + numDynBlocks = numDynBlocks + 1) + } + + def rand(cb: EmitCodeBuilder): IndexedSeq[Value[Long]] = { + val x = Array.tabulate[Settable[Long]](4)(i => cb.newLocal[Long](s"rand_x$i", runningSum(i))) + if (st.numWordsInLastDynBlock == 4) { + for (i <- lastDynBlock.indices) cb.assign(x(i), x(i) ^ lastDynBlock(i)) + Threefry.encrypt(cb, st.key, SRNGState.finalBlockNoPadTweak, x) + } else { + for (i <- lastDynBlock.indices) cb.assign(x(i), x(i) ^ lastDynBlock(i)) + cb.assign(x(lastDynBlock.size), x(lastDynBlock.size) ^ (1L << 63)) + Threefry.encrypt(cb, st.key, SRNGState.finalBlockPaddedTweak, x) + } + x + } +} diff --git a/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala b/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala new file mode 100644 index 00000000000..e82ba110146 --- /dev/null +++ b/hail/src/main/scala/is/hail/types/virtual/TRNGState.scala @@ -0,0 +1,12 @@ +package is.hail.types.virtual + +case object TRNGState extends Type { + override def _toPretty = "RNGState" + + override def pyString(sb: StringBuilder): Unit = { + sb.append("rng_state") + } + def _typeCheck(a: Any): Boolean = ??? + def mkOrdering(missingEqual: Boolean): is.hail.annotations.ExtendedOrdering = ??? + def scalaClassTag: scala.reflect.ClassTag[_ <: AnyRef] = ??? +} diff --git a/hail/src/main/scala/is/hail/utils/Bitstring.scala b/hail/src/main/scala/is/hail/utils/Bitstring.scala new file mode 100644 index 00000000000..09427161b62 --- /dev/null +++ b/hail/src/main/scala/is/hail/utils/Bitstring.scala @@ -0,0 +1,90 @@ +package is.hail.utils + +import scala.collection.mutable + +object Bitstring { + def apply(string: String): Bitstring = { + assert(string.forall(c => c == '0' || c == '1')) + val bitstring = mutable.ArrayBuilder.make[Long]() + var pos: Int = 0 + while (string.length - pos > 64) { + bitstring += java.lang.Long.parseUnsignedLong(string.slice(pos, pos + 64), 2) + pos += 64 + } + val lastWord = java.lang.Long.parseUnsignedLong(string.slice(pos, string.length)) + val bitsInLastWord = string.length - pos + bitstring += (lastWord << (64 - bitsInLastWord)) + new Bitstring(bitstring.result(), bitsInLastWord) + } +} + +case class Bitstring(contents: IndexedSeq[Long], bitsInLastWord: Int) { + def numWords = contents.length + def length = (contents.length - 1) * 64 + bitsInLastWord + + override def toString: String = { + if (contents.isEmpty) return "Bitstring()" + val result = new mutable.StringBuilder("Bitstring(") + var i = 0 + while (i < contents.length - 1) { + result ++= contents(i).toBinaryString + i += 1 + } + i = 0 + var lastWord = contents.last + val bits = Array('0', '1') + while (i < bitsInLastWord) { + result += bits((lastWord >>> 63).toInt) + lastWord <<= 1 + i += 1 + } + result += ')' + result.result + } + + def ++(rhs: Bitstring): Bitstring = { + if (length == 0) return rhs + if (rhs.length == 0) return this + if (bitsInLastWord < 64) { + val newNumWords = (length + rhs.length + 63) >> 6 + val newContents = Array.ofDim[Long](newNumWords) + for (i <- 0 until (numWords - 2)) { + newContents(i) = contents(i) + } + newContents(numWords - 1) = contents.last & (rhs.contents.head >>> bitsInLastWord) + for (i <- 0 until (rhs.numWords - 2)) { + newContents(numWords + i) = + (rhs.contents(i) << (64 - bitsInLastWord)) & + (rhs.contents(i + 1) >>> bitsInLastWord) + } + var newBitsInLastWord = bitsInLastWord + rhs.bitsInLastWord + if (newBitsInLastWord > 64) { + newContents(numWords + rhs.numWords - 1) = rhs.contents.last << (64 - bitsInLastWord) + newBitsInLastWord = newBitsInLastWord - 64 + } + new Bitstring(newContents, newBitsInLastWord) + } else { + new Bitstring(contents ++ rhs.contents, rhs.bitsInLastWord) + } + } + + def popWords(n: Int): (Array[Long], Bitstring) = { + assert(n < numWords || (n == numWords && bitsInLastWord == 64)) + val result = contents.slice(0, n).toArray + val newContents = contents.slice(n, numWords) + val newBitsInLastWord = if (n < numWords) bitsInLastWord else 0 + (result, new Bitstring(newContents, newBitsInLastWord)) + } + + def padTo(n: Int): Array[Long] = { + assert(n > numWords || (n == numWords && bitsInLastWord < 64)) + val result = Array.ofDim[Long](n) + Array.copy(contents, 0, result, 0, numWords) + if (bitsInLastWord == 64) { + result(numWords) = 1L << 63 + } else { + result(numWords - 1) = result(numWords - 1) & (1L << (63 - bitsInLastWord)) + } + result + } +} diff --git a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala index f6b92450b54..b5d1c3433b8 100644 --- a/hail/src/main/scala/is/hail/utils/ErrorHandling.scala +++ b/hail/src/main/scala/is/hail/utils/ErrorHandling.scala @@ -7,6 +7,12 @@ class HailException(val msg: String, val logMsg: Option[String], cause: Throwabl def this(msg: String, errorId: Int) = this(msg, None, null, errorId) } +class HailWorkerException( + val shortMessage: String, + val expandedMessage: String, + val errorId: Int +) extends RuntimeException(shortMessage) + trait ErrorHandling { def fatal(msg: String): Nothing = throw new HailException(msg) diff --git a/hail/src/main/scala/is/hail/utils/TextTableReader.scala b/hail/src/main/scala/is/hail/utils/TextTableReader.scala deleted file mode 100644 index b983e71f45b..00000000000 --- a/hail/src/main/scala/is/hail/utils/TextTableReader.scala +++ /dev/null @@ -1,444 +0,0 @@ -package is.hail.expr.ir - -import java.util.regex.Pattern -import is.hail.asm4s.HailClassLoader -import is.hail.HailContext -import is.hail.annotations.{Region, RegionValueBuilder} -import is.hail.backend.ExecuteContext -import is.hail.backend.spark.SparkBackend -import is.hail.expr.TableAnnotationImpex -import is.hail.expr.ir.lowering.TableStage -import is.hail.io.fs.{FS, FileStatus} -import is.hail.rvd.RVDPartitioner -import is.hail.types._ -import is.hail.types.physical.{PCanonicalStringRequired, PCanonicalStruct, PStruct, PType} -import is.hail.types.virtual._ -import is.hail.utils.StringEscapeUtils._ -import is.hail.utils._ -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.Row -import org.json4s.{DefaultFormats, Formats, JValue} - -import scala.collection.mutable -import scala.util.matching.Regex - -abstract class TextReaderOptions { - val comment: Array[String] - val hasHeader: Boolean - - private lazy val commentStartsWith: Array[String] = comment.filter(_.length == 1) - private lazy val commentRegexes: Array[Regex] = comment.filter(_.length > 1).map(_.r) - - final def isComment(line: String): Boolean = - commentStartsWith.exists(pattern => line.startsWith(pattern)) || commentRegexes.exists(pattern => pattern.matches(line)) -} - -case class TextTableReaderParameters( - files: Array[String], - typeMapStr: Map[String, String], - comment: Array[String], - separator: String, - missing: Set[String], - hasHeader: Boolean, - nPartitionsOpt: Option[Int], - quoteStr: String, - skipBlankLines: Boolean, - forceBGZ: Boolean, - filterAndReplace: TextInputFilterAndReplace, - forceGZ: Boolean, - sourceFileField: Option[String]) extends TextReaderOptions { - @transient val typeMap: Map[String, Type] = typeMapStr.mapValues(s => IRParser.parseType(s)).map(identity) - - val quote: java.lang.Character = if (quoteStr != null) quoteStr(0) else null - - def nPartitions: Int = nPartitionsOpt.getOrElse(HailContext.backend.defaultParallelism) -} - -case class TextTableReaderMetadata(fileStatuses: Array[FileStatus], header: String, rowPType: PStruct) { - def fullType: TableType = TableType(rowType = rowPType.virtualType, globalType = TStruct(), key = FastIndexedSeq()) -} - -object TextTableReader { - - def splitLine(s: String, separator: String, quote: java.lang.Character): Array[String] = - splitLine(s, separator, quote, new BoxedArrayBuilder[String], new StringBuilder) - - def splitLine( - s: String, - separator: String, - quote: java.lang.Character, - ab: BoxedArrayBuilder[String], - sb: StringBuilder): Array[String] = { - ab.clear() - sb.clear() - - val matchSep: Int => Int = separator.length match { - case 0 => fatal("Hail does not currently support 0-character separators") - case 1 => - val sepChar = separator(0) - (i: Int) => if (s(i) == sepChar) 1 else -1 - case _ => - val p = Pattern.compile(separator) - val m = p.matcher(s) - - { (i: Int) => - m.region(i, s.length) - if (m.lookingAt()) - m.end() - m.start() - else - -1 - } - } - - var i = 0 - while (i < s.length) { - val c = s(i) - - val l = matchSep(i) - if (l != -1) { - i += l - ab += sb.result() - sb.clear() - } else if (quote != null && c == quote) { - if (sb.nonEmpty) - fatal(s"opening quote character '$quote' not at start of field") - i += 1 // skip quote - - while (i < s.length && s(i) != quote) { - sb += s(i) - i += 1 - } - - if (i == s.length) - fatal(s"missing terminating quote character '$quote'") - i += 1 // skip quote - - // full field must be quoted - if (i < s.length) { - val l = matchSep(i) - if (l == -1) - fatal(s"terminating quote character '$quote' not at end of field") - i += l - ab += sb.result() - sb.clear() - } - } else { - sb += c - i += 1 - } - } - ab += sb.result() - - ab.result() - } - - type Matcher = String => Boolean - val booleanMatcher: Matcher = x => try { - x.toBoolean - true - } catch { - case e: IllegalArgumentException => false - } - val int32Matcher: Matcher = x => try { - Integer.parseInt(x) - true - } catch { - case e: NumberFormatException => false - } - val int64Matcher: Matcher = x => try { - java.lang.Long.parseLong(x) - true - } catch { - case e: NumberFormatException => false - } - val float64Matcher: Matcher = x => try { - java.lang.Double.parseDouble(x) - true - } catch { - case e: NumberFormatException => false - } - - def imputeTypes( - fs: FS, - fileStatuses: Array[FileStatus], - params: TextTableReaderParameters, - headerLine: String, - columns: Array[String], - delimiter: String, - missing: Set[String], - quote: java.lang.Character - ): Array[(Option[Type], Boolean)] = { - val nFields = columns.length - - val matchTypes: Array[Type] = Array(TBoolean, TInt32, TInt64, TFloat64) - val matchers: Array[String => Boolean] = Array( - booleanMatcher, - int32Matcher, - int64Matcher, - float64Matcher) - val nMatchers = matchers.length - - val lines = GenericLines.read(fs, fileStatuses, nPartitions = params.nPartitionsOpt, - blockSizeInMB = None, minPartitions = None, gzAsBGZ = params.forceBGZ, allowSerialRead = params.forceGZ) - - val linesRDD: RDD[GenericLine] = lines.toRDD(fs) - - val (imputation, allDefined) = linesRDD.mapPartitions { it => - val allDefined = Array.fill(nFields)(true) - val ma = MultiArray2.fill[Boolean](nFields, nMatchers + 1)(true) - val ab = new BoxedArrayBuilder[String] - val sb = new StringBuilder - it.foreach { genericLine => - val line = genericLine.toString - - if (!params.isComment(line) && - (!params.hasHeader || line != headerLine) && - !(params.skipBlankLines && line.isEmpty)) { - - try { - val split = splitLine(line, delimiter, quote, ab, sb) - if (split.length != nFields) - fatal(s"expected $nFields fields, but found ${ split.length }") - - var i = 0 - while (i < nFields) { - val field = split(i) - if (!missing.contains(field)) { - var j = 0 - while (j < nMatchers) { - ma.update(i, j, ma(i, j) && matchers(j)(field)) - j += 1 - } - ma.update(i, nMatchers, false) - } else - allDefined(i) = false - i += 1 - } - } catch { - case e: Throwable => - fatal( - s"""Caught exception while reading ${ genericLine.file }: ${ e.getMessage } - | offending line: @1""".stripMargin, line, e) - } - } - } - Iterator.single((ma, allDefined)) - } - .reduce({ case ((ma1, allDefined1), (ma2, allDefined2)) => - var i = 0 - while (i < nFields) { - var j = 0 - while (j < nMatchers) { - ma1.update(i, j, ma1(i, j) && ma2(i, j)) - j += 1 - } - ma1.update(i, nMatchers, ma1(i, nMatchers) && ma2(i, nMatchers)) - i += 1 - } - (ma1, Array.tabulate(allDefined1.length)(i => (allDefined1(i) && allDefined2(i)))) - }) - - imputation.rowIndices.map { i => - someIf(!imputation(i, nMatchers), - (0 until nMatchers).find(imputation(i, _)) - .map(matchTypes) - .getOrElse(TString)) - }.zip(allDefined).toArray - } - - def readMetadata(fs: FS, options: TextTableReaderParameters): TextTableReaderMetadata = { - val TextTableReaderParameters(files, _, _, separator, missing, hasHeader, _, _, skipBlankLines, forceBGZ, filterAndReplace, forceGZ, sourceFileField) = options - - val fileStatuses: Array[FileStatus] = { - val status = fs.globAllStatuses(files) - if (status.isEmpty) - fatal("arguments refer to no files") - if (!forceBGZ) { - status.foreach { status => - val file = status.getPath - if (file.endsWith(".gz")) - checkGzippedFile(fs, file, forceGZ, forceBGZ) - } - } - status - } - - val types = options.typeMap - val quote = options.quote - - val firstFile = fileStatuses.head.getPath - val header = fs.readLines(firstFile, filterAndReplace) { lines => - val filt = lines.filter(line => !options.isComment(line.value) && !(skipBlankLines && line.value.isEmpty)) - - if (filt.isEmpty) - fatal( - s"""invalid file: no lines remaining after comment filter - | Offending file: $firstFile""".stripMargin) - else - filt.next().value - } - - val splitHeader = splitLine(header, separator, quote) - val preColumns = if (!hasHeader) { - splitHeader - .indices - .map(i => s"f$i") - .toArray - } else splitHeader.map(unescapeString) - - val (columns, duplicates) = mangle(preColumns) - if (duplicates.nonEmpty) { - warn(s"Found ${ duplicates.length } duplicate ${ plural(duplicates.length, "column") }. Mangled columns follows:\n @1", - duplicates.map { case (pre, post) => s"'$pre' -> '$post'" }.truncatable("\n ")) - } - - val sourceTypeOption = sourceFileField.map(f => (f, PCanonicalStringRequired)).toIndexedSeq - val namesAndTypes = - columns.map { c => - types.get(c) match { - case Some(t) => - (c, PType.canonical(t)) - case None => - (c, PType.canonical(TString)) - } - } - TextTableReaderMetadata(fileStatuses, header, PCanonicalStruct(true, (namesAndTypes ++ sourceTypeOption): _*)) - } - - def apply(fs: FS, params: TextTableReaderParameters): TextTableReader = { - val metadata = TextTableReader.readMetadata(fs, params) - new TextTableReader(params, metadata.header, metadata.fileStatuses, metadata.rowPType) - } - - def fromJValue(fs: FS, jv: JValue): TextTableReader = { - implicit val formats: Formats = TableReader.formats - val params = jv.extract[TextTableReaderParameters] - TextTableReader(fs, params) - } -} - -class TextTableReader( - val params: TextTableReaderParameters, - header: String, - fileStatuses: IndexedSeq[FileStatus], - fullRowPType: PStruct -) extends TableReader { - val fullType: TableType = TableType(fullRowPType.virtualType, FastIndexedSeq.empty, TStruct()) - - def pathsUsed: Seq[String] = params.files - - val partitionCounts: Option[IndexedSeq[Long]] = None - - def rowAndGlobalPTypes(ctx: ExecuteContext, requestedType: TableType): (PStruct, PStruct) = { - PType.canonical(requestedType.rowType, required = true).asInstanceOf[PStruct] -> - PCanonicalStruct.empty(required = true) - } - - def renderShort(): String = defaultRender() - - def executeGeneric(ctx: ExecuteContext): GenericTableValue = { - val fs = ctx.fs - - val lines = GenericLines.read(fs, fileStatuses, nPartitions = params.nPartitionsOpt, - blockSizeInMB = None, minPartitions = None, gzAsBGZ = params.forceBGZ, allowSerialRead = params.forceGZ) - val partitioner: Option[RVDPartitioner] = None - val globals: TStruct => Row = _ => Row.empty - - val localParams = params - val localHeader = header - val localFullRowType = fullRowPType - val bodyPType: TStruct => PStruct = (requestedRowType: TStruct) => localFullRowType.subsetTo(requestedRowType).asInstanceOf[PStruct] - val linesBody = lines.body - val nFieldOrig = localFullRowType.size - (params.sourceFileField.isDefined).toInt - - val transformer = localParams.filterAndReplace.transformer() - val body = { (requestedRowType: TStruct) => - - val includeFileName = localParams.sourceFileField.exists(requestedRowType.hasField) - val dataFieldNames = if (includeFileName) requestedRowType.fieldNames.init else requestedRowType.fieldNames - val useColIndices = dataFieldNames.map(localFullRowType.virtualType.fieldIdx) - val rowFields = requestedRowType.fields.toArray - val requestedPType = bodyPType(requestedRowType) - - { (region: Region, theHailClassLoader: HailClassLoader, fs: FS, context: Any) => - - val rvb = new RegionValueBuilder(region) - val ab = new BoxedArrayBuilder[String] - val sb = new StringBuilder - linesBody(fs, context) - .filter { bline => - val line = transformer(bline.toString) - if (line == null || localParams.isComment(line) || - (localParams.hasHeader && localHeader == line) || - (localParams.skipBlankLines && line.isEmpty)) - false - else { - try { - val sp = TextTableReader.splitLine(line, localParams.separator, localParams.quote, ab, sb) - if (sp.length != nFieldOrig) - fatal(s"expected $nFieldOrig fields, but found ${ sp.length } fields") - - rvb.start(requestedPType) - rvb.startStruct() - - var i = 0 - while (i < useColIndices.length) { - val f = rowFields(i) - val name = f.name - val typ = f.typ - val field = sp(useColIndices(i)) - try { - if (localParams.missing.contains(field)) - rvb.setMissing() - else - rvb.addAnnotation(typ, TableAnnotationImpex.importAnnotation(field, typ)) - } catch { - case e: Exception => - fatal(s"""${ e.getClass.getName }: could not convert "$field" to $typ in column "$name" """, e) - } - i += 1 - } - - if (includeFileName) - rvb.addString(bline.file) - - rvb.endStruct() - rvb.end() - true - } catch { - case e: Throwable => - fatal( - s"""Caught exception while reading ${ bline.file }: ${ e.getMessage } - | offending line: @1""".stripMargin, line, e) - } - } - }.map(_ => rvb.result().offset) - } - } - new GenericTableValue(partitioner = partitioner, - fullTableType = fullType, - globals = globals, - contextType = lines.contextType, - contexts = lines.contexts, - bodyPType = bodyPType, - body = body) - } - - override def lower(ctx: ExecuteContext, requestedType: TableType): TableStage = - executeGeneric(ctx).toTableStage(ctx, requestedType) - - def apply(tr: TableRead, ctx: ExecuteContext): TableValue = - executeGeneric(ctx).toTableValue(ctx, tr.typ) - - override def toJValue: JValue = { - implicit val formats: Formats = DefaultFormats - decomposeWithName(params, "TextTableReader") - } - - override def hashCode(): Int = params.hashCode() - - override def equals(that: Any): Boolean = that match { - case that: TextTableReader => params == that.params - case _ => false - } -} diff --git a/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala b/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala index 07320e2b245..27d69292413 100644 --- a/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala +++ b/hail/src/main/scala/is/hail/utils/richUtils/RichContextRDD.scala @@ -93,7 +93,6 @@ class RichContextRDD[T: ClassTag](crdd: ContextRDD[T]) { ): Array[FileWriteMetadata] = { val localTmpdir = ctx.localTmpdir val fs = ctx.fs - val fsBc = ctx.fsBc fs.mkDir(path + "/parts") if (idxRelPath != null) diff --git a/hail/src/test/resources/sampleheaderdiffelem.txt b/hail/src/test/resources/sampleheaderdiffelem.txt new file mode 100644 index 00000000000..8cedbe05608 --- /dev/null +++ b/hail/src/test/resources/sampleheaderdiffelem.txt @@ -0,0 +1,11 @@ +col000000 col000009 col000002 col000003 col000004 +row000000 0 0 0 0 0 +row000001 0 0 0 0 0 +row000002 0 0 0 0 0 +row000003 0 0 0 0 0 +row000004 0 0 0 0 0 +row000005 0 0 0 0 0 +row000006 0 0 0 0 0 +row000007 0 0 0 0 0 +row000008 0 0 0 0 0 +row000009 0 0 0 0 0 \ No newline at end of file diff --git a/hail/src/test/resources/samplenonintentries.txt b/hail/src/test/resources/samplenonintentries.txt new file mode 100644 index 00000000000..247a1cd0741 --- /dev/null +++ b/hail/src/test/resources/samplenonintentries.txt @@ -0,0 +1,6 @@ +col000000 col000001 col000002 col000003 col000004 col000005 +row000000 0 0 0 abc 0 0 +row000001 0 0 0 0 0 0 +row000002 0 0 0 0 0 0 +row000003 0 0 0 0 0 0 +row000004 0 0 0 0 0 0 \ No newline at end of file diff --git a/hail/src/test/scala/is/hail/HailSuite.scala b/hail/src/test/scala/is/hail/HailSuite.scala index 8ee2e12d88b..0f5e51c0ede 100644 --- a/hail/src/test/scala/is/hail/HailSuite.scala +++ b/hail/src/test/scala/is/hail/HailSuite.scala @@ -1,12 +1,20 @@ package is.hail +import java.io.{File, PrintWriter} + +import breeze.linalg.{DenseMatrix, Matrix, Vector} +import is.hail.ExecStrategy.ExecStrategy +import is.hail.annotations._ import is.hail.asm4s.HailClassLoader -import is.hail.annotations.{Region, RegionPool} +import is.hail.expr.ir._ import is.hail.backend.{BroadcastValue, ExecuteContext} import is.hail.backend.spark.SparkBackend -import is.hail.utils.{ExecutionTimer, using} +import is.hail.types.virtual._ +import is.hail.utils._ import is.hail.io.fs.FS +import is.hail.TestUtils._ import org.apache.spark.SparkContext +import org.apache.spark.sql.Row import org.scalatest.testng.TestNGSuite import org.testng.ITestContext import org.testng.annotations.{AfterMethod, BeforeClass, BeforeMethod} @@ -30,7 +38,7 @@ object HailSuite { lazy val hc: HailContext = { val hc = withSparkBackend() - hc.flags.set("lower", "1") + hc.sparkBackend("HailSuite.hc").setFlag("lower", "1") hc.checkRVDKeys = true hc } @@ -63,7 +71,7 @@ class HailSuite extends TestNGSuite { timer = new ExecutionTimer("HailSuite") assert(ctx == null) pool = RegionPool() - ctx = new ExecuteContext(backend.tmpdir, backend.localTmpdir, backend, fs, Region(pool=pool), timer, null, HailSuite.theHailClassLoader) + ctx = backend.createExecuteContextForTests(timer, Region(pool=pool)) } @AfterMethod @@ -83,4 +91,205 @@ class HailSuite extends TestNGSuite { hc.sparkBackend("HailSuite.withExecuteContext").withExecuteContext(timer)(f) } } + + def assertEvalsTo( + x: IR, + env: Env[(Any, Type)], + args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ) { + + TypeCheck(ctx, x, BindingEnv(env.mapValues(_._2), agg = agg.map(_._2.toEnv))) + + val t = x.typ + assert(t == TVoid || t.typeCheck(expected), s"$t, $expected") + + ExecuteContext.scoped() { ctx => + val filteredExecStrats: Set[ExecStrategy] = + if (HailContext.backend.isInstanceOf[SparkBackend]) + execStrats + else { + info("skipping interpret and non-lowering compile steps on non-spark backend") + execStrats.intersect(ExecStrategy.backendOnly) + } + + filteredExecStrats.foreach { strat => + try { + val res = strat match { + case ExecStrategy.Interpret => + assert(agg.isEmpty) + Interpret[Any](ctx, x, env, args) + case ExecStrategy.InterpretUnoptimized => + assert(agg.isEmpty) + Interpret[Any](ctx, x, env, args, optimize = false) + case ExecStrategy.JvmCompile => + assert(Forall(x, node => Compilable(node))) + eval(x, env, args, agg, bytecodePrinter = + Option(ctx.getFlag("jvm_bytecode_dump")) + .map { path => + val pw = new PrintWriter(new File(path)) + pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(ctx, x)}\n */\n\n") + pw + }, true, ctx) + case ExecStrategy.JvmCompileUnoptimized => + assert(Forall(x, node => Compilable(node))) + eval(x, env, args, agg, bytecodePrinter = + Option(ctx.getFlag("jvm_bytecode_dump")) + .map { path => + val pw = new PrintWriter(new File(path)) + pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(ctx, x)}\n */\n\n") + pw + }, + optimize = false, ctx) + case ExecStrategy.LoweredJVMCompile => + loweredExecute(ctx, x, env, args, agg) + } + if (t != TVoid) { + assert(t.typeCheck(res), s"\n t=$t\n result=$res\n strategy=$strat") + assert(t.valuesSimilar(res, expected), s"\n result=$res\n expect=$expected\n strategy=$strat)") + } + } catch { + case e: Exception => + error(s"error from strategy $strat") + if (execStrats.contains(strat)) throw e + } + } + } + } + + def assertNDEvals(nd: IR, expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected) + } + + def assertNDEvals(nd: IR, expected: (Any, IndexedSeq[Long])) + (implicit execStrats: Set[ExecStrategy]) { + if (expected == null) + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, null, null) + else + assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected._2, expected._1) + } + + def assertNDEvals(nd: IR, args: IndexedSeq[(Any, Type)], expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, args, None, expected) + } + + def assertNDEvals(nd: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) + (implicit execStrats: Set[ExecStrategy]) { + assertNDEvals(nd, Env.empty, FastIndexedSeq(), Some(agg), expected) + } + + def assertNDEvals( + nd: IR, + env: Env[(Any, Type)], + args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ): Unit = { + var e: IndexedSeq[Any] = expected.asInstanceOf[IndexedSeq[Any]] + val dims = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { + val n = e.length + if (n != 0 && e.head.isInstanceOf[IndexedSeq[_]]) + e = e.head.asInstanceOf[IndexedSeq[Any]] + n.toLong + } + assertNDEvals(nd, Env.empty, FastIndexedSeq(), agg, dims, expected) + } + + def assertNDEvals( + nd: IR, + env: Env[(Any, Type)], + args: IndexedSeq[(Any, Type)], + agg: Option[(IndexedSeq[Row], TStruct)], + dims: IndexedSeq[Long], + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ): Unit = { + val arrayIR = if (expected == null) nd else { + val refs = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { Ref(genUID(), TInt32) } + Let("nd", nd, + dims.zip(refs).foldRight[IR](NDArrayRef(Ref("nd", nd.typ), refs.map(Cast(_, TInt64)), -1)) { + case ((n, ref), accum) => + ToArray(StreamMap(rangeIR(n.toInt), ref.name, accum)) + }) + } + assertEvalsTo(arrayIR, env, args, agg, expected) + } + + def assertBMEvalsTo( + bm: BlockMatrixIR, + expected: DenseMatrix[Double] + )( + implicit execStrats: Set[ExecStrategy] + ): Unit = { + ExecuteContext.scoped() { ctx => + val filteredExecStrats: Set[ExecStrategy] = + if (HailContext.backend.isInstanceOf[SparkBackend]) execStrats + else { + info("skipping interpret and non-lowering compile steps on non-spark backend") + execStrats.intersect(ExecStrategy.backendOnly) + } + filteredExecStrats.filter(ExecStrategy.interpretOnly).foreach { strat => + try { + val res = strat match { + case ExecStrategy.Interpret => + Interpret(bm, ctx, optimize = true) + case ExecStrategy.InterpretUnoptimized => + Interpret(bm, ctx, optimize = false) + } + assert(res.toBreezeMatrix() == expected) + } catch { + case e: Exception => + error(s"error from strategy $strat") + if (execStrats.contains(strat)) throw e + } + } + val expectedArray = Array.tabulate(expected.rows)(i => Array.tabulate(expected.cols)(j => expected(i, j)).toFastIndexedSeq).toFastIndexedSeq + assertNDEvals(BlockMatrixCollect(bm), expectedArray)(filteredExecStrats.filterNot(ExecStrategy.interpretOnly)) + } + } + + def assertAllEvalTo( + xs: (IR, Any)* + )( + implicit execStrats: Set[ExecStrategy] + ): Unit = { + assertEvalsTo(MakeTuple.ordered(xs.map(_._1)), Row.fromSeq(xs.map(_._2))) + } + + def assertEvalsTo( + x: IR, + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ) { + assertEvalsTo(x, Env.empty, FastIndexedSeq(), None, expected) + } + + def assertEvalsTo( + x: IR, + args: IndexedSeq[(Any, Type)], + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ) { + assertEvalsTo(x, Env.empty, args, None, expected) + } + + def assertEvalsTo( + x: IR, + agg: (IndexedSeq[Row], TStruct), + expected: Any + )( + implicit execStrats: Set[ExecStrategy] + ) { + assertEvalsTo(x, Env.empty, FastIndexedSeq(), Some(agg), expected) + } } diff --git a/hail/src/test/scala/is/hail/TestUtils.scala b/hail/src/test/scala/is/hail/TestUtils.scala index d93521e7ec5..92b8e8c15bd 100644 --- a/hail/src/test/scala/is/hail/TestUtils.scala +++ b/hail/src/test/scala/is/hail/TestUtils.scala @@ -309,90 +309,6 @@ object TestUtils { assert(t.valuesSimilar(i2, c), s"interpret (optimize = false) $i vs compile $c") } - def assertAllEvalTo(xs: (IR, Any)*)(implicit execStrats: Set[ExecStrategy]): Unit = { - assertEvalsTo(MakeTuple.ordered(xs.map(_._1)), Row.fromSeq(xs.map(_._2))) - } - - def assertEvalsTo(x: IR, expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertEvalsTo(x, Env.empty, FastIndexedSeq(), None, expected) - } - - def assertEvalsTo(x: IR, args: IndexedSeq[(Any, Type)], expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertEvalsTo(x, Env.empty, args, None, expected) - } - - def assertEvalsTo(x: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertEvalsTo(x, Env.empty, FastIndexedSeq(), Some(agg), expected) - } - - def assertEvalsTo(x: IR, - env: Env[(Any, Type)], - args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], - expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - - TypeCheck(x, BindingEnv(env.mapValues(_._2), agg = agg.map(_._2.toEnv))) - - val t = x.typ - assert(t == TVoid || t.typeCheck(expected), s"$t, $expected") - - ExecuteContext.scoped() { ctx => - val filteredExecStrats: Set[ExecStrategy] = - if (HailContext.backend.isInstanceOf[SparkBackend]) - execStrats - else { - info("skipping interpret and non-lowering compile steps on non-spark backend") - execStrats.intersect(ExecStrategy.backendOnly) - } - - filteredExecStrats.foreach { strat => - try { - val res = strat match { - case ExecStrategy.Interpret => - assert(agg.isEmpty) - Interpret[Any](ctx, x, env, args) - case ExecStrategy.InterpretUnoptimized => - assert(agg.isEmpty) - Interpret[Any](ctx, x, env, args, optimize = false) - case ExecStrategy.JvmCompile => - assert(Forall(x, node => Compilable(node))) - eval(x, env, args, agg, bytecodePrinter = - Option(HailContext.getFlag("jvm_bytecode_dump")) - .map { path => - val pw = new PrintWriter(new File(path)) - pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(x)}\n */\n\n") - pw - }, true, ctx) - case ExecStrategy.JvmCompileUnoptimized => - assert(Forall(x, node => Compilable(node))) - eval(x, env, args, agg, bytecodePrinter = - Option(HailContext.getFlag("jvm_bytecode_dump")) - .map { path => - val pw = new PrintWriter(new File(path)) - pw.print(s"/* JVM bytecode dump for IR:\n${Pretty(x)}\n */\n\n") - pw - }, - optimize = false, ctx) - case ExecStrategy.LoweredJVMCompile => - loweredExecute(ctx, x, env, args, agg) - } - if (t != TVoid) { - assert(t.typeCheck(res), s"\n t=$t\n result=$res\n strategy=$strat") - assert(t.valuesSimilar(res, expected), s"\n result=$res\n expect=$expected\n strategy=$strat)") - } - } catch { - case e: Exception => - error(s"error from strategy $strat") - if (execStrats.contains(strat)) throw e - } - } - } - } - def assertThrows[E <: Throwable : Manifest](x: IR, regex: String) { assertThrows[E](x, Env.empty[(Any, Type)], FastIndexedSeq.empty[(Any, Type)], regex) } @@ -431,85 +347,6 @@ object TestUtils { assertCompiledThrows[HailException](x, regex) } - def assertNDEvals(nd: IR, expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected) - } - - def assertNDEvals(nd: IR, expected: (Any, IndexedSeq[Long])) - (implicit execStrats: Set[ExecStrategy]) { - if (expected == null) - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, null, null) - else - assertNDEvals(nd, Env.empty, FastIndexedSeq(), None, expected._2, expected._1) - } - - def assertNDEvals(nd: IR, args: IndexedSeq[(Any, Type)], expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, args, None, expected) - } - - def assertNDEvals(nd: IR, agg: (IndexedSeq[Row], TStruct), expected: Any) - (implicit execStrats: Set[ExecStrategy]) { - assertNDEvals(nd, Env.empty, FastIndexedSeq(), Some(agg), expected) - } - - def assertNDEvals(nd: IR, env: Env[(Any, Type)], args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], expected: Any) - (implicit execStrats: Set[ExecStrategy]): Unit = { - var e: IndexedSeq[Any] = expected.asInstanceOf[IndexedSeq[Any]] - val dims = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { - val n = e.length - if (n != 0 && e.head.isInstanceOf[IndexedSeq[_]]) - e = e.head.asInstanceOf[IndexedSeq[Any]] - n.toLong - } - assertNDEvals(nd, Env.empty, FastIndexedSeq(), agg, dims, expected) - } - - def assertNDEvals(nd: IR, env: Env[(Any, Type)], args: IndexedSeq[(Any, Type)], - agg: Option[(IndexedSeq[Row], TStruct)], dims: IndexedSeq[Long], expected: Any) - (implicit execStrats: Set[ExecStrategy]): Unit = { - val arrayIR = if (expected == null) nd else { - val refs = Array.fill(nd.typ.asInstanceOf[TNDArray].nDims) { Ref(genUID(), TInt32) } - Let("nd", nd, - dims.zip(refs).foldRight[IR](NDArrayRef(Ref("nd", nd.typ), refs.map(Cast(_, TInt64)), -1)) { - case ((n, ref), accum) => - ToArray(StreamMap(rangeIR(n.toInt), ref.name, accum)) - }) - } - assertEvalsTo(arrayIR, env, args, agg, expected) - } - - def assertBMEvalsTo(bm: BlockMatrixIR, expected: DenseMatrix[Double]) - (implicit execStrats: Set[ExecStrategy]): Unit = { - ExecuteContext.scoped() { ctx => - val filteredExecStrats: Set[ExecStrategy] = - if (HailContext.backend.isInstanceOf[SparkBackend]) execStrats - else { - info("skipping interpret and non-lowering compile steps on non-spark backend") - execStrats.intersect(ExecStrategy.backendOnly) - } - filteredExecStrats.filter(ExecStrategy.interpretOnly).foreach { strat => - try { - val res = strat match { - case ExecStrategy.Interpret => - Interpret(bm, ctx, optimize = true) - case ExecStrategy.InterpretUnoptimized => - Interpret(bm, ctx, optimize = false) - } - assert(res.toBreezeMatrix() == expected) - } catch { - case e: Exception => - error(s"error from strategy $strat") - if (execStrats.contains(strat)) throw e - } - } - val expectedArray = Array.tabulate(expected.rows)(i => Array.tabulate(expected.cols)(j => expected(i, j)).toFastIndexedSeq).toFastIndexedSeq - assertNDEvals(BlockMatrixCollect(bm), expectedArray)(filteredExecStrats.filterNot(ExecStrategy.interpretOnly)) - } - } - def importVCF(ctx: ExecuteContext, file: String, force: Boolean = false, forceBGZ: Boolean = false, headerFile: Option[String] = None, diff --git a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala index 6ae59b84572..9774a965a2d 100644 --- a/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala +++ b/hail/src/test/scala/is/hail/annotations/StagedConstructorSuite.scala @@ -31,7 +31,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "string") @@ -68,7 +68,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "int") @@ -105,7 +105,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "array") @@ -149,7 +149,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "struct") @@ -193,7 +193,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "array of struct") @@ -326,7 +326,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "struct with array") @@ -374,7 +374,7 @@ class StagedConstructorSuite extends HailSuite { val region = Region(pool=pool) val rv = RegionValue(region) - rv.setOffset(fb.result()(theHailClassLoader)(region, input)) + rv.setOffset(fb.result(ctx)(theHailClassLoader)(region, input)) if (showRVInfo) { printRegion(region, "missing array") @@ -415,7 +415,7 @@ class StagedConstructorSuite extends HailSuite { } val region = Region(pool=pool) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) def run(i: Int, b: Boolean, d: Double): (Int, Boolean, Double) = { val off = f(region, i, b, d) (Region.loadInt(t.loadField(off, 0)), diff --git a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala index 88ee0daa94d..2bf3b9d77ce 100644 --- a/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/ASM4SSuite.scala @@ -2,6 +2,7 @@ package is.hail.asm4s import java.io.PrintWriter +import is.hail.HailSuite import is.hail.asm4s.Code._ import is.hail.asm4s.FunctionBuilder._ import is.hail.check.{Gen, Prop} @@ -13,13 +14,13 @@ import scala.language.postfixOps trait Z2Z { def apply(z:Boolean): Boolean } -class ASM4SSuite extends TestNGSuite { +class ASM4SSuite extends HailSuite { private[this] val theHailClassLoader = new HailClassLoader(getClass().getClassLoader()) @Test def not(): Unit = { val notb = FunctionBuilder[Z2Z]("is/hail/asm4s/Z2Z", Array(NotGenericTypeInfo[Boolean]), NotGenericTypeInfo[Boolean]) notb.emit(!notb.getArg[Boolean](1)) - val not = notb.result()(theHailClassLoader) + val not = notb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!not(true)) assert(not(false)) } @@ -27,7 +28,7 @@ class ASM4SSuite extends TestNGSuite { @Test def mux(): Unit = { val gb = FunctionBuilder[Boolean, Int]("G") gb.emit(gb.getArg[Boolean](1).mux(11, -1)) - val g = gb.result()(theHailClassLoader) + val g = gb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(g(true) == 11) assert(g(false) == -1) } @@ -35,7 +36,7 @@ class ASM4SSuite extends TestNGSuite { @Test def add(): Unit = { val fb = FunctionBuilder[Int, Int]("F") fb.emit(fb.getArg[Int](1) + 5) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(-2) == 3) } @@ -43,7 +44,7 @@ class ASM4SSuite extends TestNGSuite { val fb = FunctionBuilder[Int]("F") val l = fb.newLocal[Int]() fb.emit(Code(l := 0, l++, l += 2, l)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f() == 3) } @@ -57,7 +58,7 @@ class ASM4SSuite extends TestNGSuite { arr(2) = -6, arr(hb.getArg[Int](1)) )) - val h = hb.result()(theHailClassLoader) + val h = hb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(h(0) == 6) assert(h(1) == 7) assert(h(2) == -6) @@ -66,7 +67,7 @@ class ASM4SSuite extends TestNGSuite { @Test def get(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).getField[Int]("i")) - val i = fb.result()(theHailClassLoader) + val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) val a = new A assert(i(a) == 5) @@ -75,7 +76,7 @@ class ASM4SSuite extends TestNGSuite { @Test def invoke(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).invoke[Int]("f")) - val i = fb.result()(theHailClassLoader) + val i = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) val a = new A assert(i(a) == 6) @@ -84,7 +85,7 @@ class ASM4SSuite extends TestNGSuite { @Test def invoke2(): Unit = { val fb = FunctionBuilder[A, Int]("F") fb.emit(fb.getArg[A](1).invoke[Int, Int]("g", 6)) - val j = fb.result()(theHailClassLoader) + val j = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) val a = new A assert(j(a) == 11) @@ -93,7 +94,7 @@ class ASM4SSuite extends TestNGSuite { @Test def newInstance(): Unit = { val fb = FunctionBuilder[Int]("F") fb.emit(Code.newInstance[A]().invoke[Int]("f")) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f() == 6) } @@ -104,7 +105,7 @@ class ASM4SSuite extends TestNGSuite { inst.store(Code.newInstance[A]()), inst.put("i", -2), inst.getField[Int]("i"))) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f() == -2) } @@ -115,21 +116,21 @@ class ASM4SSuite extends TestNGSuite { inst.store(Code.newInstance[A]()), inst.put("j", -2), Code.getStatic[A, Int]("j"))) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f() == -2) } @Test def f2(): Unit = { val fb = FunctionBuilder[Int, Int, Int]("F") fb.emit(fb.getArg[Int](1) + fb.getArg[Int](2)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(3, 5) == 8) } @Test def compare(): Unit = { val fb = FunctionBuilder[Int, Int, Boolean]("F") fb.emit(fb.getArg[Int](1) > fb.getArg[Int](2)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(5, 2)) assert(!f(-1, -1)) assert(!f(2, 5)) @@ -147,7 +148,7 @@ class ASM4SSuite extends TestNGSuite { r.store(r * i), i.store(i - 1))), r)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(3) == 6) assert(f(4) == 24) @@ -156,7 +157,7 @@ class ASM4SSuite extends TestNGSuite { @Test def dcmp(): Unit = { val fb = FunctionBuilder[Double, Double, Boolean]("F") fb.emit(fb.getArg[Double](1) > fb.getArg[Double](2)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(5.2, 2.3)) val d = -2.3 @@ -173,7 +174,7 @@ class ASM4SSuite extends TestNGSuite { arr(1) = Code.newInstance[A](), arr(0).getField[Int]("i") + arr(1).getField[Int]("i") )) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f() == 10) } @@ -204,7 +205,7 @@ class ASM4SSuite extends TestNGSuite { ) ), vn_2 + vn_1))) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) Prop.forAll(Gen.choose(0, 100)) { i => fibonacciReference(i) == f(i) @@ -216,37 +217,37 @@ class ASM4SSuite extends TestNGSuite { { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN < x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN <= x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN > x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Double.NaN >= x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeDouble(Double.NaN).ceq(x)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeDouble(Double.NaN).cne(x)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f()) } @@ -259,37 +260,37 @@ class ASM4SSuite extends TestNGSuite { { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN < x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN <= x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN > x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(Float.NaN >= x) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeFloat(Float.NaN).ceq(x)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(!f()) } { val fb = FunctionBuilder[Boolean]("F") fb.emit(new CodeFloat(Float.NaN).cne(x)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f()) } @@ -321,7 +322,7 @@ class ASM4SSuite extends TestNGSuite { }) res } - val f = fb.result(Some(new PrintWriter(System.out)))(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles(), Some(new PrintWriter(System.out)))(theHailClassLoader) assert(f(0, 1, 1) == 2) assert(f(1, 5, 1) == 4) assert(f(2, 2, 8) == 16) @@ -340,7 +341,7 @@ class ASM4SSuite extends TestNGSuite { v1 + v2)) fb.emitWithBuilder(add.invoke(_, fb.getArg[Int](1), fb.getArg[Int](2))) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(1, 1) == 2) } @@ -361,7 +362,7 @@ class ASM4SSuite extends TestNGSuite { case LongInfo => fb.emit(Code(c, longField.load())) case BooleanInfo => fb.emit(Code(c, booleanField.load())) } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) f(arg1, arg2, arg3) } @@ -388,7 +389,7 @@ class ASM4SSuite extends TestNGSuite { case BooleanInfo => mb.emit(Code(c, booleanField.load())) } fb.emitWithBuilder(mb.invoke(_, fb.getArg[Int](1), fb.getArg[Long](2), fb.getArg[Boolean](3))) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) f(arg1, arg2, arg3) } @@ -408,7 +409,7 @@ class ASM4SSuite extends TestNGSuite { v2 := v1, v1)) - assert(fb.result()(theHailClassLoader)() == 1) + assert(fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader)() == 1) } @Test def testInitialize(): Unit = { @@ -417,7 +418,7 @@ class ASM4SSuite extends TestNGSuite { fb.emit(Code( fb.getArg[Boolean](1).mux(Code._empty, l := 5), l)) - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx.shouldWriteIRFiles())(theHailClassLoader) assert(f(true) == 0) assert(f(false) == 5) } diff --git a/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala b/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala index 1d61ffee487..58ba5a573b1 100644 --- a/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala +++ b/hail/src/test/scala/is/hail/asm4s/CodeSuite.scala @@ -40,7 +40,7 @@ class CodeSuite extends HailSuite { mb.emit(EmitCodeBuilder.scopedCode(mb) { cb => v.sizeToStoreInBytes(cb).value }) - fb.result()(theHailClassLoader)() + fb.result(ctx)(theHailClassLoader)() } assert(testSizeHelper(int64) == 8L) @@ -61,7 +61,7 @@ class CodeSuite extends HailSuite { } sarray.sizeToStoreInBytes(cb).value }) - assert(fb.result()(theHailClassLoader)(ctx.r) == 28L) // 2 missing bytes 4 byte aligned + 4 header bytes + 5 elements * 4 bytes for ints. + assert(fb.result(ctx)(theHailClassLoader)(ctx.r) == 28L) // 2 missing bytes 4 byte aligned + 4 header bytes + 5 elements * 4 bytes for ints. } @Test def testIntervalSizeInBytes(): Unit = { @@ -86,7 +86,7 @@ class CodeSuite extends HailSuite { true, true) sval.sizeToStoreInBytes(cb).value }) - assert(fb.result()(theHailClassLoader)(ctx.r) == 72L) // 2 28 byte structs, plus 2 1 byte booleans that get 8 byte for an extra 8 bytes, plus missing bytes. + assert(fb.result(ctx)(theHailClassLoader)(ctx.r) == 72L) // 2 28 byte structs, plus 2 1 byte booleans that get 8 byte for an extra 8 bytes, plus missing bytes. } @Test def testHash() { @@ -109,7 +109,7 @@ class CodeSuite extends HailSuite { val hash = v.hash(cb) hash.value }) - fb.result()(theHailClassLoader)() + fb.result(ctx)(theHailClassLoader)() } def hashTestStringHelper(toHash: String): Int = { @@ -125,7 +125,7 @@ class CodeSuite extends HailSuite { hash.value }) val region = Region(pool=pool) - fb.result()(theHailClassLoader)(region) + fb.result(ctx)(theHailClassLoader)(region) } def hashTestArrayHelper(toHash: IndexedSeq[Int]): Int = { @@ -140,7 +140,7 @@ class CodeSuite extends HailSuite { }) val region = Region(pool=pool) val arrayPointer = pArray.unstagedStoreJavaObject(toHash, region) - fb.result()(theHailClassLoader)(arrayPointer) + fb.result(ctx)(theHailClassLoader)(arrayPointer) } def hashTestStructHelper(toHash: Row, fields : IndexedSeq[PField]): Int = { @@ -155,6 +155,6 @@ class CodeSuite extends HailSuite { }) val region = Region(pool=pool) val structPointer = pStruct.unstagedStoreJavaObject(toHash, region) - fb.result()(theHailClassLoader)(structPointer) + fb.result(ctx)(theHailClassLoader)(structPointer) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala index fa7976e1468..b7067828b44 100644 --- a/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/BlockMatrixIRSuite.scala @@ -69,7 +69,6 @@ class BlockMatrixIRSuite extends HailSuite { } @Test def testBlockMatrixBroadcastValue_Scalars() { - implicit val execStrats: Set[ExecStrategy] = ExecStrategy.interpretOnly val broadcastTwo = BlockMatrixBroadcast( ValueToBlockMatrix(MakeArray(Seq[F64](F64(2)), TArray(TFloat64)), Array[Long](1, 1), ones.typ.blockSize), FastIndexedSeq(), shape, ones.typ.blockSize) diff --git a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala index 97becfa9426..56473e4d87b 100644 --- a/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/EmitStreamSuite.scala @@ -27,7 +27,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, R](ctx, "stream_test") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1))) - val asmFn = fb.result()(theHailClassLoader) + val asmFn = fb.result(ctx)(theHailClassLoader) asmFn.apply } @@ -35,7 +35,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, U, R](ctx, "F") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2))) - val asmFn = fb.result()(theHailClassLoader) + val asmFn = fb.result(ctx)(theHailClassLoader) asmFn.apply } @@ -43,7 +43,7 @@ class EmitStreamSuite extends HailSuite { val fb = EmitFunctionBuilder[T, U, V, R](ctx, "F") val mb = fb.apply_method mb.emit(f(mb, mb.getCodeParam[T](1), mb.getCodeParam[U](2), mb.getCodeParam[V](3))) - val asmFn = fb.result()(theHailClassLoader) + val asmFn = fb.result(ctx)(theHailClassLoader) asmFn.apply } @@ -67,7 +67,7 @@ class EmitStreamSuite extends HailSuite { case ToArray(s) => s case s => s } - TypeCheck(s) + TypeCheck(ctx, s) EmitStream.produce(new Emit(emitContext, fb.ecb), s, cb, region, EmitEnv(Env.empty, inputTypes.indices.map(i => mb.storeEmitParam(i + 2, cb))), None) .consumeCode[Long](cb, 0L, { s => val arr = StreamUtils.toArray(cb, s.asStream.producer, region) @@ -131,7 +131,7 @@ class EmitStreamSuite extends HailSuite { val emitContext = EmitContext.analyze(ctx, ir) fb.emitWithBuilder { cb => - TypeCheck(ir) + TypeCheck(ctx, ir) val len = cb.newLocal[Int]("len", 0) val len2 = cb.newLocal[Int]("len2", -1) @@ -170,8 +170,8 @@ class EmitStreamSuite extends HailSuite { IndexedSeq("hi", "world") ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) } } @@ -237,8 +237,8 @@ class EmitStreamSuite extends HailSuite { ) for ((ir, v) <- tests) { val expectedLen = Option(v).map(_.length) - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir) == expectedLen, Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == expectedLen, Pretty(ctx, ir)) } } @@ -251,8 +251,8 @@ class EmitStreamSuite extends HailSuite { "i", MakeStream(Seq(Ref("i", TInt32), Ref("end", TInt32)), TStream(TInt32))) ) - assert(evalStream(ir) == (3 until 10).flatMap { i => Seq(i, 10) }, Pretty(ir)) - assert(evalStreamLen(ir).isEmpty, Pretty(ir)) + assert(evalStream(ir) == (3 until 10).flatMap { i => Seq(i, 10) }, Pretty(ctx, ir)) + assert(evalStreamLen(ir).isEmpty, Pretty(ctx, ir)) } @Test def testEmitMap() { @@ -269,8 +269,8 @@ class EmitStreamSuite extends HailSuite { StreamMap(ten, "x", NA(TInt32)) -> IndexedSeq.tabulate(10) { _ => null } ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) } } @@ -288,8 +288,8 @@ class EmitStreamSuite extends HailSuite { StreamFilter(StreamMap(ten, "x", NA(TInt32)), "z", True()) -> IndexedSeq.tabulate(10) { _ => null } ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir).isEmpty, Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir).isEmpty, Pretty(ctx, ir)) } } @@ -317,9 +317,9 @@ class EmitStreamSuite extends HailSuite { IndexedSeq(0, 0, 1, 1, 2, 2, 3, 3) ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) if (v != null) - assert(evalStreamLen(ir) == None, Pretty(ir)) + assert(evalStreamLen(ir) == None, Pretty(ctx, ir)) } } @@ -486,10 +486,10 @@ class EmitStreamSuite extends HailSuite { for ((lstream, rstream, expectedLeft, expectedOuter) <- tests) { val l = leftjoin(lstream, rstream) val o = outerjoin(lstream, rstream) - assert(evalStream(l) == expectedLeft, Pretty(l)) - assert(evalStream(o) == expectedOuter, Pretty(o)) - assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(l)) - assert(evalStreamLen(o) == None, Pretty(o)) + assert(evalStream(l) == expectedLeft, Pretty(ctx, l)) + assert(evalStream(o) == expectedOuter, Pretty(ctx, o)) + assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(ctx, l)) + assert(evalStreamLen(o) == None, Pretty(ctx, o)) } } @@ -553,10 +553,10 @@ class EmitStreamSuite extends HailSuite { for ((lstream, rstream, expectedLeft, expectedInner) <- tests) { val l = leftjoin(lstream, rstream) val i = innerjoin(lstream, rstream) - assert(evalStream(l) == expectedLeft, Pretty(l)) - assert(evalStream(i) == expectedInner, Pretty(i)) - assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(l)) - assert(evalStreamLen(i) == None, Pretty(i)) + assert(evalStream(l) == expectedLeft, Pretty(ctx, l)) + assert(evalStream(i) == expectedInner, Pretty(ctx, i)) + assert(evalStreamLen(l) == Some(expectedLeft.length), Pretty(ctx, l)) + assert(evalStreamLen(i) == None, Pretty(ctx, i)) } } @@ -618,8 +618,8 @@ class EmitStreamSuite extends HailSuite { 1, "a", "v", a + v) -> IndexedSeq(1, 1 /*1+0*0*/ , 2 /*1+1*1*/ , 6 /*2+2*2*/ , 15 /*6+3*3*/) ) for ((ir, v) <- tests) { - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir) == Some(v.length), Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == Some(v.length), Pretty(ctx, ir)) } } @@ -628,7 +628,7 @@ class EmitStreamSuite extends HailSuite { val aggregate = compileStream(LoweringPipeline.compileLowerer(false).apply(ctx, ir).asInstanceOf[IR], PType.canonical(inType)) for ((inp, expected) <- tests) - assert(aggregate(inp) == expected, Pretty(ir)) + assert(aggregate(inp) == expected, Pretty(ctx, ir)) } def scanOp(op: AggOp, initArgs: Seq[IR], opArgs: Seq[IR]): ApplyScanOp = @@ -718,8 +718,8 @@ class EmitStreamSuite extends HailSuite { ) val lens: Array[Option[Int]] = Array(Some(3), Some(4), Some(3), None, None, None) for (((ir, v), len) <- tests zip lens) { - assert(evalStream(ir) == v, Pretty(ir)) - assert(evalStreamLen(ir) == len, Pretty(ir)) + assert(evalStream(ir) == v, Pretty(ctx, ir)) + assert(evalStreamLen(ir) == len, Pretty(ctx, ir)) } } @@ -865,7 +865,7 @@ class EmitStreamSuite extends HailSuite { StreamScan(StreamMap(target, "i", i), 0, "a", "i", i) -> 1, StreamScan(StreamScan(target, 0, "a", "i", i), 0, "a", "i", i) -> 1 )) { - assert(StreamUtils.multiplicity(ir, "target") == v, Pretty(ir)) + assert(StreamUtils.multiplicity(ir, "target") == v, Pretty(ctx, ir)) } } @@ -884,7 +884,7 @@ class EmitStreamSuite extends HailSuite { throw new RuntimeException(s"memory usage scales with stream size!" + s"\n at size=$lowSize, memory=$memUsed1" + s"\n at size=$highSize, memory=$memUsed2" + - s"\n IR: ${ Pretty(f(lowSize)) }") + s"\n IR: ${ Pretty(ctx, f(lowSize)) }") } diff --git a/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala index ca3a1c307d0..74cf4cfea6a 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ExtractIntervalFiltersSuite.scala @@ -278,7 +278,7 @@ class ExtractIntervalFiltersSuite extends HailSuite { invoke("lor", TBoolean, Ref("acc", TBoolean), invoke("contains", TBoolean, Ref("elt", TInterval(TInt32)), k1))) - TypeCheck(ir, BindingEnv(Env(ref1.name -> ref1.typ))) + TypeCheck(ctx, ir, BindingEnv(Env(ref1.name -> ref1.typ))) val (rw, intervals) = ExtractIntervalFilters.extractPartitionFilters(ir, ref1, ref1Key).get assert(rw == True()) @@ -329,7 +329,7 @@ class ExtractIntervalFiltersSuite extends HailSuite { ApplyComparisonOp(LTEQ(TInt32), k, I32(9)) ), False()))) - assert(ExtractIntervalFilters(tf).asInstanceOf[TableFilter].child.isInstanceOf[TableFilterIntervals]) + assert(ExtractIntervalFilters(ctx, tf).asInstanceOf[TableFilter].child.isInstanceOf[TableFilterIntervals]) assertEvalsTo(TableCount(tf), 6L)(ExecStrategy.interpretOnly) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala index a4db25ff2ce..45ec68779dc 100644 --- a/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/ForwardLetsSuite.scala @@ -132,6 +132,7 @@ class ForwardLetsSuite extends HailSuite { .apply(aggEnv) TypeCheck( + ctx, ForwardLets(ir0).asInstanceOf[IR], BindingEnv(Env.empty, agg = Some(aggEnv))) } @@ -142,8 +143,8 @@ class ForwardLetsSuite extends HailSuite { 'x + 'x + 'y }(env) - TypeCheck(ir, BindingEnv(env)) - TypeCheck(ForwardLets(ir).asInstanceOf[IR], BindingEnv(env)) + TypeCheck(ctx, ir, BindingEnv(env)) + TypeCheck(ctx, ForwardLets(ir).asInstanceOf[IR], BindingEnv(env)) } @Test def testLetsDoNotForwardInsideArrayAggWithNoOps(): Unit = { @@ -159,7 +160,7 @@ class ForwardLetsSuite extends HailSuite { Ref("y", TInt32) + Ref("x", TInt32 ))) - TypeCheck(x, BindingEnv(Env("y" -> TInt32))) - TypeCheck(ForwardLets(x).asInstanceOf[IR], BindingEnv(Env("y" -> TInt32))) + TypeCheck(ctx, x, BindingEnv(Env("y" -> TInt32))) + TypeCheck(ctx, ForwardLets(x).asInstanceOf[IR], BindingEnv(Env("y" -> TInt32))) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index bffdf48c706..10502f0fe37 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -2135,8 +2135,8 @@ class IRSuite extends HailSuite { def joinRows(left: IndexedSeq[Integer], right: IndexedSeq[Integer], joinType: String): IR = { join( - MakeStream.unify(left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk1" -> (if (n == null) NA(TInt32) else I32(n)), "lk2" -> Str("x"), "a" -> I64(idx))) }), - MakeStream.unify(right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("b" -> I32(idx), "rk2" -> Str("x"), "rk1" -> (if (n == null) NA(TInt32) else I32(n)), "c" -> Str("foo"))) }), + MakeStream.unify(ctx, left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk1" -> (if (n == null) NA(TInt32) else I32(n)), "lk2" -> Str("x"), "a" -> I64(idx))) }), + MakeStream.unify(ctx, right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("b" -> I32(idx), "rk2" -> Str("x"), "rk1" -> (if (n == null) NA(TInt32) else I32(n)), "c" -> Str("foo"))) }), FastIndexedSeq("lk1", "lk2"), FastIndexedSeq("rk1", "rk2"), rightDistinct = true, @@ -2150,7 +2150,7 @@ class IRSuite extends HailSuite { assertEvalsTo( join( NA(TStream(TStruct("k1" -> TInt32, "k2" -> TString, "a" -> TInt64))), - MakeStream.unify(Seq(MakeStruct(FastIndexedSeq("b" -> I32(0), "k2" -> Str("x"), "k1" -> I32(3), "c" -> Str("foo"))))), + MakeStream.unify(ctx, Seq(MakeStruct(FastIndexedSeq("b" -> I32(0), "k2" -> Str("x"), "k1" -> I32(3), "c" -> Str("foo"))))), FastIndexedSeq("k1", "k2"), FastIndexedSeq("k1", "k2"), true, @@ -2159,7 +2159,7 @@ class IRSuite extends HailSuite { assertEvalsTo( join( - MakeStream.unify(Seq(MakeStruct(FastIndexedSeq("k1" -> I32(0), "k2" -> Str("x"), "a" -> I64(3))))), + MakeStream.unify(ctx, Seq(MakeStruct(FastIndexedSeq("k1" -> I32(0), "k2" -> Str("x"), "a" -> I64(3))))), NA(TStream(TStruct("b" -> TInt32, "k2" -> TString, "k1" -> TInt32, "c" -> TString))), FastIndexedSeq("k1", "k2"), FastIndexedSeq("k1", "k2"), @@ -2199,8 +2199,8 @@ class IRSuite extends HailSuite { def joinRows(left: IndexedSeq[Integer], right: IndexedSeq[Integer], joinType: String): IR = { join( - MakeStream.unify(left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk" -> (if (n == null) NA(TInt32) else I32(n)), "l" -> I32(idx))) }), - MakeStream.unify(right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("rk" -> (if (n == null) NA(TInt32) else I32(n)), "r" -> I32(idx))) }), + MakeStream.unify(ctx, left.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("lk" -> (if (n == null) NA(TInt32) else I32(n)), "l" -> I32(idx))) }), + MakeStream.unify(ctx, right.zipWithIndex.map { case (n, idx) => MakeStruct(FastIndexedSeq("rk" -> (if (n == null) NA(TInt32) else I32(n)), "r" -> I32(idx))) }), FastIndexedSeq("lk"), FastIndexedSeq("rk"), false, diff --git a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala index 555fb21fcdf..8d299449a3d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/LocusFunctionsSuite.scala @@ -2,7 +2,6 @@ package is.hail.expr.ir import is.hail.ExecStrategy import is.hail.HailSuite -import is.hail.TestUtils.assertEvalsTo import is.hail.types.physical.{PCanonicalLocus, PInterval} import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, FastSeq, Interval} diff --git a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala index d924c50fc42..00581bd9494 100644 --- a/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/PruneSuite.scala @@ -48,15 +48,22 @@ class PruneSuite extends HailSuite { assert(PruneDeadFields.isSupertype(tuple2IntsFirstRemoved, tuple2Ints)) } + @Test def testIsSupertypeWithDistinctFieldTypes(): Unit = { + val tuple2Ints = TTuple(TInt32, TFloat64) + val tuple2IntsFirstRemoved = TTuple(IndexedSeq(TupleField(1, TFloat64))) + + assert(PruneDeadFields.isSupertype(tuple2IntsFirstRemoved, tuple2Ints)) + } + def checkMemo(ir: BaseIR, requestedType: BaseType, expected: Array[BaseType]) { val irCopy = ir.deepCopy() assert(PruneDeadFields.isSupertype(requestedType, irCopy.typ), s"not supertype:\n super: ${ requestedType.parsableString() }\n sub: ${ irCopy.typ.parsableString() }") val ms = PruneDeadFields.ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) irCopy match { - case mir: MatrixIR => PruneDeadFields.memoizeMatrixIR(mir, requestedType.asInstanceOf[MatrixType], ms) - case tir: TableIR => PruneDeadFields.memoizeTableIR(tir, requestedType.asInstanceOf[TableType], ms) - case ir: IR => PruneDeadFields.memoizeValueIR(ir, requestedType.asInstanceOf[Type], ms) + case mir: MatrixIR => PruneDeadFields.memoizeMatrixIR(ctx, mir, requestedType.asInstanceOf[MatrixType], ms) + case tir: TableIR => PruneDeadFields.memoizeTableIR(ctx, tir, requestedType.asInstanceOf[TableType], ms) + case ir: IR => PruneDeadFields.memoizeValueIR(ctx, ir, requestedType.asInstanceOf[Type], ms) } irCopy.children.zipWithIndex.foreach { case (child, i) => if (expected(i) != null && expected(i) != ms.requestedType.lookup(child)) { @@ -73,14 +80,14 @@ class PruneSuite extends HailSuite { val ms = PruneDeadFields.ComputeMutableState(Memo.empty[BaseType], mutable.HashMap.empty) val rebuilt = (irCopy match { case mir: MatrixIR => - PruneDeadFields.memoizeMatrixIR(mir, requestedType.asInstanceOf[MatrixType], ms) - PruneDeadFields.rebuild(mir, ms.rebuildState) + PruneDeadFields.memoizeMatrixIR(ctx, mir, requestedType.asInstanceOf[MatrixType], ms) + PruneDeadFields.rebuild(ctx, mir, ms.rebuildState) case tir: TableIR => - PruneDeadFields.memoizeTableIR(tir, requestedType.asInstanceOf[TableType], ms) - PruneDeadFields.rebuild(tir, ms.rebuildState) + PruneDeadFields.memoizeTableIR(ctx, tir, requestedType.asInstanceOf[TableType], ms) + PruneDeadFields.rebuild(ctx, tir, ms.rebuildState) case ir: IR => - PruneDeadFields.memoizeValueIR(ir, requestedType.asInstanceOf[Type], ms) - PruneDeadFields.rebuildIR(ir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) + PruneDeadFields.memoizeValueIR(ctx, ir, requestedType.asInstanceOf[Type], ms) + PruneDeadFields.rebuildIR(ctx, ir, BindingEnv(Env.empty, Some(Env.empty), Some(Env.empty)), ms.rebuildState) }).asInstanceOf[T] if (!f(ir, rebuilt)) fatal(s"IR did not rebuild the same:\n Base: $ir\n Rebuilt: $rebuilt") @@ -892,7 +899,7 @@ class PruneSuite extends HailSuite { checkRebuild(TableFilter(tr, tableRefBoolean(tr.typ, "row.2")), subsetTable(tr.typ, "row.3"), (_: BaseIR, r: BaseIR) => { val tf = r.asInstanceOf[TableFilter] - TypeCheck(tf.pred, PruneDeadFields.relationalTypeToEnv(tf.typ)) + TypeCheck(ctx, tf.pred, PruneDeadFields.relationalTypeToEnv(tf.typ)) tf.child.typ == subsetTable(tr.typ, "row.3", "row.2") }) } @@ -902,7 +909,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmr, subsetTable(tmr.typ, "row.foo"), (_: BaseIR, r: BaseIR) => { val tmr = r.asInstanceOf[TableMapRows] - TypeCheck(tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) + TypeCheck(ctx, tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) tmr.child.typ == subsetTable(tr.typ, "row.2", "global.g1", "row.3") }) @@ -910,7 +917,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmr2, subsetTable(tmr2.typ, "row.foo", "NO_KEY"), (_: BaseIR, r: BaseIR) => { val tmr = r.asInstanceOf[TableMapRows] - TypeCheck(tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) + TypeCheck(ctx, tmr.newRow, PruneDeadFields.relationalTypeToEnv(tmr.child.typ)) tmr.child.typ == subsetTable(tr.typ, "row.2", "global.g1", "row.3", "NO_KEY") // FIXME: remove row.3 when TableRead is fixed }) @@ -921,7 +928,7 @@ class PruneSuite extends HailSuite { checkRebuild(tmg, subsetTable(tmg.typ, "global.foo"), (_: BaseIR, r: BaseIR) => { val tmg = r.asInstanceOf[TableMapGlobals] - TypeCheck(tmg.newGlobals, PruneDeadFields.relationalTypeToEnv(tmg.child.typ)) + TypeCheck(ctx, tmg.newGlobals, PruneDeadFields.relationalTypeToEnv(tmg.child.typ)) tmg.child.typ == subsetTable(tr.typ, "global.g1") }) } @@ -986,7 +993,7 @@ class PruneSuite extends HailSuite { checkRebuild(mfc, subsetMatrixTable(mfc.typ, "global.g1"), (_: BaseIR, r: BaseIR) => { val mfc = r.asInstanceOf[MatrixFilterCols] - TypeCheck(mfc.pred, PruneDeadFields.relationalTypeToEnv(mfc.child.typ)) + TypeCheck(ctx, mfc.pred, PruneDeadFields.relationalTypeToEnv(mfc.child.typ)) mfc.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2") } ) @@ -997,7 +1004,7 @@ class PruneSuite extends HailSuite { checkRebuild(mfe, subsetMatrixTable(mfe.typ, "global.g1"), (_: BaseIR, r: BaseIR) => { val mfe = r.asInstanceOf[MatrixFilterEntries] - TypeCheck(mfe.pred, PruneDeadFields.relationalTypeToEnv(mfe.child.typ)) + TypeCheck(ctx, mfe.pred, PruneDeadFields.relationalTypeToEnv(mfe.child.typ)) mfe.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "va.r2", "g.e1") } ) @@ -1010,7 +1017,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmr, subsetMatrixTable(mmr.typ, "global.g1", "g.e1", "va.foo"), (_: BaseIR, r: BaseIR) => { val mmr = r.asInstanceOf[MatrixMapRows] - TypeCheck(mmr.newRow, PruneDeadFields.relationalTypeToEnv(mmr.child.typ)) + TypeCheck(ctx, mmr.newRow, PruneDeadFields.relationalTypeToEnv(mmr.child.typ)) mmr.child.asInstanceOf[MatrixKeyRowsBy].child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2", "g.e1") } ) @@ -1022,7 +1029,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmc, subsetMatrixTable(mmc.typ, "global.g1", "g.e1", "sa.foo"), (_: BaseIR, r: BaseIR) => { val mmc = r.asInstanceOf[MatrixMapCols] - TypeCheck(mmc.newCol, PruneDeadFields.relationalTypeToEnv(mmc.child.typ)) + TypeCheck(ctx, mmc.newCol, PruneDeadFields.relationalTypeToEnv(mmc.child.typ)) mmc.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "g.e1") } ) @@ -1033,7 +1040,7 @@ class PruneSuite extends HailSuite { checkRebuild(mme, subsetMatrixTable(mme.typ, "global.g1", "g.foo"), (_: BaseIR, r: BaseIR) => { val mme = r.asInstanceOf[MatrixMapEntries] - TypeCheck(mme.newEntries, PruneDeadFields.relationalTypeToEnv(mme.child.typ)) + TypeCheck(ctx, mme.newEntries, PruneDeadFields.relationalTypeToEnv(mme.child.typ)) mme.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2", "va.r2") } ) @@ -1044,7 +1051,7 @@ class PruneSuite extends HailSuite { checkRebuild(mmg, subsetMatrixTable(mmg.typ, "global.foo", "g.e1", "va.r2"), (_: BaseIR, r: BaseIR) => { val mmg = r.asInstanceOf[MatrixMapGlobals] - TypeCheck(mmg.newGlobals, PruneDeadFields.relationalTypeToEnv(mmg.child.typ)) + TypeCheck(ctx, mmg.newGlobals, PruneDeadFields.relationalTypeToEnv(mmg.child.typ)) mmg.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2", "g.e1") } ) @@ -1055,7 +1062,7 @@ class PruneSuite extends HailSuite { checkRebuild(ma, subsetMatrixTable(ma.typ, "va.foo", "g.foo"), (_: BaseIR, r: BaseIR) => { val ma = r.asInstanceOf[MatrixAggregateRowsByKey] - TypeCheck(ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) + TypeCheck(ctx, ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) ma.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "sa.c2") } ) @@ -1066,7 +1073,7 @@ class PruneSuite extends HailSuite { checkRebuild(ma, subsetMatrixTable(ma.typ, "g.foo", "sa.foo"), (_: BaseIR, r: BaseIR) => { val ma = r.asInstanceOf[MatrixAggregateColsByKey] - TypeCheck(ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) + TypeCheck(ctx, ma.entryExpr, PruneDeadFields.relationalTypeToEnv(ma.child.typ)) ma.child.asInstanceOf[MatrixRead].typ == subsetMatrixTable(mr.typ, "global.g1", "va.r2") } ) @@ -1434,7 +1441,7 @@ class PruneSuite extends HailSuite { .bind(ifIR, pruneT) // should run without error! - PruneDeadFields.rebuildIR(ifIR, BindingEnv.empty[Type].bindEval("a", t), + PruneDeadFields.rebuildIR(ctx, ifIR, BindingEnv.empty[Type].bindEval("a", t), PruneDeadFields.RebuildMutableState(memo, mutable.HashMap.empty)) } diff --git a/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala new file mode 100644 index 00000000000..1db3c97e1ae --- /dev/null +++ b/hail/src/test/scala/is/hail/expr/ir/RandomSuite.scala @@ -0,0 +1,106 @@ +package is.hail.expr.ir + +import is.hail.HailSuite +import org.apache.commons.math3.distribution.ChiSquaredDistribution +import org.testng.annotations.Test + +class RandomSuite extends HailSuite { + @Test def testThreefry() { + val k = Array.fill[Long](4)(0) + val tf = Threefry(k) + val x = Array.fill[Long](4)(0) + val expected = Array( + 0x09218EBDE6C85537L, + 0x55941F5266D86105L, + 0x4BD25E16282434DCL, + 0xEE29EC846BD2E40BL + ) + tf(x, 0) + assert(x sameElements expected) + + val rand = new ThreefryRandomEngine(k, Array.fill(4)(0L), 0, tweak = 0) + val y = Array.fill(4)(rand.nextLong()) + assert(y sameElements expected) + } + + def runChiSquareTest(samples: Int, buckets: Int)(sample: => Int) { + val chiSquareDist = new ChiSquaredDistribution(buckets - 1) + val expected = samples.toDouble / buckets + var numRuns = 0 + val passThreshold = 0.1 + val failThreshold = 1e-6 + var geometricMean = failThreshold + + while (geometricMean >= failThreshold && geometricMean < passThreshold) { + val counts = Array.ofDim[Int](buckets) + for (_ <- 0 until samples) counts(sample) += 1 + val chisquare = counts.map(observed => math.pow(observed - expected, 2) / expected).sum + val pvalue = 1 - chiSquareDist.cumulativeProbability(chisquare) + numRuns += 1 + geometricMean = math.pow(geometricMean, (numRuns - 1).toDouble / numRuns) * math.pow(pvalue, 1.0 / numRuns) + } + assert(geometricMean >= passThreshold, s"failed after $numRuns runs with pvalue $geometricMean") + println(s"passed after $numRuns runs with pvalue $geometricMean") + } + + @Test def testRandomInt() { + val n = 1 << 25 + val k = 1 << 15 + val rand = ThreefryRandomEngine() + runChiSquareTest(n, k) { + rand.nextInt() & (k - 1) + } + } + + @Test def testBoundedUniformInt() { + var n = 1 << 25 + var k = 1 << 15 + val rand = ThreefryRandomEngine() + runChiSquareTest(n, k) { + rand.nextInt(k) + } + + n = 30000000 + k = math.pow(n, 3.0/5).toInt + runChiSquareTest(n, k) { + rand.nextInt(k) + } + } + + @Test def testBoundedUniformLong() { + var n = 1 << 25 + var k = 1 << 15 + val rand = ThreefryRandomEngine() + runChiSquareTest(n, k) { + rand.nextLong(k).toInt + } + + n = 30000000 + k = math.pow(n, 3.0/5).toInt + runChiSquareTest(n, k) { + rand.nextLong(k).toInt + } + } + + @Test def testUniformDouble() { + val n = 1 << 25 + val k = 1 << 15 + val rand = ThreefryRandomEngine() + runChiSquareTest(n, k) { + val r = rand.nextDouble() + assert(r >= 0.0 && r < 1.0, r) + (r * k).toInt + } + } + + @Test def testUniformFloat() { + val n = 1 << 25 + val k = 1 << 15 + val rand = ThreefryRandomEngine() + runChiSquareTest(n, k) { + val r = rand.nextFloat() + assert(r >= 0.0 && r < 1.0, r) + (r * k).toInt + } + } +} diff --git a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala index 1f38e497764..3fd6c228a1d 100644 --- a/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/RequirednessSuite.scala @@ -436,17 +436,17 @@ class RequirednessSuite extends HailSuite { valueIR().map(v => v(0) -> v(1)).foreach { case (n: IR, t: PType) => if (n.typ != t.virtualType) - s += s"${ n.typ } != ${ t.virtualType }: \n${ Pretty(n) }" + s += s"${ n.typ } != ${ t.virtualType }: \n${ Pretty(ctx, n) }" case (n: IR, et: EmitType) => if (n.typ != et.virtualType) - s += s"${ n.typ } != ${ et.virtualType }: \n${ Pretty(n) }" + s += s"${ n.typ } != ${ et.virtualType }: \n${ Pretty(ctx, n) }" } tableIR().map(v => (v(0), v(1), v(2))).foreach { case (n: TableIR, row: PType, global: PType) => if (n.typ.rowType != row.virtualType || n.typ.globalType != global.virtualType ) s += s"""row: ${ n.typ.rowType } vs ${ row.virtualType } |global: ${ n.typ.globalType } vs ${ global.virtualType }: - |${ Pretty(n) }" + |${ Pretty(ctx, n) }" |""".stripMargin } assert(s.size == 0, s.result().mkString("\n\n")) @@ -454,20 +454,20 @@ class RequirednessSuite extends HailSuite { def /**/dump(m: Memo[BaseTypeWithRequiredness]): String = { m.m.map { case (node, t) => - s"${Pretty(node.t)}: \n$t" + s"${Pretty(ctx, node.t)}: \n$t" }.mkString("\n\n") } @Test(dataProvider = "valueIR") def testRequiredness(node: IR, expected: Any): Unit = { - TypeCheck(node) + TypeCheck(ctx, node) val et = expected match { case pt: PType => EmitType(pt.sType, pt.required) case et: EmitType => et } val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[TypeWithRequiredness] - assert(actual.canonicalEmitType(node.typ) == et, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.canonicalEmitType(node.typ) == et, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") } @Test def sharedNodesWorkCorrectly(): Unit = { @@ -485,8 +485,8 @@ class RequirednessSuite extends HailSuite { def testTableRequiredness(node: TableIR, row: PType, global: PType): Unit = { val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[RTable] - assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") - assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${Pretty(node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") + assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${Pretty(ctx, node)}: \n$actual\n\n${ dump(res.r) }") } @Test def testTableReader() { @@ -512,8 +512,8 @@ class RequirednessSuite extends HailSuite { val node = TableRead(rType, dropRows = false, reader) val res = Requiredness.apply(node, ctx) val actual = res.r.lookup(node).asInstanceOf[RTable] - assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${ Pretty(node) }: \n$actual\n\n${ dump(res.r) }") - assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${ Pretty(node) }: \n$actual\n\n${ dump(res.r) }") + assert(actual.rowType.canonicalPType(node.typ.rowType) == row, s"\n\n${ Pretty(ctx, node) }: \n$actual\n\n${ dump(res.r) }") + assert(actual.globalType.canonicalPType(node.typ.globalType) == global, s"\n\n${ Pretty(ctx, node) }: \n$actual\n\n${ dump(res.r) }") } } diff --git a/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala b/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala index 4bfe5abd5b1..c5620545fc9 100644 --- a/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/SimplifySuite.scala @@ -1,7 +1,6 @@ package is.hail.expr.ir import is.hail.{ExecStrategy, HailSuite} -import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.TestUtils.IRAggCount import is.hail.types.virtual._ import is.hail.utils.{FastIndexedSeq, FastSeq, Interval} @@ -42,16 +41,16 @@ class SimplifySuite extends HailSuite { @Test def testInsertFieldsRewriteRules() { val ir1 = InsertFields(InsertFields(base, Seq("1" -> I32(2)), None), Seq("1" -> I32(3)), None) - assert(Simplify(ir1) == InsertFields(base, Seq("1" -> I32(3)), Some(FastIndexedSeq("1", "2")))) + assert(Simplify(ctx, ir1) == InsertFields(base, Seq("1" -> I32(3)), Some(FastIndexedSeq("1", "2")))) val ir2 = InsertFields(InsertFields(base, Seq("3" -> I32(2)), Some(FastIndexedSeq("3", "1", "2"))), Seq("3" -> I32(3)), None) - assert(Simplify(ir2) == InsertFields(base, Seq("3" -> I32(3)), Some(FastIndexedSeq("3", "1", "2")))) + assert(Simplify(ctx, ir2) == InsertFields(base, Seq("3" -> I32(3)), Some(FastIndexedSeq("3", "1", "2")))) val ir3 = InsertFields(InsertFields(base, Seq("3" -> I32(2)), Some(FastIndexedSeq("3", "1", "2"))), Seq("4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4"))) - assert(Simplify(ir3) == InsertFields(base, Seq("3" -> I32(2), "4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4")))) + assert(Simplify(ctx, ir3) == InsertFields(base, Seq("3" -> I32(2), "4" -> I32(3)), Some(FastIndexedSeq("3", "1", "2", "4")))) val ir4 = InsertFields(InsertFields(base, Seq("3" -> I32(0), "4" -> I32(1))), Seq("3" -> I32(5))) - assert(Simplify(ir4) == InsertFields(base, Seq("4" -> I32(1), "3" -> I32(5)), Some(FastIndexedSeq("1", "2", "3", "4")))) + assert(Simplify(ctx, ir4) == InsertFields(base, Seq("4" -> I32(1), "3" -> I32(5)), Some(FastIndexedSeq("1", "2", "3", "4")))) } lazy val base2 = Literal(TStruct("A" -> TInt32, "B" -> TInt32, "C" -> TInt32, "D" -> TInt32), Row(1, 2, 3, 4)) @@ -62,16 +61,16 @@ class SimplifySuite extends HailSuite { IndexedSeq("B" -> GetField(base2, "B")), None ) - val simplify1 = Simplify(ir1) + val simplify1 = Simplify(ctx, ir1) assert(simplify1.typ == ir1.typ) } @Test def testInsertSelectRewriteRules() { val ir1 = SelectFields(InsertFields(base, FastIndexedSeq("3" -> I32(1)), None), FastIndexedSeq("1")) - assert(Simplify(ir1) == SelectFields(base, FastIndexedSeq("1"))) + assert(Simplify(ctx, ir1) == SelectFields(base, FastIndexedSeq("1"))) val ir2 = SelectFields(InsertFields(base, FastIndexedSeq("3" -> I32(1)), None), FastIndexedSeq("3", "1")) - assert(Simplify(ir2) == InsertFields(SelectFields(base, FastIndexedSeq("1")), FastIndexedSeq("3" -> I32(1)), Some(FastIndexedSeq("3", "1")))) + assert(Simplify(ctx, ir2) == InsertFields(SelectFields(base, FastIndexedSeq("1")), FastIndexedSeq("3" -> I32(1)), Some(FastIndexedSeq("3", "1")))) } @Test def testBlockMatrixRewriteRules() { @@ -79,7 +78,7 @@ class SimplifySuite extends HailSuite { FastIndexedSeq(2, 2), 10) val identityBroadcast = BlockMatrixBroadcast(bmir, FastIndexedSeq(0, 1), FastIndexedSeq(2, 2), 10) - assert(Simplify(identityBroadcast) == bmir) + assert(Simplify(ctx, identityBroadcast) == bmir) } @Test def testContainsRewrites() { @@ -112,14 +111,14 @@ class SimplifySuite extends HailSuite { val ir2 = Let("row2", InsertFields(r, FastSeq(("y", F64(0.0)))), InsertFields(r2, FastSeq(("z", GetField(r2, "x").toD + GetField(r2, "y"))))) val ir3 = Let("row2", InsertFields(r, FastSeq(("y", F64(0.0)))), InsertFields(Ref("something_else", TStruct.empty), FastSeq(("z", GetField(r2, "y").toI)))) - assert(Simplify(ir1) == InsertFields(r, FastSeq(("y", F64(0)), ("z", GetField(r, "x").toD)), Some(FastIndexedSeq("x", "y", "z")))) - assert(Simplify(ir2) == InsertFields(r, FastSeq(("y", F64(0.0)), ("z", GetField(r, "x").toD + F64(0.0))), Some(FastIndexedSeq("x", "y", "z")))) + assert(Simplify(ctx, ir1) == InsertFields(r, FastSeq(("y", F64(0)), ("z", GetField(r, "x").toD)), Some(FastIndexedSeq("x", "y", "z")))) + assert(Simplify(ctx, ir2) == InsertFields(r, FastSeq(("y", F64(0.0)), ("z", GetField(r, "x").toD + F64(0.0))), Some(FastIndexedSeq("x", "y", "z")))) assert(Optimize[IR](ir3, "direct", ctx) == InsertFields(Ref("something_else", TStruct.empty), FastSeq(("z", I32(0))))) val shouldNotRewrite = Let("row2", InsertFields(r, FastSeq(("y", Ref("other", TFloat64)))), InsertFields(r2, FastSeq(("z", invoke("str", TString, r2))))) - assert(Simplify(shouldNotRewrite) == shouldNotRewrite) + assert(Simplify(ctx, shouldNotRewrite) == shouldNotRewrite) } @Test def testNestedInsertsSimplifyAcrossLets() { @@ -138,7 +137,7 @@ class SimplifySuite extends HailSuite { ) ) ) - val simplified = new NormalizeNames(_.toString, true).apply(Simplify(l)) + val simplified = new NormalizeNames(_.toString, true).apply(Simplify(ctx, l)) val expected = Let("1", I32(1) + Ref("OTHER_1", TInt32), Let("2", I32(1) + Ref("1", TInt32), @@ -160,7 +159,7 @@ class SimplifySuite extends HailSuite { AggLet("bar", In(1, TInt32) * In(1, TInt32), Ref("x", TInt32), true))) doesRewrite.foreach { a => - assert(Simplify(a) == a.query) + assert(Simplify(ctx, a) == a.query) } val doesNotRewrite: Array[StreamAgg] = Array( @@ -172,7 +171,7 @@ class SimplifySuite extends HailSuite { ) doesNotRewrite.foreach { a => - assert(Simplify(a) == a) + assert(Simplify(ctx, a) == a) } } @@ -183,7 +182,7 @@ class SimplifySuite extends HailSuite { AggLet("bar", In(1, TInt32) * In(1, TInt32), Ref("x", TInt32), false))) doesRewrite.foreach { a => - assert(Simplify(a) == a.query) + assert(Simplify(ctx, a) == a.query) } val doesNotRewrite: Array[StreamAggScan] = Array( @@ -195,7 +194,7 @@ class SimplifySuite extends HailSuite { ) doesNotRewrite.foreach { a => - assert(Simplify(a) == a) + assert(Simplify(ctx, a) == a) } } @@ -203,7 +202,7 @@ class SimplifySuite extends HailSuite { val tr = TableRange(10, 10) val a = ArrayLen(GetField(TableCollect(tr), "rows")) assert(a.typ == TInt32) - val s = Simplify(a).asInstanceOf[IR] + val s = Simplify(ctx, a).asInstanceOf[IR] assertEvalsTo(s, 10) assert(s.typ == TInt32) } @@ -215,7 +214,7 @@ class SimplifySuite extends HailSuite { mir = MatrixMapCols(mir, AggLet("foo", I32(1), InsertFields(Ref("sa", colType), FastSeq(("bar", I32(2)))), false), None) val tir = MatrixColsTable(mir) - assert(Simplify(tir) == tir) + assert(Simplify(ctx, tir) == tir) } @Test def testFilterParallelize() { @@ -228,8 +227,8 @@ class SimplifySuite extends HailSuite { val tp = TableParallelize(rowsAndGlobals, None) val tf = TableFilter(tp, GetField(Ref("row", tp.typ.rowType), "x") < 100) - val rw = Simplify(tf) - TypeCheck(rw) + val rw = Simplify(ctx, tf) + TypeCheck(ctx, rw) assert(!Exists(rw, _.isInstanceOf[TableFilter])) } } @@ -239,9 +238,9 @@ class SimplifySuite extends HailSuite { val mapOfRange = mapIR(rangeIR)(range_element => range_element + 5) val mapBlockedByLet = bindIR(I32(5))(ref => mapIR(rangeIR)(range_element => range_element + ref)) - assert(Simplify(StreamLen(rangeIR)) == Simplify(StreamLen(mapOfRange))) - assert(Simplify(StreamLen(mapBlockedByLet)) match { - case Let(name, value, body) => body == Simplify(StreamLen(mapOfRange)) + assert(Simplify(ctx, StreamLen(rangeIR)) == Simplify(ctx, StreamLen(mapOfRange))) + assert(Simplify(ctx, StreamLen(mapBlockedByLet)) match { + case Let(name, value, body) => body == Simplify(ctx, StreamLen(mapOfRange)) }) } @@ -252,7 +251,7 @@ class SimplifySuite extends HailSuite { tir = TableKeyBy(tir, FastIndexedSeq("idx", "idx2")) tir = TableFilterIntervals(tir, FastIndexedSeq(Interval(Row(0), Row(1), true, false)), false) tir = TableFilterIntervals(tir, FastIndexedSeq(Interval(Row(8), Row(10), true, false)), false) - assert(Simplify(tir).asInstanceOf[TableFilterIntervals].intervals == FastIndexedSeq(Interval(Row(0), Row(1), true, false), Interval(Row(8), Row(10), true, false))) + assert(Simplify(ctx, tir).asInstanceOf[TableFilterIntervals].intervals == FastIndexedSeq(Interval(Row(0), Row(1), true, false), Interval(Row(8), Row(10), true, false))) } @Test def testSimplifyReadFilterIntervals() { @@ -273,21 +272,21 @@ class SimplifySuite extends HailSuite { val tfi1 = TableFilterIntervals(tr, intervals1, true) val exp1 = TableRead(tnr.fullType, false, TableNativeReader(fs, TableNativeReaderParameters(src + "/rows", Some(NativeReaderOptions(intervals1, tnr.fullType.keyType, true))))) - assert(Simplify(tfi1) == exp1) + assert(Simplify(ctx, tfi1) == exp1) val tfi2 = TableFilterIntervals(exp1, intervals2, true) val exp2 = TableRead(tnr.fullType, false, TableNativeReader(fs, TableNativeReaderParameters(src + "/rows", Some(NativeReaderOptions(intersection, tnr.fullType.keyType, true))))) - assert(Simplify(tfi2) == exp2) + assert(Simplify(ctx, tfi2) == exp2) val ztfi1 = TableFilterIntervals(tzr, intervals1, true) val zexp1 = TableRead(tzr.typ, false, tzrr.copy(options = Some(NativeReaderOptions(intervals1, tnr.fullType.keyType, true)))) - assert(Simplify(ztfi1) == zexp1) + assert(Simplify(ctx, ztfi1) == zexp1) val ztfi2 = TableFilterIntervals(ztfi1, intervals2, true) val zexp2 = TableRead(tzr.typ, false, tzrr.copy(options = Some(NativeReaderOptions(intersection, tnr.fullType.keyType, true)))) - assert(Simplify(ztfi2) == zexp2) + assert(Simplify(ctx, ztfi2) == zexp2) } @Test(enabled = false) def testFilterIntervalsKeyByToFilter() { @@ -296,7 +295,7 @@ class SimplifySuite extends HailSuite { t = TableKeyBy(t, FastIndexedSeq("x")) t = TableFilterIntervals(t, FastIndexedSeq(Interval(Row(-10), Row(10), includesStart = true, includesEnd = false)), keep = true) - val t2 = Simplify(t) + val t2 = Simplify(ctx, t) assert(t2 match { case TableKeyBy(TableFilter(child, _), _, _) => !Exists(child, _.isInstanceOf[TableFilterIntervals]) case _ => false @@ -305,28 +304,28 @@ class SimplifySuite extends HailSuite { @Test def testSimplifyArraySlice(): Unit = { val stream = StreamRange(I32(0), I32(10), I32(1)) - val streamSlice1 = Simplify(ArraySlice(ToArray(stream), I32(0), Some(I32(7)))) + val streamSlice1 = Simplify(ctx, ArraySlice(ToArray(stream), I32(0), Some(I32(7)))) assert(streamSlice1 match { case ToArray(StreamTake(_,_)) => true case _ => false } ) assertEvalsTo(streamSlice1.asInstanceOf[IR], FastSeq(0, 1, 2, 3, 4, 5, 6)) - val streamSlice2 = Simplify(ArraySlice(ToArray(stream), I32(3), Some(I32(5)))) + val streamSlice2 = Simplify(ctx, ArraySlice(ToArray(stream), I32(3), Some(I32(5)))) assert(streamSlice2 match { case ToArray(StreamTake(StreamDrop(_,_), _)) => true case _ => false } ) assertEvalsTo(streamSlice2.asInstanceOf[IR], FastSeq(3, 4)) - val streamSlice3 = Simplify(ArraySlice(ToArray(stream), I32(6), Some(I32(2)))) + val streamSlice3 = Simplify(ctx, ArraySlice(ToArray(stream), I32(6), Some(I32(2)))) assert(streamSlice3 match { case MakeArray(_, _) => true case _ => false } ) assertEvalsTo(streamSlice3.asInstanceOf[IR], FastSeq()) - val streamSlice4 = Simplify(ArraySlice(ToArray(stream), I32(0), None)) + val streamSlice4 = Simplify(ctx, ArraySlice(ToArray(stream), I32(0), None)) assert(streamSlice4 match { case ToArray(StreamDrop(_, _)) => true case _ => false diff --git a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala index 80e6b2d2d3f..a6560224d64 100644 --- a/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/TableIRSuite.scala @@ -1151,7 +1151,9 @@ class TableIRSuite extends HailSuite { Row((i / 5) * 5) }, Row("Hello"))) - val e = intercept[HailException](TypeCheck(collect(TableMapPartitions(table, "g", "part", StreamFlatMap(StreamRange(0, 2, 1), "_", part))))) + val e = intercept[HailException](TypeCheck( + ctx, + collect(TableMapPartitions(table, "g", "part", StreamFlatMap(StreamRange(0, 2, 1), "_", part))))) assert("must iterate over the partition exactly once".r.findFirstIn(e.getCause.getMessage).isDefined) } } diff --git a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala index ac465d71b01..78d1713fd48 100644 --- a/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/agg/StagedBlockLinkedListSuite.scala @@ -31,7 +31,7 @@ class StagedBlockLinkedListSuite extends HailSuite { ptr } - fb.result()(theHailClassLoader)(_) + fb.result(ctx)(theHailClassLoader)(_) } private val pushF: (Region, Long, E) => Unit = { @@ -52,7 +52,7 @@ class StagedBlockLinkedListSuite extends HailSuite { Code._empty } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) ({ (r, ptr, elt) => f(r, ptr, if(elt == null) 0L else ScalaToRegionValue(r, elemPType, elt)) }) @@ -75,7 +75,7 @@ class StagedBlockLinkedListSuite extends HailSuite { Code._empty } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) ({ (r, ptr, other) => assert(other.elemPType.required == elemPType.required) f(r, ptr, other.ptr) @@ -96,7 +96,7 @@ class StagedBlockLinkedListSuite extends HailSuite { sbll.resultArray(cb, rArg, arrayPType).a } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) ({ (r, ptr) => SafeRow.read(arrayPType, f(r, ptr)) .asInstanceOf[IndexedSeq[E]] @@ -119,7 +119,7 @@ class StagedBlockLinkedListSuite extends HailSuite { dstPtr } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) ({ (r, other) => f(r, other.ptr) }) } diff --git a/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala b/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala index 4c37b43d04f..15c24ad10c2 100644 --- a/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/lowering/BlockMatrixStageSuite.scala @@ -21,9 +21,7 @@ class BlockMatrixStageSuite extends HailSuite { val stage = if (ctxs.isEmpty) BlockMatrixStage.empty(TInt32) else { - new BlockMatrixStage( - globalVals, - ctxs.head._2.typ) { + new BlockMatrixStage(IndexedSeq(), globalVals, ctxs.head._2.typ) { private[this] val ctxMap = ctxs.toMap def blockContext(idx: (Int, Int)): IR = ctxMap(idx) def blockBody(ctxRef: Ref): IR = body(ctxRef) diff --git a/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala b/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala index 27ad08bce34..65f01963fe7 100644 --- a/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/lowering/LowerDistributedSortSuite.scala @@ -1,6 +1,5 @@ package is.hail.expr.ir.lowering -import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.functions.IRRandomness import is.hail.expr.ir.{Analyses, Apply, ApplyBinaryPrimOp, Ascending, Descending, ErrorIDs, GetField, I32, IR, Literal, MakeStruct, Ref, Requiredness, RequirednessAnalysis, SelectFields, SortField, TableIR, TableMapRows, TableRange, ToArray, ToStream, mapIR} import is.hail.{ExecStrategy, HailContext, HailSuite, TestUtils} @@ -34,37 +33,42 @@ class LowerDistributedSortSuite extends HailSuite { // Only does ascending for now def testDistributedSortHelper(myTable: TableIR, sortFields: IndexedSeq[SortField]): Unit = { - HailContext.setFlag("shuffle_cutoff_to_local_sort", "40") - val analyses: Analyses = Analyses.apply(myTable, ctx) - val rowType = analyses.requirednessAnalysis.lookup(myTable).asInstanceOf[RTable].rowType - val stage = LowerTableIR.applyTable(myTable, DArrayLowering.All, ctx, analyses, Map.empty[String, IR]) + val originalShuffleCutoff = backend.getFlag("shuffle_cutoff_to_local_sort") + try { + backend.setFlag("shuffle_cutoff_to_local_sort", "40") + val analyses: Analyses = Analyses.apply(myTable, ctx) + val rowType = analyses.requirednessAnalysis.lookup(myTable).asInstanceOf[RTable].rowType + val stage = LowerTableIR.applyTable(myTable, DArrayLowering.All, ctx, analyses, Map.empty[String, IR]) - val sortedTs = LowerDistributedSort.distributedSort(ctx, stage, sortFields, Map.empty[String, IR], rowType) - val res = TestUtils.eval(sortedTs.mapCollect(Map.empty[String, IR])(x => ToArray(x))).asInstanceOf[IndexedSeq[IndexedSeq[Row]]].flatten + val sortedTs = LowerDistributedSort.distributedSort(ctx, stage, sortFields, Map.empty[String, IR], rowType) + val res = TestUtils.eval(sortedTs.mapCollect(Map.empty[String, IR])(x => ToArray(x))).asInstanceOf[IndexedSeq[IndexedSeq[Row]]].flatten - val rowFunc = myTable.typ.rowType.select(sortFields.map(_.field))._2 - val unsortedCollect = is.hail.expr.ir.TestUtils.collect(myTable) - val unsortedAnalyses = Analyses.apply(unsortedCollect, ctx) - val unsorted = TestUtils.eval(LowerTableIR.apply(unsortedCollect, DArrayLowering.All, ctx, unsortedAnalyses, Map.empty[String, IR])).asInstanceOf[Row](0).asInstanceOf[IndexedSeq[Row]] - val scalaSorted = unsorted.sortWith{ case (l, r) => - val leftKey = rowFunc(l) - val rightKey = rowFunc(r) - var ans = false - var i = 0 - while (i < sortFields.size) { - if (leftKey(i).asInstanceOf[Int] != rightKey(i).asInstanceOf[Int]) { - if (sortFields(i).sortOrder == Ascending) { - ans = leftKey(i).asInstanceOf[Int] < rightKey(i).asInstanceOf[Int] - } else { - ans = leftKey(i).asInstanceOf[Int] > rightKey(i).asInstanceOf[Int] + val rowFunc = myTable.typ.rowType.select(sortFields.map(_.field))._2 + val unsortedCollect = is.hail.expr.ir.TestUtils.collect(myTable) + val unsortedAnalyses = Analyses.apply(unsortedCollect, ctx) + val unsorted = TestUtils.eval(LowerTableIR.apply(unsortedCollect, DArrayLowering.All, ctx, unsortedAnalyses, Map.empty[String, IR])).asInstanceOf[Row](0).asInstanceOf[IndexedSeq[Row]] + val scalaSorted = unsorted.sortWith{ case (l, r) => + val leftKey = rowFunc(l) + val rightKey = rowFunc(r) + var ans = false + var i = 0 + while (i < sortFields.size) { + if (leftKey(i).asInstanceOf[Int] != rightKey(i).asInstanceOf[Int]) { + if (sortFields(i).sortOrder == Ascending) { + ans = leftKey(i).asInstanceOf[Int] < rightKey(i).asInstanceOf[Int] + } else { + ans = leftKey(i).asInstanceOf[Int] > rightKey(i).asInstanceOf[Int] + } + i = sortFields.size } - i = sortFields.size + i += 1 } - i += 1 + ans } - ans + assert(res == scalaSorted) + } finally { + backend.setFlag("shuffle_cutoff_to_local_sort", originalShuffleCutoff) } - assert(res == scalaSorted) } @Test def testDistributedSort(): Unit = { @@ -87,4 +91,9 @@ class LowerDistributedSortSuite extends HailSuite { testDistributedSortHelper(tableWithExtraField, IndexedSeq(SortField("idx", Descending))) testDistributedSortHelper(tableWithExtraField, IndexedSeq(SortField("foo", Descending), SortField("idx", Ascending))) } + + @Test def testDistributedSortEmpty(): Unit = { + val tableRange = TableRange(0, 1) + testDistributedSortHelper(tableRange, IndexedSeq(SortField("idx", Ascending))) + } } diff --git a/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala b/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala index 6d6912fe488..2509bc05107 100644 --- a/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala +++ b/hail/src/test/scala/is/hail/io/AvroReaderSuite.scala @@ -1,7 +1,6 @@ package is.hail.io import is.hail.ExecStrategy.ExecStrategy -import is.hail.TestUtils.assertEvalsTo import is.hail.expr.ir.{ReadPartition, Str, ToArray} import is.hail.io.avro.AvroPartitionReader import is.hail.utils.{FastIndexedSeq, fatal, using} diff --git a/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala b/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala index e7b81bb9b0e..ff915e20ed9 100644 --- a/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala +++ b/hail/src/test/scala/is/hail/io/compress/BGzipCodecSuite.scala @@ -142,7 +142,7 @@ class BGzipCodecSuite extends HailSuite { val end = makeVirtualOffset(splits(i + 1), 0) Row(i, compPath, splits(i), end, true) } - val lines2 = GenericLines.collect(fs, GenericLines.read(fs, contexts, false)) + val lines2 = GenericLines.collect(fs, GenericLines.read(fs, contexts, false, false)) compareLines(lines2, lines) true } diff --git a/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala b/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala index 3c03b854f05..cf8c2a1edc4 100644 --- a/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala +++ b/hail/src/test/scala/is/hail/types/physical/PContainerTest.scala @@ -38,7 +38,7 @@ class PContainerTest extends PhysicalTestUtils { fb.emit(Region.containsNonZeroBits(value + sourceType.lengthHeaderBytes, sourceType.loadLength(value).toL)) - val res = fb.result()(theHailClassLoader)(src) + val res = fb.result(ctx)(theHailClassLoader)(src) res } @@ -53,7 +53,7 @@ class PContainerTest extends PhysicalTestUtils { fb.emit(sourceType.hasMissingValues(value)) - val res = fb.result()(theHailClassLoader)(src) + val res = fb.result(ctx)(theHailClassLoader)(src) res } diff --git a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala index d63eb279223..c563e8be3ca 100644 --- a/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala +++ b/hail/src/test/scala/is/hail/types/physical/PNDArraySuite.scala @@ -59,7 +59,7 @@ class PNDArraySuite extends PhysicalTestUtils { throw e } - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) val result1 = f(region1, region2, region3) val result1Data = nd.unstagedDataFirstElementPointer(result1) diff --git a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala index d9f2585c088..f7c2dc937da 100644 --- a/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala +++ b/hail/src/test/scala/is/hail/types/physical/PhysicalTestUtils.scala @@ -76,7 +76,7 @@ abstract class PhysicalTestUtils extends HailSuite { } val copy = try { - val f = fb.result()(theHailClassLoader) + val f = fb.result(ctx)(theHailClassLoader) val copyOff = f(region, srcAddress) UnsafeRow.read(destType, region, copyOff) } catch { diff --git a/hail/src/test/scala/is/hail/utils/TextTableSuite.scala b/hail/src/test/scala/is/hail/utils/TextTableSuite.scala deleted file mode 100644 index 280c49c2b92..00000000000 --- a/hail/src/test/scala/is/hail/utils/TextTableSuite.scala +++ /dev/null @@ -1,35 +0,0 @@ -package is.hail.utils - -import is.hail.HailSuite -import is.hail.expr.ir.TextTableReader -import is.hail.types.virtual._ -import org.testng.annotations.Test - -class TextTableSuite extends HailSuite { - - @Test def testTypeGuessing() { - - val doubleStrings = Seq("1", ".1", "-1", "-.1", "1e1", "-1e1", - "1E1", "-1E1", "1.0e2", "-1.0e2", "1e-1", "-1e-1", "-1.0e-2") - val badDoubleStrings = Seq("1ee1", "1e--2", "1ee2", "1e0.1", "1e-0.1", "1e1.") - val intStrings = Seq("1", "0", "-1", "12312398", "-123092398") - val longStrings = Seq("11010101010101010", "-9223372036854775808") - val booleanStrings = Seq("true", "True", "TRUE", "false", "False", "FALSE") - - doubleStrings.foreach(str => assert(TextTableReader.float64Matcher(str))) - badDoubleStrings.foreach(str => assert(!TextTableReader.float64Matcher(str))) - - intStrings.foreach(str => assert(TextTableReader.int32Matcher(str))) - intStrings.foreach(str => assert(TextTableReader.int64Matcher(str))) - intStrings.foreach(str => assert(TextTableReader.float64Matcher(str))) - - longStrings.foreach(str => assert(TextTableReader.int64Matcher(str), str)) - longStrings.foreach(str => assert(!TextTableReader.int32Matcher(str))) - - booleanStrings.foreach(str => assert(TextTableReader.booleanMatcher(str))) - } - - @Test def testPipeDelimiter() { - assert(TextTableReader.splitLine("a|b", "|", '#').toSeq == Seq("a", "b")) - } -} diff --git a/infra/azure/main.tf b/infra/azure/main.tf index fa13f011647..65e07a3f140 100644 --- a/infra/azure/main.tf +++ b/infra/azure/main.tf @@ -2,12 +2,16 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "=2.97.0" + version = "=2.99.0" } azuread = { source = "hashicorp/azuread" version = "=2.7.0" } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.8.0" + } http = { source = "hashicorp/http" version = "2.1.0" @@ -60,7 +64,11 @@ module "vdc" { resource_group = data.azurerm_resource_group.rg container_registry_id = azurerm_container_registry.acr.id - k8s_machine_type = var.k8s_machine_type + + k8s_default_node_pool_machine_type = var.k8s_default_node_pool_machine_type + k8s_user_pool_machine_type = var.k8s_user_pool_machine_type + k8s_preemptible_node_pool_name = var.k8s_preemptible_node_pool_name + k8s_nonpreemptible_node_pool_name = var.k8s_nonpreemptible_node_pool_name } module "db" { diff --git a/infra/azure/modules/batch/main.tf b/infra/azure/modules/batch/main.tf index 648f3a9a31a..10e643b2ef0 100644 --- a/infra/azure/modules/batch/main.tf +++ b/infra/azure/modules/batch/main.tf @@ -55,6 +55,10 @@ resource "azurerm_storage_account" "batch" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" + + blob_properties { + last_access_time_enabled = true + } } resource "azurerm_storage_container" "batch_logs" { @@ -75,6 +79,10 @@ resource "azurerm_storage_account" "test" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" + + blob_properties { + last_access_time_enabled = true + } } resource "azurerm_storage_container" "test" { @@ -91,7 +99,7 @@ resource "azurerm_storage_management_policy" "test" { enabled = true filters { prefix_match = [azurerm_storage_container.test.name] - blob_types = ["blockBlob", "appendBlob"] + blob_types = ["blockBlob"] } actions { base_blob { diff --git a/infra/azure/modules/ci/main.tf b/infra/azure/modules/ci/main.tf index 8a03bae37a7..3b31d03a182 100644 --- a/infra/azure/modules/ci/main.tf +++ b/infra/azure/modules/ci/main.tf @@ -4,6 +4,10 @@ resource "azurerm_storage_account" "ci" { location = var.resource_group.location account_tier = "Standard" account_replication_type = "LRS" + + blob_properties { + last_access_time_enabled = true + } } resource "azurerm_storage_container" "ci_artifacts" { @@ -20,7 +24,7 @@ resource "azurerm_storage_management_policy" "ci" { enabled = true filters { prefix_match = [azurerm_storage_container.ci_artifacts.name] - blob_types = ["blockBlob", "appendBlob"] + blob_types = ["blockBlob"] } actions { base_blob { diff --git a/infra/azure/modules/vdc/main.tf b/infra/azure/modules/vdc/main.tf index 49bca49e4de..6123942cbce 100644 --- a/infra/azure/modules/vdc/main.tf +++ b/infra/azure/modules/vdc/main.tf @@ -52,13 +52,13 @@ resource "azurerm_kubernetes_cluster" "vdc" { default_node_pool { name = "nonpreempt" - vm_size = var.k8s_machine_type + vm_size = var.k8s_default_node_pool_machine_type vnet_subnet_id = azurerm_subnet.k8s_subnet.id enable_auto_scaling = true min_count = 1 - max_count = 200 + max_count = 5 node_labels = { "preemptible" = "false" @@ -82,10 +82,32 @@ resource "azurerm_kubernetes_cluster" "vdc" { } } +resource "azurerm_kubernetes_cluster_node_pool" "vdc_nonpreemptible_pool" { + name = var.k8s_nonpreemptible_node_pool_name + kubernetes_cluster_id = azurerm_kubernetes_cluster.vdc.id + vm_size = var.k8s_user_pool_machine_type + vnet_subnet_id = azurerm_subnet.k8s_subnet.id + + enable_auto_scaling = true + + min_count = 0 + max_count = 200 + + node_labels = { + "preemptible" = "false" + } + + lifecycle { + # Ignore if the node count has natually changed since last apply + # due to autoscaling + ignore_changes = [node_count] + } +} + resource "azurerm_kubernetes_cluster_node_pool" "vdc_preemptible_pool" { - name = "preempt" + name = var.k8s_preemptible_node_pool_name kubernetes_cluster_id = azurerm_kubernetes_cluster.vdc.id - vm_size = var.k8s_machine_type + vm_size = var.k8s_user_pool_machine_type vnet_subnet_id = azurerm_subnet.k8s_subnet.id enable_auto_scaling = true diff --git a/infra/azure/modules/vdc/variables.tf b/infra/azure/modules/vdc/variables.tf index d986b5c3987..5f39919f551 100644 --- a/infra/azure/modules/vdc/variables.tf +++ b/infra/azure/modules/vdc/variables.tf @@ -5,7 +5,19 @@ variable resource_group { }) } -variable k8s_machine_type { +variable k8s_default_node_pool_machine_type { + type = string +} + +variable k8s_user_pool_machine_type { + type = string +} + +variable k8s_preemptible_node_pool_name { + type = string +} + +variable k8s_nonpreemptible_node_pool_name { type = string } diff --git a/infra/azure/variables.tf b/infra/azure/variables.tf index ffaa92acbb7..c1b0e0e2b6f 100644 --- a/infra/azure/variables.tf +++ b/infra/azure/variables.tf @@ -7,18 +7,33 @@ variable domain { } variable acr_name { - type = string + type = string default = "" } variable acr_sku { - type = string + type = string default = "Premium" } -variable k8s_machine_type { - type = string - default = "Standard_D2_v2" +variable k8s_default_node_pool_machine_type { + type = string + default = "Standard_D2_v2" # 2 vCPU +} + +variable k8s_user_pool_machine_type { + type = string + default = "Standard_D4_v2" # 8 vCPU +} + +variable k8s_preemptible_node_pool_name { + type = string + default = "preempt1" +} + +variable k8s_nonpreemptible_node_pool_name { + type = string + default = "nonpreempt1" } variable organization_domain { @@ -37,6 +52,6 @@ variable "ci_config" { } variable oauth2_developer_redirect_uris { - type = list(string) + type = list(string) default = [] } diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 4f6eb836799..350ffc058b7 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -6,7 +6,7 @@ terraform { } kubernetes = { source = "hashicorp/kubernetes" - version = "1.13.3" + version = "2.8.0" } } backend "gcs" { @@ -14,6 +14,14 @@ terraform { } } +variable "k8s_preemptible_node_pool_name" { + type = string + default = "preemptible-pool" +} +variable "k8s_nonpreemptible_node_pool_name" { + type = string + default = "nonpreemptible-pool" +} variable "batch_gcp_regions" {} variable "gcp_project" {} variable "batch_logs_bucket_location" {} @@ -118,9 +126,9 @@ resource "google_container_cluster" "vdc" { } resource "google_container_node_pool" "vdc_preemptible_pool" { - name = "preemptible-pool" + name = var.k8s_preemptible_node_pool_name location = var.gcp_zone - cluster = google_container_cluster.vdc.name + cluster = google_container_cluster.vdc.name # Allocate at least one node, so that autoscaling can take place. initial_node_count = 1 @@ -132,7 +140,7 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { node_config { preemptible = true - machine_type = "n1-standard-2" + machine_type = "n1-standard-8" labels = { "preemptible" = "true" @@ -155,9 +163,9 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { } resource "google_container_node_pool" "vdc_nonpreemptible_pool" { - name = "nonpreemptible-pool" + name = var.k8s_nonpreemptible_node_pool_name location = var.gcp_zone - cluster = google_container_cluster.vdc.name + cluster = google_container_cluster.vdc.name # Allocate at least one node, so that autoscaling can take place. initial_node_count = 1 @@ -169,7 +177,7 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { node_config { preemptible = false - machine_type = "n1-standard-2" + machine_type = "n1-standard-8" labels = { preemptible = "false" diff --git a/internal-gateway/internal-gateway.nginx.conf b/internal-gateway/internal-gateway.nginx.conf index 7389e85bd1d..e1e09e0e285 100644 --- a/internal-gateway/internal-gateway.nginx.conf +++ b/internal-gateway/internal-gateway.nginx.conf @@ -13,7 +13,7 @@ map $service $batch_driver_limit_key { default ""; # no key => no limit } -limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=18r/s; +limit_req_zone $batch_driver_limit_key zone=batch_driver:1m rate=60r/s; server { server_name internal.hail; diff --git a/letsencrypt/subdomains.txt b/letsencrypt/subdomains.txt index 336db4be899..7bc1e03642d 100644 --- a/letsencrypt/subdomains.txt +++ b/letsencrypt/subdomains.txt @@ -2,6 +2,9 @@ ci notebook batch batch-driver +benchmark +blog +memory monitoring auth prometheus diff --git a/query/Makefile b/query/Makefile index 8763f9e08cf..11fedfc4a63 100644 --- a/query/Makefile +++ b/query/Makefile @@ -1,5 +1,8 @@ include ../config.mk +QUERY_STORAGE_URI := $(shell kubectl get secret global-config --template={{.data.query_storage_uri}} | base64 --decode) +TEST_STORAGE_URI := $(shell kubectl get secret global-config --template={{.data.test_storage_uri}} | base64 --decode) + EXTRA_PYTHONPATH := ../hail/python:../gear PYTHON := PYTHONPATH=$${PYTHONPATH:+$${PYTHONPATH}:}$(EXTRA_PYTHONPATH) python3 @@ -12,51 +15,57 @@ jar: cp ../hail/build/libs/hail-all-spark.jar ./hail.jar HAIL_TEST_GCS_TOKEN := $(shell whoami) +HAIL_TEST_RESOURCES_PREFIX := $(TEST_STORAGE_URI)/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources +HAIL_TEST_RESOURCES_DIR := $(HAIL_TEST_RESOURCES_PREFIX)/test/resources/ +HAIL_DOCTEST_DATA_DIR := $(HAIL_TEST_RESOURCES_PREFIX)/doctest/data/ HAIL_REVISION := $(shell git rev-parse HEAD) -# JAR_LOCATION := gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/jars/$(HAIL_REVISION).jar -JAR_LOCATION := gs://hail-query/jars/$(HAIL_REVISION).jar +ifeq ($(NAMESPACE),default) +ifeq ($(DEPLOY_JAR_FOR_PUBLIC_USE),true) +# This should only be used if the normal CI deploy process fails and you need to upload a JAR to the +# expected location for our users. +JAR_LOCATION := $(QUERY_STORAGE_URI)/jars/$(HAIL_REVISION).jar +else +JAR_LOCATION := $(QUERY_STORAGE_URI)/jars/$(HAIL_TEST_GCS_TOKEN)/$(HAIL_REVISION).jar +endif +else +JAR_LOCATION := $(TEST_STORAGE_URI)/$(NAMESPACE)/jars/$(HAIL_REVISION).jar +endif -.PHONY: push-jar -push-jar: jar +.PHONY: upload-query-jar +upload-query-jar: jar + ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default gsutil cp ./hail.jar "$(JAR_LOCATION)" echo >last_uploaded_jar "$(JAR_LOCATION)" upload-resources-dir: - python3 -m hailtop.aiotools.copy 'null' '[ \ -{"from": "../hail/src/test/resources/", \ - "to": "gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/"}, \ -{"from": "../hail/python/hail/docs/data/", \ - "to": "gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/"}]' + ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default + python3 -m hailtop.aiotools.copy 'null' '[{"from":"../hail/src/test/resources","to":"$(HAIL_TEST_RESOURCES_DIR)"},{"from":"../hail/python/hail/docs/data","to":"$(HAIL_DOCTEST_DATA_DIR)"}]' touch upload-resources-dir .PHONY: test -test: push-jar upload-resources-dir - HAIL_QUERY_BACKEND=service \ - HAIL_TEST_RESOURCES_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/' \ - HAIL_DOCTEST_DATA_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/' \ - HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - $(MAKE) -C ../hail pytest +test: upload-query-jar upload-resources-dir + HAIL_QUERY_BACKEND=batch \ + HAIL_TEST_RESOURCES_DIR='$(HAIL_TEST_RESOURCES_DIR)' \ + HAIL_DOCTEST_DATA_DIR='$(HAIL_DOCTEST_DATA_DIR)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + $(MAKE) -C ../hail pytest .PHONY: ipython -ipython: push-jar - HAIL_QUERY_BACKEND=service \ - HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - ipython +ipython: upload-query-jar + HAIL_QUERY_BACKEND=batch \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + ipython .PHONY: test-no-deps test-no-deps: - HAIL_QUERY_BACKEND=service \ - HAIL_TEST_RESOURCES_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/test/resources/' \ - HAIL_DOCTEST_DATA_DIR='gs://hail-test-dmk9z/$(HAIL_TEST_GCS_TOKEN)/hail-test-resources/doctest/data/' \ - HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - $(MAKE) -C ../hail pytest + HAIL_QUERY_BACKEND=batch \ + HAIL_TEST_RESOURCES_DIR='$(HAIL_TEST_RESOURCES_DIR)' \ + HAIL_DOCTEST_DATA_DIR='$(HAIL_DOCTEST_DATA_DIR)' \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + $(MAKE) -C ../hail pytest .PHONY: ipython-no-deps ipython-no-deps: - HAIL_QUERY_BACKEND=service \ - HAIL_SHA='$(HAIL_REVISION)-$(TOKEN)' \ - HAIL_JAR_URL=$$(cat last_uploaded_jar) \ - ipython + HAIL_QUERY_BACKEND=batch \ + HAIL_JAR_URL=$$(cat last_uploaded_jar) \ + ipython diff --git a/tls/config.yaml b/tls/config.yaml index 6fea03be255..e0b90ca7142 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -19,6 +19,10 @@ principals: domains: - batch-driver kind: json +- name: batch-driver-nginx + domains: + - batch-driver + kind: nginx - name: ci domains: - ci From db65c33c29100c64405c39ebace90a7c463b4bec Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 29 Mar 2022 18:38:07 +1100 Subject: [PATCH 350/501] Remove obsolete load_config_file argument (#179) * Revert "Revert "Merge upstream 0.2.93" (#177)" This reverts commit e279414c8766be01ab7da46e7bb91b4a3f52241d. * Remove obsolete load_config_file argument --- infra/gcp/main.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 350ffc058b7..c1f9009a917 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -256,8 +256,6 @@ resource "google_compute_address" "internal_gateway" { } provider "kubernetes" { - load_config_file = false - host = "https://${google_container_cluster.vdc.endpoint}" token = data.google_client_config.provider.access_token cluster_ca_certificate = base64decode( From 8ed3bab59111a1a4efcbae3ad5e0f7653d116127 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Thu, 31 Mar 2022 17:13:26 +1100 Subject: [PATCH 351/501] Infra: remove `globals.tfvars` and describe `ci-config` (#180) * ci tf: parametrize bucket * ci tf: parametrize bucket fix * Infra: remove global.tfvars, add ci/bucket_name var, add ci-config to README * Gitignore .tfvars * Revert parametrising ci bucket name (hail-ci is okay). Add query_gsa_secret * Fix * Update infra/gcp/README.md Co-authored-by: Leonhard Gruenschloss * Update infra/gcp/README.md Co-authored-by: Leonhard Gruenschloss * Update infra/gcp/README.md Co-authored-by: Leonhard Gruenschloss * Remove query_gsa_secret Co-authored-by: Leonhard Gruenschloss --- .gitignore | 1 + infra/gcp/README.md | 28 +++++++++++++++++++++++++++- infra/gcp/ci/variables.tf | 2 ++ infra/global.tfvars | 28 ---------------------------- 4 files changed, 30 insertions(+), 29 deletions(-) delete mode 100644 infra/global.tfvars diff --git a/.gitignore b/.gitignore index ecbdb3355ae..3f4b9befe3f 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ website/website/static/css/ *.dylib # macOS dynamic libraries */hail.jar infra/.terraform.lock.hcl +*.tfvars diff --git a/infra/gcp/README.md b/infra/gcp/README.md index 0f21d2516f1..896a0e6d5c4 100644 --- a/infra/gcp/README.md +++ b/infra/gcp/README.md @@ -75,7 +75,7 @@ Instructions: # batch regions and be compatible with the bucket location. batch_logs_bucket_storage_class = "MULTI_REGIONAL" - # Similarly, bucket locations and storage classess are specified + # Similarly, bucket locations and storage classes are specified # for other services: hail_query_bucket_location = "" hail_query_bucket_storage_class = "MULTI_REGIONAL" @@ -95,6 +95,32 @@ Instructions: use_artifact_registry = false ``` +- You can optionally add a `ci_config` section to `$HOME/.hail/global.tfvars`, + to enable CI triggered by git events: + + ``` + ci_config = { + github_oauth_token = "" + github_user1_oauth_token = "" + # This `false` here will prevent the CI from merging PRs into the repo: + watched_branches = [["broadinstitute/hail:main", true, false]] + deploy_steps = ["deploy_batch", "test_batch_0", "deploy_ci"] + bucket_location = "" + bucket_storage_class = "MULTI_REGIONAL" + github_context = "ci-gcp" + } + ``` + + Because of the sensitive values for `github_oauth_token` and `github_user1_oauth_token`, + you should never put `$HOME/.hail/global.tfvars` unencrypted under the version control. To share + the config, you can instead keep it as a cloud secret: + + ```sh + gcloud secrets create gcp-tfvars --data-file $HOME/.hail/global.tfvars + # To access: + gcloud secrets versions access --secret gcp-tfvars 1 > $HOME/.hail/global.tfvars + ``` + - Run `terraform init`. - Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. At the diff --git a/infra/gcp/ci/variables.tf b/infra/gcp/ci/variables.tf index 324a7441e75..d8b7a7e2d1b 100644 --- a/infra/gcp/ci/variables.tf +++ b/infra/gcp/ci/variables.tf @@ -1,9 +1,11 @@ variable "github_oauth_token" { type = string + sensitive = true } variable "github_user1_oauth_token" { type = string + sensitive = true } variable "bucket_location" { diff --git a/infra/global.tfvars b/infra/global.tfvars deleted file mode 100644 index 91ce09f42e8..00000000000 --- a/infra/global.tfvars +++ /dev/null @@ -1,28 +0,0 @@ -organization_domain = "populationgenomics.org.au" - -# batch_gcp_regions is a JSON array of string, the names of the gcp -# regions to schedule over in Batch. -batch_gcp_regions = "[\"australia-southeast1\"]" - -gcp_project = "hail-295901" -gcp_location = "australia-southeast1" -gcp_region = "australia-southeast1" -gcp_zone = "australia-southeast1-b" -domain = "hail.populationgenomics.org.au" -use_artifact_registry = true - -# This is the bucket location that spans the regions you're going to -# schedule across in Batch. If you are running on one region, it can -# just be that region. E.g. "US" -batch_logs_bucket_location = "australia-southeast1" - -# The storage class for the batch logs bucket. It should span the -# batch regions and be compatible with the bucket location. -batch_logs_bucket_storage_class = "STANDARD" - -# Similarly, bucket locations and storage classess are specified -# for other services: -hail_query_bucket_location = "australia-southeast1" -hail_query_bucket_storage_class = "STANDARD" -hail_test_gcs_bucket_location = "australia-southeast1" -hail_test_gcs_bucket_storage_class = "REGIONAL" From df630e93322667ec9398d6c796903eb4cd7b2b0d Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Thu, 31 Mar 2022 17:52:45 +1100 Subject: [PATCH 352/501] Add query_gsa_secret (#181) --- infra/gcp/main.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index c1f9009a917..236f8f88c3a 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -470,6 +470,12 @@ module "test_gsa_secret" { ] } +module "query_gsa_secret" { + source = "./gsa_k8s_secret" + name = "query" + iam_roles = ["storage.admin"] +} + resource "google_storage_bucket_iam_member" "test_bucket_admin" { bucket = module.hail_test_gcs_bucket.name role = "roles/storage.admin" From c25bd996658294875e507d04322eed206e46ee9d Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 4 Apr 2022 12:43:49 +1000 Subject: [PATCH 353/501] Use sops for credentials (#182) * Use sops for credentials * Fix markdown * Fix indentation --- .gitignore | 1 - infra/gcp/README.md | 114 +++++++++++++------ infra/gcp/auth_oauth2_client_secret.enc.json | 36 ++++++ infra/gcp/global.tfvars | 28 +++++ infra/gcp/main.tf | 58 ++++++---- infra/gcp/terraform_sa_key.enc.json | 30 +++++ 6 files changed, 204 insertions(+), 63 deletions(-) create mode 100644 infra/gcp/auth_oauth2_client_secret.enc.json create mode 100644 infra/gcp/global.tfvars create mode 100644 infra/gcp/terraform_sa_key.enc.json diff --git a/.gitignore b/.gitignore index 3f4b9befe3f..ecbdb3355ae 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,3 @@ website/website/static/css/ *.dylib # macOS dynamic libraries */hail.jar infra/.terraform.lock.hcl -*.tfvars diff --git a/infra/gcp/README.md b/infra/gcp/README.md index 896a0e6d5c4..8b79bda7c91 100644 --- a/infra/gcp/README.md +++ b/infra/gcp/README.md @@ -10,22 +10,13 @@ Instructions: gcloud config set compute/zone ``` -- Create a service account for Terraform with Owner role. We use - service account name `terraform`. Create a JSON service account key - and place it in `$HOME/.hail/terraform_sa_key.json`. - - ``` - gcloud iam service-accounts create terraform --display-name="Terraform Account" - gcloud projects add-iam-policy-binding --member='serviceAccount:terraform@.iam.gserviceaccount.com' --role='roles/owner' - gcloud iam service-accounts keys create $HOME/.hail/terraform_sa_key.json --iam-account=terraform@.iam.gserviceaccount.com - ``` - - Enable the GCP services needed by Hail: ``` gcloud services enable \ container.googleapis.com \ compute.googleapis.com \ + cloudkms.googleapis.com \ cloudresourcemanager.googleapis.com \ servicenetworking.googleapis.com \ sqladmin.googleapis.com \ @@ -49,11 +40,9 @@ Instructions: - https://auth./oauth2callback - http://127.0.0.1/oauth2callback - Download the client secret as `~/.hail/auth_oauth2_client_secret.json`. + Download the client secret as `/tmp/auth_oauth2_client_secret.json`. -- Install terraform. - -- Create `$HOME/.hail/global.tfvars` that looks like: +- Create `/tmp/global.tfvars` that looks like this: ``` # organization_domain is a string that is the domain of the organization @@ -95,35 +84,86 @@ Instructions: use_artifact_registry = false ``` -- You can optionally add a `ci_config` section to `$HOME/.hail/global.tfvars`, - to enable CI triggered by git events: - - ``` - ci_config = { - github_oauth_token = "" - github_user1_oauth_token = "" - # This `false` here will prevent the CI from merging PRs into the repo: - watched_branches = [["broadinstitute/hail:main", true, false]] - deploy_steps = ["deploy_batch", "test_batch_0", "deploy_ci"] - bucket_location = "" - bucket_storage_class = "MULTI_REGIONAL" - github_context = "ci-gcp" - } - ``` - - Because of the sensitive values for `github_oauth_token` and `github_user1_oauth_token`, - you should never put `$HOME/.hail/global.tfvars` unencrypted under the version control. To share - the config, you can instead keep it as a cloud secret: +- You can optionally create a `/tmp/ci_config.json` file to enable CI triggered by GitHub events: + + ```json + { + "bucket_location": "", + "bucket_storage_class": "STANDARD", + "deploy_steps": [ + "deploy_batch", + "test_batch_0", + "deploy_ci" + ], + "github_context": "ci-gcp", + "github_oauth_token": "", + "github_user1_oauth_token": "", + "watched_branches": [ + [ + "hail-is/hail:main", + true, + false + ] + ] + } + ``` + +- Install [sops](https://github.com/mozilla/sops). + +- Set up a key for sops to use: + + ```sh + gcloud auth application-default login + + gcloud kms keyrings create sops --location global + + gcloud kms keys create sops-key --location global --keyring sops --purpose encryption + + gcloud kms keys list --location global --keyring sops + ``` + + You should see: ```sh - gcloud secrets create gcp-tfvars --data-file $HOME/.hail/global.tfvars - # To access: - gcloud secrets versions access --secret gcp-tfvars 1 > $HOME/.hail/global.tfvars + NAME PURPOSE PRIMARY_STATE + projects//locations/global/keyRings/sops/cryptoKeys/sops-key ENCRYPT_DECRYPT ENABLED ``` + + This key can be shared with other developers in your team, controlling access through IAM. It needs to be created outside of Terraform to avoid a cyclic dependency: the Terraform configuration needs to decrypt `sops` files. + +- Create a service account for Terraform with Owner role. We use + service account name `terraform`. Create a JSON service account key + and place it in `/tmp/terraform_sa_key.json`. + + ``` + gcloud iam service-accounts create terraform --display-name="Terraform Account" + + gcloud projects add-iam-policy-binding --member='serviceAccount:terraform@.iam.gserviceaccount.com' --role='roles/owner' + + gcloud iam service-accounts keys create /tmp/terraform_sa_key.json --iam-account=terraform@.iam.gserviceaccount.com + ``` + + +- Encrypt the above files and add them to the repository: + + ```sh + sops --encrypt --gcp-kms projects//locations/global/keyRings/sops/cryptoKeys/sops-key /tmp/auth_oauth2_client_secret.json > $HAIL/infra/gcp/auth_oauth2_client_secret.enc.json + + # Optional + sops --encrypt --gcp-kms projects//locations/global/keyRings/sops/cryptoKeys/sops-key /tmp/ci_config.json > $HAIL/infra/gcp/ci_config.enc.json + + sops --encrypt --gcp-kms projects//locations/global/keyRings/sops/cryptoKeys/sops-key /tmp/terraform_sa_key.json > $HAIL/infra/gcp/terraform_sa_key.enc.json + + git add $HAIL/infra/gcp/auth_oauth2_client_secret.enc.json $HAIL/infra/gcp/ci_config.enc.json $HAIL/infra/gcp/terraform_sa_key.enc.json + + # git commit and push as desired. + ``` + +- Install terraform. - Run `terraform init`. -- Run `terraform apply -var-file="$HOME/.hail/global.tfvars"`. At the +- Run `terraform apply -var-file=global.tfvars`. At the time of writing, this takes ~15m. - Terraform created a GKE cluster named `vdc`. Configure `kubectl` diff --git a/infra/gcp/auth_oauth2_client_secret.enc.json b/infra/gcp/auth_oauth2_client_secret.enc.json new file mode 100644 index 00000000000..7b62fea7199 --- /dev/null +++ b/infra/gcp/auth_oauth2_client_secret.enc.json @@ -0,0 +1,36 @@ +{ + "web": { + "client_id": "ENC[AES256_GCM,data:eaAGOdp+oMIi88VQ5G5aE0FIkPmGdPifwGoNKl5IWKZSTDBBn6fSPgcj+1Gnx3JcQcdZUcgokw9ybkc7W3URDg9OGC/is6fQ,iv:I7zdlXunkfdEZ2WlRzkjvpNvuekjVeCMRz6ZP0ay/iQ=,tag:Dr2DcQDJ3/51qvYAkDPsGw==,type:str]", + "project_id": "ENC[AES256_GCM,data:HctVX0c7BSCSMtY=,iv:p1GFV+YY1aAGNvr3KIKT5zMCu/RSH5ToxtNdbsPbDCQ=,tag:PAtBTwOIzqGic4vN/e/o/Q==,type:str]", + "auth_uri": "ENC[AES256_GCM,data:TUuV3pyK2Vc9E3Ro2yaOTB87sJSDh2FoDM/Ah/fJLXD90tvEJ48kWoc=,iv:80OaGayp0FEoMXXmbXFgPrJ+4iw9xTIcDsXDXBIjMHU=,tag:3NL7x5BxcYTik36nFDw+mw==,type:str]", + "token_uri": "ENC[AES256_GCM,data:Npk8MApu9qcfZj/CcNmZqx7DEEfQzS7XXVE98ZTd9cxsyN8=,iv:Fihc8bEk4vUDQgXLw2qxhKkFtA8nYRpvmd1ms8fdGwg=,tag:8qadiwmluCK4X5P4j7vXvg==,type:str]", + "auth_provider_x509_cert_url": "ENC[AES256_GCM,data:zvw76ZGs40zl1Z00oqHQojWmKfTH64lkt8Oo/hBJizjQh6PtZd3KwCjv,iv:RVPKL2M0AklucZbqrEFNu5rD5iAGDrj/G+20yiEgIzU=,tag:7SDfG83I1Vp9upLl4rna9w==,type:str]", + "client_secret": "ENC[AES256_GCM,data:EfS6wVX9jUIOGhpS9EWoS8HhnS8zzF6D,iv:0d5snPds/pHW+f4OJP2x2AlnlvKqBV7DrKmsgMzymfU=,tag:anBpngwRhIZQYKKXlpv6WA==,type:str]", + "redirect_uris": [ + "ENC[AES256_GCM,data:2kQbbmMEuFJGry9LNIrU+/haBXIQPq7QpEjp/ap1AvBT18e2uCow4CyZ+59uA2VMrRwRQgd3V8NfMw==,iv:ijnINANsRY1szF2pT6Rxq8AIKSfu+wPf2fsp1UzKVPI=,tag:YPWACkUWUSyVEjvbKEk25g==,type:str]", + "ENC[AES256_GCM,data:S+B6VnLg/FxsYRBRMb7+jEJdegTdihT//MbG56VsZQ==,iv:tvL2qR+4nASlZykPb8Yx4IusRk76gkGT0HogrVBSdno=,tag:6NY65VlpXmGoEq6wSJzehg==,type:str]", + "ENC[AES256_GCM,data:P0oQHc1wCeSGpE4uLzo1b1NyjXD2ISqEOweFwHxAmdrmF1FuSUlyKrOiwIdrXN5kjYgEXZQgdaC81KaidIUCMoy1GTTQeOd78uArNhQzTuiXmwHF6cIebQ==,iv:UUb9zKtfsRoCamezOLsIDh+75hvrhB/Xj9yv0nRzqTM=,tag:eSRbZ6tVFyHn+njWdCl1ZA==,type:str]", + "ENC[AES256_GCM,data:McbGtcjaRW+RFI48UmAj1Vgmn3TB7CflO4WEo2uLRqm/2v4AHdwkv+Hx5soCsEsH0Fm8qB06QBXednu/VMWzHtzAfYUgCnd5Q61OI+Kgxpl8T93yWQ==,iv:U3rOxXulKoZp+bwmGWza6iOcuDcgWUWDgZCIikGtSAU=,tag:GwVEFlX2qRHxgFpBZfHrKg==,type:str]", + "ENC[AES256_GCM,data:vzGz29rRjeuCMGtDhKcUW0nL78HFXqvD2Qe2WbcOabFxL0Pt/4LR2BKx25ecyjpF8C024pKrS4QNaHeuiOgMRQyIGjaoEBHbfpAEekosUOT8IZY=,iv:5g+7fUVgy8GyL2zmOW8gsov0cskvlCs9FCZFElO44V8=,tag:kjFkbUye6v41NKT4AVPZug==,type:str]", + "ENC[AES256_GCM,data:m/zo+RGd4bGXJRtJFyqCnFppmdqEtGXPRto+vkvj8FmfGt8K18heVaCEAtBmdXogok9rmGvPzalWjMkkMm08KxkQ0NL/uetyaqgRtZvoH1ms,iv:XMOmNJWEHzjucp++uduUC0spYlg/kVYB9LSWPXpYU7M=,tag:P/3V9+keQDxV3Y8/efw4dA==,type:str]" + ] + }, + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/hail-295901/locations/global/keyRings/sops/cryptoKeys/sops-key", + "created_at": "2022-04-01T03:47:32Z", + "enc": "CiQAKYFiUcNvw0DpeI53KNdobbTVVbhR03sz9GlYjm2tHgbgucsSSQAB+Y96sfETf2pJD8vcq5dj2GrGvv/b+YLSBDTH7uBVAqb0d2DnJ29Nukbztf+q6DkLcnOijiMwHLvp0MkTlwVoZjJtjAVTHDk=" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2022-04-01T03:47:33Z", + "mac": "ENC[AES256_GCM,data:IvzhANPGdNY6A9EE9wdxoiPRppggk75In+dUh7QxP25XI0hXe55xPL+ICK3FVko9Dq+IkV0rV5Rpo/vX55gFG6NiJNcBeipOZi1BWN9XBNR8h0bWi99wMz60/1A3fe+52Cb1486xWBsrqxPXMAOW6ajo5OPrfGAi2PSB0DV1yMY=,iv:DPXztjWklw4o6fELV8vXdNUPECgSeLniOTmJDoKSbgs=,tag:CBo2AwmwBy39pCeHFBRt0Q==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.7.2" + } +} \ No newline at end of file diff --git a/infra/gcp/global.tfvars b/infra/gcp/global.tfvars new file mode 100644 index 00000000000..91ce09f42e8 --- /dev/null +++ b/infra/gcp/global.tfvars @@ -0,0 +1,28 @@ +organization_domain = "populationgenomics.org.au" + +# batch_gcp_regions is a JSON array of string, the names of the gcp +# regions to schedule over in Batch. +batch_gcp_regions = "[\"australia-southeast1\"]" + +gcp_project = "hail-295901" +gcp_location = "australia-southeast1" +gcp_region = "australia-southeast1" +gcp_zone = "australia-southeast1-b" +domain = "hail.populationgenomics.org.au" +use_artifact_registry = true + +# This is the bucket location that spans the regions you're going to +# schedule across in Batch. If you are running on one region, it can +# just be that region. E.g. "US" +batch_logs_bucket_location = "australia-southeast1" + +# The storage class for the batch logs bucket. It should span the +# batch regions and be compatible with the bucket location. +batch_logs_bucket_storage_class = "STANDARD" + +# Similarly, bucket locations and storage classess are specified +# for other services: +hail_query_bucket_location = "australia-southeast1" +hail_query_bucket_storage_class = "STANDARD" +hail_test_gcs_bucket_location = "australia-southeast1" +hail_test_gcs_bucket_storage_class = "REGIONAL" diff --git a/infra/gcp/main.tf b/infra/gcp/main.tf index 236f8f88c3a..f61b17dc20c 100644 --- a/infra/gcp/main.tf +++ b/infra/gcp/main.tf @@ -8,6 +8,10 @@ terraform { source = "hashicorp/kubernetes" version = "2.8.0" } + sops = { + source = "carlpett/sops" + version = "0.6.3" + } } backend "gcs" { bucket = "cpg-hail-terraform" @@ -40,19 +44,6 @@ variable "use_artifact_registry" { description = "pull the ubuntu image from Artifact Registry. Otherwise, GCR" } -variable "ci_config" { - type = object({ - github_oauth_token = string - github_user1_oauth_token = string - watched_branches = list(tuple([string, bool, bool])) - deploy_steps = list(string) - bucket_location = string - bucket_storage_class = string - github_context = string - }) - default = null -} - variable deploy_ukbb { type = bool description = "Run the UKBB Genetic Correlation browser" @@ -68,8 +59,12 @@ locals { docker_root_image = "${local.docker_prefix}/ubuntu:20.04" } +data "sops_file" "terraform_sa_key_sops" { + source_file = "terraform_sa_key.enc.json" +} + provider "google" { - credentials = file("~/.hail/terraform_sa_key.json") + credentials = data.sops_file.terraform_sa_key_sops.raw project = var.gcp_project region = var.gcp_region @@ -77,7 +72,7 @@ provider "google" { } provider "google-beta" { - credentials = file("~/.hail/terraform_sa_key.json") + credentials = data.sops_file.terraform_sa_key_sops.raw project = var.gcp_project region = var.gcp_region @@ -640,28 +635,41 @@ resource "kubernetes_cluster_role_binding" "batch" { } } +data "sops_file" "auth_oauth2_client_secret_sops" { + source_file = "auth_oauth2_client_secret.enc.json" +} + resource "kubernetes_secret" "auth_oauth2_client_secret" { metadata { name = "auth-oauth2-client-secret" } data = { - "client_secret.json" = file("~/.hail/auth_oauth2_client_secret.json") + "client_secret.json" = data.sops_file.auth_oauth2_client_secret_sops.raw } } +data "sops_file" "ci_config_sops" { + count = fileexists("ci_config.enc.json") ? 1 : 0 + source_file = "ci_config.enc.json" +} + +locals { + ci_config = length(data.sops_file.ci_config_sops) == 1 ? data.sops_file.ci_config_sops[0] : null +} + module "ci" { source = "./ci" - count = var.ci_config != null ? 1 : 0 - - github_oauth_token = var.ci_config.github_oauth_token - github_user1_oauth_token = var.ci_config.github_user1_oauth_token - watched_branches = var.ci_config.watched_branches - deploy_steps = var.ci_config.deploy_steps - bucket_location = var.ci_config.bucket_location - bucket_storage_class = var.ci_config.bucket_storage_class + count = local.ci_config != null ? 1 : 0 + + github_oauth_token = local.ci_config.data["github_oauth_token"] + github_user1_oauth_token = local.ci_config.data["github_user1_oauth_token"] + watched_branches = jsondecode(local.ci_config.raw).watched_branches + deploy_steps = jsondecode(local.ci_config.raw).deploy_steps + bucket_location = local.ci_config.data["bucket_location"] + bucket_storage_class = local.ci_config.data["bucket_storage_class"] ci_email = module.ci_gsa_secret.email container_registry_id = google_container_registry.registry.id - github_context = var.ci_config.github_context + github_context = local.ci_config.data["github_context"] } diff --git a/infra/gcp/terraform_sa_key.enc.json b/infra/gcp/terraform_sa_key.enc.json new file mode 100644 index 00000000000..31526999278 --- /dev/null +++ b/infra/gcp/terraform_sa_key.enc.json @@ -0,0 +1,30 @@ +{ + "type": "ENC[AES256_GCM,data:tFYK1vfHcr26TQFneTRY,iv:usBwiNPAYJeVyYukEigA+gQR62kEMFVUVtMbxhvBtL4=,tag:le15hVNAv7ydw08plx5FvQ==,type:str]", + "project_id": "ENC[AES256_GCM,data:Bcc3VNlRgPuMmVA=,iv:NwMv2fMhi9eAmfLlhSeRvPWClnfWlxN3seNtTD8EDRg=,tag:crij5ZMRXhfuSo1+Fi8R0Q==,type:str]", + "private_key_id": "ENC[AES256_GCM,data:++bmmwQv5V7elOB3YpeBE3RK20KKV7f+/zxp8dzV1li2FyWQtIQsnw==,iv:eYV0AOH/NQyWLZRvYWnU9A8YdwHwTaM39TDFS6pl5ME=,tag:uoJzFjNR8lkVBd1hrB+m7Q==,type:str]", + "private_key": "ENC[AES256_GCM,data:2xcHP4yDSruXl1MWi8+SL/FbJDWyd86As03i3tBc17/ERpm4KsaznSmCxfNXwzpuQ8B1v+c0hvt5yrFFxhYL9U4+t/BqKMvZGPYuAsHFfGmeMLjBIAy60LVlVNcpoiHo7q79x5OYIcjz+yhjSFGRZLqpfbztXF+GO1jBAG2A3r+U8bwPYaFR7+Hle1P5qeThzvpijFqIzCRns+CoXsuINMUh7dfMELCojORHQNri9yVoYsDtEEZafmdm2B9ILyLIK/IQtqEMjEpMpdj/UKzjDF0pCbQ1PI+dpVgRx6UrDQV7vfmkAXlHwMndf0N31Ap0qqW91RjBiZmcEF7w+GsIbhMOvriULxE7puBzTsSeLsbkketH4FnaDbYXh/bjLMpx81GeYf1PnlqJfcJSZPLN5q+Gnnr5AKZbWgBxFEr1l+kT0tZyRS4bfjTLNcU61VIvlcFfPdPWh50Fuw6+eV/xtYZ12SKAumSqiRJxCAvKm9WXWI2D0R7hSPcLRhN5vNsSK2lN5XvxeNwbcoJqzU36viKpE6/PVWlzGeSx4wjlPS/DKOpOfDvB9kZKhxCK+sfvftUxMUePsi7YyfE+6oVDv3tqxz8wuau9ubSU5YhvosAUeaPsLjkDJRGQnpZ20i38GAXEVxFcdc3aRTv7PxFGbuEQWybWmK0R98lNbmafNS+Kf60xgr0lgTeE9IlFvjuAvp4dpO1oCcOhdHyKPv5ZEmTfAQgmko9UxUqGESQY4j6O81SzWzP+UNXQC/itmRmzUFGleBFwKfHAPjBVvLZ9N8qJ6VP3lQ+umQK0V913CiOrtMecESmMroW1GPPa+CU9fbu/vOv9ZUcehVYFclqBcXx2i2LdgFtAgfv5SEPYndZBRMiIw0Iv8KxE90m2bXBf516z2lpRqPQ8kHAspKgHHb9Cv05Yp2/V9d6lSNUPJlHx356oYrCOfkvfdCeT/eeDGAriADQrBmUBw6hJG8EyX8XD794Mrhhx1Dc5x7t/jXBNeXXInU3SvLPCwyPPXecKTInBw6d5BoiarnFHx1JbWhoaxOklNfYrMzelkITJptcO45W//50CszFs7aHxCWUTgdAjS+CC7CAaSYdZgD0Sa7K1NUmC+EnLhdXLs1ICXH/KN++ZMTw3trZJGjq+SFQYpsiMd+PYehl6QbsgKF//X9wBbWYpC3ESvhECMx633ZliYVC54IcbUz4sJw4SMp75h1kSFJ5Q7/0XitfjNmVVVLpHrq9k8kV40f616cE5BZQpYXmiVQ1A/pf/fZSr4rt9guGH2imnQRsGxsbqZyw8zyUDbQ779a8ZR1ysfvyz0B121o103Xws+VRjdJc56R/VpAFiw+9tX/fLRa+6Mtj8m82T16JLUyc8taDATaHjxib/gCESwF3iIBSxKP6hWsM8llyPX3q5GPjMJNAkyJDbRfx/IA1TgRBgoNnd/Dm2/uUDLKwQ48qZAUGJ5CCHXdOEVga+R4Nx3cR6cwLxVViw1dQiXZVkGc+hQInyC/zjoZMRIuJNsj1a23F83wSK/ezG2aEDZZP6kYXBj9s5lPdbL/rFHuCW4UhqbjIzLpLtlt+UPCenaDGXc73IXwON9InjmGdB32qOCQJw1xWJOiwsJUnai84Pzuzs4tfDJhY0pRLVijfX5bzKkGtJcsu5UqlXVq5ikhrV8KCVyrgMO0ohZmFgWMO5JZl9ruG+Etw2P1fNWWcL7rrUzDVYsI8bk3nAN2tPU2eFCkGayuviuUYBJrVxAyXRH4vyQcksDyY9+yZlUBp75PDdA4zwvNlBUODNflJB6DteGscHxDsr6RldRLxbAFJ8HKo60HD9vSqgJZWcbqvozSeHa4aFALkrWgIOEjqtySC6CoTInYaNsQl6obdm4B3Lc9hC1w3muj1Vzl31jiq/i2bC1Hpc4b7nlY61ipFe/V7US1GiqOdFOugN6QLajXkCE9b8x59IKQTpLEDVU+eVN5V2CtQscFLFfWWfIHE1+m7aI+dNtaPfrOp1MNRj/FBCHWsC3A9P8iwGxUIt1jYIyMwtXkMJiaP2UaUgxpdCagvSneSFhYn55/zdKpWgoIWE/Es7ccomli7c03p2r2j8AcmoepdiyaqSxKHBQT6aznJnAMeGnxvDbxGZdfBqJDguEMLHh07HzXQ95h9tC3S/1xIdkYBsxWFtFcMK3YWdqd0RUssnXDfk7ADj0yoPGL8pwZk0rFbn4GXO9ADs1F1uMgjoKtkR5NxWfbBiIHSbqP8BwjM9aNoQoOSDW7j1XoNpQPU3,iv:f88xs6oXcZIKdSnt1Gl8N+nGrMlwVafTCQPxYFcDcGk=,tag:OILo8O7lhj/P0zZyEhkiFw==,type:str]", + "client_email": "ENC[AES256_GCM,data:K6WKQWrsEp4tgFAFv7rqwof0jxuoeaZna6NyAtUmlDEKcXtZF6eqXTo/v4dG,iv:1f5uXU6ttpgriAT5HAh4qcOQE75DMvN5+6+2/K3XLhc=,tag:/ezeER3XQG26jQB/qvJRIg==,type:str]", + "client_id": "ENC[AES256_GCM,data:P3stjpos9BUl3OWjPwEGLvBwG83f,iv:Xnzo8DIMA/WwQOSttYJEpAV2aJxMCmhVMWlXmjJu7Pc=,tag:YhshTtXEs73WVERmkyngxw==,type:str]", + "auth_uri": "ENC[AES256_GCM,data:Wa0dViUcTMjmwnHIk7nbyfJi3XNE0Xy56ZAE41lkYKOCwn5LzJwep60=,iv:GPiqAdsjqaDsc/MM+Ahpjx7/UUax5qqYKf4oRt1TQgw=,tag:i+nLp19XSMip7NLnnRDRLg==,type:str]", + "token_uri": "ENC[AES256_GCM,data:4GAgHNrQS9M1x6hZIl4cnQOG9r3vE3QwY2Y7OWpZgGieTuM=,iv:/arD9sO1MdO4J4tsxm1E3WdnPts+J0gFu/ETtyZtW+U=,tag:VRR3+1PNH0gJSZ79f0upGQ==,type:str]", + "auth_provider_x509_cert_url": "ENC[AES256_GCM,data:ZDwxJ2l7rMIo+L0hRUt4ymZRs4F0OKo4mRuseNNf0PeHkk7hMguobd3g,iv:pzWKW8dUeHSsQInAXOFZR9CknUIt+cT3tKmfVMGGduo=,tag:fuo7uxpYE78i+l6SY4oKiQ==,type:str]", + "client_x509_cert_url": "ENC[AES256_GCM,data:pwR4evh+15goLhuW4ASO0KoHcRm/c2Rd8wkWsQwsHUDk9Y5D/VRCGjpACG9MDLMxah0a9DtBjGGJcxGed/O2xp1RRj8OLB7SK8/CKRwBCszGAXEMIaOyh1XOqsrAQSFOlQ==,iv:c88tLvyZS0FmfH93We3X3GxflL59rBPvq4VD7I4fie4=,tag:mw48WEOuGTwfM5/S4Qgd1Q==,type:str]", + "sops": { + "kms": null, + "gcp_kms": [ + { + "resource_id": "projects/hail-295901/locations/global/keyRings/sops/cryptoKeys/sops-key", + "created_at": "2022-04-01T04:25:20Z", + "enc": "CiQAKYFiUaz5U7Yb9TlP+OYVBdFGHXhyEWcM1/Ux2oGcsjxqvaISSQAB+Y96XZUCqzsHDiJuL6xpSZUtUH1HrQa3JaLuz11cS9L4WtvBoclBUhOSjXBifjoEdDlwaxT/2+mVqPNjTfRvWhK0hS+7Lh0=" + } + ], + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2022-04-01T04:25:21Z", + "mac": "ENC[AES256_GCM,data:fN1JUaOvmgMmjeDYooUAsgv81QAeB8yQ22aLMJH6o49DPCyXBaoXCgoCySpRNxtWUx99qmgd0eIi7h65Q3i96XjBW+Dk1AzK5Kk0IsjvWHoA1ifADBP78kkCsiIkt2NegBbNsazsxGdfzQ2j0ILKijWqFHZo/nq9ywT6U9hty5Q=,iv:efgkwvrsehFt3qNN7Hq5fnyymPj7IECXbYnhUad3pNI=,tag:ldr9MTmZt/RewPPgIDqeUw==,type:str]", + "pgp": null, + "unencrypted_suffix": "_unencrypted", + "version": "3.7.2" + } +} \ No newline at end of file From a93618f0aef34e59d69f1c53d838a31059666b19 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Mon, 4 Apr 2022 14:21:27 +1000 Subject: [PATCH 354/501] Pass authorisation token to query service backend (#184) --- hail/python/hail/backend/service_backend.py | 5 +++-- hail/python/hail/context.py | 9 ++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index 2c802a938c5..d0debbfe48b 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -211,7 +211,8 @@ async def create(*, jar_url: Optional[str] = None, driver_cores: Optional[Union[int, str]] = None, driver_memory: Optional[Union[int, str]] = None, - name_prefix: Optional[str] = None): + name_prefix: Optional[str] = None, + token: Optional[str] = None): if billing_project is None: billing_project = get_user_config().get('batch', 'billing_project', fallback=None) if billing_project is None: @@ -224,7 +225,7 @@ async def create(*, async_fs = RouterAsyncFS('file') sync_fs = RouterFS(async_fs) if batch_client is None: - batch_client = await aiohb.BatchClient.create(billing_project) + batch_client = await aiohb.BatchClient.create(billing_project, _token=token) bc = hb.BatchClient.from_async(batch_client) batch_attributes: Dict[str, str] = dict() user_local_reference_cache_dir = Path(get_user_local_cache_dir(), 'references', version()) diff --git a/hail/python/hail/context.py b/hail/python/hail/context.py index a02a71a39f7..e4ad944d9f7 100644 --- a/hail/python/hail/context.py +++ b/hail/python/hail/context.py @@ -398,7 +398,8 @@ def init_spark(sc=None, disable_progress_bar=bool, driver_cores=nullable(oneof(str, int)), driver_memory=nullable(str), - name_prefix=nullable(str) + name_prefix=nullable(str), + token=nullable(str) ) async def init_batch( *, @@ -414,7 +415,8 @@ async def init_batch( disable_progress_bar: bool = True, driver_cores: Optional[Union[str, int]] = None, driver_memory: Optional[str] = None, - name_prefix: Optional[str] = None + name_prefix: Optional[str] = None, + token: Optional[str] = None, ): from hail.backend.service_backend import ServiceBackend # FIXME: pass local_tmpdir and use on worker and driver @@ -423,7 +425,8 @@ async def init_batch( disable_progress_bar=disable_progress_bar, driver_cores=driver_cores, driver_memory=driver_memory, - name_prefix=name_prefix) + name_prefix=name_prefix, + token=token) log = _get_log(log) if tmpdir is None: From ea172ef40693c25168c553cd25b5864c2d0635a1 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Mon, 4 Apr 2022 14:22:11 +1000 Subject: [PATCH 355/501] [query] Add assertion that methods have fewer than 255 parameters (#183) --- hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala index 22a2c10253a..8b11e942802 100644 --- a/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala +++ b/hail/src/main/scala/is/hail/asm4s/ClassBuilder.scala @@ -488,6 +488,8 @@ class MethodBuilder[C]( val returnTypeInfo: TypeInfo[_], val isStatic: Boolean = false ) extends WrappedClassBuilder[C] { + require(parameterTypeInfo.length + isStatic.toInt <= 255, + s"Invalid method, methods may at most 255 arguments, found ${parameterTypeInfo.length + isStatic.toInt}") // very long method names, repeated hundreds of thousands of times can cause memory issues. // If necessary to find the name of a method precisely, this can be set to around the constant // limit of 65535 characters, but usually, this can be much smaller. From 5317b92816f75946bb1d9071c543006269f5497b Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Mon, 4 Apr 2022 18:53:38 +1000 Subject: [PATCH 356/501] Fix global.tfvars path (#185) --- infra/gcp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/gcp/README.md b/infra/gcp/README.md index 8b79bda7c91..17d76f9abd6 100644 --- a/infra/gcp/README.md +++ b/infra/gcp/README.md @@ -42,7 +42,7 @@ Instructions: Download the client secret as `/tmp/auth_oauth2_client_secret.json`. -- Create `/tmp/global.tfvars` that looks like this: +- Create `infra/gcp/global.tfvars` that looks like this: ``` # organization_domain is a string that is the domain of the organization From ffa35297b3c17c04fd221a0a9221477277fc170a Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 7 Apr 2022 17:42:40 -0400 Subject: [PATCH 357/501] [query/service] fix per-core off-heap memory --- batch/batch/worker/worker.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 1c5e63ea106..32e341e4479 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -2021,12 +2021,13 @@ async def create_and_start( # We allocate 60% of memory per core to off heap memory memory_per_core_mib = worker_memory_per_core_mib(CLOUD, instance_config.worker_type()) - heap_memory_mb = int(0.4 * n_cores * memory_per_core_mib) - off_heap_memory_per_core_mb = int(0.6 * memory_per_core_mib) + memory_mib = n_cores * memory_per_core_mib + heap_memory_mib = int(0.4 * memory_mib) + off_heap_memory_per_core_mib = memory_mib - heap_memory_mib command = [ 'java', - f'-Xmx{heap_memory_mb}M', + f'-Xmx{heap_memory_mib}M', '-cp', f'/jvm-entryway:/jvm-entryway/junixsocket-selftest-2.3.3-jar-with-dependencies.jar:{JVM.SPARK_HOME}/jars/*', 'is.hail.JVMEntryway', @@ -2076,7 +2077,7 @@ async def create_and_start( command=command, cpu_in_mcpu=n_cores * 1000, memory_in_bytes=total_memory_bytes, - env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mb}'], + env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mib}'], volume_mounts=volume_mounts, ) From c4686199bd87bddafdde19c73df6fe5830ec5429 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Tue, 12 Apr 2022 12:52:55 +1000 Subject: [PATCH 358/501] Also commit global.tfvars (#187) --- infra/gcp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/gcp/README.md b/infra/gcp/README.md index 17d76f9abd6..a61ccfcc7ec 100644 --- a/infra/gcp/README.md +++ b/infra/gcp/README.md @@ -154,7 +154,7 @@ Instructions: sops --encrypt --gcp-kms projects//locations/global/keyRings/sops/cryptoKeys/sops-key /tmp/terraform_sa_key.json > $HAIL/infra/gcp/terraform_sa_key.enc.json - git add $HAIL/infra/gcp/auth_oauth2_client_secret.enc.json $HAIL/infra/gcp/ci_config.enc.json $HAIL/infra/gcp/terraform_sa_key.enc.json + git add $HAIL/infra/gcp/global.tfvars $HAIL/infra/gcp/auth_oauth2_client_secret.enc.json $HAIL/infra/gcp/ci_config.enc.json $HAIL/infra/gcp/terraform_sa_key.enc.json # git commit and push as desired. ``` From 2d71abf06b187b72eb8a41077608bd495f8db267 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Mon, 2 May 2022 12:41:45 +1000 Subject: [PATCH 359/501] avoid use of memory service on the client and the worker (#190) --- .../scala/is/hail/backend/service/ServiceBackend.scala | 9 +++------ hail/src/main/scala/is/hail/backend/service/Worker.scala | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 6533df64040..e886b95d7fe 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -107,12 +107,9 @@ class ServiceBackend( val token = tokenUrlSafe(32) val root = s"${ backendContext.remoteTmpDir }parallelizeAndComputeWithIndex/$token" - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } + // FIXME: HACK: working around the memory service until the issue is resolved: + // https://hail.zulipchat.com/#narrow/stream/223457-Hail-Batch-support/topic/Batch.20Query.3A.20possible.20overloading.20of.20.60memory.60.20service/near/280823230 + val (open, create) = (fs.openNoCompression _, fs.createNoCompression _) log.info(s"parallelizeAndComputeWithIndex: $token: nPartitions $n") log.info(s"parallelizeAndComputeWithIndex: $token: writing f and contexts") diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala index 7edbc55d2b8..34718193475 100644 --- a/hail/src/main/scala/is/hail/backend/service/Worker.scala +++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala @@ -92,12 +92,9 @@ object Worker { } } - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } + // FIXME: HACK: working around the memory service until the issue is resolved: + // https://hail.zulipchat.com/#narrow/stream/223457-Hail-Batch-support/topic/Batch.20Query.3A.20possible.20overloading.20of.20.60memory.60.20service/near/280823230 + val (open, create) = (fs.openNoCompression _, fs.createNoCompression _) val fFuture = Future { retryTransientErrors { From be47820f26f725148976442d204d62f24b19c0a9 Mon Sep 17 00:00:00 2001 From: Vlad Savelyev Date: Mon, 2 May 2022 12:41:45 +1000 Subject: [PATCH 360/501] avoid use of memory service on the client and the worker (#190) --- .../scala/is/hail/backend/service/ServiceBackend.scala | 9 +++------ hail/src/main/scala/is/hail/backend/service/Worker.scala | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index 6533df64040..e886b95d7fe 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -107,12 +107,9 @@ class ServiceBackend( val token = tokenUrlSafe(32) val root = s"${ backendContext.remoteTmpDir }parallelizeAndComputeWithIndex/$token" - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } + // FIXME: HACK: working around the memory service until the issue is resolved: + // https://hail.zulipchat.com/#narrow/stream/223457-Hail-Batch-support/topic/Batch.20Query.3A.20possible.20overloading.20of.20.60memory.60.20service/near/280823230 + val (open, create) = (fs.openNoCompression _, fs.createNoCompression _) log.info(s"parallelizeAndComputeWithIndex: $token: nPartitions $n") log.info(s"parallelizeAndComputeWithIndex: $token: writing f and contexts") diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala index c1da2e7d4cc..ebc6f03aaeb 100644 --- a/hail/src/main/scala/is/hail/backend/service/Worker.scala +++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala @@ -92,12 +92,9 @@ object Worker { } } - // FIXME: HACK - val (open, create) = if (n <= 50) { - (fs.openCachedNoCompression _, fs.createCachedNoCompression _) - } else { - (fs.openNoCompression _, fs.createNoCompression _) - } + // FIXME: HACK: working around the memory service until the issue is resolved: + // https://hail.zulipchat.com/#narrow/stream/223457-Hail-Batch-support/topic/Batch.20Query.3A.20possible.20overloading.20of.20.60memory.60.20service/near/280823230 + val (open, create) = (fs.openNoCompression _, fs.createNoCompression _) val fFuture = Future { retryTransientErrors { From cdf68fec339e1140c82a3d471d2b018cb7de580d Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 2 May 2022 13:25:58 +1000 Subject: [PATCH 361/501] Use apiVersion: policy/v1beta1 for PodDisruptionBudget (https://github.com/kyverno/kyverno/issues/2983) --- auth/deployment.yaml | 2 +- batch/deployment.yaml | 2 +- bootstrap-gateway/deployment.yaml | 2 +- gateway/deployment.yaml | 2 +- internal-gateway/deployment.yaml | 2 +- website/deployment.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/auth/deployment.yaml b/auth/deployment.yaml index f91301e82bb..a790339f89f 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -244,7 +244,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: auth diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 7f83f8d4d96..2447b3424fe 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -413,7 +413,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: batch diff --git a/bootstrap-gateway/deployment.yaml b/bootstrap-gateway/deployment.yaml index 0712510a99b..50cbd1d76f2 100644 --- a/bootstrap-gateway/deployment.yaml +++ b/bootstrap-gateway/deployment.yaml @@ -65,7 +65,7 @@ spec: name: cpu targetAverageUtilization: 500 --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway diff --git a/gateway/deployment.yaml b/gateway/deployment.yaml index f525792aa7d..d6c78a53cc3 100644 --- a/gateway/deployment.yaml +++ b/gateway/deployment.yaml @@ -79,7 +79,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway diff --git a/internal-gateway/deployment.yaml b/internal-gateway/deployment.yaml index cc371ea6be6..9c9f7504e08 100644 --- a/internal-gateway/deployment.yaml +++ b/internal-gateway/deployment.yaml @@ -56,7 +56,7 @@ spec: optional: false secretName: ssl-config-internal-gateway --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: internal-gateway diff --git a/website/deployment.yaml b/website/deployment.yaml index 9204085603a..09df6b943a2 100644 --- a/website/deployment.yaml +++ b/website/deployment.yaml @@ -106,7 +106,7 @@ spec: name: cpu targetAverageUtilization: 80 --- -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: website From 411cd980b3105079fe36474f051870fcf91367db Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Mon, 2 May 2022 15:03:49 +1000 Subject: [PATCH 362/501] Revert "Use apiVersion: policy/v1beta1 for PodDisruptionBudget (https://github.com/kyverno/kyverno/issues/2983)" This reverts commit cdf68fec339e1140c82a3d471d2b018cb7de580d. --- auth/deployment.yaml | 2 +- batch/deployment.yaml | 2 +- bootstrap-gateway/deployment.yaml | 2 +- gateway/deployment.yaml | 2 +- internal-gateway/deployment.yaml | 2 +- website/deployment.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/auth/deployment.yaml b/auth/deployment.yaml index a790339f89f..f91301e82bb 100644 --- a/auth/deployment.yaml +++ b/auth/deployment.yaml @@ -244,7 +244,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: auth diff --git a/batch/deployment.yaml b/batch/deployment.yaml index 2447b3424fe..7f83f8d4d96 100644 --- a/batch/deployment.yaml +++ b/batch/deployment.yaml @@ -413,7 +413,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: batch diff --git a/bootstrap-gateway/deployment.yaml b/bootstrap-gateway/deployment.yaml index 50cbd1d76f2..0712510a99b 100644 --- a/bootstrap-gateway/deployment.yaml +++ b/bootstrap-gateway/deployment.yaml @@ -65,7 +65,7 @@ spec: name: cpu targetAverageUtilization: 500 --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: gateway diff --git a/gateway/deployment.yaml b/gateway/deployment.yaml index d6c78a53cc3..f525792aa7d 100644 --- a/gateway/deployment.yaml +++ b/gateway/deployment.yaml @@ -79,7 +79,7 @@ spec: name: cpu targetAverageUtilization: 2500 --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: gateway diff --git a/internal-gateway/deployment.yaml b/internal-gateway/deployment.yaml index 9c9f7504e08..cc371ea6be6 100644 --- a/internal-gateway/deployment.yaml +++ b/internal-gateway/deployment.yaml @@ -56,7 +56,7 @@ spec: optional: false secretName: ssl-config-internal-gateway --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: internal-gateway diff --git a/website/deployment.yaml b/website/deployment.yaml index 09df6b943a2..9204085603a 100644 --- a/website/deployment.yaml +++ b/website/deployment.yaml @@ -106,7 +106,7 @@ spec: name: cpu targetAverageUtilization: 80 --- -apiVersion: policy/v1beta1 +apiVersion: policy/v1 kind: PodDisruptionBudget metadata: name: website From bb0dd360e4d546ff948c1d7b43614667e829be57 Mon Sep 17 00:00:00 2001 From: Michael Franklin Date: Fri, 6 May 2022 14:42:12 +1000 Subject: [PATCH 363/501] Add endpoint for getting aggregated resources (#189) * Add batch billing endpoint * Fix linting issue * Remove name because it doesn't exist * Minor import fix * Why can't I program * Fix arg grouping * Fix query * Fix job ids * Fix quoting of batch resource * Minor clean-up * Revert unintended changes * Remove two unused fields * Add more explicit limit conversion * Add docstring + fix error throwing * Update batch/batch/front_end/front_end.py Co-authored-by: Leonhard Gruenschloss * Add limit range (0, 1E4) [exclusive] Co-authored-by: Leonhard Gruenschloss --- batch/batch/front_end/front_end.py | 127 ++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index c9d9c008ff0..f9c9e1ac3d0 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -210,6 +210,90 @@ async def _handle_api_error(f, *args, **kwargs): raise e.http_response() +async def _query_batch_jobs_for_billing(request, batch_id): + db = request.app['db'] + + # batch has already been validated + where_conditions = ['(jobs.batch_id = %s)'] + where_args = [batch_id] + + last_job_id = request.query.get('last_job_id') + query_limit: str = request.query.get('limit') + limit = 300 + if query_limit: + try: + limit = int(query_limit) + except ValueError as e: + raise web.HTTPBadRequest(reason=f'Bad value for "limit": {e}') + if not (0 < limit < 1e4): + raise web.HTTPBadRequest(reason=f'Limit must be between 1 and 10,000 (limit={limit})') + + if last_job_id is not None: + last_job_id = int(last_job_id) + where_conditions.append('(jobs.job_id > %s)') + where_args.append(last_job_id) + + sql = f''' + SELECT + jobs.batch_id as batch_id, + jobs.job_id as job_id, + jobs.state as state, + batches.user AS user + FROM jobs + INNER JOIN batches ON jobs.batch_id = batches.id + WHERE {' AND '.join(where_conditions)} + GROUP BY jobs.batch_id, jobs.job_id + ORDER BY jobs.batch_id, jobs.job_id ASC + LIMIT %s; + ''' + + jobs = [dict(record) async for record in db.select_and_fetchall(sql, (*where_args, limit))] + n_job_ids = len(jobs) + job_ids = [job['job_id'] for job in jobs] + + if n_job_ids == 0: + return [] + if n_job_ids == 1: + job_condition = 'job_id = %s' + else: + placeholders = ', '.join(['%s'] * n_job_ids) + job_condition = f'job_id IN ({placeholders})' + + job_attributes_sql = f''' + SELECT job_id, `key`, `value` + FROM job_attributes + WHERE batch_id = %s AND {job_condition}; + ''' + + job_resources_sql = f''' + SELECT job_id, resource, `usage` + FROM aggregated_job_resources + WHERE batch_id = %s AND {job_condition} + ''' + + attributes_by_job = collections.defaultdict(dict) + async for record in db.select_and_fetchall(job_attributes_sql, (batch_id, *job_ids)): + attributes_by_job[record['job_id']][record['key']] = record['value'] + + resources_by_job = collections.defaultdict(dict) + async for record in db.select_and_fetchall(job_resources_sql, (batch_id, *job_ids)): + resources_by_job[record['job_id']][record['resource']] = record['usage'] + + for j in jobs: + job_id = j['job_id'] + j['resources'] = resources_by_job.get(job_id, []) + j['attributes'] = attributes_by_job.get(job_id, {}) + + if j.get('cost'): + del j['cost'] + + last_job_id = None + if len(jobs) == limit: + last_job_id = jobs[-1]['job_id'] + + return jobs, last_job_id + + async def _query_batch_jobs(request, batch_id): state_query_values = { 'pending': ['Pending'], @@ -330,6 +414,47 @@ async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argum return web.json_response(resp) +@routes.get('/api/v1alpha/batches/{batch_id}/jobs/resources') +@rest_billing_project_users_only +async def get_jobs_for_billing(request, userdata, batch_id): + """ + Get jobs for batch to check the amount of resources used. + Takes a "last_job_id" and "limit" parameter that can be used to implement paging. + + Returns + ------- + Example response: + { + "jobs": [{ + "batch_id": 1, + "job_id": 1, + "state": "Error", + "user": "", + "resources": { + "compute/n1-preemptible/1": 0, + "disk/local-ssd/1": 0, + "disk/pd-ssd/1": 0, + "ip-fee/1024/1": 0, + "memory/n1-preemptible/1": 0, + "service-fee/1": 0 + }, + "attributes": { + "name": "" + } + }] + } + """ + + # just noting the @rest_billing_project_users_only decorator + # does the permission checks for us + jobs, last_job_id = await _query_batch_jobs_for_billing(request, batch_id) + resp = {'jobs': jobs} + if last_job_id: + resp['last_job_id'] = last_job_id + + return web.json_response(resp) + + async def _get_job_log_from_record(app, batch_id, job_id, record): client_session: httpx.ClientSession = app['client_session'] batch_format_version = BatchFormatVersion(record['format_version']) @@ -1593,7 +1718,7 @@ async def ui_get_job(request, userdata, batch_id): resources['actual_memory'] = humanize.naturalsize(resources['memory_bytes'], binary=True) del resources['memory_bytes'] if 'storage_gib' in resources: - resources['actual_storage'] = humanize.naturalsize(resources['storage_gib'] * 1024**3, binary=True) + resources['actual_storage'] = humanize.naturalsize(resources['storage_gib'] * 1024 ** 3, binary=True) del resources['storage_gib'] if 'cores_mcpu' in resources: resources['actual_cpu'] = resources['cores_mcpu'] / 1000 From 531b3e1578f2b4dd2f7b51b08cf3efd768d1169b Mon Sep 17 00:00:00 2001 From: vladsaveliev Date: Thu, 12 May 2022 09:27:28 +1000 Subject: [PATCH 364/501] https timeout 5 -> 20 (part of Dan Kings fix for GCS copy https://github.com/hail-is/hail/pull/11830) --- hail/python/hailtop/httpx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail/python/hailtop/httpx.py b/hail/python/hailtop/httpx.py index 5a2a4180f30..515b0f02eb2 100644 --- a/hail/python/hailtop/httpx.py +++ b/hail/python/hailtop/httpx.py @@ -101,7 +101,7 @@ def __init__(self, assert 'connector' not in kwargs if timeout is None: - timeout = aiohttp.ClientTimeout(total=5) + timeout = aiohttp.ClientTimeout(total=20) self.raise_for_status = raise_for_status self.client_session = aiohttp.ClientSession( From 17ac9707007935ff6f4a5830268ed01904a97eec Mon Sep 17 00:00:00 2001 From: Tim Poterba Date: Thu, 5 May 2022 14:57:06 -0400 Subject: [PATCH 365/501] [query] Execute single-partition scatters on driver (#11819) * [query] Execute single-partition scatters on driver * bleh --- .../main/scala/is/hail/backend/Backend.scala | 2 ++ .../scala/is/hail/backend/BackendUtils.scala | 24 +++++++++++++------ .../is/hail/backend/spark/SparkBackend.scala | 2 ++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hail/src/main/scala/is/hail/backend/Backend.scala b/hail/src/main/scala/is/hail/backend/Backend.scala index 7049ea4a83d..9713f4e6c90 100644 --- a/hail/src/main/scala/is/hail/backend/Backend.scala +++ b/hail/src/main/scala/is/hail/backend/Backend.scala @@ -27,6 +27,8 @@ abstract class BackendContext abstract class Backend { def defaultParallelism: Int + def canExecuteParallelTasksOnDriver: Boolean = true + def broadcast[T: ClassTag](value: T): BroadcastValue[T] def persist(backendContext: BackendContext, id: String, value: BlockMatrix, storageLevel: String): Unit diff --git a/hail/src/main/scala/is/hail/backend/BackendUtils.scala b/hail/src/main/scala/is/hail/backend/BackendUtils.scala index 506f6bbcf9d..796ed7bf270 100644 --- a/hail/src/main/scala/is/hail/backend/BackendUtils.scala +++ b/hail/src/main/scala/is/hail/backend/BackendUtils.scala @@ -22,15 +22,25 @@ class BackendUtils(mods: Array[(String, (HailClassLoader, FS, Int, Region) => Ba if (contexts.isEmpty) return Array() val backend = HailContext.backend - val globalsBC = backend.broadcast(globals) val f = getModule(modID) - backend.parallelizeAndComputeWithIndex(backendContext, fs, contexts, tsd)({ (ctx, htc, theHailClassLoader, fs) => - val gs = globalsBC.value - htc.getRegionPool().scopedRegion { region => - val res = f(theHailClassLoader, fs, htc.partitionId(), region)(region, ctx, gs) - res + if (contexts.length == 0) + Array.empty[Array[Byte]] + else if (contexts.length == 1 && backend.canExecuteParallelTasksOnDriver) { + RegionPool.scoped { rp => + rp.scopedRegion { r => + Array(f(theDriverHailClassLoader, fs, 0, r)(r, contexts(0), globals)) + } } - }) + } else { + val globalsBC = backend.broadcast(globals) + backend.parallelizeAndComputeWithIndex(backendContext, fs, contexts, tsd)({ (ctx, htc, theHailClassLoader, fs) => + val gs = globalsBC.value + htc.getRegionPool().scopedRegion { region => + val res = f(theHailClassLoader, fs, htc.partitionId(), region)(region, ctx, gs) + res + } + }) + } } } diff --git a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala index 62b92c30766..579074ce308 100644 --- a/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala +++ b/hail/src/main/scala/is/hail/backend/spark/SparkBackend.scala @@ -256,6 +256,8 @@ class SparkBackend( lazy val sparkSession: SparkSession = SparkSession.builder().config(sc.getConf).getOrCreate() private[this] val theHailClassLoader: HailClassLoader = new HailClassLoader(getClass().getClassLoader()) + override def canExecuteParallelTasksOnDriver: Boolean = false + val fs: HadoopFS = new HadoopFS(new SerializableHadoopConfiguration(sc.hadoopConfiguration)) private[this] val longLifeTempFileManager: TempFileManager = new OwningTempFileManager(fs) From 33b8a7bdcc74f5612d917d586c67257c9d5480ef Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 6 May 2022 16:47:20 -0400 Subject: [PATCH 366/501] [copy] fix the TimeoutError and ServerDisconnected issues in copy OK, there were two problems: 1. A timeout of 5s appears to be now too short for Google Cloud Storage. I am not sure why but we timeout substantially more frequently. I have observed this myself on my laptop. Just this morning I saw it happen to Daniel. 2. When using an `aiohttp.AsyncIterablePayload`, it is *critical* to always check if the coroutine which actually writes to GCS (which is stashed in the variable `request_task`) is still alive. In the current `main`, we do not do this which causes hangs (in particular the timeout exceptions are never thrown ergo we never retry). To understand the second problem, you must first recall how writing works in aiogoogle. There are two Tasks and an `asyncio.Queue`. The terms "writer" and "reader" are somewhat confusing, so let's use left and right. The left Task has the owning reference to both the source "file" and the destination "file". In particular, it is the *left* Task which closes both "files". Moreover, the left Task reads chunks from the source file and places those chunks on the `asyncio.Queue`. The right Task takes chunks off the queue and writes those chunks to the destination file. This situation can go awry in two ways. First, if the right Task encounters any kind of failure, it will stop taking chunks off of the queue. When the queue (which has a size limit of one) is full, the left Task will hang. The system is stuck. The left Task will wait forever for the right Task to empty the queue. The second scenario is exactly the same except that the left Task is trying to add the "stop" message to the queue rather than a chunk. In either case, it is critical that the left Task waits simultaneously on the queue operation *and* on the right Task completing. If the right Task has died, no further writes can occur and the left Task must raise an exception. In the first scenario, we do not observe the right Task's exception because that will be done when we close the `InsertObjectStream` (which represents the destination "file"). --- I also added several types, assertions, and a few missing `async with ... as resp:` blocks. --- .../aiogoogle/client/storage_client.py | 122 +++++++++++------- .../python/hailtop/aiocloud/common/session.py | 15 ++- hail/python/hailtop/utils/utils.py | 24 +++- 3 files changed, 101 insertions(+), 60 deletions(-) diff --git a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py index 18217ad88ad..0415464e305 100644 --- a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py +++ b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py @@ -1,6 +1,6 @@ import os from typing import (Tuple, Any, Set, Optional, MutableMapping, Dict, AsyncIterator, cast, Type, - List) + List, Awaitable, Coroutine) from types import TracebackType from multidict import CIMultiDictProxy # pylint: disable=unused-import import sys @@ -55,7 +55,9 @@ async def __anext__(self): class InsertObjectStream(WritableStream): - def __init__(self, it, request_task): + def __init__(self, + it: FeedableAsyncIterable[bytes], + request_task: asyncio.Future): # in Python 3.9: asyncio.Future[aiohttp.ClientResponse] super().__init__() self._it = it self._request_task = request_task @@ -63,24 +65,31 @@ def __init__(self, it, request_task): async def write(self, b): assert not self.closed - await self._it.feed(b) - return len(b) + + fut = asyncio.ensure_future(self._it.feed(b)) + try: + await asyncio.wait([fut, self._request_task], return_when=asyncio.FIRST_COMPLETED) + if fut.done(): + return len(b) + raise ValueError(f'request task finished early') + finally: + fut.cancel() async def _wait_closed(self): + fut = asyncio.ensure_future(self._it.stop()) try: - await self._it.stop() - except: - await self._request_task # retrieve exceptions - raise - else: + await asyncio.wait([fut, self._request_task], return_when=asyncio.FIRST_COMPLETED) async with await self._request_task as resp: self._value = await resp.json() + finally: + fut.cancel() class _TaskManager: - def __init__(self, coro): + def __init__(self, coro: Coroutine[Any, Any, Any], closable: bool = False): self._coro = coro - self._task = None + self._task: Optional[asyncio.Task[Any]] = None + self._closable = closable async def __aenter__(self) -> asyncio.Task: self._task = asyncio.create_task(self._coro) @@ -90,17 +99,27 @@ async def __aexit__(self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]) -> None: + assert self._task is not None + if not self._task.done(): if exc_val: self._task.cancel() try: - await self._task + value = await self._task + if self._closable: + value.close() except: _, exc, _ = sys.exc_info() if exc is not exc_val: log.warning('dropping preempted task exception', exc_info=True) else: - await self._task + value = await self._task + if self._closable: + value.close() + else: + value = await self._task + if self._closable: + value.close() class ResumableInsertObjectStream(WritableStream): @@ -138,31 +157,31 @@ async def _write_chunk_1(self): # https://cloud.google.com/storage/docs/performing-resumable-uploads#status-check # note: this retries - resp = await self._session.put(self._session_url, - headers={ - 'Content-Length': '0', - 'Content-Range': f'bytes */{total_size_str}' - }, - raise_for_status=False) - if resp.status >= 200 and resp.status < 300: - assert self._closed - assert total_size is not None - self._write_buffer.advance_offset(total_size) - assert self._write_buffer.size() == 0 - self._done = True - return - if resp.status == 308: - range = resp.headers.get('Range') - if range is not None: - new_offset = self._range_upper(range) + 1 + async with await self._session.put(self._session_url, + headers={ + 'Content-Length': '0', + 'Content-Range': f'bytes */{total_size_str}' + }, + raise_for_status=False) as resp: + if resp.status >= 200 and resp.status < 300: + assert self._closed + assert total_size is not None + self._write_buffer.advance_offset(total_size) + assert self._write_buffer.size() == 0 + self._done = True + return + if resp.status == 308: + range = resp.headers.get('Range') + if range is not None: + new_offset = self._range_upper(range) + 1 + else: + new_offset = 0 + self._write_buffer.advance_offset(new_offset) + self._broken = False else: - new_offset = 0 - self._write_buffer.advance_offset(new_offset) - self._broken = False - else: - assert resp.status >= 400 - resp.raise_for_status() - assert False + assert resp.status >= 400 + resp.raise_for_status() + assert False assert not self._broken self._broken = True @@ -192,7 +211,8 @@ async def _write_chunk_1(self): 'Content-Range': range }, raise_for_status=False, - retry=False)) as put_task: + retry=False), + closable=True) as put_task: for chunk in self._write_buffer.chunks(n): async with _TaskManager(it.feed(chunk)) as feed_task: done, _ = await asyncio.wait([put_task, feed_task], return_when=asyncio.FIRST_COMPLETED) @@ -243,7 +263,7 @@ async def _wait_closed(self): class GetObjectStream(ReadableStream): - def __init__(self, resp): + def __init__(self, resp: aiohttp.ClientResponse): super().__init__() self._resp = resp self._content = resp.content @@ -252,22 +272,28 @@ def __init__(self, resp): # Read up to n bytes. If n is not provided, or set to -1, read until EOF # and return all read bytes. async def read(self, n: int = -1) -> bytes: - assert not self._closed + assert not self._closed and self._content is not None return await self._content.read(n) async def readexactly(self, n: int) -> bytes: - assert not self._closed and n >= 0 + assert not self._closed and n >= 0 and self._content is not None try: return await self._content.readexactly(n) except asyncio.IncompleteReadError as e: raise UnexpectedEOFError() from e def headers(self) -> 'CIMultiDictProxy[str]': + assert self._resp is not None + return self._resp.headers async def _wait_closed(self) -> None: + assert self._resp is not None + assert self._content is not None + self._content = None self._resp.release() + self._resp.close() self._resp = None @@ -290,10 +316,7 @@ async def insert_object(self, bucket: str, name: str, **kwargs) -> WritableStrea assert 'name' not in params params['name'] = name - if 'data' in params: - return await self._session.post( - f'https://storage.googleapis.com/upload/storage/v1/b/{bucket}/o', - **kwargs) + assert 'data' not in params upload_type = params.get('uploadType') if not upload_type: @@ -303,7 +326,7 @@ async def insert_object(self, bucket: str, name: str, **kwargs) -> WritableStrea if upload_type == 'media': it: FeedableAsyncIterable[bytes] = FeedableAsyncIterable() kwargs['data'] = aiohttp.AsyncIterablePayload(it) - request_task = asyncio.ensure_future(self._session.post( + request_task: asyncio.Future = asyncio.ensure_future(self._session.post( f'https://storage.googleapis.com/upload/storage/v1/b/{bucket}/o', retry=False, **kwargs)) @@ -314,10 +337,11 @@ async def insert_object(self, bucket: str, name: str, **kwargs) -> WritableStrea assert upload_type == 'resumable' chunk_size = kwargs.get('bufsize', 256 * 1024) - resp = await self._session.post( + async with await self._session.post( f'https://storage.googleapis.com/upload/storage/v1/b/{bucket}/o', - **kwargs) - session_url = resp.headers['Location'] + **kwargs + ) as resp: + session_url = resp.headers['Location'] return ResumableInsertObjectStream(self._session, session_url, chunk_size) async def get_object(self, bucket: str, name: str, **kwargs) -> GetObjectStream: diff --git a/hail/python/hailtop/aiocloud/common/session.py b/hail/python/hailtop/aiocloud/common/session.py index 5da934ffaf4..71c38b8b178 100644 --- a/hail/python/hailtop/aiocloud/common/session.py +++ b/hail/python/hailtop/aiocloud/common/session.py @@ -1,5 +1,6 @@ from types import TracebackType from typing import Optional, Type, TypeVar, Mapping +import aiohttp import abc from hailtop import httpx from hailtop.utils import request_retry_transient_errors, RateLimit, RateLimiter @@ -10,22 +11,22 @@ class BaseSession(abc.ABC): @abc.abstractmethod - async def request(self, method: str, url: str, **kwargs): + async def request(self, method: str, url: str, **kwargs) -> aiohttp.ClientResponse: pass - async def get(self, url: str, **kwargs): + async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse: return await self.request('GET', url, **kwargs) - async def post(self, url: str, **kwargs): + async def post(self, url: str, **kwargs) -> aiohttp.ClientResponse: return await self.request('POST', url, **kwargs) - async def put(self, url: str, **kwargs): + async def put(self, url: str, **kwargs) -> aiohttp.ClientResponse: return await self.request('PUT', url, **kwargs) - async def delete(self, url: str, **kwargs): + async def delete(self, url: str, **kwargs) -> aiohttp.ClientResponse: return await self.request('DELETE', url, **kwargs) - async def head(self, url: str, **kwargs): + async def head(self, url: str, **kwargs) -> aiohttp.ClientResponse: return await self.request('HEAD', url, **kwargs) async def close(self) -> None: @@ -78,7 +79,7 @@ def __init__(self, self._http_session = httpx.ClientSession(**kwargs) self._credentials = credentials - async def request(self, method: str, url: str, **kwargs): + async def request(self, method: str, url: str, **kwargs) -> aiohttp.ClientResponse: auth_headers = await self._credentials.auth_headers() if auth_headers: if 'headers' in kwargs: diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index d654590d35d..829be472d00 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -731,9 +731,15 @@ async def retry_transient_errors(f: Callable[..., Awaitable[T]], *args, **kwargs if not is_transient_error(e): raise errors += 1 - if errors == 2 or errors % 10 == 0: + if errors == 2: + log.warning(f'A transient error occured. We will automatically retry. Do not be alarmed. ' + f'We have thus far seen {errors} transient errors (current delay: ' + f'{delay}). The most recent error was {type(e)} {e}') + elif errors % 10 == 0: st = ''.join(traceback.format_stack()) - log.warning(f'Encountered {errors} errors (current delay: {delay}). My stack trace is {st}. Most recent error was {e}', exc_info=True) + log.warning(f'A transient error occured. We will automatically retry. ' + f'We have thus far seen {errors} transient errors (current delay: ' + f'{delay}). The stack trace for this call is {st}. The most recent error was {type(e)} {e}', exc_info=True) delay = await sleep_and_backoff(delay) @@ -755,11 +761,21 @@ def sync_retry_transient_errors(f, *args, **kwargs): delay = sync_sleep_and_backoff(delay) -async def request_retry_transient_errors(session, method, url, **kwargs): +async def request_retry_transient_errors( + session, # : Union[httpx.ClientSession, aiohttp.ClientSession] + method: str, + url, + **kwargs +) -> aiohttp.ClientResponse: return await retry_transient_errors(session.request, method, url, **kwargs) -async def request_raise_transient_errors(session, method, url, **kwargs): +async def request_raise_transient_errors( + session, # : Union[httpx.ClientSession, aiohttp.ClientSession] + method: str, + url, + **kwargs +) -> aiohttp.ClientResponse: try: return await session.request(method, url, **kwargs) except Exception as e: From ab6696092ddc445483b292119f8afd9efbcb25e3 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 11 May 2022 17:17:39 -0400 Subject: [PATCH 367/501] remove pylints --- .../hailtop/aiocloud/aiogoogle/client/storage_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py index 0415464e305..97edaccca74 100644 --- a/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py +++ b/hail/python/hailtop/aiocloud/aiogoogle/client/storage_client.py @@ -1,6 +1,6 @@ import os from typing import (Tuple, Any, Set, Optional, MutableMapping, Dict, AsyncIterator, cast, Type, - List, Awaitable, Coroutine) + List, Coroutine) from types import TracebackType from multidict import CIMultiDictProxy # pylint: disable=unused-import import sys @@ -71,7 +71,7 @@ async def write(self, b): await asyncio.wait([fut, self._request_task], return_when=asyncio.FIRST_COMPLETED) if fut.done(): return len(b) - raise ValueError(f'request task finished early') + raise ValueError('request task finished early') finally: fut.cancel() From af2a9d5131eb89b1d832cfe7cc099a81483d4c22 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Wed, 11 May 2022 17:17:57 -0400 Subject: [PATCH 368/501] more pylints --- hail/python/hailtop/utils/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 829be472d00..d42567ce895 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -762,7 +762,7 @@ def sync_retry_transient_errors(f, *args, **kwargs): async def request_retry_transient_errors( - session, # : Union[httpx.ClientSession, aiohttp.ClientSession] + session, # : Union[httpx.ClientSession, aiohttp.ClientSession] method: str, url, **kwargs @@ -771,7 +771,7 @@ async def request_retry_transient_errors( async def request_raise_transient_errors( - session, # : Union[httpx.ClientSession, aiohttp.ClientSession] + session, # : Union[httpx.ClientSession, aiohttp.ClientSession] method: str, url, **kwargs From 068f803f9fa0f9069c6cb3f9e567d8ec576e6f43 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 26 May 2022 16:48:39 +1000 Subject: [PATCH 369/501] Allow specifying the pool name explicitly --- batch/batch/front_end/front_end.py | 6 +++++- batch/batch/inst_coll_config.py | 13 ++++++++++--- hail/python/hailtop/batch/backend.py | 2 ++ hail/python/hailtop/batch/job.py | 1 + 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index f9c9e1ac3d0..000185598db 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -886,6 +886,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh worker_type = None machine_type = resources.get('machine_type') + pool_name = resources.get('pool_name') preemptible = resources.get('preemptible', BATCH_JOB_DEFAULT_PREEMPTIBLE) if machine_type and machine_type not in valid_machine_types(cloud): @@ -894,6 +895,9 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if machine_type and ('cpu' in resources or 'memory' in resources): raise web.HTTPBadRequest(reason='cannot specify cpu and memory with machine_type') + if machine_type and pool_name: + raise web.HTTPBadRequest(reason='cannot specify pool name with machine_type') + if spec['process']['type'] == 'jvm': jvm_requested_cpu = parse_cpu_in_mcpu(resources.get('cpu', BATCH_JOB_DEFAULT_CPU)) if 'cpu' in resources and jvm_requested_cpu not in (1000, 8000): @@ -961,7 +965,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh inst_coll_configs: InstanceCollectionConfigs = app['inst_coll_configs'] result, exc = inst_coll_configs.select_inst_coll( - cloud, machine_type, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes + cloud, machine_type, pool_name, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes ) if exc: diff --git a/batch/batch/inst_coll_config.py b/batch/batch/inst_coll_config.py index 53d014da949..9f43da5a972 100644 --- a/batch/batch/inst_coll_config.py +++ b/batch/batch/inst_coll_config.py @@ -273,12 +273,15 @@ async def refresh(self, db: Database): self.resource_rates = resource_rates self.product_versions.update(product_versions_data) - def select_pool_from_cost(self, cloud, cores_mcpu, memory_bytes, storage_bytes, preemptible): + def select_pool_from_cost(self, cloud, pool_name, cores_mcpu, memory_bytes, storage_bytes, preemptible): assert self.resource_rates is not None optimal_result = None optimal_cost = None for pool in self.name_pool_config.values(): + if pool_name and pool.name != pool_name: + continue + if pool.cloud != cloud or pool.preemptible != preemptible: continue @@ -304,8 +307,10 @@ def select_pool_from_cost(self, cloud, cores_mcpu, memory_bytes, storage_bytes, optimal_result = (pool.name, maybe_cores_mcpu, maybe_memory_bytes, maybe_storage_gib) return optimal_result - def select_pool_from_worker_type(self, cloud, worker_type, cores_mcpu, memory_bytes, storage_bytes, preemptible): + def select_pool_from_worker_type(self, cloud, pool_name, worker_type, cores_mcpu, memory_bytes, storage_bytes, preemptible): for pool in self.name_pool_config.values(): + if pool_name and pool.name != pool_name: + continue if pool.cloud == cloud and pool.worker_type == worker_type and pool.preemptible == preemptible: result = pool.convert_requests_to_resources(cores_mcpu, memory_bytes, storage_bytes) if result: @@ -319,11 +324,12 @@ def select_job_private(self, cloud, machine_type, storage_bytes): return self.jpim_config.convert_requests_to_resources(machine_type, storage_bytes) def select_inst_coll( - self, cloud, machine_type, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes + self, cloud, machine_type, pool_name, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes ): if worker_type is not None and machine_type is None: result = self.select_pool_from_worker_type( cloud=cloud, + pool_name=pool_name, worker_type=worker_type, cores_mcpu=req_cores_mcpu, memory_bytes=req_memory_bytes, @@ -333,6 +339,7 @@ def select_inst_coll( elif worker_type is None and machine_type is None: result = self.select_pool_from_cost( cloud=cloud, + pool_name=pool_name, cores_mcpu=req_cores_mcpu, memory_bytes=req_memory_bytes, storage_bytes=req_storage_bytes, diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 9d6c0d75de4..cf939bdbb18 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -657,6 +657,8 @@ async def compile_job(job): resources['storage'] = job._storage if job._machine_type: resources['machine_type'] = job._machine_type + if job._pool_name: + resources['pool_name'] = job._pool_name if job._preemptible is not None: resources['preemptible'] = job._preemptible diff --git a/hail/python/hailtop/batch/job.py b/hail/python/hailtop/batch/job.py index b13d960843a..8cb1f98a933 100644 --- a/hail/python/hailtop/batch/job.py +++ b/hail/python/hailtop/batch/job.py @@ -79,6 +79,7 @@ def __init__(self, self._always_run: bool = False self._preemptible: Optional[bool] = None self._machine_type: Optional[str] = None + self._pool_name: Optional[str] = None self._timeout: Optional[Union[int, float]] = None self._cloudfuse: List[Tuple[str, str, bool]] = [] self._env: Dict[str, str] = {} From d328bcfc73864cdf2152e87e6ebdfa70246679ba Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 26 May 2022 16:55:49 +1000 Subject: [PATCH 370/501] Add SQL for adding seqr-specific pools --- batch/sql/add-seqr-pools.sql | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 batch/sql/add-seqr-pools.sql diff --git a/batch/sql/add-seqr-pools.sql b/batch/sql/add-seqr-pools.sql new file mode 100644 index 00000000000..3bcaa5ed952 --- /dev/null +++ b/batch/sql/add-seqr-pools.sql @@ -0,0 +1,42 @@ + +INSERT INTO inst_colls (`name`, `is_pool`, `boot_disk_size_gb`, `max_instances`, `max_live_instances`, `cloud`) +SELECT 'seqr-standard', 1, boot_disk_size_gb, max_instances, max_live_instances, cloud +FROM inst_colls +WHERE name = 'standard'; + +INSERT INTO inst_colls (`name`, `is_pool`, `boot_disk_size_gb`, `max_instances`, `max_live_instances`, `cloud`) +SELECT 'seqr-highmem', 1, boot_disk_size_gb, max_instances, max_live_instances, cloud +FROM inst_colls +WHERE name = 'highmem'; + +INSERT INTO inst_colls (`name`, `is_pool`, `boot_disk_size_gb`, `max_instances`, `max_live_instances`, `cloud`) +SELECT 'seqr-highcpu', 1, boot_disk_size_gb, max_instances, max_live_instances, cloud +FROM inst_colls +WHERE name = 'highcpu'; + +INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data_disk`, + `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, + `preemptible`) +SELECT 'seqr-standard', worker_type, worker_cores, worker_local_ssd_data_disk, + worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + TRUE +FROM pools +WHERE name = 'standard'; + +INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data_disk`, + `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, + `preemptible`) +SELECT 'seqr-highmem', worker_type, worker_cores, worker_local_ssd_data_disk, + worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + TRUE +FROM pools +WHERE name = 'highmem'; + +INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data_disk`, + `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, + `preemptible`) +SELECT 'seqr-highcpu', worker_type, worker_cores, worker_local_ssd_data_disk, + worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + TRUE +FROM pools +WHERE name = 'highcpu'; From ecfc93e18600fd64ddca260018008f3f0732c4e1 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 26 May 2022 17:51:51 +1000 Subject: [PATCH 371/501] Allow pool name prefix --- batch/batch/front_end/front_end.py | 6 +++--- batch/batch/inst_coll_config.py | 14 +++++++------- batch/sql/add-seqr-pools.sql | 6 +++--- hail/python/hailtop/batch/backend.py | 4 ++-- hail/python/hailtop/batch/job.py | 2 +- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index 000185598db..0629ee1d505 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -886,7 +886,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh worker_type = None machine_type = resources.get('machine_type') - pool_name = resources.get('pool_name') + pool_name_prefix = resources.get('pool_name_prefix') preemptible = resources.get('preemptible', BATCH_JOB_DEFAULT_PREEMPTIBLE) if machine_type and machine_type not in valid_machine_types(cloud): @@ -895,7 +895,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh if machine_type and ('cpu' in resources or 'memory' in resources): raise web.HTTPBadRequest(reason='cannot specify cpu and memory with machine_type') - if machine_type and pool_name: + if machine_type and pool_name_prefix: raise web.HTTPBadRequest(reason='cannot specify pool name with machine_type') if spec['process']['type'] == 'jvm': @@ -965,7 +965,7 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, app: aioh inst_coll_configs: InstanceCollectionConfigs = app['inst_coll_configs'] result, exc = inst_coll_configs.select_inst_coll( - cloud, machine_type, pool_name, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes + cloud, machine_type, pool_name_prefix, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes ) if exc: diff --git a/batch/batch/inst_coll_config.py b/batch/batch/inst_coll_config.py index 9f43da5a972..74cf88f0679 100644 --- a/batch/batch/inst_coll_config.py +++ b/batch/batch/inst_coll_config.py @@ -273,13 +273,13 @@ async def refresh(self, db: Database): self.resource_rates = resource_rates self.product_versions.update(product_versions_data) - def select_pool_from_cost(self, cloud, pool_name, cores_mcpu, memory_bytes, storage_bytes, preemptible): + def select_pool_from_cost(self, cloud, pool_name_prefix, cores_mcpu, memory_bytes, storage_bytes, preemptible): assert self.resource_rates is not None optimal_result = None optimal_cost = None for pool in self.name_pool_config.values(): - if pool_name and pool.name != pool_name: + if pool_name_prefix and not pool.name.startswith(pool_name_prefix): continue if pool.cloud != cloud or pool.preemptible != preemptible: @@ -307,9 +307,9 @@ def select_pool_from_cost(self, cloud, pool_name, cores_mcpu, memory_bytes, stor optimal_result = (pool.name, maybe_cores_mcpu, maybe_memory_bytes, maybe_storage_gib) return optimal_result - def select_pool_from_worker_type(self, cloud, pool_name, worker_type, cores_mcpu, memory_bytes, storage_bytes, preemptible): + def select_pool_from_worker_type(self, cloud, pool_name_prefix, worker_type, cores_mcpu, memory_bytes, storage_bytes, preemptible): for pool in self.name_pool_config.values(): - if pool_name and pool.name != pool_name: + if pool_name_prefix and not pool.name.startswith(pool_name_prefix): continue if pool.cloud == cloud and pool.worker_type == worker_type and pool.preemptible == preemptible: result = pool.convert_requests_to_resources(cores_mcpu, memory_bytes, storage_bytes) @@ -324,12 +324,12 @@ def select_job_private(self, cloud, machine_type, storage_bytes): return self.jpim_config.convert_requests_to_resources(machine_type, storage_bytes) def select_inst_coll( - self, cloud, machine_type, pool_name, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes + self, cloud, machine_type, pool_name_prefix, preemptible, worker_type, req_cores_mcpu, req_memory_bytes, req_storage_bytes ): if worker_type is not None and machine_type is None: result = self.select_pool_from_worker_type( cloud=cloud, - pool_name=pool_name, + pool_name_prefix=pool_name_prefix, worker_type=worker_type, cores_mcpu=req_cores_mcpu, memory_bytes=req_memory_bytes, @@ -339,7 +339,7 @@ def select_inst_coll( elif worker_type is None and machine_type is None: result = self.select_pool_from_cost( cloud=cloud, - pool_name=pool_name, + pool_name_prefix=pool_name_prefix, cores_mcpu=req_cores_mcpu, memory_bytes=req_memory_bytes, storage_bytes=req_storage_bytes, diff --git a/batch/sql/add-seqr-pools.sql b/batch/sql/add-seqr-pools.sql index 3bcaa5ed952..7a4de2a0664 100644 --- a/batch/sql/add-seqr-pools.sql +++ b/batch/sql/add-seqr-pools.sql @@ -18,7 +18,7 @@ INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, `preemptible`) SELECT 'seqr-standard', worker_type, worker_cores, worker_local_ssd_data_disk, - worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + worker_external_ssd_data_disk_size_gb, FALSE, standing_worker_cores, TRUE FROM pools WHERE name = 'standard'; @@ -27,7 +27,7 @@ INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, `preemptible`) SELECT 'seqr-highmem', worker_type, worker_cores, worker_local_ssd_data_disk, - worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + worker_external_ssd_data_disk_size_gb, FALSE, standing_worker_cores, TRUE FROM pools WHERE name = 'highmem'; @@ -36,7 +36,7 @@ INSERT INTO pools (`name`, `worker_type`, `worker_cores`, `worker_local_ssd_data `worker_external_ssd_data_disk_size_gb`, `enable_standing_worker`, `standing_worker_cores`, `preemptible`) SELECT 'seqr-highcpu', worker_type, worker_cores, worker_local_ssd_data_disk, - worker_external_ssd_data_disk_size_gb, enable_standing_worker, standing_worker_cores, + worker_external_ssd_data_disk_size_gb, FALSE, standing_worker_cores, TRUE FROM pools WHERE name = 'highcpu'; diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index cf939bdbb18..05a51ecfae5 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -657,8 +657,8 @@ async def compile_job(job): resources['storage'] = job._storage if job._machine_type: resources['machine_type'] = job._machine_type - if job._pool_name: - resources['pool_name'] = job._pool_name + if job._pool_name_prefix: + resources['pool_name_prefix'] = job._pool_name_prefix if job._preemptible is not None: resources['preemptible'] = job._preemptible diff --git a/hail/python/hailtop/batch/job.py b/hail/python/hailtop/batch/job.py index 8cb1f98a933..dbf986b8623 100644 --- a/hail/python/hailtop/batch/job.py +++ b/hail/python/hailtop/batch/job.py @@ -79,7 +79,7 @@ def __init__(self, self._always_run: bool = False self._preemptible: Optional[bool] = None self._machine_type: Optional[str] = None - self._pool_name: Optional[str] = None + self._pool_name_prefix: Optional[str] = None self._timeout: Optional[Union[int, float]] = None self._cloudfuse: List[Tuple[str, str, bool]] = [] self._env: Dict[str, str] = {} From b3564a26352d729a256d314d67f457f9e0b1c190 Mon Sep 17 00:00:00 2001 From: Leonhard Gruenschloss Date: Thu, 26 May 2022 19:07:10 +1000 Subject: [PATCH 372/501] Use pool labels --- .../batch/driver/instance_collection/pool.py | 3 +++ batch/batch/driver/main.py | 1 + batch/batch/driver/templates/pool.html | 1 + batch/batch/front_end/front_end.py | 8 +++--- batch/batch/inst_coll_config.py | 26 +++++++++---------- batch/sql/add-pool-label.sql | 1 + batch/sql/add-seqr-pools.sql | 13 +++++----- build.yaml | 2 ++ hail/python/hailtop/batch/backend.py | 4 +-- hail/python/hailtop/batch/job.py | 2 +- 10 files changed, 35 insertions(+), 26 deletions(-) create mode 100644 batch/sql/add-pool-label.sql diff --git a/batch/batch/driver/instance_collection/pool.py b/batch/batch/driver/instance_collection/pool.py index 98c2549b837..8fb519881d9 100644 --- a/batch/batch/driver/instance_collection/pool.py +++ b/batch/batch/driver/instance_collection/pool.py @@ -109,6 +109,7 @@ def __init__( self.data_disk_size_gb = config.data_disk_size_gb self.data_disk_size_standing_gb = config.data_disk_size_standing_gb self.preemptible = config.preemptible + self.label = config.label @property def local_ssd_data_disk(self) -> bool: @@ -130,6 +131,7 @@ def config(self): 'max_instances': self.max_instances, 'max_live_instances': self.max_live_instances, 'preemptible': self.preemptible, + 'label': self.label, } def configure(self, pool_config: PoolConfig): @@ -148,6 +150,7 @@ def configure(self, pool_config: PoolConfig): self.max_instances = pool_config.max_instances self.max_live_instances = pool_config.max_live_instances self.preemptible = pool_config.preemptible + self.label = pool_config.label def adjust_for_remove_instance(self, instance): super().adjust_for_remove_instance(instance) diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index e80b726072e..4084aa726d0 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -537,6 +537,7 @@ async def pool_config_update(request, userdata): # pylint: disable=unused-argum max_instances, max_live_instances, pool.preemptible, + pool.label, ) await pool_config.update_database(db) pool.configure(pool_config) diff --git a/batch/batch/driver/templates/pool.html b/batch/batch/driver/templates/pool.html index 19c84302980..23a5d706362 100644 --- a/batch/batch/driver/templates/pool.html +++ b/batch/batch/driver/templates/pool.html @@ -31,6 +31,7 @@

Configuration

Standing worker cores:
Max instances:
Max live instances:
+
Label: